refactor: rename Batch to Partition and update related types and channels for consistency

This commit is contained in:
2026-04-11 00:09:28 -05:00
parent cd0e53b1d2
commit 9eb9821daf
11 changed files with 69 additions and 48 deletions

View File

@@ -23,14 +23,17 @@ type JobConfig struct {
Retry RetryConfig `yaml:"retry"`
}
type TargetTableInfo struct {
type TableInfo struct {
Schema string `yaml:"schema"`
Table string `yaml:"table"`
}
type TargetTableInfo struct {
TableInfo `yaml:",inline"`
}
type SourceTableInfo struct {
Schema string `yaml:"schema"`
Table string `yaml:"table"`
TableInfo `yaml:",inline"`
PrimaryKey string `yaml:"primary_key"`
}

View File

@@ -10,7 +10,7 @@ import (
)
type ExtractorError struct {
Batch models.Batch
Batch models.Partition
LastId int64
HasLastId bool
Msg string
@@ -24,7 +24,7 @@ func ExtractorErrorHandler(
ctx context.Context,
maxRetryAttempts int,
chErrorsIn <-chan ExtractorError,
chBatchesOut chan<- models.Batch,
chBatchesOut chan<- models.Partition,
chJobErrorsOut chan<- JobError,
wgActiveBatches *sync.WaitGroup,
) {

View File

@@ -9,7 +9,7 @@ import (
)
type LoaderError struct {
models.Chunk
models.Batch
Msg string
}
@@ -21,7 +21,7 @@ func LoaderErrorHandler(
ctx context.Context,
maxRetryAttempts int,
chErrorsIn <-chan LoaderError,
chChunksOut chan<- models.Chunk,
chChunksOut chan<- models.Batch,
chJobErrorsOut chan<- JobError,
wgActiveChunks *sync.WaitGroup,
) {
@@ -59,7 +59,7 @@ func LoaderErrorHandler(
err.RetryCounter++
select {
case chChunksOut <- err.Chunk:
case chChunksOut <- err.Batch:
case <-ctx.Done():
return
}

View File

@@ -73,7 +73,7 @@ func buildExtractQueryMssql(
func extractorErrorFromLastRowMssql(
lastRow models.UnknownRowValues,
indexPrimaryKey int,
batch *models.Batch,
batch *models.Partition,
previousError error,
) *custom_errors.ExtractorError {
lastIdRawValue := lastRow[indexPrimaryKey]
@@ -103,9 +103,9 @@ func (mssqlEx *MssqlExtractor) ProcessBatch(
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
chunkSize int,
batch models.Batch,
batch models.Partition,
indexPrimaryKey int,
chChunksOut chan<- models.Chunk,
chChunksOut chan<- models.Batch,
rowsRead *int64,
) error {
query := buildExtractQueryMssql(tableInfo, columns, batch.ShouldUseRange, batch.IsLowerLimitInclusive)
@@ -142,7 +142,7 @@ func (mssqlEx *MssqlExtractor) ProcessBatch(
lastRow := rowsChunk[len(rowsChunk)-1]
select {
case chChunksOut <- models.Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
case chChunksOut <- models.Batch{Id: uuid.New(), PartitionId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
case <-ctx.Done():
return nil
}
@@ -156,7 +156,7 @@ func (mssqlEx *MssqlExtractor) ProcessBatch(
if len(rowsChunk) >= chunkSize {
select {
case chChunksOut <- models.Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
case chChunksOut <- models.Batch{Id: uuid.New(), PartitionId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
case <-ctx.Done():
return nil
}
@@ -181,7 +181,7 @@ func (mssqlEx *MssqlExtractor) ProcessBatch(
if len(rowsChunk) > 0 {
select {
case chChunksOut <- models.Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
case chChunksOut <- models.Batch{Id: uuid.New(), PartitionId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
case <-ctx.Done():
return nil
}
@@ -197,8 +197,8 @@ func (mssqlEx *MssqlExtractor) Exec(
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
chunkSize int,
chBatchesIn <-chan models.Batch,
chChunksOut chan<- models.Chunk,
chBatchesIn <-chan models.Partition,
chChunksOut chan<- models.Batch,
chErrorsOut chan<- custom_errors.ExtractorError,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveBatches *sync.WaitGroup,

View File

@@ -57,9 +57,9 @@ func (postgresEx *PostgresExtractor) ProcessBatch(
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
chunkSize int,
batch models.Batch,
batch models.Partition,
indexPrimaryKey int,
chChunksOut chan<- models.Chunk,
chChunksOut chan<- models.Batch,
rowsRead *int64,
) error {
query := buildExtractQueryPostgres(tableInfo, columns)
@@ -86,7 +86,7 @@ func (postgresEx *PostgresExtractor) ProcessBatch(
if len(rowsChunk) >= chunkSize {
select {
case chChunksOut <- models.Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
case chChunksOut <- models.Batch{Id: uuid.New(), PartitionId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
case <-ctx.Done():
return nil
}
@@ -102,7 +102,7 @@ func (postgresEx *PostgresExtractor) ProcessBatch(
if len(rowsChunk) > 0 {
select {
case chChunksOut <- models.Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
case chChunksOut <- models.Batch{Id: uuid.New(), PartitionId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
case <-ctx.Done():
return nil
}
@@ -118,8 +118,8 @@ func (postgresEx *PostgresExtractor) Exec(
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
chunkSize int,
chBatchesIn <-chan models.Batch,
chChunksOut chan<- models.Chunk,
chBatchesIn <-chan models.Partition,
chChunksOut chan<- models.Batch,
chErrorsOut chan<- custom_errors.ExtractorError,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveBatches *sync.WaitGroup,

View File

@@ -38,7 +38,7 @@ func (postgresLd *PostgresLoader) ProcessChunk(
ctx context.Context,
tableInfo config.TargetTableInfo,
colNames []string,
chunk models.Chunk,
chunk models.Batch,
) (int, error) {
tableId := pgx.Identifier{tableInfo.Schema, tableInfo.Table}
_, err := postgresLd.db.CopyFrom(
@@ -60,7 +60,7 @@ func (postgresLd *PostgresLoader) ProcessChunk(
}
}
return 0, &custom_errors.LoaderError{Chunk: chunk, Msg: err.Error()}
return 0, &custom_errors.LoaderError{Batch: chunk, Msg: err.Error()}
}
return len(chunk.Data), nil
@@ -70,7 +70,7 @@ func (postgresLd *PostgresLoader) Exec(
ctx context.Context,
tableInfo config.TargetTableInfo,
columns []models.ColumnType,
chChunksIn <-chan models.Chunk,
chChunksIn <-chan models.Batch,
chErrorsOut chan<- custom_errors.LoaderError,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveChunks *sync.WaitGroup,

View File

@@ -64,7 +64,7 @@ const processChunkCtxCheck = 4096
func (mssqlTr *MssqlTransformer) ProcessChunk(
ctx context.Context,
chunk *models.Chunk,
chunk *models.Batch,
transformationPlan []etl.ColumnTransformPlan,
) error {
for i, rowValues := range chunk.Data {
@@ -94,8 +94,8 @@ func (mssqlTr *MssqlTransformer) ProcessChunk(
func (mssqlTr *MssqlTransformer) Exec(
ctx context.Context,
columns []models.ColumnType,
chChunksIn <-chan models.Chunk,
chChunksOut chan<- models.Chunk,
chChunksIn <-chan models.Batch,
chChunksOut chan<- models.Batch,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveChunks *sync.WaitGroup,
) {

View File

@@ -15,9 +15,9 @@ type Extractor interface {
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
chunkSize int,
batch models.Batch,
batch models.Partition,
indexPrimaryKey int,
chChunksOut chan<- models.Chunk,
chChunksOut chan<- models.Batch,
rowsRead *int64,
) error
@@ -26,8 +26,8 @@ type Extractor interface {
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
chunkSize int,
chBatchesIn <-chan models.Batch,
chChunksOut chan<- models.Chunk,
chBatchesIn <-chan models.Partition,
chChunksOut chan<- models.Batch,
chErrorsOut chan<- custom_errors.ExtractorError,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveBatches *sync.WaitGroup,
@@ -45,15 +45,15 @@ type ColumnTransformPlan struct {
type Transformer interface {
ProcessChunk(
ctx context.Context,
chunk *models.Chunk,
chunk *models.Batch,
transformationPlan []ColumnTransformPlan,
) error
Exec(
ctx context.Context,
columns []models.ColumnType,
chChunksIn <-chan models.Chunk,
chChunksOut chan<- models.Chunk,
chChunksIn <-chan models.Batch,
chChunksOut chan<- models.Batch,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveChunks *sync.WaitGroup,
)
@@ -64,17 +64,35 @@ type Loader interface {
ctx context.Context,
tableInfo config.TargetTableInfo,
colNames []string,
chunk models.Chunk,
chunk models.Batch,
) (int, error)
Exec(
ctx context.Context,
tableInfo config.TargetTableInfo,
columns []models.ColumnType,
chChunksIn <-chan models.Chunk,
chChunksIn <-chan models.Batch,
chErrorsOut chan<- custom_errors.LoaderError,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveChunks *sync.WaitGroup,
rowsLoaded *int64,
)
}
type TableAnalizer interface {
QueryColumnTypes(
ctx context.Context,
tableInfo config.TableInfo,
) ([]models.ColumnType, error)
EstimateTotalRows(
ctx context.Context,
tableInfo config.TableInfo,
) (int64, error)
CalculatePartitionRanges(
ctx context.Context,
tableInfo config.TableInfo,
totalPartitions int,
) (models.Partition, error)
}

View File

@@ -4,14 +4,14 @@ import "github.com/google/uuid"
type UnknownRowValues = []any
type Chunk struct {
type Batch struct {
Id uuid.UUID
BatchId uuid.UUID
PartitionId uuid.UUID
Data []UnknownRowValues
RetryCounter int
}
type Batch struct {
type Partition struct {
Id uuid.UUID
ParentId uuid.UUID
LowerLimit int64