refactor: rename Batch to Partition in error handling and processing functions for consistency

2026-04-11 00:30:54 -05:00
parent 9eb9821daf
commit 955bc65ce9
10 changed files with 151 additions and 151 deletions
--- a/internal/app/etl/extractors/mssql.go
+++ b/internal/app/etl/extractors/mssql.go
@@ -70,20 +70,20 @@ func buildExtractQueryMssql(
 	return sbQuery.String()
 }

-func extractorErrorFromLastRowMssql(
+func errorFromLastRow(
 	lastRow models.UnknownRowValues,
 	indexPrimaryKey int,
-	batch *models.Partition,
+	partition *models.Partition,
 	previousError error,
 ) *custom_errors.ExtractorError {
 	lastIdRawValue := lastRow[indexPrimaryKey]

 	lastId, ok := convert.ToInt64(lastIdRawValue)
 	if !ok {
-		currentBatch := *batch
-		currentBatch.RetryCounter = 3
+		currentPartition := *partition
+		currentPartition.RetryCounter = 3
 		return &custom_errors.ExtractorError{
-			Batch:     currentBatch,
+			Partition: currentPartition,
 			HasLastId: true,
 			Msg:       fmt.Sprintf("Couldn't cast last id value as int: %s", previousError.Error()),
 		}
@@ -91,78 +91,78 @@ func extractorErrorFromLastRowMssql(
 	}

 	return &custom_errors.ExtractorError{
-		Batch:     *batch,
+		Partition: *partition,
 		HasLastId: true,
 		LastId:    lastId,
 		Msg:       previousError.Error(),
 	}
 }

-func (mssqlEx *MssqlExtractor) ProcessBatch(
+func (mssqlEx *MssqlExtractor) ProcessPartition(
 	ctx context.Context,
 	tableInfo config.SourceTableInfo,
 	columns []models.ColumnType,
-	chunkSize int,
-	batch models.Partition,
+	batchSize int,
+	partition models.Partition,
 	indexPrimaryKey int,
-	chChunksOut chan<- models.Batch,
+	chBatchesOut chan<- models.Batch,
 	rowsRead *int64,
 ) error {
-	query := buildExtractQueryMssql(tableInfo, columns, batch.ShouldUseRange, batch.IsLowerLimitInclusive)
+	query := buildExtractQueryMssql(tableInfo, columns, partition.ShouldUseRange, partition.IsLowerLimitInclusive)

 	var queryArgs []any
-	if batch.ShouldUseRange {
+	if partition.ShouldUseRange {
 		queryArgs = append(queryArgs,
-			sql.Named("min", batch.LowerLimit),
-			sql.Named("max", batch.UpperLimit),
+			sql.Named("min", partition.LowerLimit),
+			sql.Named("max", partition.UpperLimit),
 		)
 	}

 	rows, err := mssqlEx.db.QueryContext(ctx, query, queryArgs...)
 	if err != nil {
-		return &custom_errors.ExtractorError{Batch: batch, HasLastId: false, Msg: err.Error()}
+		return &custom_errors.ExtractorError{Partition: partition, HasLastId: false, Msg: err.Error()}
 	}
 	defer rows.Close()

-	rowsChunk := make([]models.UnknownRowValues, 0, chunkSize)
+	batchRows := make([]models.UnknownRowValues, 0, batchSize)

 	for rows.Next() {
-		values := make([]any, len(columns))
+		rowValues := make([]any, len(columns))
 		scanArgs := make([]any, len(columns))

-		for i := range values {
-			scanArgs[i] = &values[i]
+		for i := range rowValues {
+			scanArgs[i] = &rowValues[i]
 		}

 		if err := rows.Scan(scanArgs...); err != nil {
-			if len(rowsChunk) == 0 {
-				return &custom_errors.ExtractorError{Batch: batch, HasLastId: false, Msg: err.Error()}
+			if len(batchRows) == 0 {
+				return &custom_errors.ExtractorError{Partition: partition, HasLastId: false, Msg: err.Error()}
 			}

-			lastRow := rowsChunk[len(rowsChunk)-1]
+			lastRow := batchRows[len(batchRows)-1]

 			select {
-			case chChunksOut <- models.Batch{Id: uuid.New(), PartitionId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
+			case chBatchesOut <- models.Batch{Id: uuid.New(), PartitionId: partition.Id, Rows: batchRows, RetryCounter: 0}:
 			case <-ctx.Done():
 				return nil
 			}

-			atomic.AddInt64(rowsRead, int64(len(rowsChunk)))
+			atomic.AddInt64(rowsRead, int64(len(batchRows)))

-			return extractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err)
+			return errorFromLastRow(lastRow, indexPrimaryKey, &partition, err)
 		}

-		rowsChunk = append(rowsChunk, values)
+		batchRows = append(batchRows, rowValues)

-		if len(rowsChunk) >= chunkSize {
+		if len(batchRows) >= batchSize {
 			select {
-			case chChunksOut <- models.Batch{Id: uuid.New(), PartitionId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
+			case chBatchesOut <- models.Batch{Id: uuid.New(), PartitionId: partition.Id, Rows: batchRows, RetryCounter: 0}:
 			case <-ctx.Done():
 				return nil
 			}

-			atomic.AddInt64(rowsRead, int64(len(rowsChunk)))
-			rowsChunk = make([]models.UnknownRowValues, 0, chunkSize)
+			atomic.AddInt64(rowsRead, int64(len(batchRows)))
+			batchRows = make([]models.UnknownRowValues, 0, batchSize)
 		}
 	}

@@ -171,22 +171,22 @@ func (mssqlEx *MssqlExtractor) ProcessBatch(
 			return ctx.Err()
 		}

-		if len(rowsChunk) == 0 {
-			return &custom_errors.ExtractorError{Batch: batch, HasLastId: false, Msg: err.Error()}
+		if len(batchRows) == 0 {
+			return &custom_errors.ExtractorError{Partition: partition, HasLastId: false, Msg: err.Error()}
 		}

-		lastRow := rowsChunk[len(rowsChunk)-1]
-		return extractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err)
+		lastRow := batchRows[len(batchRows)-1]
+		return errorFromLastRow(lastRow, indexPrimaryKey, &partition, err)
 	}

-	if len(rowsChunk) > 0 {
+	if len(batchRows) > 0 {
 		select {
-		case chChunksOut <- models.Batch{Id: uuid.New(), PartitionId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
+		case chBatchesOut <- models.Batch{Id: uuid.New(), PartitionId: partition.Id, Rows: batchRows, RetryCounter: 0}:
 		case <-ctx.Done():
 			return nil
 		}

-		atomic.AddInt64(rowsRead, int64(len(rowsChunk)))
+		atomic.AddInt64(rowsRead, int64(len(batchRows)))
 	}

 	return nil
@@ -196,12 +196,12 @@ func (mssqlEx *MssqlExtractor) Exec(
 	ctx context.Context,
 	tableInfo config.SourceTableInfo,
 	columns []models.ColumnType,
-	chunkSize int,
-	chBatchesIn <-chan models.Partition,
-	chChunksOut chan<- models.Batch,
+	batchSize int,
+	chPartitionsIn <-chan models.Partition,
+	chBatchesOut chan<- models.Batch,
 	chErrorsOut chan<- custom_errors.ExtractorError,
 	chJobErrorsOut chan<- custom_errors.JobError,
-	wgActiveBatches *sync.WaitGroup,
+	wgActivePartitions *sync.WaitGroup,
 	rowsRead *int64,
 ) {
 	indexPrimaryKey := slices.IndexFunc(columns, func(col models.ColumnType) bool {
@@ -229,19 +229,19 @@ func (mssqlEx *MssqlExtractor) Exec(
 		select {
 		case <-ctx.Done():
 			return
-		case batch, ok := <-chBatchesIn:
+		case partition, ok := <-chPartitionsIn:
 			if !ok {
 				return
 			}

-			err := mssqlEx.ProcessBatch(
+			err := mssqlEx.ProcessPartition(
 				ctx,
 				tableInfo,
 				columns,
-				chunkSize,
-				batch,
+				batchSize,
+				partition,
 				indexPrimaryKey,
-				chChunksOut,
+				chBatchesOut,
 				rowsRead,
 			)

@@ -267,7 +267,7 @@ func (mssqlEx *MssqlExtractor) Exec(
 				return
 			}

-			wgActiveBatches.Done()
+			wgActivePartitions.Done()
 		}
 	}
 }
--- a/internal/app/etl/extractors/postgres.go
+++ b/internal/app/etl/extractors/postgres.go
@@ -52,29 +52,29 @@ func buildExtractQueryPostgres(sourceDbInfo config.SourceTableInfo, columns []mo
 	return fmt.Sprintf(`SELECT %s FROM "%s"."%s"  ORDER BY "%s" ASC`, sbColumns.String(), sourceDbInfo.Schema, sourceDbInfo.Table, sourceDbInfo.PrimaryKey)
 }

-func (postgresEx *PostgresExtractor) ProcessBatch(
+func (postgresEx *PostgresExtractor) ProcessPartition(
 	ctx context.Context,
 	tableInfo config.SourceTableInfo,
 	columns []models.ColumnType,
-	chunkSize int,
-	batch models.Partition,
+	batchSize int,
+	partition models.Partition,
 	indexPrimaryKey int,
-	chChunksOut chan<- models.Batch,
+	chBatchesOut chan<- models.Batch,
 	rowsRead *int64,
 ) error {
 	query := buildExtractQueryPostgres(tableInfo, columns)

-	if batch.ShouldUseRange {
+	if partition.ShouldUseRange {
 		return errors.New("Batch config not yet supported")
 	}

 	rows, err := postgresEx.db.Query(ctx, query)
 	if err != nil {
-		return &custom_errors.ExtractorError{Batch: batch, HasLastId: false, Msg: err.Error()}
+		return &custom_errors.ExtractorError{Partition: partition, HasLastId: false, Msg: err.Error()}
 	}
 	defer rows.Close()

-	rowsChunk := make([]models.UnknownRowValues, 0, chunkSize)
+	batchRows := make([]models.UnknownRowValues, 0, batchSize)

 	for rows.Next() {
 		values, err := rows.Values()
@@ -82,17 +82,17 @@ func (postgresEx *PostgresExtractor) ProcessBatch(
 			return errors.New("Unexpected error reading rows from source")
 		}

-		rowsChunk = append(rowsChunk, values)
+		batchRows = append(batchRows, values)

-		if len(rowsChunk) >= chunkSize {
+		if len(batchRows) >= batchSize {
 			select {
-			case chChunksOut <- models.Batch{Id: uuid.New(), PartitionId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
+			case chBatchesOut <- models.Batch{Id: uuid.New(), PartitionId: partition.Id, Rows: batchRows, RetryCounter: 0}:
 			case <-ctx.Done():
 				return nil
 			}

-			atomic.AddInt64(rowsRead, int64(len(rowsChunk)))
-			rowsChunk = make([]models.UnknownRowValues, 0, chunkSize)
+			atomic.AddInt64(rowsRead, int64(len(batchRows)))
+			batchRows = make([]models.UnknownRowValues, 0, batchSize)
 		}
 	}

@@ -100,14 +100,14 @@ func (postgresEx *PostgresExtractor) ProcessBatch(
 		return errors.New("Unexpected error reading rows from source")
 	}

-	if len(rowsChunk) > 0 {
+	if len(batchRows) > 0 {
 		select {
-		case chChunksOut <- models.Batch{Id: uuid.New(), PartitionId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
+		case chBatchesOut <- models.Batch{Id: uuid.New(), PartitionId: partition.Id, Rows: batchRows, RetryCounter: 0}:
 		case <-ctx.Done():
 			return nil
 		}

-		atomic.AddInt64(rowsRead, int64(len(rowsChunk)))
+		atomic.AddInt64(rowsRead, int64(len(batchRows)))
 	}

 	return nil
@@ -117,12 +117,12 @@ func (postgresEx *PostgresExtractor) Exec(
 	ctx context.Context,
 	tableInfo config.SourceTableInfo,
 	columns []models.ColumnType,
-	chunkSize int,
-	chBatchesIn <-chan models.Partition,
-	chChunksOut chan<- models.Batch,
+	batchSize int,
+	chPartitionsIn <-chan models.Partition,
+	chBatchesOut chan<- models.Batch,
 	chErrorsOut chan<- custom_errors.ExtractorError,
 	chJobErrorsOut chan<- custom_errors.JobError,
-	wgActiveBatches *sync.WaitGroup,
+	wgActivePartitions *sync.WaitGroup,
 	rowsRead *int64,
 ) {
 }