refactor: simplify batch processing by removing partition dependency and introducing batch accumulator

2026-05-11 00:38:42 -05:00
parent 16217f6ee2
commit ab9a2d8694
5 changed files with 81 additions and 76 deletions
--- a/internal/app/etl/extractors/main.go
+++ b/internal/app/etl/extractors/main.go
@@ -27,7 +27,6 @@ func sendBatch(ctx context.Context, chBatchesOut chan<- models.Batch, batch mode

 func flush(
 	ctx context.Context,
-	partition *models.Partition,
 	batchSize int,
 	batchRows []models.UnknownRowValues,
 	chBatchesOut chan<- models.Batch,
@@ -36,7 +35,7 @@ func flush(
 		return nil
 	}

-	batch := models.Batch{Id: uuid.New(), PartitionId: partition.Id, Rows: batchRows}
+	batch := models.Batch{Id: uuid.New(), Rows: batchRows}
 	batchRows = make([]models.UnknownRowValues, 0, batchSize)
 	return sendBatch(ctx, chBatchesOut, batch)
 }
--- a/internal/app/etl/extractors/process.go
+++ b/internal/app/etl/extractors/process.go
@@ -90,7 +90,7 @@ func (ex *GenericExtractor) ProcessPartition(
 				return rowsRead, err
 			}

-			if err := flush(ctx, &partition, batchSize, batchRows, chBatchesOut); err != nil {
+			if err := flush(ctx, batchSize, batchRows, chBatchesOut); err != nil {
 				return rowsRead, err
 			}

@@ -102,7 +102,7 @@ func (ex *GenericExtractor) ProcessPartition(
 		batchRows = append(batchRows, rowValues)
 		if len(batchRows) >= batchSize {
 			// logrus.Debugf("Batch size reached, flushing batch with %v rows (rowsRead=%v)", len(batchRows), rowsRead)
-			if err := flush(ctx, &partition, batchSize, batchRows, chBatchesOut); err != nil {
+			if err := flush(ctx, batchSize, batchRows, chBatchesOut); err != nil {
 				// logrus.Warnf("Error flushing rows: %v", err)
 				return rowsRead, err
 			}
@@ -110,7 +110,7 @@ func (ex *GenericExtractor) ProcessPartition(
 		}
 	}

-	if err := flush(ctx, &partition, batchSize, batchRows, chBatchesOut); err != nil {
+	if err := flush(ctx, batchSize, batchRows, chBatchesOut); err != nil {
 		return rowsRead, err
 	}