refactor: simplify batch processing by removing partition dependency and introducing batch accumulator

This commit is contained in:
2026-05-11 00:38:42 -05:00
parent 16217f6ee2
commit ab9a2d8694
5 changed files with 81 additions and 76 deletions

View File

@@ -30,7 +30,7 @@ func (gl *GenericLoader) Consume(
})
var accRows []models.UnknownRowValues
var parentBatchesId []uuid.UUID
var parentBatches []models.BatchRef
pendingDone := 0
defer func() {
@@ -43,11 +43,11 @@ func (gl *GenericLoader) Consume(
if len(accRows) == 0 {
return true
}
count := len(parentBatchesId)
count := len(parentBatches)
superBatch := models.Batch{
Id: uuid.New(),
ParentBatchesId: parentBatchesId,
Rows: accRows,
Id: uuid.New(),
ParentBatches: parentBatches,
Rows: accRows,
}
processedRows, err := gl.ProcessBatchWithRetries(ctx, tableInfo, colNames, retryConfig, superBatch)
for range count {
@@ -55,7 +55,7 @@ func (gl *GenericLoader) Consume(
}
pendingDone -= count
accRows = nil
parentBatchesId = nil
parentBatches = nil
if err != nil {
atomic.AddInt32(failedBatchesCount, 1)
@@ -142,7 +142,7 @@ func (gl *GenericLoader) Consume(
pendingDone++
accRows = append(accRows, batch.Rows...)
parentBatchesId = append(parentBatchesId, batch.Id)
parentBatches = append(parentBatches, models.BatchRef{Id: batch.Id})
if len(accRows) >= batchSize {
if !flush() {