feat: refactor chunk handling in extractor and transformer for improved data processing

This commit is contained in:
2026-04-08 21:09:26 -05:00
parent 853be4a5a6
commit f6dfcd390f
3 changed files with 41 additions and 14 deletions

View File

@@ -8,18 +8,18 @@ import (
func transformRowsMssql(
columns []ColumnType,
chChunksIn <-chan []UnknownRowValues,
chChunksIn <-chan Chunk,
chChunksOut chan<- []UnknownRowValues,
chJobErrorsOut chan<- JobError,
) {
chunkCount := 0
totalRowsTransformed := 0
for rows := range chChunksIn {
for chunk := range chChunksIn {
chunkStartTime := time.Now()
log.Debugf("Chunk received, transforming %d rows...", len(rows))
log.Debugf("Chunk received, transforming %d rows...", len(chunk.Data))
for _, rowValues := range rows {
for _, rowValues := range chunk.Data {
for i, col := range columns {
value := rowValues[i]
@@ -61,12 +61,12 @@ func transformRowsMssql(
}
chunkCount++
totalRowsTransformed += len(rows)
totalRowsTransformed += len(chunk.Data)
chunkDuration := time.Since(chunkStartTime)
rowsPerSec := float64(len(rows)) / chunkDuration.Seconds()
rowsPerSec := float64(len(chunk.Data)) / chunkDuration.Seconds()
log.Infof("Transformed chunk: %d rows in %v (%.0f rows/sec) - Total: %d rows",
len(rows), chunkDuration, rowsPerSec, totalRowsTransformed)
len(chunk.Data), chunkDuration, rowsPerSec, totalRowsTransformed)
chChunksOut <- rows
chChunksOut <- chunk.Data
}
}