feat: enhance error handling and context management in MSSQL extraction process

This commit is contained in:
2026-04-08 22:39:07 -05:00
parent 554618daad
commit d3a3b26bb3

View File

@@ -3,6 +3,7 @@ package main
import ( import (
"context" "context"
"database/sql" "database/sql"
"errors"
"slices" "slices"
"strings" "strings"
"time" "time"
@@ -42,16 +43,47 @@ func extractFromMssql(
ShouldCancelJob: true, ShouldCancelJob: true,
Msg: "Primary key not found in provided columns", Msg: "Primary key not found in provided columns",
} }
select { select {
case chJobErrorsOut <- jobError:
case <-ctx.Done(): case <-ctx.Done():
return return
case chJobErrorsOut <- jobError:
} }
return return
} }
for batch := range chBatchesIn { for {
func() { if ctx.Err() != nil {
return
}
select {
case <-ctx.Done():
return
case batch, ok := <-chBatchesIn:
if !ok {
return
}
if abort := processBatch(ctx, db, job, columns, chunkSize, batch, indexPrimaryKey, chChunksOut, chErrorsOut); abort {
return
}
}
}
}
func processBatch(
ctx context.Context,
db *sql.DB,
job MigrationJob,
columns []ColumnType,
chunkSize int,
batch Batch,
indexPrimaryKey int,
chChunksOut chan<- Chunk,
chErrorsOut chan<- ExtractorError,
) (abort bool) {
query := buildExtractQueryMssql(job, columns, batch.ShouldUseRange, batch.IsLowerLimitInclusive) query := buildExtractQueryMssql(job, columns, batch.ShouldUseRange, batch.IsLowerLimitInclusive)
log.Debug("Query used to extract data from mssql: ", query) log.Debug("Query used to extract data from mssql: ", query)
@@ -66,13 +98,12 @@ func extractFromMssql(
queryStartTime := time.Now() queryStartTime := time.Now()
rows, err := db.QueryContext(ctx, query, queryArgs...) rows, err := db.QueryContext(ctx, query, queryArgs...)
if err != nil { if err != nil {
exError := ExtractorError{ select {
Batch: batch, case chErrorsOut <- ExtractorError{Batch: batch, HasLastId: false, Msg: err.Error()}:
HasLastId: false, case <-ctx.Done():
Msg: err.Error(), return true
} }
chErrorsOut <- exError return false
return
} }
defer rows.Close() defer rows.Close()
log.Debugf("Query executed in %v", time.Since(queryStartTime)) log.Debugf("Query executed in %v", time.Since(queryStartTime))
@@ -91,24 +122,28 @@ func extractFromMssql(
if err := rows.Scan(scanArgs...); err != nil { if err := rows.Scan(scanArgs...); err != nil {
if len(rowsChunk) == 0 { if len(rowsChunk) == 0 {
exError := ExtractorError{ select {
Batch: batch, case chErrorsOut <- ExtractorError{Batch: batch, HasLastId: false, Msg: err.Error()}:
HasLastId: false, case <-ctx.Done():
Msg: err.Error(), return true
} }
chErrorsOut <- exError return false
return
} }
lastRow := rowsChunk[len(rowsChunk)-1] lastRow := rowsChunk[len(rowsChunk)-1]
chErrorsOut <- ExtractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err) select {
chChunksOut <- Chunk{ case chErrorsOut <- ExtractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err):
Id: uuid.New(), case <-ctx.Done():
BatchId: batch.Id, return true
Data: rowsChunk,
RetryCounter: 0,
} }
return
select {
case chChunksOut <- Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
case <-ctx.Done():
return true
}
return false
} }
rowsChunk = append(rowsChunk, values) rowsChunk = append(rowsChunk, values)
@@ -117,49 +152,54 @@ func extractFromMssql(
if len(rowsChunk) >= chunkSize { if len(rowsChunk) >= chunkSize {
chunkDuration := time.Since(chunkStartTime) chunkDuration := time.Since(chunkStartTime)
rowsPerSec := float64(chunkSize) / chunkDuration.Seconds() rowsPerSec := float64(chunkSize) / chunkDuration.Seconds()
log.Infof("Extracted chunk: %d rows in %v (%.0f rows/sec) - Total: %d rows", log.Infof("Extracted chunk: %d rows in %v (%.0f rows/sec) - Total: %d rows", len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
chChunksOut <- Chunk{ select {
Id: uuid.New(), case chChunksOut <- Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
BatchId: batch.Id, case <-ctx.Done():
Data: rowsChunk, return true
RetryCounter: 0,
} }
rowsChunk = make([]UnknownRowValues, 0, chunkSize) rowsChunk = make([]UnknownRowValues, 0, chunkSize)
chunkStartTime = time.Now() chunkStartTime = time.Now()
} }
} }
if len(rowsChunk) > 0 { if err := rows.Err(); err != nil {
chunkDuration := time.Since(chunkStartTime) if errors.Is(err, ctx.Err()) {
rowsPerSec := float64(len(rowsChunk)) / chunkDuration.Seconds() return true
log.Infof("Extracted final chunk: %d rows in %v (%.0f rows/sec) - Total: %d rows",
len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
chChunksOut <- Chunk{
Id: uuid.New(),
BatchId: batch.Id,
Data: rowsChunk,
RetryCounter: 0,
}
} }
if err := rows.Err(); err != nil {
if len(rowsChunk) == 0 { if len(rowsChunk) == 0 {
exError := ExtractorError{ select {
Batch: batch, case chErrorsOut <- ExtractorError{Batch: batch, HasLastId: false, Msg: err.Error()}:
HasLastId: false, case <-ctx.Done():
Msg: err.Error(), return true
} }
chErrorsOut <- exError return false
return
} }
lastRow := rowsChunk[len(rowsChunk)-1] lastRow := rowsChunk[len(rowsChunk)-1]
chErrorsOut <- ExtractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err) select {
return case chErrorsOut <- ExtractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err):
case <-ctx.Done():
return true
} }
}() return false
} }
if len(rowsChunk) > 0 {
chunkDuration := time.Since(chunkStartTime)
rowsPerSec := float64(len(rowsChunk)) / chunkDuration.Seconds()
log.Infof("Extracted final chunk: %d rows in %v (%.0f rows/sec) - Total: %d rows", len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
select {
case chChunksOut <- Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
case <-ctx.Done():
return true
}
}
return false
} }
func extractFromPostgres(ctx context.Context, job MigrationJob, columns []ColumnType, chunkSize int, db *pgxpool.Pool, out chan<- []UnknownRowValues) error { func extractFromPostgres(ctx context.Context, job MigrationJob, columns []ColumnType, chunkSize int, db *pgxpool.Pool, out chan<- []UnknownRowValues) error {