feat: enhance error handling and context management in MSSQL extraction process

2026-04-08 22:39:07 -05:00
parent 554618daad
commit d3a3b26bb3
1 changed files with 144 additions and 104 deletions
--- a/cmd/go_migrate/extractor.go
+++ b/cmd/go_migrate/extractor.go
@@ -3,6 +3,7 @@ package main
 import (
 	"context"
 	"database/sql"
 	"errors"
 	"slices"
 	"strings"
 	"time"
@@ -42,16 +43,47 @@ func extractFromMssql(
 			ShouldCancelJob: true,
 			Msg:             "Primary key not found in provided columns",
 		}
 		select {
 		case chJobErrorsOut <- jobError:
 		case <-ctx.Done():
 			return
 		case chJobErrorsOut <- jobError:
 		}
 		return
 	}
-	for batch := range chBatchesIn {
+	for {
-		func() {
+		if ctx.Err() != nil {
 			return
 		}
 		select {
 		case <-ctx.Done():
 			return
 		case batch, ok := <-chBatchesIn:
 			if !ok {
 				return
 			}
 			if abort := processBatch(ctx, db, job, columns, chunkSize, batch, indexPrimaryKey, chChunksOut, chErrorsOut); abort {
 				return
 			}
 		}
 	}
 }
 func processBatch(
 	ctx context.Context,
 	db *sql.DB,
 	job MigrationJob,
 	columns []ColumnType,
 	chunkSize int,
 	batch Batch,
 	indexPrimaryKey int,
 	chChunksOut chan<- Chunk,
 	chErrorsOut chan<- ExtractorError,
 ) (abort bool) {
 	query := buildExtractQueryMssql(job, columns, batch.ShouldUseRange, batch.IsLowerLimitInclusive)
 	log.Debug("Query used to extract data from mssql: ", query)
@@ -66,13 +98,12 @@ func extractFromMssql(
 	queryStartTime := time.Now()
 	rows, err := db.QueryContext(ctx, query, queryArgs...)
 	if err != nil {
-				exError := ExtractorError{
+		select {
-					Batch:     batch,
+		case chErrorsOut <- ExtractorError{Batch: batch, HasLastId: false, Msg: err.Error()}:
-					HasLastId: false,
+		case <-ctx.Done():
-					Msg:       err.Error(),
+			return true
 		}
-				chErrorsOut <- exError
+		return false
 				return
 	}
 	defer rows.Close()
 	log.Debugf("Query executed in %v", time.Since(queryStartTime))
@@ -91,24 +122,28 @@ func extractFromMssql(
 		if err := rows.Scan(scanArgs...); err != nil {
 			if len(rowsChunk) == 0 {
-						exError := ExtractorError{
+				select {
-							Batch:     batch,
+				case chErrorsOut <- ExtractorError{Batch: batch, HasLastId: false, Msg: err.Error()}:
-							HasLastId: false,
+				case <-ctx.Done():
-							Msg:       err.Error(),
+					return true
 				}
-						chErrorsOut <- exError
+				return false
 						return
 			}
 			lastRow := rowsChunk[len(rowsChunk)-1]
-					chErrorsOut <- ExtractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err)
+			select {
-					chChunksOut <- Chunk{
+			case chErrorsOut <- ExtractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err):
-						Id:           uuid.New(),
+			case <-ctx.Done():
-						BatchId:      batch.Id,
+				return true
 						Data:         rowsChunk,
 						RetryCounter: 0,
 			}
-					return
+
 			select {
 			case chChunksOut <- Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
 			case <-ctx.Done():
 				return true
 			}
 			return false
 		}
 		rowsChunk = append(rowsChunk, values)
@@ -117,49 +152,54 @@ func extractFromMssql(
 		if len(rowsChunk) >= chunkSize {
 			chunkDuration := time.Since(chunkStartTime)
 			rowsPerSec := float64(chunkSize) / chunkDuration.Seconds()
-					log.Infof("Extracted chunk: %d rows in %v (%.0f rows/sec) - Total: %d rows",
+			log.Infof("Extracted chunk: %d rows in %v (%.0f rows/sec) - Total: %d rows", len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
-						len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
+
-					chChunksOut <- Chunk{
+			select {
-						Id:           uuid.New(),
+			case chChunksOut <- Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
-						BatchId:      batch.Id,
+			case <-ctx.Done():
-						Data:         rowsChunk,
+				return true
 						RetryCounter: 0,
 			}
 			rowsChunk = make([]UnknownRowValues, 0, chunkSize)
 			chunkStartTime = time.Now()
 		}
 	}
-			if len(rowsChunk) > 0 {
+	if err := rows.Err(); err != nil {
-				chunkDuration := time.Since(chunkStartTime)
+		if errors.Is(err, ctx.Err()) {
-				rowsPerSec := float64(len(rowsChunk)) / chunkDuration.Seconds()
+			return true
 				log.Infof("Extracted final chunk: %d rows in %v (%.0f rows/sec) - Total: %d rows",
 					len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
 				chChunksOut <- Chunk{
 					Id:           uuid.New(),
 					BatchId:      batch.Id,
 					Data:         rowsChunk,
 					RetryCounter: 0,
 				}
 		}
 			if err := rows.Err(); err != nil {
 		if len(rowsChunk) == 0 {
-					exError := ExtractorError{
+			select {
-						Batch:     batch,
+			case chErrorsOut <- ExtractorError{Batch: batch, HasLastId: false, Msg: err.Error()}:
-						HasLastId: false,
+			case <-ctx.Done():
-						Msg:       err.Error(),
+				return true
 			}
-					chErrorsOut <- exError
+			return false
 					return
 		}
 		lastRow := rowsChunk[len(rowsChunk)-1]
-				chErrorsOut <- ExtractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err)
+		select {
-				return
+		case chErrorsOut <- ExtractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err):
 		case <-ctx.Done():
 			return true
 		}
-		}()
+		return false
 	}
 	if len(rowsChunk) > 0 {
 		chunkDuration := time.Since(chunkStartTime)
 		rowsPerSec := float64(len(rowsChunk)) / chunkDuration.Seconds()
 		log.Infof("Extracted final chunk: %d rows in %v (%.0f rows/sec) - Total: %d rows", len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
 		select {
 		case chChunksOut <- Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
 		case <-ctx.Done():
 			return true
 		}
 	}
 	return false
 }
 func extractFromPostgres(ctx context.Context, job MigrationJob, columns []ColumnType, chunkSize int, db *pgxpool.Pool, out chan<- []UnknownRowValues) error {