feat: implement batch processing for MSSQL with improved structure and logging

2026-04-08 19:18:53 -05:00
4 changed files with 22 additions and 66 deletions
--- a/cmd/go_migrate/extractor-error-handler.go
+++ b/cmd/go_migrate/extractor-error-handler.go
@@ -19,15 +19,10 @@ func (e *ExtractorError) Error() string {
 const maxRetryAttempts = 3
-func extractorErrorHandler(chErrorsIn <-chan ExtractorError, chBatchesOut chan<- Batch, chJobErrorsOut chan<- JobError) {
+func extractorErrorHandler(chErrorsIn <-chan ExtractorError, chBatchesOut chan<- Batch, chGlobalErrorsOut chan<- error) {
 	for err := range chErrorsIn {
 		if err.RetryCounter >= maxRetryAttempts {
-			jobError := JobError{
+			chGlobalErrorsOut <- fmt.Errorf("batch %v reached max retries (%d): %s", err.Id, maxRetryAttempts, err.Msg)
 				ShouldCancelJob: false,
 				Msg:             fmt.Sprintf("batch %v reached max retries (%d)", err.Id, maxRetryAttempts),
 				Prev:            &err,
 			}
 			chJobErrorsOut <- jobError
 			continue
 		}
--- a/cmd/go_migrate/extractor.go
+++ b/cmd/go_migrate/extractor.go
@@ -23,18 +23,20 @@ func extractFromMssql(
 	chBatchesIn <-chan Batch,
 	chChunksOut chan<- []UnknownRowValues,
 	chErrorsOut chan<- ExtractorError,
 	chJobErrorsOut chan<- JobError,
 ) {
 	indexPrimaryKey := slices.IndexFunc(columns, func(col ColumnType) bool {
 		return strings.EqualFold(col.name, job.PrimaryKey)
 	})
 	if indexPrimaryKey == -1 {
-		exError := JobError{
+		exError := ExtractorError{
-			ShouldCancelJob: true,
+			Batch: Batch{
-			Msg:             "Primary key not found in provided columns",
+				RetryCounter: maxRetryAttempts,
 			},
 			HasLastId: false,
 			Msg:       "Primary key not found in columns provided",
 		}
-		chJobErrorsOut <- exError
+		chErrorsOut <- exError
 		return
 	}
@@ -89,7 +91,6 @@ func extractFromMssql(
 					}
 					lastRow := rowsChunk[len(rowsChunk)-1]
 					chChunksOut <- rowsChunk
 					chErrorsOut <- ExtractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err)
 					return
 				}
--- a/cmd/go_migrate/job-error-handler.go
+++ b/cmd/go_migrate/job-error-handler.go
@@ -1,33 +0,0 @@
 package main
 import (
 	"fmt"
 	log "github.com/sirupsen/logrus"
 )
 type JobError struct {
 	ShouldCancelJob bool
 	Msg             string
 	Prev            error
 }
 func (e *JobError) Error() string {
 	if e.Prev != nil {
 		return fmt.Sprintf("%s: %v", e.Msg, e.Prev)
 	}
 	return e.Msg
 }
 func jobErrorHandler(chErrorsIn <-chan JobError) error {
 	for err := range chErrorsIn {
 		if err.ShouldCancelJob {
 			return &err
 		}
 		log.Error(err)
 	}
 	return nil
 }
--- a/cmd/go_migrate/process.go
+++ b/cmd/go_migrate/process.go
@@ -24,31 +24,18 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
 	logColumnTypes(sourceColTypes, "Source col types")
 	logColumnTypes(targetColTypes, "Target col types")
-	ctx, cancel := context.WithCancel(context.Background())
+	mssqlCtx := context.Background()
-	defer cancel()
+	batches, err := batchGeneratorMssql(mssqlCtx, sourceDb, job)
 	batches, err := batchGeneratorMssql(ctx, sourceDb, job)
 	if err != nil {
 		log.Error("Unexpected error calculating batch ranges: ", err)
 	}
-	chJobErrors := make(chan JobError)
+	chGlobalErrors := make(chan error)
-	defer close(chJobErrors)
+	defer close(chGlobalErrors)
 	go func() {
 		if err := jobErrorHandler(chJobErrors); err != nil {
 			cancel()
 		}
 	}()
 	chBatches := make(chan Batch, len(batches))
 	chExtractorErrors := make(chan ExtractorError, len(batches))
 	go func() {
 		extractorErrorHandler(chExtractorErrors, chBatches, chJobErrors)
 	}()
 	chChunks := make(chan []UnknownRowValues, QueueSize)
 	chExtractorErrors := make(chan ExtractorError, len(batches))
 	maxExtractors := min(NumExtractors, len(batches))
 	var wgMssqlExtractors sync.WaitGroup
@@ -56,7 +43,7 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
 	extractStartTime := time.Now()
 	for range maxExtractors {
 		wgMssqlExtractors.Go(func() {
-			extractFromMssql(ctx, sourceDb, job, sourceColTypes, ChunkSize, chBatches, chChunks, chExtractorErrors, chJobErrors)
+			extractFromMssql(mssqlCtx, sourceDb, job, sourceColTypes, ChunkSize, chBatches, chChunks, chExtractorErrors)
 		})
 	}
@@ -68,6 +55,10 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
 		close(chExtractorErrors)
 	}()
 	go func() {
 		extractorErrorHandler(chExtractorErrors, chBatches, chGlobalErrors)
 	}()
 	go func() {
 		wgMssqlExtractors.Wait()
 		close(chChunks)
@@ -92,15 +83,17 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
 	}()
 	var wgPostgresLoaders sync.WaitGroup
 	postgresLoaderCtx := context.Background()
 	log.Infof("Starting %d PostgreSQL loader(s)...", NumLoaders)
 	loaderStartTime := time.Now()
 	for range NumLoaders {
 		wgPostgresLoaders.Go(func() {
-			if err := loadRowsPostgres(ctx, job, targetColTypes, targetDb, chRowsTransform); err != nil {
+			if err := loadRowsPostgres(postgresLoaderCtx, job, targetColTypes, targetDb, chRowsTransform); err != nil {
 				log.Error("Unexpected error loading data into postgres: ", err)
 			}
 			// fakeLoader(job, sourceColTypes, chRowsTransform)
 		})
 	}