refactor: enhance logging and batch processing in migration; adjust configuration parameters for improved performance

2026-05-09 01:16:34 -05:00
parent 68d983ea57
commit b690e580c5
8 changed files with 229 additions and 112 deletions
--- a/internal/app/db-wrapper/mssql.go
+++ b/internal/app/db-wrapper/mssql.go
@@ -10,7 +10,6 @@ import (
 	dbdialects "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/db-wrapper/db_dialects"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
 	mssql "github.com/microsoft/go-mssqldb"
-	"github.com/sirupsen/logrus"
 )

 func init() {
@@ -188,8 +187,6 @@ func buildExtractQueryMssql(q ExtractionQuery) (string, error) {
 	hasRegularColumns := len(q.Columns) > 0
 	hasJsonColumns := len(q.FromJsonColumns) > 0

-	// logrus.Debugf("Extraction query: %+v", q)
-
 	resolvedJson := make(map[string][]config.FromJsonItem, len(q.FromJsonColumns))
 	if hasJsonColumns {
 		for _, jsonConfig := range q.FromJsonColumns {
@@ -296,7 +293,7 @@ func (mw *mssqlDbWrapper) QueryFromObject(ctx context.Context, q ExtractionQuery
 		return nil, err
 	}

-	logrus.Debugf("Query: %s", queryString)
+	// logrus.Debugf("Query: %s", queryString)

 	var queryArgs []any

--- a/internal/app/etl/loaders/consume.go
+++ b/internal/app/etl/loaders/consume.go
@@ -9,6 +9,8 @@ import (
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
+	"github.com/google/uuid"
+	"github.com/sirupsen/logrus"
 )

 func (gl *GenericLoader) Consume(
@@ -16,6 +18,7 @@ func (gl *GenericLoader) Consume(
 	tableInfo config.TargetTableInfo,
 	columns []models.ColumnType,
 	retryConfig config.RetryConfig,
+	batchSize int,
 	chBatchesIn <-chan models.Batch,
 	chErrorsOut chan<- custom_errors.JobError,
 	wgActiveBatches *sync.WaitGroup,
@@ -26,6 +29,66 @@ func (gl *GenericLoader) Consume(
 		return col.Name()
 	})

+	var accRows []models.UnknownRowValues
+	var parentBatchesId []uuid.UUID
+	pendingDone := 0
+
+	defer func() {
+		for range pendingDone {
+			wgActiveBatches.Done()
+		}
+	}()
+
+	flush := func() bool {
+		if len(accRows) == 0 {
+			return true
+		}
+		count := len(parentBatchesId)
+		superBatch := models.Batch{
+			Id:              uuid.New(),
+			ParentBatchesId: parentBatchesId,
+			Rows:            accRows,
+		}
+		processedRows, err := gl.ProcessBatchWithRetries(ctx, tableInfo, colNames, retryConfig, superBatch)
+		for range count {
+			wgActiveBatches.Done()
+		}
+		pendingDone -= count
+		accRows = nil
+		parentBatchesId = nil
+
+		if err != nil {
+			atomic.AddInt32(failedBatchesCount, 1)
+			if jobError, ok := errors.AsType[*custom_errors.JobError](err); ok {
+				select {
+				case <-ctx.Done():
+					return false
+				case chErrorsOut <- *jobError:
+				}
+			} else {
+				select {
+				case <-ctx.Done():
+					return false
+				case chErrorsOut <- custom_errors.JobError{ShouldCancelJob: false, Msg: err.Error(), Prev: err}:
+				}
+			}
+
+			if atomic.LoadInt32(failedBatchesCount) > int32(retryConfig.MaxFailedBatchesLoad) {
+				select {
+				case <-ctx.Done():
+				case chErrorsOut <- custom_errors.JobError{ShouldCancelJob: true, Msg: "Max failed batches (load) reached"}:
+				}
+				return false
+			}
+			return true
+		}
+
+		current := atomic.LoadInt64(rowsLoaded)
+		logrus.Debugf("Rows loaded: +%v [current=%v] (%s.%s)", processedRows, current, tableInfo.Schema, tableInfo.Table)
+		atomic.AddInt64(rowsLoaded, int64(processedRows))
+		return true
+	}
+
 	for {
 		if ctx.Err() != nil {
 			return
@@ -36,42 +99,56 @@ func (gl *GenericLoader) Consume(
 			return
 		case batch, ok := <-chBatchesIn:
 			if !ok {
+				flush()
 				return
 			}

-			processedRows, err := gl.ProcessBatchWithRetries(ctx, tableInfo, colNames, retryConfig, batch)
-			wgActiveBatches.Done()
+			if batchSize <= 0 {
+				processedRows, err := gl.ProcessBatchWithRetries(ctx, tableInfo, colNames, retryConfig, batch)
+				wgActiveBatches.Done()

-			if err != nil {
-				atomic.AddInt32(failedBatchesCount, 1)
-				if jobError, ok := errors.AsType[*custom_errors.JobError](err); ok {
-					select {
-					case <-ctx.Done():
-						return
-					case chErrorsOut <- *jobError:
+				if err != nil {
+					atomic.AddInt32(failedBatchesCount, 1)
+					if jobError, ok := errors.AsType[*custom_errors.JobError](err); ok {
+						select {
+						case <-ctx.Done():
+							return
+						case chErrorsOut <- *jobError:
+						}
+					} else {
+						select {
+						case <-ctx.Done():
+							return
+						case chErrorsOut <- custom_errors.JobError{ShouldCancelJob: false, Msg: err.Error(), Prev: err}:
+						}
 					}
-				} else {
-					select {
-					case <-ctx.Done():
-						return
-					case chErrorsOut <- custom_errors.JobError{ShouldCancelJob: false, Msg: err.Error(), Prev: err}:
-					}
-				}
-
-				currentFBCount := atomic.LoadInt32(failedBatchesCount)
-				if currentFBCount > int32(retryConfig.MaxFailedBatchesLoad) {
-					select {
-					case <-ctx.Done():
-						return
-					case chErrorsOut <- custom_errors.JobError{ShouldCancelJob: true, Msg: "Max failed batches (load) reached"}:
-						return
+
+					if atomic.LoadInt32(failedBatchesCount) > int32(retryConfig.MaxFailedBatchesLoad) {
+						select {
+						case <-ctx.Done():
+							return
+						case chErrorsOut <- custom_errors.JobError{ShouldCancelJob: true, Msg: "Max failed batches (load) reached"}:
+							return
+						}
 					}
+					continue
 				}

+				current := atomic.LoadInt64(rowsLoaded)
+				logrus.Debugf("Rows loaded: +%v [current=%v] (%s.%s)", processedRows, current, tableInfo.Schema, tableInfo.Table)
+				atomic.AddInt64(rowsLoaded, int64(processedRows))
 				continue
 			}

-			atomic.AddInt64(rowsLoaded, int64(processedRows))
+			pendingDone++
+			accRows = append(accRows, batch.Rows...)
+			parentBatchesId = append(parentBatchesId, batch.Id)
+
+			if len(accRows) >= batchSize {
+				if !flush() {
+					return
+				}
+			}
 		}
 	}
 }
--- a/internal/app/etl/transformers/consume.go
+++ b/internal/app/etl/transformers/consume.go
@@ -8,12 +8,14 @@ import (
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
+	"github.com/google/uuid"
 )

 func (mssqlTr *MssqlTransformer) Consume(
 	ctx context.Context,
 	columns []models.ColumnType,
 	retryConfig config.RetryConfig,
+	batchSize int,
 	chBatchesIn <-chan models.Batch,
 	chBatchesOut chan<- models.Batch,
 	chJobErrorsOut chan<- custom_errors.JobError,
@@ -23,6 +25,32 @@ func (mssqlTr *MssqlTransformer) Consume(
 	storagePlan := computeStorageTransformationPlan(ctx, mssqlTr.azureClient, mssqlTr.toStorage, columns, mssqlTr.sourceTable)
 	transformationPlan = append(transformationPlan, storagePlan...)

+	var accRows []models.UnknownRowValues
+	var parentBatchesId []uuid.UUID
+	var firstPartitionId uuid.UUID
+
+	flush := func() bool {
+		if len(accRows) == 0 {
+			return true
+		}
+		out := models.Batch{
+			Id:              uuid.New(),
+			PartitionId:     firstPartitionId,
+			ParentBatchesId: parentBatchesId,
+			Rows:            accRows,
+		}
+		select {
+		case chBatchesOut <- out:
+			wgActiveBatches.Add(1)
+		case <-ctx.Done():
+			return false
+		}
+		accRows = nil
+		parentBatchesId = nil
+		firstPartitionId = uuid.Nil
+		return true
+	}
+
 	for {
 		if ctx.Err() != nil {
 			return
@@ -34,49 +62,56 @@ func (mssqlTr *MssqlTransformer) Consume(

 		case batch, ok := <-chBatchesIn:
 			if !ok {
+				flush()
 				return
 			}

-			if len(transformationPlan) == 0 {
+			if len(transformationPlan) > 0 {
+				err := ProcessBatchWithRetries(ctx, &batch, transformationPlan, retryConfig)
+				if err != nil {
+					if errors.Is(err, ctx.Err()) {
+						return
+					}
+
+					if jobError, ok := errors.AsType[*custom_errors.JobError](err); ok {
+						select {
+						case chJobErrorsOut <- *jobError:
+						case <-ctx.Done():
+							return
+						}
+					} else {
+						select {
+						case chJobErrorsOut <- custom_errors.JobError{ShouldCancelJob: true, Msg: "Transformation failed", Prev: err}:
+						case <-ctx.Done():
+							return
+						}
+					}
+
+					return
+				}
+			}
+
+			if batchSize <= 0 {
 				select {
 				case chBatchesOut <- batch:
 					wgActiveBatches.Add(1)
-					continue
 				case <-ctx.Done():
 					return
 				}
+				continue
 			}

-			err := ProcessBatchWithRetries(ctx, &batch, transformationPlan, retryConfig)
-			if err != nil {
-				if errors.Is(err, ctx.Err()) {
+			if len(parentBatchesId) == 0 {
+				firstPartitionId = batch.PartitionId
+			}
+			accRows = append(accRows, batch.Rows...)
+			parentBatchesId = append(parentBatchesId, batch.Id)
+
+			if len(accRows) >= batchSize {
+				if !flush() {
 					return
 				}
-
-				if jobError, ok := errors.AsType[*custom_errors.JobError](err); ok {
-					select {
-					case chJobErrorsOut <- *jobError:
-					case <-ctx.Done():
-						return
-					}
-				} else {
-					select {
-					case chJobErrorsOut <- custom_errors.JobError{ShouldCancelJob: true, Msg: "Transformation failed", Prev: err}:
-					case <-ctx.Done():
-						return
-					}
-				}
-
-				return
 			}
-
-			select {
-			case chBatchesOut <- batch:
-			case <-ctx.Done():
-				return
-			}
-
-			wgActiveBatches.Add(1)
 		}
 	}
 }
--- a/internal/app/etl/types.go
+++ b/internal/app/etl/types.go
@@ -21,6 +21,7 @@ type Transformer interface {
 		ctx context.Context,
 		columns []models.ColumnType,
 		retryConfig config.RetryConfig,
+		batchSize int,
 		chBatchesIn <-chan models.Batch,
 		chBatchesOut chan<- models.Batch,
 		chJobErrorsOut chan<- custom_errors.JobError,
--- a/internal/app/models/main.go
+++ b/internal/app/models/main.go
@@ -9,10 +9,11 @@ import (
 type UnknownRowValues = []any

 type Batch struct {
-	Id           uuid.UUID
-	PartitionId  uuid.UUID
-	Rows         []UnknownRowValues
-	RetryCounter int
+	Id              uuid.UUID
+	PartitionId     uuid.UUID
+	ParentBatchesId []uuid.UUID
+	Rows            []UnknownRowValues
+	RetryCounter    int
 }

 type PartitionRange struct {