feat: update target database type to sqlserver and clean up unused code in mssql.go

feat: implement GenericLoader for batch processing and utility functions
feat: refactor job result handling and remove unused files
2026-04-17 16:29:32 -05:00 · 2026-04-17 15:58:08 -05:00 · 2026-04-16 16:47:35 -05:00
18 changed files with 373 additions and 418 deletions
--- a/cmd/go_migrate/connect.go
+++ b/cmd/go_migrate/connect.go
@@ -1,77 +0,0 @@
-package main
-
-import (
-	"context"
-	"database/sql"
-	"errors"
-	"fmt"
-	"sync"
-	"time"
-
-	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
-	"github.com/jackc/pgx/v5/pgxpool"
-	_ "github.com/microsoft/go-mssqldb"
-	log "github.com/sirupsen/logrus"
-)
-
-func connectToSqlServer() (*sql.DB, error) {
-	db, err := sql.Open("sqlserver", config.App.SourceDbUrl)
-	if err != nil {
-		return nil, fmt.Errorf("Unable to connect to sqlserver: %w", err)
-	}
-
-	ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
-	defer cancel()
-
-	if err := db.PingContext(ctx); err != nil {
-		return nil, fmt.Errorf("Unable to ping sqlserver: %w", err)
-	}
-
-	return db, nil
-}
-
-func connectToPostgres() (*pgxpool.Pool, error) {
-	ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
-	defer cancel()
-
-	pool, err := pgxpool.New(ctx, config.App.TargetDbUrl)
-	if err != nil {
-		return nil, fmt.Errorf("Unable to connect to postgres: %w", err)
-	}
-
-	if err := pool.Ping(ctx); err != nil {
-		pool.Close()
-		return nil, fmt.Errorf("Unable to ping postgres: %w", err)
-	}
-
-	return pool, nil
-}
-
-func connectToDatabases() (*sql.DB, *pgxpool.Pool, error) {
-	var sourceDbErr, targetDbErr error
-	var sourceDb *sql.DB
-	var targetDb *pgxpool.Pool
-	var wg sync.WaitGroup
-
-	wg.Go(func() {
-		sourceDb, sourceDbErr = connectToSqlServer()
-		if sourceDbErr != nil {
-			log.Error("Unable to connect to source db: ", sourceDbErr)
-		}
-	})
-
-	wg.Go(func() {
-		targetDb, targetDbErr = connectToPostgres()
-		if targetDbErr != nil {
-			log.Error("Unable to connect to target db: ", targetDbErr)
-		}
-	})
-
-	wg.Wait()
-
-	if sourceDbErr != nil || targetDbErr != nil {
-		return nil, nil, errors.New("Unable to connect to databases")
-	}
-
-	return sourceDb, targetDb, nil
-}
--- a/cmd/go_migrate/main.go
+++ b/cmd/go_migrate/main.go
@@ -11,6 +11,7 @@ import (
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/loaders"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/table_analyzers"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/transformers"
+	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
 	log "github.com/sirupsen/logrus"
 	"golang.org/x/sync/errgroup"
 )
@@ -95,10 +96,10 @@ func processMigrationJobs(
 	targetDb dbwrapper.DbWrapper,
 	jobs []config.Job,
 	maxParallelWorkers int,
-) []JobResult {
+) []models.JobResult {
 	if len(jobs) == 0 {
 		log.Info("No migration jobs configured")
-		return []JobResult{}
+		return []models.JobResult{}
 	}

 	if maxParallelWorkers <= 0 {
@@ -111,7 +112,7 @@ func processMigrationJobs(

 	log.Infof("Starting migration with %d parallel worker(s)", maxParallelWorkers)

-	chJobResults := make(chan JobResult, len(jobs))
+	chJobResults := make(chan models.JobResult, len(jobs))
 	chJobs := make(chan config.Job, len(jobs))
 	var wgJobs sync.WaitGroup

@@ -119,7 +120,7 @@ func processMigrationJobs(
 	targetTableAnalyzer := table_analyzers.NewPostgresTableAnalyzer(targetDb)
 	extractor := extractors.NewMssqlExtractor(sourceDb)
 	transformer := transformers.NewMssqlTransformer()
-	loader := loaders.NewPostgresLoader(targetDb)
+	loader := loaders.NewGenericLoader(targetDb)

 	for i := range maxParallelWorkers {
 		wgJobs.Go(func() {
@@ -151,7 +152,7 @@ func processMigrationJobs(
 		close(chJobResults)
 	}()

-	var finalResults []JobResult
+	var finalResults []models.JobResult
 	for res := range chJobResults {
 		finalResults = append(finalResults, res)
 	}
--- a/cmd/go_migrate/metrics.go
+++ b/cmd/go_migrate/metrics.go
@@ -1,13 +0,0 @@
-package main
-
-import "time"
-
-type JobResult struct {
-	JobName    string
-	StartTime  time.Time
-	Duration   time.Duration
-	RowsRead   int64
-	RowsLoaded int64
-	RowsFailed int64
-	Error      error
-}
--- a/cmd/go_migrate/process.go
+++ b/cmd/go_migrate/process.go
@@ -11,7 +11,6 @@ import (
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
 	dbwrapper "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/db-wrapper"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
-	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/extractors"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/table_analyzers"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
 	log "github.com/sirupsen/logrus"
@@ -27,11 +26,11 @@ func processMigrationJob(
 	transformer etl.Transformer,
 	loader etl.Loader,
 	job config.Job,
-) JobResult {
+) models.JobResult {
 	localCtx, cancel := context.WithCancel(ctx)
 	defer cancel()

-	result := JobResult{
+	result := models.JobResult{
 		JobName:   job.Name,
 		StartTime: time.Now(),
 	}
@@ -86,6 +85,7 @@ func processMigrationJob(
 	}

 	chJobErrors := make(chan custom_errors.JobError, job.QueueSize)
+	chExtractorErrors := make(chan custom_errors.ExtractorError, job.QueueSize)
 	chLoadersErrors := make(chan custom_errors.LoaderError, job.QueueSize)
 	chPartitions := make(chan models.Partition, job.QueueSize)
 	chBatchesRaw := make(chan models.Batch, job.QueueSize)
@@ -105,6 +105,15 @@ func processMigrationJob(
 		}
 	}()

+	go custom_errors.ExtractorErrorHandler(
+		localCtx,
+		job.Retry,
+		job.MaxPartitionErrrors,
+		chExtractorErrors,
+		chPartitions,
+		chJobErrors,
+		&wgActivePartitions,
+	)
 	go custom_errors.LoaderErrorHandler(
 		localCtx,
 		job.Retry,
@@ -120,14 +129,14 @@ func processMigrationJob(

 	for range maxExtractors {
 		wgExtractors.Go(func() {
-			extractors.Consume(
+			extractor.Exec(
 				localCtx,
-				extractor,
 				job.SourceTable,
 				sourceColTypes,
 				job.BatchSize,
 				chPartitions,
 				chBatchesRaw,
+				chExtractorErrors,
 				chJobErrors,
 				&wgActivePartitions,
 				&rowsRead,
@@ -181,6 +190,8 @@ func processMigrationJob(
 		log.Debugf("wgActivePartitions is empty (%v)", job.Name)
 		close(chPartitions)
 		log.Debugf("chPartitions is closed (%v)", job.Name)
+		close(chExtractorErrors)
+		log.Debugf("chExtractorErrors is closed (%v)", job.Name)

 		wgExtractors.Wait()
 		log.Debugf("wgExtractors is empty (%v)", job.Name)
--- a/config.yaml
+++ b/config.yaml
@@ -1,6 +1,6 @@
 max_parallel_workers: 4
 source_db_type: sqlserver
-target_db_type: postgres
+target_db_type: sqlserver

 defaults:
  max_extractors: 2
--- a/internal/app/custom_errors/extractor.error.go
+++ b/internal/app/custom_errors/extractor.error.go
@@ -1,7 +1,13 @@
 package custom_errors

 import (
+	"context"
+	"fmt"
+	"sync"
+
+	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
+	"github.com/google/uuid"
 )

 type ExtractorError struct {
@@ -14,3 +20,100 @@ type ExtractorError struct {
 func (e *ExtractorError) Error() string {
 	return e.Msg
 }
+
+func ExtractorErrorHandler(
+	ctx context.Context,
+	retryConfig config.RetryConfig,
+	maxPartitionErrors int,
+	chErrorsIn <-chan ExtractorError,
+	chPartitionsOut chan<- models.Partition,
+	chJobErrorsOut chan<- JobError,
+	wgActivePartitions *sync.WaitGroup,
+) {
+	definitiveErrors := 0
+
+	for {
+		if ctx.Err() != nil {
+			return
+		}
+
+		select {
+		case <-ctx.Done():
+			return
+
+		case err, ok := <-chErrorsIn:
+			if !ok {
+				return
+			}
+
+			if err.Partition.RetryCounter >= retryConfig.Attempts {
+				wgActivePartitions.Done()
+				definitiveErrors++
+				jobError := JobError{
+					ShouldCancelJob: false,
+					Msg:             fmt.Sprintf("Partition %v reached max retries (%d)", err.Partition.Id, retryConfig.Attempts),
+					Prev:            &err,
+				}
+
+				select {
+				case chJobErrorsOut <- jobError:
+				case <-ctx.Done():
+					return
+				}
+
+				if maxPartitionErrors > 0 && definitiveErrors >= maxPartitionErrors {
+					fatalError := JobError{
+						ShouldCancelJob: true,
+						Msg:             fmt.Sprintf("Partition error limit reached (%d)", maxPartitionErrors),
+						Prev:            &err,
+					}
+
+					select {
+					case chJobErrorsOut <- fatalError:
+					case <-ctx.Done():
+						return
+					}
+				}
+
+				continue
+			} else {
+				jobError := JobError{
+					ShouldCancelJob: false,
+					Msg:             fmt.Sprintf("Temporal error in partition %v (retries: %d)", err.Partition.Id, err.Partition.RetryCounter),
+					Prev:            &err,
+				}
+
+				select {
+				case chJobErrorsOut <- jobError:
+				case <-ctx.Done():
+					return
+				}
+			}
+
+			newPartition := err.Partition
+			newPartition.RetryCounter++
+
+			delay := computeBackoffDelay(
+				newPartition.RetryCounter,
+				retryConfig.BaseDelayMs,
+				retryConfig.MaxDelayMs,
+				retryConfig.MaxJitterMs,
+			)
+
+			if err.HasLastId {
+				newPartition.ParentId = err.Partition.Id
+				newPartition.Id = uuid.New()
+				newPartition.Range.Min = err.LastId
+				newPartition.Range.IsMinInclusive = false
+			}
+
+			requeueWithBackoff(ctx, delay, func() {
+				select {
+				case chPartitionsOut <- newPartition:
+				case <-ctx.Done():
+					return
+				}
+			})
+		}
+	}
+}
--- a/internal/app/etl/extractors/consumer.go
+++ b/internal/app/etl/extractors/consumer.go
@@ -1,92 +0,0 @@
-package extractors
-
-import (
-	"context"
-	"errors"
-	"slices"
-	"strings"
-	"sync"
-	"sync/atomic"
-
-	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
-	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
-	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
-	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
-)
-
-func Consume(
-	ctx context.Context,
-	extractor etl.Extractor,
-	tableInfo config.SourceTableInfo,
-	columns []models.ColumnType,
-	batchSize int,
-	chPartitionsIn <-chan models.Partition,
-	chBatchesOut chan<- models.Batch,
-	chErrorsOut chan<- custom_errors.JobError,
-	wgActivePartitions *sync.WaitGroup,
-	rowsRead *int64,
-) {
-	indexPrimaryKey := slices.IndexFunc(columns, func(col models.ColumnType) bool {
-		return strings.EqualFold(col.Name(), tableInfo.PrimaryKey)
-	})
-
-	if indexPrimaryKey == -1 {
-		select {
-		case <-ctx.Done():
-			return
-		case chErrorsOut <- custom_errors.JobError{
-			ShouldCancelJob: true,
-			Msg:             "Primary key not found in provided columns",
-		}:
-		}
-		return
-	}
-
-	for {
-		if ctx.Err() != nil {
-			return
-		}
-
-		select {
-		case <-ctx.Done():
-			return
-		case partition, ok := <-chPartitionsIn:
-			if !ok {
-				return
-			}
-
-			rowsReadResult, err := extractWithRetries(
-				ctx,
-				extractor,
-				tableInfo,
-				columns,
-				batchSize,
-				partition,
-				indexPrimaryKey,
-				chBatchesOut,
-			)
-			wgActivePartitions.Done()
-
-			if rowsReadResult > 0 {
-				atomic.AddInt64(rowsRead, rowsReadResult)
-			}
-
-			if err != nil {
-				var jobError *custom_errors.JobError
-				if errors.As(err, &jobError) {
-					select {
-					case <-ctx.Done():
-						return
-					case chErrorsOut <- *jobError:
-					}
-				} else {
-					select {
-					case <-ctx.Done():
-						return
-					case chErrorsOut <- custom_errors.JobError{ShouldCancelJob: false, Msg: err.Error(), Prev: err}:
-					}
-				}
-			}
-		}
-	}
-}
--- a/internal/app/etl/extractors/extract-with-retries.go
+++ b/internal/app/etl/extractors/extract-with-retries.go
@@ -1,70 +0,0 @@
-package extractors
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"time"
-
-	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
-	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
-	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
-	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
-	"github.com/google/uuid"
-)
-
-func extractWithRetries(
-	ctx context.Context,
-	extractor etl.Extractor,
-	tableInfo config.SourceTableInfo,
-	columns []models.ColumnType,
-	batchSize int,
-	partition models.Partition,
-	indexPrimaryKey int,
-	chBatchesOut chan<- models.Batch,
-) (int64, error) {
-	var totalRowsRead int64
-	delay := time.Duration(time.Second * 1)
-	currentParitition := partition
-
-	for {
-		rowsRead, err := extractor.Exec(
-			ctx,
-			tableInfo,
-			columns,
-			batchSize,
-			currentParitition,
-			indexPrimaryKey,
-			chBatchesOut,
-		)
-		totalRowsRead += rowsRead
-
-		if err == nil {
-			return totalRowsRead, nil
-		}
-
-		var exError *custom_errors.ExtractorError
-		if errors.As(err, &exError) {
-			currentParitition.RetryCounter++
-
-			if currentParitition.RetryCounter > 3 {
-				return totalRowsRead, &custom_errors.JobError{
-					Msg:  fmt.Sprintf("Partition %v reached max retries", exError.Partition.Id),
-					Prev: err,
-				}
-			}
-
-			if exError.HasLastId {
-				currentParitition.ParentId = exError.Partition.Id
-				currentParitition.Id = uuid.New()
-				currentParitition.Range.Min = exError.LastId
-				currentParitition.Range.IsMinInclusive = false
-			}
-
-			time.Sleep(delay)
-			continue
-		}
-
-		return totalRowsRead, err
-	}
-}
--- a/internal/app/etl/extractors/main.go
+++ b/internal/app/etl/extractors/main.go
@@ -1,64 +0,0 @@
-package extractors
-
-import (
-	"context"
-	"fmt"
-
-	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/convert"
-	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
-	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
-	"github.com/google/uuid"
-)
-
-func errorFromLastPartitionRow(
-	lastRow models.UnknownRowValues,
-	indexPrimaryKey int,
-	partition models.Partition,
-	previousError error,
-) error {
-	lastIdRawValue := lastRow[indexPrimaryKey]
-
-	lastId, ok := convert.ToInt64(lastIdRawValue)
-	if !ok {
-		currentPartition := partition
-		currentPartition.RetryCounter = 3
-		return &custom_errors.ExtractorError{
-			Partition: currentPartition,
-			HasLastId: true,
-			Msg:       fmt.Sprintf("Couldn't cast last id value as int: %s", previousError.Error()),
-		}
-
-	}
-
-	return &custom_errors.ExtractorError{
-		Partition: partition,
-		HasLastId: true,
-		LastId:    lastId,
-		Msg:       previousError.Error(),
-	}
-}
-
-func sendBatch(ctx context.Context, chBatchesOut chan<- models.Batch, batch models.Batch) error {
-	select {
-	case chBatchesOut <- batch:
-		return nil
-	case <-ctx.Done():
-		return ctx.Err()
-	}
-}
-
-func flush(
-	ctx context.Context,
-	partition *models.Partition,
-	batchSize int,
-	batchRows []models.UnknownRowValues,
-	chBatchesOut chan<- models.Batch,
-) error {
-	if len(batchRows) == 0 {
-		return nil
-	}
-
-	batch := models.Batch{Id: uuid.New(), PartitionId: partition.Id, Rows: batchRows}
-	batchRows = make([]models.UnknownRowValues, 0, batchSize)
-	return sendBatch(ctx, chBatchesOut, batch)
-}
--- a/internal/app/etl/extractors/mssql.go
+++ b/internal/app/etl/extractors/mssql.go
@@ -3,13 +3,20 @@ package extractors
 import (
 	"context"
 	"database/sql"
+	"errors"
 	"fmt"
+	"slices"
 	"strings"
+	"sync"
+	"sync/atomic"

 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
+	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/convert"
+	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
 	dbwrapper "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/db-wrapper"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
+	"github.com/google/uuid"
 )

 type MssqlExtractor struct {
@@ -36,9 +43,9 @@ func buildExtractQueryMssql(
 		for i, col := range columns {
 			fmt.Fprintf(&sbQuery, "[%s]", col.Name())

-			if col.Type() == "GEOMETRY" {
-				fmt.Fprintf(&sbQuery, ".STAsBinary() AS [%s]", col.Name())
-			}
+			// if col.Type() == "GEOMETRY" {
+			// 	fmt.Fprintf(&sbQuery, ".STAsBinary() AS [%s]", col.Name())
+			// }

 			if i < len(columns)-1 {
 				sbQuery.WriteString(", ")
@@ -64,7 +71,35 @@ func buildExtractQueryMssql(
 	return sbQuery.String()
 }

-func (mssqlEx *MssqlExtractor) Exec(
+func errorFromLastRow(
+	lastRow models.UnknownRowValues,
+	indexPrimaryKey int,
+	partition models.Partition,
+	previousError error,
+) *custom_errors.ExtractorError {
+	lastIdRawValue := lastRow[indexPrimaryKey]
+
+	lastId, ok := convert.ToInt64(lastIdRawValue)
+	if !ok {
+		currentPartition := partition
+		currentPartition.RetryCounter = 3
+		return &custom_errors.ExtractorError{
+			Partition: currentPartition,
+			HasLastId: true,
+			Msg:       fmt.Sprintf("Couldn't cast last id value as int: %s", previousError.Error()),
+		}
+
+	}
+
+	return &custom_errors.ExtractorError{
+		Partition: partition,
+		HasLastId: true,
+		LastId:    lastId,
+		Msg:       previousError.Error(),
+	}
+}
+
+func (mssqlEx *MssqlExtractor) ProcessPartition(
 	ctx context.Context,
 	tableInfo config.SourceTableInfo,
 	columns []models.ColumnType,
@@ -72,50 +107,171 @@ func (mssqlEx *MssqlExtractor) Exec(
 	partition models.Partition,
 	indexPrimaryKey int,
 	chBatchesOut chan<- models.Batch,
-) (int64, error) {
+) (int, error) {
 	query := buildExtractQueryMssql(tableInfo, columns, partition.HasRange, partition.Range.IsMinInclusive)

 	var queryArgs []any
 	if partition.HasRange {
-		queryArgs = append(queryArgs, sql.Named("min", partition.Range.Min), sql.Named("max", partition.Range.Max))
+		queryArgs = append(queryArgs,
+			sql.Named("min", partition.Range.Min),
+			sql.Named("max", partition.Range.Max),
+		)
 	}

+	rowsRead := 0
 	rows, err := mssqlEx.db.Query(ctx, query, queryArgs...)
 	if err != nil {
-		return 0, err
+		return rowsRead, &custom_errors.ExtractorError{Partition: partition, HasLastId: false, Msg: err.Error()}
 	}
 	defer rows.Close()

 	batchRows := make([]models.UnknownRowValues, 0, batchSize)
-	var rowsRead int64 = 0

 	for rows.Next() {
-		values, err := rows.Values()
-		if err != nil {
-			if len(batchRows) == 0 {
-				return rowsRead, err
+		rowValues := make([]any, len(columns))
+		scanArgs := make([]any, len(columns))
+
+		for i := range rowValues {
+			scanArgs[i] = &rowValues[i]
 		}

-			if err := flush(ctx, &partition, batchSize, batchRows, chBatchesOut); err != nil {
-				return rowsRead, err
+		if err := rows.Scan(scanArgs...); err != nil {
+			if len(batchRows) == 0 {
+				return rowsRead, &custom_errors.ExtractorError{Partition: partition, HasLastId: false, Msg: err.Error()}
 			}

 			lastRow := batchRows[len(batchRows)-1]
-			return rowsRead, errorFromLastPartitionRow(lastRow, indexPrimaryKey, partition, err)
+
+			select {
+			case chBatchesOut <- models.Batch{Id: uuid.New(), PartitionId: partition.Id, Rows: batchRows, RetryCounter: 0}:
+			case <-ctx.Done():
+				return rowsRead, ctx.Err()
+			}
+
+			return rowsRead, errorFromLastRow(lastRow, indexPrimaryKey, partition, err)
 		}
 		rowsRead++

-		batchRows = append(batchRows, values)
+		batchRows = append(batchRows, rowValues)
 		if len(batchRows) >= batchSize {
-			if err := flush(ctx, &partition, batchSize, batchRows, chBatchesOut); err != nil {
-				return rowsRead, err
+			select {
+			case chBatchesOut <- models.Batch{Id: uuid.New(), PartitionId: partition.Id, Rows: batchRows, RetryCounter: 0}:
+			case <-ctx.Done():
+				return rowsRead, ctx.Err()
 			}
+
+			batchRows = make([]models.UnknownRowValues, 0, batchSize)
+		}
+
+	}
+
+	if err := rows.Err(); err != nil {
+		if errors.Is(err, ctx.Err()) {
+			return rowsRead, ctx.Err()
+		}
+
+		if len(batchRows) > 0 {
+			lastRow := batchRows[len(batchRows)-1]
+			return rowsRead, errorFromLastRow(lastRow, indexPrimaryKey, partition, err)
+		}
+
+		return rowsRead, &custom_errors.ExtractorError{Partition: partition, HasLastId: false, Msg: err.Error()}
+	}
+
+	if len(batchRows) > 0 {
+		select {
+		case chBatchesOut <- models.Batch{Id: uuid.New(), PartitionId: partition.Id, Rows: batchRows, RetryCounter: 0}:
+		case <-ctx.Done():
+			return rowsRead, ctx.Err()
 		}
 	}

-	if err := flush(ctx, &partition, batchSize, batchRows, chBatchesOut); err != nil {
-		return rowsRead, err
-	}
-
-	return rowsRead, rows.Err()
+	return rowsRead, nil
+}
+
+func (mssqlEx *MssqlExtractor) Exec(
+	ctx context.Context,
+	tableInfo config.SourceTableInfo,
+	columns []models.ColumnType,
+	batchSize int,
+	chPartitionsIn <-chan models.Partition,
+	chBatchesOut chan<- models.Batch,
+	chErrorsOut chan<- custom_errors.ExtractorError,
+	chJobErrorsOut chan<- custom_errors.JobError,
+	wgActivePartitions *sync.WaitGroup,
+	rowsRead *int64,
+) {
+	indexPrimaryKey := slices.IndexFunc(columns, func(col models.ColumnType) bool {
+		return strings.EqualFold(col.Name(), tableInfo.PrimaryKey)
+	})
+
+	if indexPrimaryKey == -1 {
+		select {
+		case <-ctx.Done():
+			return
+		case chJobErrorsOut <- custom_errors.JobError{
+			ShouldCancelJob: true,
+			Msg:             "Primary key not found in provided columns",
+		}:
+		}
+
+		return
+	}
+
+	for {
+		if ctx.Err() != nil {
+			return
+		}
+
+		select {
+		case <-ctx.Done():
+			return
+		case partition, ok := <-chPartitionsIn:
+			if !ok {
+				return
+			}
+
+			rowsReadResult, err := mssqlEx.ProcessPartition(
+				ctx,
+				tableInfo,
+				columns,
+				batchSize,
+				partition,
+				indexPrimaryKey,
+				chBatchesOut,
+			)
+
+			if rowsReadResult > 0 {
+				atomic.AddInt64(rowsRead, int64(rowsReadResult))
+			}
+
+			if err != nil {
+				var exError *custom_errors.ExtractorError
+				var jobError *custom_errors.JobError
+				if errors.As(err, &exError) {
+					select {
+					case <-ctx.Done():
+						return
+					case chErrorsOut <- *exError:
+					}
+				} else if errors.As(err, &jobError) {
+					select {
+					case <-ctx.Done():
+						return
+					case chJobErrorsOut <- *jobError:
+					}
+				} else {
+					select {
+					case <-ctx.Done():
+						return
+					case chErrorsOut <- custom_errors.ExtractorError{Partition: partition, Msg: err.Error()}:
+					}
+				}
+
+				continue
+			}
+
+			wgActivePartitions.Done()
+		}
+	}
 }
--- a/internal/app/etl/extractors/postgres.go
+++ b/internal/app/etl/extractors/postgres.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"fmt"
 	"strings"
+	"sync"

 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
@@ -50,7 +51,7 @@ func buildExtractQueryPostgres(sourceDbInfo config.SourceTableInfo, columns []mo
 	return fmt.Sprintf(`SELECT %s FROM "%s"."%s"  ORDER BY "%s" ASC`, sbColumns.String(), sourceDbInfo.Schema, sourceDbInfo.Table, sourceDbInfo.PrimaryKey)
 }

-func (postgresEx *PostgresExtractor) Exec(
+func (postgresEx *PostgresExtractor) ProcessPartition(
 	ctx context.Context,
 	tableInfo config.SourceTableInfo,
 	columns []models.ColumnType,
@@ -58,14 +59,14 @@ func (postgresEx *PostgresExtractor) Exec(
 	partition models.Partition,
 	indexPrimaryKey int,
 	chBatchesOut chan<- models.Batch,
-) (int64, error) {
+) (int, error) {
 	query := buildExtractQueryPostgres(tableInfo, columns)

 	if partition.HasRange {
 		return 0, errors.New("Batch config not yet supported")
 	}

-	var rowsRead int64 = 0
+	rowsRead := 0
 	rows, err := postgresEx.db.Query(ctx, query)
 	if err != nil {
 		return rowsRead, &custom_errors.ExtractorError{Partition: partition, HasLastId: false, Msg: err.Error()}
@@ -108,3 +109,17 @@ func (postgresEx *PostgresExtractor) Exec(

 	return rowsRead, nil
 }
+
+func (postgresEx *PostgresExtractor) Exec(
+	ctx context.Context,
+	tableInfo config.SourceTableInfo,
+	columns []models.ColumnType,
+	batchSize int,
+	chPartitionsIn <-chan models.Partition,
+	chBatchesOut chan<- models.Batch,
+	chErrorsOut chan<- custom_errors.ExtractorError,
+	chJobErrorsOut chan<- custom_errors.JobError,
+	wgActivePartitions *sync.WaitGroup,
+	rowsRead *int64,
+) {
+}
--- a/internal/app/etl/loaders/postgres.go
+++ b/internal/app/etl/loaders/postgres.go
@@ -15,31 +15,21 @@ import (
 	"github.com/jackc/pgx/v5/pgconn"
 )

-type PostgresLoader struct {
+type GenericLoader struct {
 	db dbwrapper.DbWrapper
 }

-func NewPostgresLoader(db dbwrapper.DbWrapper) etl.Loader {
-	return &PostgresLoader{db: db}
+func NewGenericLoader(db dbwrapper.DbWrapper) etl.Loader {
+	return &GenericLoader{db: db}
 }

-func mapSlice[T any, V any](input []T, mapper func(T) V) []V {
-	result := make([]V, len(input))
-
-	for i, v := range input {
-		result[i] = mapper(v)
-	}
-
-	return result
-}
-
-func (postgresLd *PostgresLoader) ProcessBatch(
+func (gl *GenericLoader) ProcessBatch(
 	ctx context.Context,
 	tableInfo config.TargetTableInfo,
 	colNames []string,
 	batch models.Batch,
 ) (int, error) {
-	_, err := postgresLd.db.SaveMassive(
+	_, err := gl.db.SaveMassive(
 		ctx,
 		tableInfo.Schema,
 		tableInfo.Table,
@@ -65,7 +55,7 @@ func (postgresLd *PostgresLoader) ProcessBatch(
 	return len(batch.Rows), nil
 }

-func (postgresLd *PostgresLoader) Exec(
+func (gl *GenericLoader) Exec(
 	ctx context.Context,
 	tableInfo config.TargetTableInfo,
 	columns []models.ColumnType,
@@ -92,7 +82,7 @@ func (postgresLd *PostgresLoader) Exec(
 				return
 			}

-			processedRows, err := postgresLd.ProcessBatch(ctx, tableInfo, colNames, batch)
+			processedRows, err := gl.ProcessBatch(ctx, tableInfo, colNames, batch)

 			if err != nil {
 				var ldError *custom_errors.LoaderError
--- a/internal/app/etl/loaders/types.go
+++ b/internal/app/etl/loaders/types.go
@@ -1 +0,0 @@
-package loaders
--- a/internal/app/etl/loaders/utils.go
+++ b/internal/app/etl/loaders/utils.go
@@ -0,0 +1,11 @@
+package loaders
+
+func mapSlice[T any, V any](input []T, mapper func(T) V) []V {
+	result := make([]V, len(input))
+
+	for i, v := range input {
+		result[i] = mapper(v)
+	}
+
+	return result
+}
--- a/internal/app/etl/table_analyzers/mssql.go
+++ b/internal/app/etl/table_analyzers/mssql.go
@@ -39,6 +39,8 @@ JOIN sys.schemas s ON st.schema_id = s.schema_id
 WHERE s.name = @schema AND st.name = @table AND (c.is_hidden = 0 OR (c.graph_type IS NOT NULL AND c.name LIKE '$%'))
 ORDER BY c.column_id;`

+// AND c.name NOT LIKE '$%'
+
 type rawColumnMssql struct {
 	name       string
 	userType   string
--- a/internal/app/etl/transformers/mssql.go
+++ b/internal/app/etl/transformers/mssql.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"errors"
 	"sync"
-	"time"

 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
@@ -18,46 +17,7 @@ func NewMssqlTransformer() etl.Transformer {
 }

 func computeTransformationPlan(columns []models.ColumnType) []etl.ColumnTransformPlan {
-	var plan []etl.ColumnTransformPlan
-
-	for i, col := range columns {
-		switch col.SystemType() {
-		case "uniqueidentifier":
-			plan = append(plan, etl.ColumnTransformPlan{
-				Index: i,
-				Fn: func(v any) (any, error) {
-					if b, ok := v.([]byte); ok && b != nil {
-						return mssqlUuidToBigEndian(b)
-					}
-					return v, nil
-				},
-			})
-
-		case "geometry", "geography":
-			plan = append(plan, etl.ColumnTransformPlan{
-				Index: i,
-				Fn: func(v any) (any, error) {
-					if b, ok := v.([]byte); ok && b != nil {
-						return wkbToEwkbWithSrid(b, 4326)
-					}
-					return v, nil
-				},
-			})
-
-		case "datetime", "datetime2":
-			plan = append(plan, etl.ColumnTransformPlan{
-				Index: i,
-				Fn: func(v any) (any, error) {
-					if t, ok := v.(time.Time); ok {
-						return ensureUTC(t), nil
-					}
-					return v, nil
-				},
-			})
-		}
-	}
-
-	return plan
+	return []etl.ColumnTransformPlan{}
 }

 const processBatchCtxCheck = 4096
@@ -74,10 +34,6 @@ func (mssqlTr *MssqlTransformer) ProcessBatch(
 			}
 		}

-		if rowValues == nil {
-			continue
-		}
-
 		for _, task := range transformationPlan {
 			val := rowValues[task.Index]
 			if val == nil {
--- a/internal/app/etl/types.go
+++ b/internal/app/etl/types.go
@@ -10,7 +10,7 @@ import (
 )

 type Extractor interface {
-	Exec(
+	ProcessPartition(
 		ctx context.Context,
 		tableInfo config.SourceTableInfo,
 		columns []models.ColumnType,
@@ -18,7 +18,20 @@ type Extractor interface {
 		partition models.Partition,
 		indexPrimaryKey int,
 		chBatchesOut chan<- models.Batch,
-	) (int64, error)
+	) (int, error)
+
+	Exec(
+		ctx context.Context,
+		tableInfo config.SourceTableInfo,
+		columns []models.ColumnType,
+		batchSize int,
+		chPartitionsIn <-chan models.Partition,
+		chBatchesOut chan<- models.Batch,
+		chErrorsOut chan<- custom_errors.ExtractorError,
+		chJobErrorsOut chan<- custom_errors.JobError,
+		wgActivePartitions *sync.WaitGroup,
+		rowsRead *int64,
+	)
 }

 type TransformerFunc func(any) (any, error)
--- a/internal/app/models/main.go
+++ b/internal/app/models/main.go
@@ -1,6 +1,10 @@
 package models

-import "github.com/google/uuid"
+import (
+	"time"
+
+	"github.com/google/uuid"
+)

 type UnknownRowValues = []any

@@ -25,3 +29,13 @@ type Partition struct {
 	HasRange     bool
 	RetryCounter int
 }
+
+type JobResult struct {
+	JobName    string
+	StartTime  time.Time
+	Duration   time.Duration
+	RowsRead   int64
+	RowsLoaded int64
+	RowsFailed int64
+	Error      error
+}
Author	SHA1	Message	Date
Kylesoda	63cf26e1ab	feat: update target database type to sqlserver and clean up unused code in mssql.go	2026-04-17 16:29:32 -05:00
Kylesoda	846a49d40c	feat: implement GenericLoader for batch processing and utility functions	2026-04-17 15:58:08 -05:00
Kylesoda	93b302db8e	feat: refactor job result handling and remove unused files	2026-04-16 16:47:35 -05:00