feat: implement extractor error handling and batch processing for MSSQL and Postgres

feat: enhance migration job processing with detailed metrics and error handling
feat: enhance migration job processing with parallel execution and improved logging
2026-04-10 19:06:41 -05:00 · 2026-04-09 21:55:19 -05:00 · 2026-04-09 20:02:04 -05:00 · 2026-04-09 19:46:45 -05:00 · 2026-04-09 19:20:50 -05:00 · 2026-04-09 18:12:11 -05:00
21 changed files with 1036 additions and 102 deletions
--- a/cmd/go_migrate/batch-generator.go
+++ b/cmd/go_migrate/batch-generator.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"time"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
 	"github.com/google/uuid"
 )
@@ -19,7 +20,7 @@ type Batch struct {
 	RetryCounter          int
 }
-func estimateTotalRowsMssql(ctx context.Context, db *sql.DB, job MigrationJob) (int64, error) {
+func estimateTotalRowsMssql(ctx context.Context, db *sql.DB, tableInfo config.SourceTableInfo) (int64, error) {
 	query := `
 SELECT
 	SUM(p.rows) AS count
@@ -33,7 +34,7 @@ GROUP BY t.name`
 	defer cancel()
 	var rowsCount int64
-	err := db.QueryRowContext(ctxTimeout, query, sql.Named("schema", job.Schema), sql.Named("table", job.Table)).Scan(&rowsCount)
+	err := db.QueryRowContext(ctxTimeout, query, sql.Named("schema", tableInfo.Schema), sql.Named("table", tableInfo.Table)).Scan(&rowsCount)
 	if err != nil {
 		return 0, err
 	}
@@ -41,7 +42,7 @@ GROUP BY t.name`
 	return rowsCount, nil
 }
-func calculateBatchesMssql(ctx context.Context, db *sql.DB, job MigrationJob, batchCount int64) ([]Batch, error) {
+func calculateBatchesMssql(ctx context.Context, db *sql.DB, tableInfo config.SourceTableInfo, batchCount int64) ([]Batch, error) {
 	query := fmt.Sprintf(`
 SELECT
 	MIN([%s]) AS lower_limit,
@@ -49,7 +50,13 @@ SELECT
 FROM
 	(SELECT [%s], NTILE(@batchCount) OVER (ORDER BY [%s]) AS batch_id FROM [%s].[%s]) AS T
 GROUP BY batch_id
-ORDER BY batch_id`, job.PrimaryKey, job.PrimaryKey, job.PrimaryKey, job.PrimaryKey, job.Schema, job.Table)
+ORDER BY batch_id`,
 		tableInfo.PrimaryKey,
 		tableInfo.PrimaryKey,
 		tableInfo.PrimaryKey,
 		tableInfo.PrimaryKey,
 		tableInfo.Schema,
 		tableInfo.Table)
 	ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*20)
 	defer cancel()
@@ -84,15 +91,15 @@ ORDER BY batch_id`, job.PrimaryKey, job.PrimaryKey, job.PrimaryKey, job.PrimaryK
 	return batches, nil
 }
-func batchGeneratorMssql(ctx context.Context, db *sql.DB, job MigrationJob) ([]Batch, error) {
+func batchGeneratorMssql(ctx context.Context, db *sql.DB, tableInfo config.SourceTableInfo, rowsPerBatch int64) ([]Batch, error) {
-	rowsCount, err := estimateTotalRowsMssql(ctx, db, job)
+	rowsCount, err := estimateTotalRowsMssql(ctx, db, tableInfo)
 	if err != nil {
 		return nil, err
 	}
 	var batchCount int64 = 1
-	if rowsCount > RowsPerBatch {
+	if rowsCount > rowsPerBatch {
-		batchCount = rowsCount / RowsPerBatch
+		batchCount = rowsCount / rowsPerBatch
 	} else {
 		return []Batch{{
 			Id:             uuid.New(),
@@ -101,7 +108,7 @@ func batchGeneratorMssql(ctx context.Context, db *sql.DB, job MigrationJob) ([]B
 		}}, nil
 	}
-	batches, err := calculateBatchesMssql(ctx, db, job, batchCount)
+	batches, err := calculateBatchesMssql(ctx, db, tableInfo, batchCount)
 	if err != nil {
 		return nil, err
 	}
--- a/cmd/go_migrate/build-extract-query.go
+++ b/cmd/go_migrate/build-extract-query.go
@@ -3,9 +3,11 @@ package main
 import (
 	"fmt"
 	"strings"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
 )
-func buildExtractQueryMssql(job MigrationJob, columns []ColumnType, includeRange bool, isMinInclusive bool) string {
+func buildExtractQueryMssql(sourceDbInfo config.SourceTableInfo, columns []ColumnType, includeRange bool, isMinInclusive bool) string {
 	var sbQuery strings.Builder
 	sbQuery.WriteString("SELECT ")
@@ -26,25 +28,25 @@ func buildExtractQueryMssql(job MigrationJob, columns []ColumnType, includeRange
 		}
 	}
-	fmt.Fprintf(&sbQuery, " FROM [%s].[%s]", job.Schema, job.Table)
+	fmt.Fprintf(&sbQuery, " FROM [%s].[%s]", sourceDbInfo.Schema, sourceDbInfo.Table)
 	if includeRange {
-		fmt.Fprintf(&sbQuery, " WHERE [%s]", job.PrimaryKey)
+		fmt.Fprintf(&sbQuery, " WHERE [%s]", sourceDbInfo.PrimaryKey)
 		if isMinInclusive {
 			sbQuery.WriteString(" >=")
 		} else {
 			sbQuery.WriteString(" >")
 		}
-		fmt.Fprintf(&sbQuery, " @min AND [%s] <= @max", job.PrimaryKey)
+		fmt.Fprintf(&sbQuery, " @min AND [%s] <= @max", sourceDbInfo.PrimaryKey)
 	}
-	fmt.Fprintf(&sbQuery, " ORDER BY [%s] ASC", job.PrimaryKey)
+	fmt.Fprintf(&sbQuery, " ORDER BY [%s] ASC", sourceDbInfo.PrimaryKey)
 	return sbQuery.String()
 }
-func buildExtractQueryPostgres(job MigrationJob, columns []ColumnType) string {
+func buildExtractQueryPostgres(sourceDbInfo config.SourceTableInfo, columns []ColumnType) string {
 	var sbColumns strings.Builder
 	if len(columns) == 0 {
@@ -69,5 +71,5 @@ func buildExtractQueryPostgres(job MigrationJob, columns []ColumnType) string {
 		}
 	}
-	return fmt.Sprintf(`SELECT %s FROM "%s"."%s"  ORDER BY "%s" ASC`, sbColumns.String(), job.Schema, job.Table, job.PrimaryKey)
+	return fmt.Sprintf(`SELECT %s FROM "%s"."%s"  ORDER BY "%s" ASC`, sbColumns.String(), sourceDbInfo.Schema, sourceDbInfo.Table, sourceDbInfo.PrimaryKey)
 }
--- a/cmd/go_migrate/extractor.go
+++ b/cmd/go_migrate/extractor.go
@@ -7,8 +7,10 @@ import (
 	"slices"
 	"strings"
 	"sync"
 	"sync/atomic"
 	"time"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
 	"github.com/google/uuid"
 	"github.com/jackc/pgx/v5/pgxpool"
 	_ "github.com/microsoft/go-mssqldb"
@@ -27,7 +29,7 @@ type Chunk struct {
 func extractFromMssql(
 	ctx context.Context,
 	db *sql.DB,
-	job MigrationJob,
+	tableInfo config.SourceTableInfo,
 	columns []ColumnType,
 	chunkSize int,
 	chBatchesIn <-chan Batch,
@@ -35,9 +37,10 @@ func extractFromMssql(
 	chErrorsOut chan<- ExtractorError,
 	chJobErrorsOut chan<- JobError,
 	wgActiveBatches *sync.WaitGroup,
 	rowsRead *int64,
 ) {
 	indexPrimaryKey := slices.IndexFunc(columns, func(col ColumnType) bool {
-		return strings.EqualFold(col.name, job.PrimaryKey)
+		return strings.EqualFold(col.name, tableInfo.PrimaryKey)
 	})
 	if indexPrimaryKey == -1 {
@@ -68,7 +71,7 @@ func extractFromMssql(
 				return
 			}
-			if abort := processBatch(ctx, db, job, columns, chunkSize, batch, indexPrimaryKey, chChunksOut, chErrorsOut, wgActiveBatches); abort {
+			if abort := processBatch(ctx, db, tableInfo, columns, chunkSize, batch, indexPrimaryKey, chChunksOut, chErrorsOut, wgActiveBatches, rowsRead); abort {
 				return
 			}
 		}
@@ -78,7 +81,7 @@ func extractFromMssql(
 func processBatch(
 	ctx context.Context,
 	db *sql.DB,
-	job MigrationJob,
+	tableInfo config.SourceTableInfo,
 	columns []ColumnType,
 	chunkSize int,
 	batch Batch,
@@ -86,8 +89,9 @@ func processBatch(
 	chChunksOut chan<- Chunk,
 	chErrorsOut chan<- ExtractorError,
 	wgActiveBatches *sync.WaitGroup,
 	rowsRead *int64,
 ) (abort bool) {
-	query := buildExtractQueryMssql(job, columns, batch.ShouldUseRange, batch.IsLowerLimitInclusive)
+	query := buildExtractQueryMssql(tableInfo, columns, batch.ShouldUseRange, batch.IsLowerLimitInclusive)
 	log.Debug("Query used to extract data from mssql: ", query)
 	var queryArgs []any
@@ -146,6 +150,8 @@ func processBatch(
 				return true
 			}
 			atomic.AddInt64(rowsRead, int64(len(rowsChunk)))
 			return false
 		}
@@ -163,6 +169,7 @@ func processBatch(
 				return true
 			}
 			atomic.AddInt64(rowsRead, int64(len(rowsChunk)))
 			rowsChunk = make([]UnknownRowValues, 0, chunkSize)
 			chunkStartTime = time.Now()
 		}
@@ -200,14 +207,16 @@ func processBatch(
 		case <-ctx.Done():
 			return true
 		}
 		atomic.AddInt64(rowsRead, int64(len(rowsChunk)))
 	}
 	wgActiveBatches.Done()
 	return false
 }
-func extractFromPostgres(ctx context.Context, job MigrationJob, columns []ColumnType, chunkSize int, db *pgxpool.Pool, out chan<- []UnknownRowValues) error {
+func extractFromPostgres(ctx context.Context, tableInfo config.SourceTableInfo, columns []ColumnType, chunkSize int, db *pgxpool.Pool, out chan<- []UnknownRowValues) error {
-	query := buildExtractQueryPostgres(job, columns)
+	query := buildExtractQueryPostgres(tableInfo, columns)
 	log.Debug("Query used to extract data from postgres: ", query)
 	rows, err := db.Query(ctx, query)
@@ -229,13 +238,13 @@ func extractFromPostgres(ctx context.Context, job MigrationJob, columns []Column
 		if len(rowsChunk) >= chunkSize {
 			out <- rowsChunk
 			rowsChunk = make([]UnknownRowValues, 0, chunkSize)
-			log.Infof("Chunk send... %+v", job)
+			log.Infof("Chunk send... %+v", tableInfo)
 		}
 	}
 	if len(rowsChunk) > 0 {
 		out <- rowsChunk
-		log.Infof("Chunk send... %+v", job)
+		log.Infof("Chunk send... %+v", tableInfo)
 	}
 	return nil
--- a/cmd/go_migrate/inspect-columns.go
+++ b/cmd/go_migrate/inspect-columns.go
@@ -9,6 +9,7 @@ import (
 	"sync"
 	"time"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
 	"github.com/jackc/pgx/v5/pgxpool"
 	_ "github.com/microsoft/go-mssqldb"
 	log "github.com/sirupsen/logrus"
@@ -111,7 +112,7 @@ func MapPostgresColumn(column ColumnType, maxLength *int64, precision *int64, sc
 	return column
 }
-func GetColumnTypesPostgres(db *pgxpool.Pool, migrationJob MigrationJob) ([]ColumnType, error) {
+func GetColumnTypesPostgres(db *pgxpool.Pool, tableInfo config.TargetTableInfo) ([]ColumnType, error) {
 	query := `
 SELECT 
  c.column_name AS name,
@@ -129,7 +130,7 @@ ORDER BY c.ordinal_position;
 	ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
 	defer cancel()
-	rows, err := db.Query(ctx, query, migrationJob.Schema, migrationJob.Table)
+	rows, err := db.Query(ctx, query, tableInfo.Schema, tableInfo.Table)
 	if err != nil {
 		return nil, fmt.Errorf("Error querying column types: %w", err)
 	}
@@ -197,7 +198,7 @@ func MapMssqlColumn(column ColumnType) ColumnType {
 	return column
 }
-func GetColumnTypesMssql(db *sql.DB, migrationJob MigrationJob) ([]ColumnType, error) {
+func GetColumnTypesMssql(db *sql.DB, tableInfo config.SourceTableInfo) ([]ColumnType, error) {
 	query := `
 SELECT 
 	c.name AS name,
@@ -219,7 +220,7 @@ ORDER BY c.column_id;
 	ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
 	defer cancel()
-	rows, err := db.QueryContext(ctx, query, sql.Named("schema", migrationJob.Schema), sql.Named("table", migrationJob.Table))
+	rows, err := db.QueryContext(ctx, query, sql.Named("schema", tableInfo.Schema), sql.Named("table", tableInfo.Table))
 	if err != nil {
 		return nil, fmt.Errorf("Error querying column types: %w", err)
 	}
@@ -252,7 +253,12 @@ ORDER BY c.column_id;
 	return colTypes, nil
 }
-func GetColumnTypes(sourceDb *sql.DB, targetDb *pgxpool.Pool, migrationJob MigrationJob) ([]ColumnType, []ColumnType, error) {
+func GetColumnTypes(
 	sourceDb *sql.DB,
 	targetDb *pgxpool.Pool,
 	sourceTable config.SourceTableInfo,
 	targetTable config.TargetTableInfo,
 ) ([]ColumnType, []ColumnType, error) {
 	var sourceDbErr error
 	var targetDbErr error
 	var sourceColTypes []ColumnType
@@ -260,14 +266,14 @@ func GetColumnTypes(sourceDb *sql.DB, targetDb *pgxpool.Pool, migrationJob Migra
 	var wg sync.WaitGroup
 	wg.Go(func() {
-		sourceColTypes, sourceDbErr = GetColumnTypesMssql(sourceDb, migrationJob)
+		sourceColTypes, sourceDbErr = GetColumnTypesMssql(sourceDb, sourceTable)
 		if sourceDbErr != nil {
 			log.Error("Error (sourceDb): ", sourceDbErr)
 		}
 	})
 	wg.Go(func() {
-		targetColTypes, targetDbErr = GetColumnTypesPostgres(targetDb, migrationJob)
+		targetColTypes, targetDbErr = GetColumnTypesPostgres(targetDb, targetTable)
 		if targetDbErr != nil {
 			log.Error("Error (targetDb): ", targetDbErr)
 		}
--- a/cmd/go_migrate/loader.go
+++ b/cmd/go_migrate/loader.go
@@ -6,8 +6,10 @@ import (
 	"errors"
 	"fmt"
 	"sync"
 	"sync/atomic"
 	"time"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
 	"github.com/jackc/pgx/v5"
 	"github.com/jackc/pgx/v5/pgconn"
 	"github.com/jackc/pgx/v5/pgxpool"
@@ -18,14 +20,15 @@ import (
 func loadRowsPostgres(
 	ctx context.Context,
 	db *pgxpool.Pool,
-	job MigrationJob,
+	tableInfo config.TargetTableInfo,
 	columns []ColumnType,
 	chChunksIn <-chan Chunk,
 	chErrorsOut chan<- LoaderError,
 	chJobErrorsOut chan<- JobError,
 	wgActiveChunks *sync.WaitGroup,
 	rowsLoaded *int64,
 ) {
-	tableId := pgx.Identifier{job.Schema, job.Table}
+	tableId := pgx.Identifier{tableInfo.Schema, tableInfo.Table}
 	colNames := Map(columns, func(col ColumnType) string {
 		return col.name
 	})
@@ -43,7 +46,7 @@ func loadRowsPostgres(
 				return
 			}
-			if abort := loadChunkPostgres(ctx, db, tableId, colNames, chunk, chErrorsOut, chJobErrorsOut, wgActiveChunks); abort {
+			if abort := loadChunkPostgres(ctx, db, tableId, colNames, chunk, chErrorsOut, chJobErrorsOut, wgActiveChunks, rowsLoaded); abort {
 				return
 			}
 		}
@@ -59,6 +62,7 @@ func loadChunkPostgres(
 	chErrorsOut chan<- LoaderError,
 	chJobErrorsOut chan<- JobError,
 	wgActiveChunks *sync.WaitGroup,
 	rowsLoaded *int64,
 ) (abort bool) {
 	chunkStartTime := time.Now()
 	_, err := db.CopyFrom(
@@ -75,7 +79,7 @@ func loadChunkPostgres(
 				select {
 				case chJobErrorsOut <- JobError{
 					ShouldCancelJob: true,
-					Msg:             fmt.Sprintf("Fatal data integrity error in table %s", identifier.Sanitize()),
+					Msg:             fmt.Sprintf("Fatal error in table %s", identifier.Sanitize()),
 					Prev:            err,
 				}:
 				case <-ctx.Done():
@@ -98,11 +102,12 @@ func loadChunkPostgres(
 	log.Infof("Loaded chunk: %d rows in %v (%.0f rows/sec)", len(chunk.Data), chunkDuration, rowsPerSec)
 	atomic.AddInt64(rowsLoaded, int64(len(chunk.Data)))
 	wgActiveChunks.Done()
 	return false
 }
-func loadRowsMssql(ctx context.Context, job MigrationJob, columns []ColumnType, db *sql.DB, in <-chan []UnknownRowValues) error {
+func loadRowsMssql(ctx context.Context, tableInfo config.TargetTableInfo, columns []ColumnType, db *sql.DB, in <-chan []UnknownRowValues) error {
 	chunkCount := 0
 	totalRowsLoaded := 0
@@ -114,7 +119,7 @@ func loadRowsMssql(ctx context.Context, job MigrationJob, columns []ColumnType,
 			return fmt.Errorf("error starting transaction: %w", err)
 		}
-		fullTableName := fmt.Sprintf("[%s].[%s]", job.Schema, job.Table)
+		fullTableName := fmt.Sprintf("[%s].[%s]", tableInfo.Schema, tableInfo.Table)
 		colNames := Map(columns, func(col ColumnType) string {
 			return col.name
 		})
@@ -177,14 +182,13 @@ func Map[T any, V any](input []T, mapper func(T) V) []V {
 	return result
 }
-func fakeLoader(job MigrationJob, columns []ColumnType, in <-chan [][]any) {
+func fakeLoader(tableInfo config.TargetTableInfo, columns []ColumnType, in <-chan [][]any) {
 	for rows := range in {
 		log.Debugf("Chunk received, loading data into...")
 		for i, rowValues := range rows {
 			if i%100 == 0 {
-				logSampleRow(job, columns, rowValues, fmt.Sprintf("row %d", i))
+				logSampleRow(tableInfo.Schema, tableInfo.Table, columns, rowValues, fmt.Sprintf("row %d", i))
 			}
 		}
 	}
--- a/cmd/go_migrate/main.go
+++ b/cmd/go_migrate/main.go
@@ -2,48 +2,31 @@ package main
 import (
 	"context"
 	"database/sql"
 	"sync"
 	"time"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
 	"github.com/jackc/pgx/v5/pgxpool"
 	log "github.com/sirupsen/logrus"
 )
 type MigrationJob struct {
 	Schema     string
 	Table      string
 	PrimaryKey string
 }
 var migrationJobs []MigrationJob = []MigrationJob{
 	{
 		Schema:     "Cartografia",
 		Table:      "MANZANA",
 		PrimaryKey: "GDB_ARCHIVE_OID",
 	},
 	{
 		Schema:     "Red",
 		Table:      "PUERTO",
 		PrimaryKey: "ID_PUERTO",
 	},
 }
 const (
 	NumExtractors  int   = 4
 	NumLoaders     int   = 8
 	ChunkSize      int   = 25000
 	QueueSize      int   = 8
 	ChunksPerBatch int   = 16
 	RowsPerBatch   int64 = int64(ChunkSize * ChunksPerBatch)
 )
 func main() {
 	configureLog()
 	migrationConfig, err := config.ReadMigrationConfig()
 	if err != nil {
 		log.Fatalf("error leyendo configuracion: %v", err)
 	}
 	log.Debugf("Config: %+v", migrationConfig)
 	startTime := time.Now()
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
 	log.Info("=== Starting migration ===")
 	log.Infof("Number of loaders: %d, Chunk size: %d", NumLoaders, ChunkSize)
 	sourceDb, targetDb, connError := connectToDatabases()
 	if connError != nil {
@@ -53,12 +36,81 @@ func main() {
 	defer sourceDb.Close()
 	defer targetDb.Close()
-	for _, job := range migrationJobs {
+	results := processMigrationJobs(ctx, sourceDb, targetDb, migrationConfig.Jobs, migrationConfig.MaxParallelWorkers)
-		log.Infof(">>> Processing job: %s.%s <<<", job.Schema, job.Table)
+
-		processMigrationJob(ctx, sourceDb, targetDb, job)
+	log.Info("=== RESUMEN DE MIGRACIÓN ===")
 	var totalProcessed, totalErrors int64
 	for _, res := range results {
 		status := "OK"
 		if res.Error != nil {
 			status = "FAILED"
 		}
 		log.Infof("[%s] Status: %s | Read: %d | Loaded: %d | Errors: %d | Time: %v", res.JobName, status, res.RowsRead, res.RowsLoaded, res.RowsFailed, res.Duration)
 		totalProcessed += res.RowsLoaded
 		if res.Error != nil {
 			totalErrors++
 		}
 	}
 	log.Infof("Migración terminada. Tablas: %d, Errores: %d, Filas totales: %d", len(results), totalErrors, totalProcessed)
 	totalDuration := time.Since(startTime)
 	log.Infof("=== Migration completed successfully! ===")
 	log.Infof("Total migration time: %v", totalDuration)
 }
 func processMigrationJobs(
 	ctx context.Context,
 	sourceDb *sql.DB,
 	targetDb *pgxpool.Pool,
 	jobs []config.Job,
 	maxParallelWorkers int,
 ) []JobResult {
 	if len(jobs) == 0 {
 		log.Info("No migration jobs configured")
 		return []JobResult{}
 	}
 	if maxParallelWorkers <= 0 {
 		maxParallelWorkers = 1
 	}
 	if maxParallelWorkers > len(jobs) {
 		maxParallelWorkers = len(jobs)
 	}
 	log.Infof("Starting migration with %d parallel worker(s)", maxParallelWorkers)
 	chJobResults := make(chan JobResult, len(jobs))
 	chJobs := make(chan config.Job, len(jobs))
 	var wgJobs sync.WaitGroup
 	for i := range maxParallelWorkers {
 		wgJobs.Go(func() {
 			for job := range chJobs {
 				log.Infof("[worker %d] >>> Processing job: %s.%s <<<", i, job.SourceTable.Schema, job.SourceTable.Table)
 				res := processMigrationJob(ctx, sourceDb, targetDb, job)
 				chJobResults <- res
 			}
 		})
 	}
 	for _, job := range jobs {
 		chJobs <- job
 	}
 	close(chJobs)
 	go func() {
 		wgJobs.Wait()
 		close(chJobResults)
 	}()
 	var finalResults []JobResult
 	for res := range chJobResults {
 		finalResults = append(finalResults, res)
 	}
 	return finalResults
 }
--- a/cmd/go_migrate/metrics.go
+++ b/cmd/go_migrate/metrics.go
@@ -0,0 +1,13 @@
 package main
 import "time"
 type JobResult struct {
 	JobName    string
 	StartTime  time.Time
 	Duration   time.Duration
 	RowsRead   int64
 	RowsLoaded int64
 	RowsFailed int64
 	Error      error
 }
--- a/cmd/go_migrate/process.go
+++ b/cmd/go_migrate/process.go
@@ -4,8 +4,10 @@ import (
 	"context"
 	"database/sql"
 	"sync"
 	"sync/atomic"
 	"time"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
 	"github.com/jackc/pgx/v5/pgxpool"
 	_ "github.com/microsoft/go-mssqldb"
@@ -16,14 +18,19 @@ func processMigrationJob(
 	ctx context.Context,
 	sourceDb *sql.DB,
 	targetDb *pgxpool.Pool,
-	job MigrationJob,
+	job config.Job,
-) {
+) JobResult {
-	jobStartTime := time.Now()
+	result := JobResult{
-	log.Infof("Starting migration job: %s.%s [PK: %s]", job.Schema, job.Table, job.PrimaryKey)
+		JobName:   job.Name,
 		StartTime: time.Now(),
 	}
-	sourceColTypes, targetColTypes, err := GetColumnTypes(sourceDb, targetDb, job)
+	var rowsRead, rowsLoaded, rowsFailed int64
 	sourceColTypes, targetColTypes, err := GetColumnTypes(sourceDb, targetDb, job.SourceTable, job.TargetTable)
 	if err != nil {
-		log.Fatal("Unexpected error: ", err)
+		result.Error = err
 		return result
 	}
 	logColumnTypes(sourceColTypes, "Source col types")
@@ -32,17 +39,17 @@ func processMigrationJob(
 	jobCtx, cancel := context.WithCancel(ctx)
 	defer cancel()
-	batches, err := batchGeneratorMssql(jobCtx, sourceDb, job)
+	batches, err := batchGeneratorMssql(jobCtx, sourceDb, job.SourceTable, job.RowsPerBatch)
 	if err != nil {
 		log.Error("Unexpected error calculating batch ranges: ", err)
 	}
-	chJobErrors := make(chan JobError, 50)
+	chJobErrors := make(chan JobError, job.QueueSize)
-	chBatches := make(chan Batch, QueueSize)
+	chBatches := make(chan Batch, job.QueueSize)
-	chExtractorErrors := make(chan ExtractorError, QueueSize)
+	chExtractorErrors := make(chan ExtractorError, job.QueueSize)
-	chChunksRaw := make(chan Chunk, QueueSize)
+	chChunksRaw := make(chan Chunk, job.QueueSize)
-	chChunksTransformed := make(chan Chunk, QueueSize)
+	chChunksTransformed := make(chan Chunk, job.QueueSize)
-	chLoadersErrors := make(chan LoaderError, QueueSize)
+	chLoadersErrors := make(chan LoaderError, job.QueueSize)
 	var wgActiveBatches sync.WaitGroup
 	var wgActiveChunks sync.WaitGroup
@@ -53,19 +60,19 @@ func processMigrationJob(
 	go func() {
 		if err := jobErrorHandler(jobCtx, chJobErrors); err != nil {
 			cancel()
 			result.Error = err
 		}
 	}()
 	go extractorErrorHandler(jobCtx, chExtractorErrors, chBatches, chJobErrors, &wgActiveBatches)
 	go loaderErrorHandler(jobCtx, chLoadersErrors, chChunksTransformed, chJobErrors, &wgActiveChunks)
-	maxExtractors := min(NumExtractors, len(batches))
+	maxExtractors := min(job.MaxExtractors, len(batches))
-	log.Infof("Starting %d extractors...", maxExtractors)
+	log.Infof("Starting %d extractor(s)...", maxExtractors)
 	extractStartTime := time.Now()
 	for range maxExtractors {
 		wgExtractors.Go(func() {
-			extractFromMssql(jobCtx, sourceDb, job, sourceColTypes, ChunkSize, chBatches, chChunksRaw, chExtractorErrors, chJobErrors, &wgActiveBatches)
+			extractFromMssql(jobCtx, sourceDb, job.SourceTable, sourceColTypes, job.ChunkSize, chBatches, chChunksRaw, chExtractorErrors, chJobErrors, &wgActiveBatches, &rowsRead)
 		})
 	}
@@ -76,8 +83,7 @@ func processMigrationJob(
 		}
 	}()
-	log.Infof("Starting %d transformers...", maxExtractors)
+	log.Infof("Starting %d transformer(s)...", maxExtractors)
 	transformStartTime := time.Now()
 	for range maxExtractors {
 		wgTransformers.Go(func() {
@@ -85,12 +91,11 @@ func processMigrationJob(
 		})
 	}
-	log.Infof("Starting %d PostgreSQL loader(s)...", NumLoaders)
+	log.Infof("Starting %d loader(s)...", job.MaxLoaders)
 	loadStartTime := time.Now()
-	for range NumLoaders {
+	for range job.MaxLoaders {
 		wgLoaders.Go(func() {
-			loadRowsPostgres(jobCtx, targetDb, job, targetColTypes, chChunksTransformed, chLoadersErrors, chJobErrors, &wgActiveChunks)
+			loadRowsPostgres(jobCtx, targetDb, job.TargetTable, targetColTypes, chChunksTransformed, chLoadersErrors, chJobErrors, &wgActiveChunks, &rowsLoaded)
 		})
 	}
@@ -100,24 +105,31 @@ func processMigrationJob(
 		close(chExtractorErrors)
 		wgExtractors.Wait()
 		log.Infof("Extraction completed in %v", time.Since(extractStartTime))
 		close(chChunksRaw)
 		wgTransformers.Wait()
 		log.Infof("Transformation completed in %v", time.Since(transformStartTime))
 		wgActiveChunks.Wait()
 		close(chChunksTransformed)
 		close(chLoadersErrors)
 		wgLoaders.Wait()
 		log.Infof("Loading completed in %v", time.Since(loadStartTime))
 		cancel()
 	}()
 	<-jobCtx.Done()
-	log.Infof("Migration job completed. Total time: %v", time.Since(jobStartTime))
+
 	if ctx.Err() != nil {
 		result.Error = ctx.Err()
 	}
 	result.Duration = time.Since(result.StartTime)
 	result.RowsRead = atomic.LoadInt64(&rowsRead)
 	result.RowsLoaded = atomic.LoadInt64(&rowsLoaded)
 	result.RowsFailed = atomic.LoadInt64(&rowsFailed)
 	return result
 }
 func logColumnTypes(columnTypes []ColumnType, label string) {
@@ -128,8 +140,14 @@ func logColumnTypes(columnTypes []ColumnType, label string) {
 	}
 }
-func logSampleRow(job MigrationJob, columns []ColumnType, rowValues UnknownRowValues, tag string) {
+func logSampleRow(
-	log.Infof("[%s.%s] Sample row: (%s)", job.Schema, job.Table, tag)
+	schema string,
 	table string,
 	columns []ColumnType,
 	rowValues UnknownRowValues,
 	tag string,
 ) {
 	log.Infof("[%s.%s] Sample row: (%s)", schema, table, tag)
 	for i, col := range columns {
 		log.Infof("%s (%T): %v", col.Name(), rowValues[i], rowValues[i])
 	}
--- a/config.yaml
+++ b/config.yaml
@@ -0,0 +1,33 @@
 max_parallel_workers: 4
 defaults:
  max_extractors: 2
  max_loaders: 4
  queue_size: 8
  chunk_size: 25000
  chunks_per_batch: 8
  truncate_target: true
  truncate_method: TRUNCATE # TRUNCATE | DELETE
  retry:
    attempts: 3
 jobs:
  - name: cartografia_manzana
    enabled: true
    source:
      schema: Cartografia
      table: MANZANA
      primary_key: GDB_ARCHIVE_OID
    target:
      schema: Cartografia
      table: MANZANA
  - name: red_puerto
    enabled: true
    source:
      schema: Red
      table: PUERTO
      primary_key: ID_PUERTO
    target:
      schema: Red
      table: PUERTO
--- a/go.mod
+++ b/go.mod
@@ -10,6 +10,7 @@ require (
 	github.com/microsoft/go-mssqldb v1.9.8
 	github.com/sirupsen/logrus v1.9.4
 	github.com/twpayne/go-geom v1.6.1
 	gopkg.in/yaml.v3 v3.0.1
 )
 require (
@@ -18,6 +19,8 @@ require (
 	github.com/jackc/pgpassfile v1.0.0 // indirect
 	github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
 	github.com/jackc/puddle/v2 v2.2.2 // indirect
 	github.com/kr/text v0.2.0 // indirect
 	github.com/rogpeppe/go-internal v1.14.1 // indirect
 	github.com/shopspring/decimal v1.4.0 // indirect
 	golang.org/x/crypto v0.48.0 // indirect
 	golang.org/x/sync v0.19.0 // indirect
--- a/go.sum
+++ b/go.sum
@@ -16,6 +16,7 @@ github.com/alecthomas/assert/v2 v2.10.0 h1:jjRCHsj6hBJhkmhznrCzoNpbA3zqy0fYiUcYZ
 github.com/alecthomas/assert/v2 v2.10.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
 github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
 github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -41,6 +42,10 @@ github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo
 github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
 github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
 github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
 github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
 github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
 github.com/microsoft/go-mssqldb v1.9.8 h1:d4IFMvF/o+HdpXUqbBfzHvn/NlFA75YGcfHUUvDFJEM=
@@ -49,6 +54,8 @@ github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmd
 github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
 github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
 github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
 github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
 github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w=
@@ -71,6 +78,8 @@ golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk=
 golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/internal/app/config/migration.go
+++ b/internal/app/config/migration.go
@@ -0,0 +1,118 @@
 package config
 import (
 	"fmt"
 	"os"
 	"gopkg.in/yaml.v3"
 )
 type RetryConfig struct {
 	Attempts int `yaml:"attempts"`
 }
 type JobConfig struct {
 	MaxExtractors  int `yaml:"max_extractors"`
 	MaxLoaders     int `yaml:"max_loaders"`
 	QueueSize      int `yaml:"queue_size"`
 	ChunkSize      int `yaml:"chunk_size"`
 	ChunksPerBatch int `yaml:"chunks_per_batch"`
 	RowsPerBatch   int64
 	TruncateTarget bool        `yaml:"truncate_target"`
 	TruncateMethod string      `yaml:"truncate_method"`
 	Retry          RetryConfig `yaml:"retry"`
 }
 type TargetTableInfo struct {
 	Schema string `yaml:"schema"`
 	Table  string `yaml:"table"`
 }
 type SourceTableInfo struct {
 	Schema     string `yaml:"schema"`
 	Table      string `yaml:"table"`
 	PrimaryKey string `yaml:"primary_key"`
 }
 type Job struct {
 	Name        string          `yaml:"name"`
 	Enabled     bool            `yaml:"enabled"`
 	SourceTable SourceTableInfo `yaml:"source"`
 	TargetTable TargetTableInfo `yaml:"target"`
 	PreSQL      []string        `yaml:"pre_sql"`
 	PostSQL     []string        `yaml:"post_sql"`
 	JobConfig   `yaml:",inline"`
 }
 type MigrationConfig struct {
 	MaxParallelWorkers int       `yaml:"max_parallel_workers"`
 	Defaults           JobConfig `yaml:"defaults"`
 	Jobs               []Job     `yaml:"jobs"`
 }
 type rawConfig struct {
 	MaxParallelWorkers int         `yaml:"max_parallel_workers"`
 	Defaults           JobConfig   `yaml:"defaults"`
 	Jobs               []yaml.Node `yaml:"jobs"`
 }
 func (c *MigrationConfig) UnmarshalYAML(value *yaml.Node) error {
 	var raw rawConfig
 	if err := value.Decode(&raw); err != nil {
 		return err
 	}
 	c.MaxParallelWorkers = raw.MaxParallelWorkers
 	c.Defaults = raw.Defaults
 	c.Defaults.RowsPerBatch = int64(raw.Defaults.ChunkSize * raw.Defaults.ChunksPerBatch)
 	for _, node := range raw.Jobs {
 		job := Job{
 			JobConfig: raw.Defaults,
 		}
 		if err := node.Decode(&job); err != nil {
 			return err
 		}
 		job.RowsPerBatch = int64(job.ChunkSize * job.ChunksPerBatch)
 		c.Jobs = append(c.Jobs, job)
 	}
 	return nil
 }
 const defaultConfigFileName string = "config.yaml"
 func filenamesOrDefault(filenames []string) []string {
 	if len(filenames) == 0 {
 		return []string{defaultConfigFileName}
 	}
 	return filenames
 }
 func ReadMigrationConfig(filenames ...string) (MigrationConfig, error) {
 	filenames = filenamesOrDefault(filenames)
 	var data []byte
 	var err error
 	for _, filename := range filenames {
 		data, err = os.ReadFile(filename)
 		if err != nil {
 			continue
 		}
 		break
 	}
 	if err != nil {
 		return MigrationConfig{}, fmt.Errorf("Error reading config file: %v", err)
 	}
 	var config MigrationConfig
 	if err := yaml.Unmarshal(data, &config); err != nil {
 		return MigrationConfig{}, fmt.Errorf("Error parsing config file: %v", err)
 	}
 	return config, nil
 }
--- a/internal/app/convert/main.go
+++ b/internal/app/convert/main.go
@@ -0,0 +1,18 @@
 package convert
 func ToInt64(v any) (int64, bool) {
 	switch t := v.(type) {
 	case int:
 		return int64(t), true
 	case int8:
 		return int64(t), true
 	case int16:
 		return int64(t), true
 	case int32:
 		return int64(t), true
 	case int64:
 		return int64(t), true
 	default:
 		return 0, false
 	}
 }
--- a/internal/app/custom_errors/extractor.error.go
+++ b/internal/app/custom_errors/extractor.error.go
@@ -0,0 +1,80 @@
 package custom_errors
 import (
 	"context"
 	"fmt"
 	"sync"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
 	"github.com/google/uuid"
 )
 type ExtractorError struct {
 	Batch     models.Batch
 	LastId    int64
 	HasLastId bool
 	Msg       string
 }
 func (e *ExtractorError) Error() string {
 	return e.Msg
 }
 const maxRetryAttempts = 3
 func ExtractorErrorHandler(
 	ctx context.Context,
 	chErrorsIn <-chan ExtractorError,
 	chBatchesOut chan<- models.Batch,
 	chJobErrorsOut chan<- JobError,
 	wgActiveBatches *sync.WaitGroup,
 ) {
 	for {
 		if ctx.Err() != nil {
 			return
 		}
 		select {
 		case <-ctx.Done():
 			return
 		case err, ok := <-chErrorsIn:
 			if !ok {
 				return
 			}
 			if err.Batch.RetryCounter >= maxRetryAttempts {
 				jobError := JobError{
 					ShouldCancelJob: false,
 					Msg:             fmt.Sprintf("batch %v reached max retries (%d)", err.Batch.Id, maxRetryAttempts),
 					Prev:            &err,
 				}
 				select {
 				case chJobErrorsOut <- jobError:
 				case <-ctx.Done():
 					return
 				}
 				wgActiveBatches.Done()
 				continue
 			}
 			newBatch := err.Batch
 			newBatch.RetryCounter++
 			if err.HasLastId {
 				newBatch.ParentId = err.Batch.Id
 				newBatch.Id = uuid.New()
 				newBatch.LowerLimit = err.LastId
 				newBatch.IsLowerLimitInclusive = false
 			}
 			select {
 			case chBatchesOut <- newBatch:
 			case <-ctx.Done():
 				return
 			}
 		}
 	}
 }
--- a/internal/app/custom_errors/job.error.go
+++ b/internal/app/custom_errors/job.error.go
@@ -0,0 +1,47 @@
 package custom_errors
 import (
 	"context"
 	"fmt"
 	log "github.com/sirupsen/logrus"
 )
 type JobError struct {
 	ShouldCancelJob bool
 	Msg             string
 	Prev            error
 }
 func (e *JobError) Error() string {
 	if e.Prev != nil {
 		return fmt.Sprintf("%s: %v", e.Msg, e.Prev)
 	}
 	return e.Msg
 }
 func JobErrorHandler(ctx context.Context, chErrorsIn <-chan JobError) error {
 	for {
 		if ctx.Err() != nil {
 			return nil
 		}
 		select {
 		case <-ctx.Done():
 			return nil
 		case err, ok := <-chErrorsIn:
 			if !ok {
 				return nil
 			}
 			if err.ShouldCancelJob {
 				log.Error(err.Msg, " - ", err.Prev)
 				return &err
 			}
 			log.Error(err.Msg, " - ", err.Prev)
 		}
 	}
 }
--- a/internal/app/etl/extractor/main.go
+++ b/internal/app/etl/extractor/main.go
@@ -0,0 +1,36 @@
 package extractor
 import (
 	"context"
 	"sync"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
 )
 type Extractor interface {
 	ProcessBatch(
 		ctx context.Context,
 		tableInfo config.SourceTableInfo,
 		columns []models.ColumnType,
 		chunkSize int,
 		batch models.Batch,
 		indexPrimaryKey int,
 		chChunksOut chan<- models.Chunk,
 		rowsRead *int64,
 	) error
 	Exec(
 		ctx context.Context,
 		tableInfo config.SourceTableInfo,
 		columns []models.ColumnType,
 		chunkSize int,
 		chBatchesIn <-chan models.Batch,
 		chChunksOut chan<- models.Chunk,
 		chErrorsOut chan<- custom_errors.ExtractorError,
 		chJobErrorsOut chan<- custom_errors.JobError,
 		wgActiveBatches *sync.WaitGroup,
 		rowsRead *int64,
 	)
 }
--- a/internal/app/etl/extractor/mssql.go
+++ b/internal/app/etl/extractor/mssql.go
@@ -0,0 +1,269 @@
 package extractor
 import (
 	"context"
 	"database/sql"
 	"errors"
 	"fmt"
 	"slices"
 	"strings"
 	"sync"
 	"sync/atomic"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/convert"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
 	"github.com/google/uuid"
 )
 type MssqlExtractor struct {
 	db *sql.DB
 }
 func NewMssqlExtractor(db *sql.DB) *MssqlExtractor {
 	return &MssqlExtractor{db: db}
 }
 func buildExtractQueryMssql(
 	tableInfo config.SourceTableInfo,
 	columns []models.ColumnType,
 	includeRange bool,
 	isMinInclusive bool,
 ) string {
 	var sbQuery strings.Builder
 	sbQuery.WriteString("SELECT ")
 	if len(columns) == 0 {
 		sbQuery.WriteString("*")
 	} else {
 		for i, col := range columns {
 			fmt.Fprintf(&sbQuery, "[%s]", col.Name())
 			if col.Type() == "GEOMETRY" {
 				fmt.Fprintf(&sbQuery, ".STAsBinary() AS [%s]", col.Name())
 			}
 			if i < len(columns)-1 {
 				sbQuery.WriteString(", ")
 			}
 		}
 	}
 	fmt.Fprintf(&sbQuery, " FROM [%s].[%s]", tableInfo.Schema, tableInfo.Table)
 	if includeRange {
 		fmt.Fprintf(&sbQuery, " WHERE [%s]", tableInfo.PrimaryKey)
 		if isMinInclusive {
 			sbQuery.WriteString(" >=")
 		} else {
 			sbQuery.WriteString(" >")
 		}
 		fmt.Fprintf(&sbQuery, " @min AND [%s] <= @max", tableInfo.PrimaryKey)
 	}
 	fmt.Fprintf(&sbQuery, " ORDER BY [%s] ASC", tableInfo.PrimaryKey)
 	return sbQuery.String()
 }
 func extractorErrorFromLastRowMssql(
 	lastRow models.UnknownRowValues,
 	indexPrimaryKey int,
 	batch *models.Batch,
 	previousError error,
 ) *custom_errors.ExtractorError {
 	lastIdRawValue := lastRow[indexPrimaryKey]
 	lastId, ok := convert.ToInt64(lastIdRawValue)
 	if !ok {
 		currentBatch := *batch
 		currentBatch.RetryCounter = 3
 		return &custom_errors.ExtractorError{
 			Batch:     currentBatch,
 			HasLastId: true,
 			Msg:       fmt.Sprintf("Couldn't cast last id value as int: %s", previousError.Error()),
 		}
 	}
 	return &custom_errors.ExtractorError{
 		Batch:     *batch,
 		HasLastId: true,
 		LastId:    lastId,
 		Msg:       previousError.Error(),
 	}
 }
 func (mssqlEx *MssqlExtractor) ProcessBatch(
 	ctx context.Context,
 	tableInfo config.SourceTableInfo,
 	columns []models.ColumnType,
 	chunkSize int,
 	batch models.Batch,
 	indexPrimaryKey int,
 	chChunksOut chan<- models.Chunk,
 	rowsRead *int64,
 ) error {
 	query := buildExtractQueryMssql(tableInfo, columns, batch.ShouldUseRange, batch.IsLowerLimitInclusive)
 	var queryArgs []any
 	if batch.ShouldUseRange {
 		queryArgs = append(queryArgs,
 			sql.Named("min", batch.LowerLimit),
 			sql.Named("max", batch.UpperLimit),
 		)
 	}
 	rows, err := mssqlEx.db.QueryContext(ctx, query, queryArgs...)
 	if err != nil {
 		return &custom_errors.ExtractorError{Batch: batch, HasLastId: false, Msg: err.Error()}
 	}
 	defer rows.Close()
 	rowsChunk := make([]models.UnknownRowValues, 0, chunkSize)
 	for rows.Next() {
 		values := make([]any, len(columns))
 		scanArgs := make([]any, len(columns))
 		for i := range values {
 			scanArgs[i] = &values[i]
 		}
 		if err := rows.Scan(scanArgs...); err != nil {
 			if len(rowsChunk) == 0 {
 				return &custom_errors.ExtractorError{Batch: batch, HasLastId: false, Msg: err.Error()}
 			}
 			lastRow := rowsChunk[len(rowsChunk)-1]
 			select {
 			case chChunksOut <- models.Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
 			case <-ctx.Done():
 				return nil
 			}
 			atomic.AddInt64(rowsRead, int64(len(rowsChunk)))
 			return extractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err)
 		}
 		rowsChunk = append(rowsChunk, values)
 		if len(rowsChunk) >= chunkSize {
 			select {
 			case chChunksOut <- models.Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
 			case <-ctx.Done():
 				return nil
 			}
 			atomic.AddInt64(rowsRead, int64(len(rowsChunk)))
 			rowsChunk = make([]models.UnknownRowValues, 0, chunkSize)
 		}
 	}
 	if err := rows.Err(); err != nil {
 		if errors.Is(err, ctx.Err()) {
 			return ctx.Err()
 		}
 		if len(rowsChunk) == 0 {
 			return &custom_errors.ExtractorError{Batch: batch, HasLastId: false, Msg: err.Error()}
 		}
 		lastRow := rowsChunk[len(rowsChunk)-1]
 		return extractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err)
 	}
 	if len(rowsChunk) > 0 {
 		select {
 		case chChunksOut <- models.Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
 		case <-ctx.Done():
 			return nil
 		}
 		atomic.AddInt64(rowsRead, int64(len(rowsChunk)))
 	}
 	return nil
 }
 func (mssqlEx *MssqlExtractor) Exec(
 	ctx context.Context,
 	tableInfo config.SourceTableInfo,
 	columns []models.ColumnType,
 	chunkSize int,
 	chBatchesIn <-chan models.Batch,
 	chChunksOut chan<- models.Chunk,
 	chErrorsOut chan<- custom_errors.ExtractorError,
 	chJobErrorsOut chan<- custom_errors.JobError,
 	wgActiveBatches *sync.WaitGroup,
 	rowsRead *int64,
 ) {
 	indexPrimaryKey := slices.IndexFunc(columns, func(col models.ColumnType) bool {
 		return strings.EqualFold(col.Name(), tableInfo.PrimaryKey)
 	})
 	if indexPrimaryKey == -1 {
 		select {
 		case <-ctx.Done():
 			return
 		case chJobErrorsOut <- custom_errors.JobError{
 			ShouldCancelJob: true,
 			Msg:             "Primary key not found in provided columns",
 		}:
 		}
 		return
 	}
 	for {
 		if ctx.Err() != nil {
 			return
 		}
 		select {
 		case <-ctx.Done():
 			return
 		case batch, ok := <-chBatchesIn:
 			if !ok {
 				return
 			}
 			err := mssqlEx.ProcessBatch(
 				ctx,
 				tableInfo,
 				columns,
 				chunkSize,
 				batch,
 				indexPrimaryKey,
 				chChunksOut,
 				rowsRead,
 			)
 			if err != nil {
 				var exError *custom_errors.ExtractorError
 				if errors.As(err, &exError) {
 					select {
 					case <-ctx.Done():
 						return
 					case chErrorsOut <- *exError:
 					}
 				}
 				select {
 				case <-ctx.Done():
 					return
 				case chJobErrorsOut <- custom_errors.JobError{ShouldCancelJob: false, Prev: err}:
 				}
 				return
 			}
 			wgActiveBatches.Done()
 		}
 	}
 }
--- a/internal/app/etl/extractor/postgres.go
+++ b/internal/app/etl/extractor/postgres.go
@@ -0,0 +1,127 @@
 package extractor
 import (
 	"context"
 	"errors"
 	"fmt"
 	"strings"
 	"sync"
 	"sync/atomic"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
 	"github.com/google/uuid"
 	"github.com/jackc/pgx/v5/pgxpool"
 )
 type PostgresExtractor struct {
 	db *pgxpool.Pool
 }
 func NewPostgresExtractor(pool *pgxpool.Pool) *PostgresExtractor {
 	return &PostgresExtractor{db: pool}
 }
 func buildExtractQueryPostgres(sourceDbInfo config.SourceTableInfo, columns []models.ColumnType) string {
 	var sbColumns strings.Builder
 	if len(columns) == 0 {
 		sbColumns.WriteString("*")
 	} else {
 		for i, col := range columns {
 			if col.Type() == "GEOMETRY" {
 				sbColumns.WriteString(`ST_AsEWKB("`)
 				sbColumns.WriteString(col.Name())
 				sbColumns.WriteString(`") AS "`)
 				sbColumns.WriteString(col.Name())
 				sbColumns.WriteString(`"`)
 			} else {
 				sbColumns.WriteString(`"`)
 				sbColumns.WriteString(col.Name())
 				sbColumns.WriteString(`"`)
 			}
 			if i < len(columns)-1 {
 				sbColumns.WriteString(", ")
 			}
 		}
 	}
 	return fmt.Sprintf(`SELECT %s FROM "%s"."%s"  ORDER BY "%s" ASC`, sbColumns.String(), sourceDbInfo.Schema, sourceDbInfo.Table, sourceDbInfo.PrimaryKey)
 }
 func (postgresEx *PostgresExtractor) ProcessBatch(
 	ctx context.Context,
 	tableInfo config.SourceTableInfo,
 	columns []models.ColumnType,
 	chunkSize int,
 	batch models.Batch,
 	indexPrimaryKey int,
 	chChunksOut chan<- models.Chunk,
 	rowsRead *int64,
 ) error {
 	query := buildExtractQueryPostgres(tableInfo, columns)
 	if batch.ShouldUseRange {
 		return errors.New("Batch config not yet supported")
 	}
 	rows, err := postgresEx.db.Query(ctx, query)
 	if err != nil {
 		return &custom_errors.ExtractorError{Batch: batch, HasLastId: false, Msg: err.Error()}
 	}
 	defer rows.Close()
 	rowsChunk := make([]models.UnknownRowValues, 0, chunkSize)
 	for rows.Next() {
 		values, err := rows.Values()
 		if err != nil {
 			return errors.New("Unexpected error reading rows from source")
 		}
 		rowsChunk = append(rowsChunk, values)
 		if len(rowsChunk) >= chunkSize {
 			select {
 			case chChunksOut <- models.Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
 			case <-ctx.Done():
 				return nil
 			}
 			atomic.AddInt64(rowsRead, int64(len(rowsChunk)))
 			rowsChunk = make([]models.UnknownRowValues, 0, chunkSize)
 		}
 	}
 	if err := rows.Err(); err != nil {
 		return errors.New("Unexpected error reading rows from source")
 	}
 	if len(rowsChunk) > 0 {
 		select {
 		case chChunksOut <- models.Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
 		case <-ctx.Done():
 			return nil
 		}
 		atomic.AddInt64(rowsRead, int64(len(rowsChunk)))
 	}
 	return nil
 }
 func (postgresEx *PostgresExtractor) Exec(
 	ctx context.Context,
 	tableInfo config.SourceTableInfo,
 	columns []models.ColumnType,
 	chunkSize int,
 	chBatchesIn <-chan models.Batch,
 	chChunksOut chan<- models.Chunk,
 	chErrorsOut chan<- custom_errors.ExtractorError,
 	chJobErrorsOut chan<- custom_errors.JobError,
 	wgActiveBatches *sync.WaitGroup,
 	rowsRead *int64,
 ) {
 }
--- a/internal/app/models/colum-type.go
+++ b/internal/app/models/colum-type.go
@@ -0,0 +1,44 @@
 package models
 type ColumnType struct {
 	name string
 	hasMaxLength      bool
 	hasPrecisionScale bool
 	userType    string
 	systemType  string
 	unifiedType string
 	nullable    bool
 	maxLength   int64
 	precision   int64
 	scale       int64
 }
 func (c *ColumnType) Name() string {
 	return c.name
 }
 func (c *ColumnType) UserType() string {
 	return c.userType
 }
 func (c *ColumnType) SystemType() string {
 	return c.systemType
 }
 func (c *ColumnType) Length() (length int64, ok bool) {
 	return c.maxLength, c.hasMaxLength
 }
 func (c *ColumnType) DecimalSize() (precision, scale int64, ok bool) {
 	return c.precision, c.scale, c.hasPrecisionScale
 }
 func (c *ColumnType) Nullable() bool {
 	return c.nullable
 }
 func (c *ColumnType) Type() string {
 	return c.unifiedType
 }
--- a/internal/app/models/main.go
+++ b/internal/app/models/main.go
@@ -0,0 +1,22 @@
 package models
 import "github.com/google/uuid"
 type UnknownRowValues = []any
 type Chunk struct {
 	Id           uuid.UUID
 	BatchId      uuid.UUID
 	Data         []UnknownRowValues
 	RetryCounter int
 }
 type Batch struct {
 	Id                    uuid.UUID
 	ParentId              uuid.UUID
 	LowerLimit            int64
 	UpperLimit            int64
 	IsLowerLimitInclusive bool
 	ShouldUseRange        bool
 	RetryCounter          int
 }
--- a/scripts/config-parser/main.go
+++ b/scripts/config-parser/main.go
@@ -0,0 +1,17 @@
 package main
 import (
 	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
 	log "github.com/sirupsen/logrus"
 )
 func main() {
 	log.SetLevel(log.DebugLevel)
 	migrationConfig, err := config.ReadMigrationConfig()
 	if err != nil {
 		log.Fatalf("error leyendo configuracion: %v", err)
 	}
 	log.Debugf("Config: %+v", migrationConfig)
 }
Author	SHA1	Message	Date
Kylesoda	c2ea84bfcf	feat: implement extractor error handling and batch processing for MSSQL and Postgres	2026-04-10 19:06:41 -05:00
Kylesoda	6345a0d694	feat: enhance migration job processing with detailed metrics and error handling	2026-04-09 21:55:19 -05:00
Kylesoda	1db35c796c	feat: enhance migration job processing with parallel execution and improved logging	2026-04-09 20:02:04 -05:00
Kylesoda	0d9f955b2f	feat: enhance batch processing by adding rowsPerBatch parameter and improving logging messages	2026-04-09 19:46:45 -05:00
Kylesoda	524d892a60	feat: refactor migration job structure to use SourceTableInfo and TargetTableInfo for improved configuration handling	2026-04-09 19:20:50 -05:00
Kylesoda	e8ace6ecf9	feat: implement job configuration structure and YAML parsing for migration jobs	2026-04-09 18:12:11 -05:00
Kylesoda	adbc962464	feat: add configuration parsing and job management in YAML format	2026-04-09 17:47:58 -05:00