feat: implement extractor error handling and batch processing for MSSQL and Postgres

2026-04-10 19:06:41 -05:00
parent 6345a0d694
commit c2ea84bfcf
8 changed files with 643 additions and 0 deletions
--- a/internal/app/etl/extractor/postgres.go
+++ b/internal/app/etl/extractor/postgres.go
@@ -0,0 +1,127 @@
+package extractor
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"strings"
+	"sync"
+	"sync/atomic"
+
+	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
+	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
+	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
+	"github.com/google/uuid"
+	"github.com/jackc/pgx/v5/pgxpool"
+)
+
+type PostgresExtractor struct {
+	db *pgxpool.Pool
+}
+
+func NewPostgresExtractor(pool *pgxpool.Pool) *PostgresExtractor {
+	return &PostgresExtractor{db: pool}
+}
+
+func buildExtractQueryPostgres(sourceDbInfo config.SourceTableInfo, columns []models.ColumnType) string {
+	var sbColumns strings.Builder
+
+	if len(columns) == 0 {
+		sbColumns.WriteString("*")
+	} else {
+		for i, col := range columns {
+			if col.Type() == "GEOMETRY" {
+				sbColumns.WriteString(`ST_AsEWKB("`)
+				sbColumns.WriteString(col.Name())
+				sbColumns.WriteString(`") AS "`)
+				sbColumns.WriteString(col.Name())
+				sbColumns.WriteString(`"`)
+			} else {
+				sbColumns.WriteString(`"`)
+				sbColumns.WriteString(col.Name())
+				sbColumns.WriteString(`"`)
+			}
+
+			if i < len(columns)-1 {
+				sbColumns.WriteString(", ")
+			}
+		}
+	}
+
+	return fmt.Sprintf(`SELECT %s FROM "%s"."%s"  ORDER BY "%s" ASC`, sbColumns.String(), sourceDbInfo.Schema, sourceDbInfo.Table, sourceDbInfo.PrimaryKey)
+}
+
+func (postgresEx *PostgresExtractor) ProcessBatch(
+	ctx context.Context,
+	tableInfo config.SourceTableInfo,
+	columns []models.ColumnType,
+	chunkSize int,
+	batch models.Batch,
+	indexPrimaryKey int,
+	chChunksOut chan<- models.Chunk,
+	rowsRead *int64,
+) error {
+	query := buildExtractQueryPostgres(tableInfo, columns)
+
+	if batch.ShouldUseRange {
+		return errors.New("Batch config not yet supported")
+	}
+
+	rows, err := postgresEx.db.Query(ctx, query)
+	if err != nil {
+		return &custom_errors.ExtractorError{Batch: batch, HasLastId: false, Msg: err.Error()}
+	}
+	defer rows.Close()
+
+	rowsChunk := make([]models.UnknownRowValues, 0, chunkSize)
+
+	for rows.Next() {
+		values, err := rows.Values()
+		if err != nil {
+			return errors.New("Unexpected error reading rows from source")
+		}
+
+		rowsChunk = append(rowsChunk, values)
+
+		if len(rowsChunk) >= chunkSize {
+			select {
+			case chChunksOut <- models.Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
+			case <-ctx.Done():
+				return nil
+			}
+
+			atomic.AddInt64(rowsRead, int64(len(rowsChunk)))
+			rowsChunk = make([]models.UnknownRowValues, 0, chunkSize)
+		}
+	}
+
+	if err := rows.Err(); err != nil {
+		return errors.New("Unexpected error reading rows from source")
+	}
+
+	if len(rowsChunk) > 0 {
+		select {
+		case chChunksOut <- models.Chunk{Id: uuid.New(), BatchId: batch.Id, Data: rowsChunk, RetryCounter: 0}:
+		case <-ctx.Done():
+			return nil
+		}
+
+		atomic.AddInt64(rowsRead, int64(len(rowsChunk)))
+	}
+
+	return nil
+}
+
+func (postgresEx *PostgresExtractor) Exec(
+	ctx context.Context,
+	tableInfo config.SourceTableInfo,
+	columns []models.ColumnType,
+	chunkSize int,
+	chBatchesIn <-chan models.Batch,
+	chChunksOut chan<- models.Chunk,
+	chErrorsOut chan<- custom_errors.ExtractorError,
+	chJobErrorsOut chan<- custom_errors.JobError,
+	wgActiveBatches *sync.WaitGroup,
+	rowsRead *int64,
+) {
+}