feat: implement MSSQL extractor, transformer, and Postgres loader for enhanced data migration

This commit is contained in:
2026-04-10 23:39:37 -05:00
parent 1be7018ba3
commit cd0e53b1d2
14 changed files with 135 additions and 135 deletions

View File

@@ -7,6 +7,9 @@ import (
"time" "time"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/extractors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/loaders"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/transformers"
"github.com/jackc/pgx/v5/pgxpool" "github.com/jackc/pgx/v5/pgxpool"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
@@ -87,11 +90,24 @@ func processMigrationJobs(
chJobs := make(chan config.Job, len(jobs)) chJobs := make(chan config.Job, len(jobs))
var wgJobs sync.WaitGroup var wgJobs sync.WaitGroup
extractor := extractors.NewMssqlExtractor(sourceDb)
transformer := transformers.NewMssqlTransformer()
loader := loaders.NewPostgresLoader(targetDb)
for i := range maxParallelWorkers { for i := range maxParallelWorkers {
wgJobs.Go(func() { wgJobs.Go(func() {
for job := range chJobs { for job := range chJobs {
log.Infof("[worker %d] >>> Processing job: %s.%s <<<", i, job.SourceTable.Schema, job.SourceTable.Table) log.Infof("[worker %d] >>> Processing job: %s.%s <<<", i, job.SourceTable.Schema, job.SourceTable.Table)
res := processMigrationJob(ctx, sourceDb, targetDb, job) res := processMigrationJob(
ctx,
sourceDb,
targetDb,
extractor,
transformer,
loader,
job,
)
chJobResults <- res chJobResults <- res
} }
}) })

View File

@@ -9,12 +9,9 @@ import (
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/extractor" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/loader"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/transformer"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
"github.com/jackc/pgx/v5/pgxpool" "github.com/jackc/pgx/v5/pgxpool"
_ "github.com/microsoft/go-mssqldb" _ "github.com/microsoft/go-mssqldb"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
@@ -23,6 +20,9 @@ func processMigrationJob(
ctx context.Context, ctx context.Context,
sourceDb *sql.DB, sourceDb *sql.DB,
targetDb *pgxpool.Pool, targetDb *pgxpool.Pool,
extractor etl.Extractor,
transformer etl.Transformer,
loader etl.Loader,
job config.Job, job config.Job,
) JobResult { ) JobResult {
result := JobResult{ result := JobResult{
@@ -62,10 +62,6 @@ func processMigrationJob(
var wgTransformers sync.WaitGroup var wgTransformers sync.WaitGroup
var wgLoaders sync.WaitGroup var wgLoaders sync.WaitGroup
mssqlExtractor := extractor.NewMssqlExtractor(sourceDb)
mssqlToPostgresTransformer := transformer.NewMssqlTransformer()
postgresLoader := loader.NewPostgresLoader(targetDb)
go func() { go func() {
if err := custom_errors.JobErrorHandler(jobCtx, chJobErrors); err != nil { if err := custom_errors.JobErrorHandler(jobCtx, chJobErrors); err != nil {
cancel() cancel()
@@ -81,7 +77,7 @@ func processMigrationJob(
for range maxExtractors { for range maxExtractors {
wgExtractors.Go(func() { wgExtractors.Go(func() {
mssqlExtractor.Exec( extractor.Exec(
jobCtx, jobCtx,
job.SourceTable, job.SourceTable,
sourceColTypes, sourceColTypes,
@@ -107,7 +103,7 @@ func processMigrationJob(
for range maxExtractors { for range maxExtractors {
wgTransformers.Go(func() { wgTransformers.Go(func() {
mssqlToPostgresTransformer.Exec( transformer.Exec(
jobCtx, jobCtx,
sourceColTypes, sourceColTypes,
chChunksRaw, chChunksRaw,
@@ -122,7 +118,7 @@ func processMigrationJob(
for range job.MaxLoaders { for range job.MaxLoaders {
wgLoaders.Go(func() { wgLoaders.Go(func() {
postgresLoader.Exec( loader.Exec(
jobCtx, jobCtx,
job.TargetTable, job.TargetTable,
targetColTypes, targetColTypes,

View File

@@ -1,36 +0,0 @@
package extractor
import (
"context"
"sync"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
)
type Extractor interface {
ProcessBatch(
ctx context.Context,
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
chunkSize int,
batch models.Batch,
indexPrimaryKey int,
chChunksOut chan<- models.Chunk,
rowsRead *int64,
) error
Exec(
ctx context.Context,
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
chunkSize int,
chBatchesIn <-chan models.Batch,
chChunksOut chan<- models.Chunk,
chErrorsOut chan<- custom_errors.ExtractorError,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveBatches *sync.WaitGroup,
rowsRead *int64,
)
}

View File

@@ -1,4 +1,4 @@
package extractor package extractors
import ( import (
"context" "context"
@@ -13,6 +13,7 @@ import (
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/convert" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/convert"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
"github.com/google/uuid" "github.com/google/uuid"
) )
@@ -21,7 +22,7 @@ type MssqlExtractor struct {
db *sql.DB db *sql.DB
} }
func NewMssqlExtractor(db *sql.DB) Extractor { func NewMssqlExtractor(db *sql.DB) etl.Extractor {
return &MssqlExtractor{db: db} return &MssqlExtractor{db: db}
} }

View File

@@ -1,4 +1,4 @@
package extractor package extractors
import ( import (
"context" "context"
@@ -10,6 +10,7 @@ import (
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/jackc/pgx/v5/pgxpool" "github.com/jackc/pgx/v5/pgxpool"
@@ -19,7 +20,7 @@ type PostgresExtractor struct {
db *pgxpool.Pool db *pgxpool.Pool
} }
func NewPostgresExtractor(pool *pgxpool.Pool) Extractor { func NewPostgresExtractor(pool *pgxpool.Pool) etl.Extractor {
return &PostgresExtractor{db: pool} return &PostgresExtractor{db: pool}
} }

View File

@@ -0,0 +1 @@
package extractors

View File

@@ -1,30 +0,0 @@
package loader
import (
"context"
"sync"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
)
type Loader interface {
ProcessChunk(
ctx context.Context,
tableInfo config.TargetTableInfo,
colNames []string,
chunk models.Chunk,
) (int, error)
Exec(
ctx context.Context,
tableInfo config.TargetTableInfo,
columns []models.ColumnType,
chChunksIn <-chan models.Chunk,
chErrorsOut chan<- custom_errors.LoaderError,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveChunks *sync.WaitGroup,
rowsLoaded *int64,
)
}

View File

@@ -1,4 +1,4 @@
package loader package loaders
import ( import (
"context" "context"
@@ -9,6 +9,7 @@ import (
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
"github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgconn" "github.com/jackc/pgx/v5/pgconn"
@@ -19,7 +20,7 @@ type PostgresLoader struct {
db *pgxpool.Pool db *pgxpool.Pool
} }
func NewPostgresLoader(pool *pgxpool.Pool) Loader { func NewPostgresLoader(pool *pgxpool.Pool) etl.Loader {
return &PostgresLoader{db: pool} return &PostgresLoader{db: pool}
} }

View File

@@ -0,0 +1 @@
package loaders

View File

@@ -1,33 +0,0 @@
package transformer
import (
"context"
"sync"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
)
type transformerFunc func(any) (any, error)
type columnTransformPlan struct {
index int
fn transformerFunc
}
type Transformer interface {
ProcessChunk(
ctx context.Context,
chunk *models.Chunk,
transformationPlan []columnTransformPlan,
) error
Exec(
ctx context.Context,
columns []models.ColumnType,
chChunksIn <-chan models.Chunk,
chChunksOut chan<- models.Chunk,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveChunks *sync.WaitGroup,
)
}

View File

@@ -1,4 +1,4 @@
package transformer package transformers
import ( import (
"context" "context"
@@ -7,24 +7,25 @@ import (
"time" "time"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
) )
type MssqlTransformer struct{} type MssqlTransformer struct{}
func NewMssqlTransformer() Transformer { func NewMssqlTransformer() etl.Transformer {
return &MssqlTransformer{} return &MssqlTransformer{}
} }
func computeTransformationPlan(columns []models.ColumnType) []columnTransformPlan { func computeTransformationPlan(columns []models.ColumnType) []etl.ColumnTransformPlan {
var plan []columnTransformPlan var plan []etl.ColumnTransformPlan
for i, col := range columns { for i, col := range columns {
switch col.SystemType() { switch col.SystemType() {
case "uniqueidentifier": case "uniqueidentifier":
plan = append(plan, columnTransformPlan{ plan = append(plan, etl.ColumnTransformPlan{
index: i, Index: i,
fn: func(v any) (any, error) { Fn: func(v any) (any, error) {
if b, ok := v.([]byte); ok && b != nil { if b, ok := v.([]byte); ok && b != nil {
return mssqlUuidToBigEndian(b) return mssqlUuidToBigEndian(b)
} }
@@ -33,9 +34,9 @@ func computeTransformationPlan(columns []models.ColumnType) []columnTransformPla
}) })
case "geometry", "geography": case "geometry", "geography":
plan = append(plan, columnTransformPlan{ plan = append(plan, etl.ColumnTransformPlan{
index: i, Index: i,
fn: func(v any) (any, error) { Fn: func(v any) (any, error) {
if b, ok := v.([]byte); ok && b != nil { if b, ok := v.([]byte); ok && b != nil {
return wkbToEwkbWithSrid(b, 4326) return wkbToEwkbWithSrid(b, 4326)
} }
@@ -44,9 +45,9 @@ func computeTransformationPlan(columns []models.ColumnType) []columnTransformPla
}) })
case "datetime", "datetime2": case "datetime", "datetime2":
plan = append(plan, columnTransformPlan{ plan = append(plan, etl.ColumnTransformPlan{
index: i, Index: i,
fn: func(v any) (any, error) { Fn: func(v any) (any, error) {
if t, ok := v.(time.Time); ok { if t, ok := v.(time.Time); ok {
return ensureUTC(t), nil return ensureUTC(t), nil
} }
@@ -64,7 +65,7 @@ const processChunkCtxCheck = 4096
func (mssqlTr *MssqlTransformer) ProcessChunk( func (mssqlTr *MssqlTransformer) ProcessChunk(
ctx context.Context, ctx context.Context,
chunk *models.Chunk, chunk *models.Chunk,
transformationPlan []columnTransformPlan, transformationPlan []etl.ColumnTransformPlan,
) error { ) error {
for i, rowValues := range chunk.Data { for i, rowValues := range chunk.Data {
if i%processChunkCtxCheck == 0 { if i%processChunkCtxCheck == 0 {
@@ -74,16 +75,16 @@ func (mssqlTr *MssqlTransformer) ProcessChunk(
} }
for _, task := range transformationPlan { for _, task := range transformationPlan {
val := rowValues[task.index] val := rowValues[task.Index]
if val == nil { if val == nil {
continue continue
} }
transformed, err := task.fn(val) transformed, err := task.Fn(val)
if err != nil { if err != nil {
return err return err
} }
rowValues[task.index] = transformed rowValues[task.Index] = transformed
} }
} }

View File

@@ -0,0 +1 @@
package transformers

View File

@@ -1,4 +1,4 @@
package transformer package transformers
import ( import (
"encoding/binary" "encoding/binary"

80
internal/app/etl/types.go Normal file
View File

@@ -0,0 +1,80 @@
package etl
import (
"context"
"sync"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
)
type Extractor interface {
ProcessBatch(
ctx context.Context,
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
chunkSize int,
batch models.Batch,
indexPrimaryKey int,
chChunksOut chan<- models.Chunk,
rowsRead *int64,
) error
Exec(
ctx context.Context,
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
chunkSize int,
chBatchesIn <-chan models.Batch,
chChunksOut chan<- models.Chunk,
chErrorsOut chan<- custom_errors.ExtractorError,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveBatches *sync.WaitGroup,
rowsRead *int64,
)
}
type TransformerFunc func(any) (any, error)
type ColumnTransformPlan struct {
Index int
Fn TransformerFunc
}
type Transformer interface {
ProcessChunk(
ctx context.Context,
chunk *models.Chunk,
transformationPlan []ColumnTransformPlan,
) error
Exec(
ctx context.Context,
columns []models.ColumnType,
chChunksIn <-chan models.Chunk,
chChunksOut chan<- models.Chunk,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveChunks *sync.WaitGroup,
)
}
type Loader interface {
ProcessChunk(
ctx context.Context,
tableInfo config.TargetTableInfo,
colNames []string,
chunk models.Chunk,
) (int, error)
Exec(
ctx context.Context,
tableInfo config.TargetTableInfo,
columns []models.ColumnType,
chChunksIn <-chan models.Chunk,
chErrorsOut chan<- custom_errors.LoaderError,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveChunks *sync.WaitGroup,
rowsLoaded *int64,
)
}