feat: add MSSQL extractor and transformer implementations for improved data migration

This commit is contained in:
2026-04-10 19:59:44 -05:00
parent eb3c3bbfce
commit 053e6bd673
5 changed files with 114 additions and 74 deletions

View File

@@ -10,6 +10,7 @@ import (
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/extractor" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/extractor"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/transformer"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
"github.com/jackc/pgx/v5/pgxpool" "github.com/jackc/pgx/v5/pgxpool"
@@ -60,6 +61,9 @@ func processMigrationJob(
var wgTransformers sync.WaitGroup var wgTransformers sync.WaitGroup
var wgLoaders sync.WaitGroup var wgLoaders sync.WaitGroup
mssqlExtractor := extractor.NewMssqlExtractor(sourceDb)
mssqlToPostgresTransformer := transformer.NewMssqlTransformer()
go func() { go func() {
if err := custom_errors.JobErrorHandler(jobCtx, chJobErrors); err != nil { if err := custom_errors.JobErrorHandler(jobCtx, chJobErrors); err != nil {
cancel() cancel()
@@ -73,11 +77,9 @@ func processMigrationJob(
maxExtractors := min(job.MaxExtractors, len(batches)) maxExtractors := min(job.MaxExtractors, len(batches))
log.Infof("Starting %d extractor(s)...", maxExtractors) log.Infof("Starting %d extractor(s)...", maxExtractors)
exMssql := extractor.NewMssqlExtractor(sourceDb)
for range maxExtractors { for range maxExtractors {
wgExtractors.Go(func() { wgExtractors.Go(func() {
exMssql.Exec( mssqlExtractor.Exec(
jobCtx, jobCtx,
job.SourceTable, job.SourceTable,
sourceColTypes, sourceColTypes,
@@ -103,7 +105,14 @@ func processMigrationJob(
for range maxExtractors { for range maxExtractors {
wgTransformers.Go(func() { wgTransformers.Go(func() {
transformRowsMssql(jobCtx, sourceColTypes, chChunksRaw, chChunksTransformed, chJobErrors, &wgActiveChunks) mssqlToPostgresTransformer.Exec(
jobCtx,
sourceColTypes,
chChunksRaw,
chChunksTransformed,
chJobErrors,
&wgActiveChunks,
)
}) })
} }

View File

@@ -1,4 +1,4 @@
package main package transformer
import ( import (
"context" "context"
@@ -8,76 +8,12 @@ import (
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
log "github.com/sirupsen/logrus"
) )
type transformerFunc func(any) (any, error) type MssqlTransformer struct{}
type columnTransformPlan struct { func NewMssqlTransformer() *MssqlTransformer {
index int return &MssqlTransformer{}
fn transformerFunc
}
func transformRowsMssql(
ctx context.Context,
columns []models.ColumnType,
chChunksIn <-chan models.Chunk,
chChunksOut chan<- models.Chunk,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveChunks *sync.WaitGroup,
) {
transformationPlan := computeTransformationPlan(columns)
for {
if ctx.Err() != nil {
return
}
select {
case <-ctx.Done():
return
case chunk, ok := <-chChunksIn:
if !ok {
return
}
if len(transformationPlan) == 0 {
select {
case chChunksOut <- chunk:
wgActiveChunks.Add(1)
continue
case <-ctx.Done():
return
}
}
chunkStartTime := time.Now()
err := processChunk(ctx, &chunk, transformationPlan)
if err != nil {
if errors.Is(err, ctx.Err()) {
return
}
select {
case chJobErrorsOut <- custom_errors.JobError{ShouldCancelJob: true, Msg: "Transformation failed", Prev: err}:
case <-ctx.Done():
}
return
}
log.Infof("Transformed chunk %s: %d rows in %v", chunk.Id, len(chunk.Data), time.Since(chunkStartTime))
select {
case chChunksOut <- chunk:
case <-ctx.Done():
return
}
wgActiveChunks.Add(1)
}
}
} }
func computeTransformationPlan(columns []models.ColumnType) []columnTransformPlan { func computeTransformationPlan(columns []models.ColumnType) []columnTransformPlan {
@@ -125,7 +61,11 @@ func computeTransformationPlan(columns []models.ColumnType) []columnTransformPla
const processChunkCtxCheck = 4096 const processChunkCtxCheck = 4096
func processChunk(ctx context.Context, chunk *models.Chunk, transformationPlan []columnTransformPlan) error { func (mssqlTr *MssqlTransformer) ProcessChunk(
ctx context.Context,
chunk *models.Chunk,
transformationPlan []columnTransformPlan,
) error {
for i, rowValues := range chunk.Data { for i, rowValues := range chunk.Data {
if i%processChunkCtxCheck == 0 { if i%processChunkCtxCheck == 0 {
if err := ctx.Err(); err != nil { if err := ctx.Err(); err != nil {
@@ -149,3 +89,61 @@ func processChunk(ctx context.Context, chunk *models.Chunk, transformationPlan [
return nil return nil
} }
func (mssqlTr *MssqlTransformer) Exec(
ctx context.Context,
columns []models.ColumnType,
chChunksIn <-chan models.Chunk,
chChunksOut chan<- models.Chunk,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveChunks *sync.WaitGroup,
) {
transformationPlan := computeTransformationPlan(columns)
for {
if ctx.Err() != nil {
return
}
select {
case <-ctx.Done():
return
case chunk, ok := <-chChunksIn:
if !ok {
return
}
if len(transformationPlan) == 0 {
select {
case chChunksOut <- chunk:
wgActiveChunks.Add(1)
continue
case <-ctx.Done():
return
}
}
err := mssqlTr.ProcessChunk(ctx, &chunk, transformationPlan)
if err != nil {
if errors.Is(err, ctx.Err()) {
return
}
select {
case chJobErrorsOut <- custom_errors.JobError{ShouldCancelJob: true, Msg: "Transformation failed", Prev: err}:
case <-ctx.Done():
}
return
}
select {
case chChunksOut <- chunk:
case <-ctx.Done():
return
}
wgActiveChunks.Add(1)
}
}
}

View File

@@ -0,0 +1,33 @@
package transformer
import (
"context"
"sync"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
)
type transformerFunc func(any) (any, error)
type columnTransformPlan struct {
index int
fn transformerFunc
}
type Transformer interface {
ProcessChunk(
ctx context.Context,
chunk *models.Chunk,
transformationPlan []columnTransformPlan,
) error
Exec(
ctx context.Context,
columns []models.ColumnType,
chChunksIn <-chan models.Chunk,
chChunksOut chan<- models.Chunk,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveChunks *sync.WaitGroup,
)
}

View File

@@ -1,4 +1,4 @@
package main package transformer
import ( import (
"encoding/binary" "encoding/binary"