feat: add MSSQL extractor and transformer implementations for improved data migration
This commit is contained in:
@@ -10,6 +10,7 @@ import (
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/extractor"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/transformer"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
|
||||
@@ -60,6 +61,9 @@ func processMigrationJob(
|
||||
var wgTransformers sync.WaitGroup
|
||||
var wgLoaders sync.WaitGroup
|
||||
|
||||
mssqlExtractor := extractor.NewMssqlExtractor(sourceDb)
|
||||
mssqlToPostgresTransformer := transformer.NewMssqlTransformer()
|
||||
|
||||
go func() {
|
||||
if err := custom_errors.JobErrorHandler(jobCtx, chJobErrors); err != nil {
|
||||
cancel()
|
||||
@@ -73,11 +77,9 @@ func processMigrationJob(
|
||||
maxExtractors := min(job.MaxExtractors, len(batches))
|
||||
log.Infof("Starting %d extractor(s)...", maxExtractors)
|
||||
|
||||
exMssql := extractor.NewMssqlExtractor(sourceDb)
|
||||
|
||||
for range maxExtractors {
|
||||
wgExtractors.Go(func() {
|
||||
exMssql.Exec(
|
||||
mssqlExtractor.Exec(
|
||||
jobCtx,
|
||||
job.SourceTable,
|
||||
sourceColTypes,
|
||||
@@ -103,7 +105,14 @@ func processMigrationJob(
|
||||
|
||||
for range maxExtractors {
|
||||
wgTransformers.Go(func() {
|
||||
transformRowsMssql(jobCtx, sourceColTypes, chChunksRaw, chChunksTransformed, chJobErrors, &wgActiveChunks)
|
||||
mssqlToPostgresTransformer.Exec(
|
||||
jobCtx,
|
||||
sourceColTypes,
|
||||
chChunksRaw,
|
||||
chChunksTransformed,
|
||||
chJobErrors,
|
||||
&wgActiveChunks,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package main
|
||||
package transformer
|
||||
|
||||
import (
|
||||
"context"
|
||||
@@ -8,76 +8,12 @@ import (
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type transformerFunc func(any) (any, error)
|
||||
type MssqlTransformer struct{}
|
||||
|
||||
type columnTransformPlan struct {
|
||||
index int
|
||||
fn transformerFunc
|
||||
}
|
||||
|
||||
func transformRowsMssql(
|
||||
ctx context.Context,
|
||||
columns []models.ColumnType,
|
||||
chChunksIn <-chan models.Chunk,
|
||||
chChunksOut chan<- models.Chunk,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActiveChunks *sync.WaitGroup,
|
||||
) {
|
||||
transformationPlan := computeTransformationPlan(columns)
|
||||
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
|
||||
case chunk, ok := <-chChunksIn:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
if len(transformationPlan) == 0 {
|
||||
select {
|
||||
case chChunksOut <- chunk:
|
||||
wgActiveChunks.Add(1)
|
||||
continue
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
chunkStartTime := time.Now()
|
||||
|
||||
err := processChunk(ctx, &chunk, transformationPlan)
|
||||
if err != nil {
|
||||
if errors.Is(err, ctx.Err()) {
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case chJobErrorsOut <- custom_errors.JobError{ShouldCancelJob: true, Msg: "Transformation failed", Prev: err}:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
log.Infof("Transformed chunk %s: %d rows in %v", chunk.Id, len(chunk.Data), time.Since(chunkStartTime))
|
||||
|
||||
select {
|
||||
case chChunksOut <- chunk:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
|
||||
wgActiveChunks.Add(1)
|
||||
}
|
||||
}
|
||||
func NewMssqlTransformer() *MssqlTransformer {
|
||||
return &MssqlTransformer{}
|
||||
}
|
||||
|
||||
func computeTransformationPlan(columns []models.ColumnType) []columnTransformPlan {
|
||||
@@ -125,7 +61,11 @@ func computeTransformationPlan(columns []models.ColumnType) []columnTransformPla
|
||||
|
||||
const processChunkCtxCheck = 4096
|
||||
|
||||
func processChunk(ctx context.Context, chunk *models.Chunk, transformationPlan []columnTransformPlan) error {
|
||||
func (mssqlTr *MssqlTransformer) ProcessChunk(
|
||||
ctx context.Context,
|
||||
chunk *models.Chunk,
|
||||
transformationPlan []columnTransformPlan,
|
||||
) error {
|
||||
for i, rowValues := range chunk.Data {
|
||||
if i%processChunkCtxCheck == 0 {
|
||||
if err := ctx.Err(); err != nil {
|
||||
@@ -149,3 +89,61 @@ func processChunk(ctx context.Context, chunk *models.Chunk, transformationPlan [
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (mssqlTr *MssqlTransformer) Exec(
|
||||
ctx context.Context,
|
||||
columns []models.ColumnType,
|
||||
chChunksIn <-chan models.Chunk,
|
||||
chChunksOut chan<- models.Chunk,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActiveChunks *sync.WaitGroup,
|
||||
) {
|
||||
transformationPlan := computeTransformationPlan(columns)
|
||||
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
|
||||
case chunk, ok := <-chChunksIn:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
if len(transformationPlan) == 0 {
|
||||
select {
|
||||
case chChunksOut <- chunk:
|
||||
wgActiveChunks.Add(1)
|
||||
continue
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
err := mssqlTr.ProcessChunk(ctx, &chunk, transformationPlan)
|
||||
if err != nil {
|
||||
if errors.Is(err, ctx.Err()) {
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case chJobErrorsOut <- custom_errors.JobError{ShouldCancelJob: true, Msg: "Transformation failed", Prev: err}:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case chChunksOut <- chunk:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
|
||||
wgActiveChunks.Add(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
33
internal/app/etl/transformer/types.go
Normal file
33
internal/app/etl/transformer/types.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package transformer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
)
|
||||
|
||||
type transformerFunc func(any) (any, error)
|
||||
|
||||
type columnTransformPlan struct {
|
||||
index int
|
||||
fn transformerFunc
|
||||
}
|
||||
|
||||
type Transformer interface {
|
||||
ProcessChunk(
|
||||
ctx context.Context,
|
||||
chunk *models.Chunk,
|
||||
transformationPlan []columnTransformPlan,
|
||||
) error
|
||||
|
||||
Exec(
|
||||
ctx context.Context,
|
||||
columns []models.ColumnType,
|
||||
chChunksIn <-chan models.Chunk,
|
||||
chChunksOut chan<- models.Chunk,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActiveChunks *sync.WaitGroup,
|
||||
)
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package main
|
||||
package transformer
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
Reference in New Issue
Block a user