feat: implement MSSQL extractor, transformer, and Postgres loader for enhanced data migration

This commit is contained in:
2026-04-10 23:39:37 -05:00
parent 1be7018ba3
commit cd0e53b1d2
14 changed files with 135 additions and 135 deletions

View File

@@ -1,36 +0,0 @@
package extractor
import (
"context"
"sync"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
)
type Extractor interface {
ProcessBatch(
ctx context.Context,
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
chunkSize int,
batch models.Batch,
indexPrimaryKey int,
chChunksOut chan<- models.Chunk,
rowsRead *int64,
) error
Exec(
ctx context.Context,
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
chunkSize int,
chBatchesIn <-chan models.Batch,
chChunksOut chan<- models.Chunk,
chErrorsOut chan<- custom_errors.ExtractorError,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveBatches *sync.WaitGroup,
rowsRead *int64,
)
}

View File

@@ -1,4 +1,4 @@
package extractor
package extractors
import (
"context"
@@ -13,6 +13,7 @@ import (
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/convert"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
"github.com/google/uuid"
)
@@ -21,7 +22,7 @@ type MssqlExtractor struct {
db *sql.DB
}
func NewMssqlExtractor(db *sql.DB) Extractor {
func NewMssqlExtractor(db *sql.DB) etl.Extractor {
return &MssqlExtractor{db: db}
}

View File

@@ -1,4 +1,4 @@
package extractor
package extractors
import (
"context"
@@ -10,6 +10,7 @@ import (
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
"github.com/google/uuid"
"github.com/jackc/pgx/v5/pgxpool"
@@ -19,7 +20,7 @@ type PostgresExtractor struct {
db *pgxpool.Pool
}
func NewPostgresExtractor(pool *pgxpool.Pool) Extractor {
func NewPostgresExtractor(pool *pgxpool.Pool) etl.Extractor {
return &PostgresExtractor{db: pool}
}

View File

@@ -0,0 +1 @@
package extractors

View File

@@ -1,30 +0,0 @@
package loader
import (
"context"
"sync"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
)
type Loader interface {
ProcessChunk(
ctx context.Context,
tableInfo config.TargetTableInfo,
colNames []string,
chunk models.Chunk,
) (int, error)
Exec(
ctx context.Context,
tableInfo config.TargetTableInfo,
columns []models.ColumnType,
chChunksIn <-chan models.Chunk,
chErrorsOut chan<- custom_errors.LoaderError,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveChunks *sync.WaitGroup,
rowsLoaded *int64,
)
}

View File

@@ -1,4 +1,4 @@
package loader
package loaders
import (
"context"
@@ -9,6 +9,7 @@ import (
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgconn"
@@ -19,7 +20,7 @@ type PostgresLoader struct {
db *pgxpool.Pool
}
func NewPostgresLoader(pool *pgxpool.Pool) Loader {
func NewPostgresLoader(pool *pgxpool.Pool) etl.Loader {
return &PostgresLoader{db: pool}
}

View File

@@ -0,0 +1 @@
package loaders

View File

@@ -1,33 +0,0 @@
package transformer
import (
"context"
"sync"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
)
type transformerFunc func(any) (any, error)
type columnTransformPlan struct {
index int
fn transformerFunc
}
type Transformer interface {
ProcessChunk(
ctx context.Context,
chunk *models.Chunk,
transformationPlan []columnTransformPlan,
) error
Exec(
ctx context.Context,
columns []models.ColumnType,
chChunksIn <-chan models.Chunk,
chChunksOut chan<- models.Chunk,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveChunks *sync.WaitGroup,
)
}

View File

@@ -1,4 +1,4 @@
package transformer
package transformers
import (
"context"
@@ -7,24 +7,25 @@ import (
"time"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
)
type MssqlTransformer struct{}
func NewMssqlTransformer() Transformer {
func NewMssqlTransformer() etl.Transformer {
return &MssqlTransformer{}
}
func computeTransformationPlan(columns []models.ColumnType) []columnTransformPlan {
var plan []columnTransformPlan
func computeTransformationPlan(columns []models.ColumnType) []etl.ColumnTransformPlan {
var plan []etl.ColumnTransformPlan
for i, col := range columns {
switch col.SystemType() {
case "uniqueidentifier":
plan = append(plan, columnTransformPlan{
index: i,
fn: func(v any) (any, error) {
plan = append(plan, etl.ColumnTransformPlan{
Index: i,
Fn: func(v any) (any, error) {
if b, ok := v.([]byte); ok && b != nil {
return mssqlUuidToBigEndian(b)
}
@@ -33,9 +34,9 @@ func computeTransformationPlan(columns []models.ColumnType) []columnTransformPla
})
case "geometry", "geography":
plan = append(plan, columnTransformPlan{
index: i,
fn: func(v any) (any, error) {
plan = append(plan, etl.ColumnTransformPlan{
Index: i,
Fn: func(v any) (any, error) {
if b, ok := v.([]byte); ok && b != nil {
return wkbToEwkbWithSrid(b, 4326)
}
@@ -44,9 +45,9 @@ func computeTransformationPlan(columns []models.ColumnType) []columnTransformPla
})
case "datetime", "datetime2":
plan = append(plan, columnTransformPlan{
index: i,
fn: func(v any) (any, error) {
plan = append(plan, etl.ColumnTransformPlan{
Index: i,
Fn: func(v any) (any, error) {
if t, ok := v.(time.Time); ok {
return ensureUTC(t), nil
}
@@ -64,7 +65,7 @@ const processChunkCtxCheck = 4096
func (mssqlTr *MssqlTransformer) ProcessChunk(
ctx context.Context,
chunk *models.Chunk,
transformationPlan []columnTransformPlan,
transformationPlan []etl.ColumnTransformPlan,
) error {
for i, rowValues := range chunk.Data {
if i%processChunkCtxCheck == 0 {
@@ -74,16 +75,16 @@ func (mssqlTr *MssqlTransformer) ProcessChunk(
}
for _, task := range transformationPlan {
val := rowValues[task.index]
val := rowValues[task.Index]
if val == nil {
continue
}
transformed, err := task.fn(val)
transformed, err := task.Fn(val)
if err != nil {
return err
}
rowValues[task.index] = transformed
rowValues[task.Index] = transformed
}
}

View File

@@ -0,0 +1 @@
package transformers

View File

@@ -1,4 +1,4 @@
package transformer
package transformers
import (
"encoding/binary"

80
internal/app/etl/types.go Normal file
View File

@@ -0,0 +1,80 @@
package etl
import (
"context"
"sync"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
)
type Extractor interface {
ProcessBatch(
ctx context.Context,
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
chunkSize int,
batch models.Batch,
indexPrimaryKey int,
chChunksOut chan<- models.Chunk,
rowsRead *int64,
) error
Exec(
ctx context.Context,
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
chunkSize int,
chBatchesIn <-chan models.Batch,
chChunksOut chan<- models.Chunk,
chErrorsOut chan<- custom_errors.ExtractorError,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveBatches *sync.WaitGroup,
rowsRead *int64,
)
}
type TransformerFunc func(any) (any, error)
type ColumnTransformPlan struct {
Index int
Fn TransformerFunc
}
type Transformer interface {
ProcessChunk(
ctx context.Context,
chunk *models.Chunk,
transformationPlan []ColumnTransformPlan,
) error
Exec(
ctx context.Context,
columns []models.ColumnType,
chChunksIn <-chan models.Chunk,
chChunksOut chan<- models.Chunk,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveChunks *sync.WaitGroup,
)
}
type Loader interface {
ProcessChunk(
ctx context.Context,
tableInfo config.TargetTableInfo,
colNames []string,
chunk models.Chunk,
) (int, error)
Exec(
ctx context.Context,
tableInfo config.TargetTableInfo,
columns []models.ColumnType,
chChunksIn <-chan models.Chunk,
chErrorsOut chan<- custom_errors.LoaderError,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveChunks *sync.WaitGroup,
rowsLoaded *int64,
)
}