feat: implement MSSQL extractor, transformer, and Postgres loader for enhanced data migration
This commit is contained in:
@@ -1,36 +0,0 @@
|
||||
package extractor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
)
|
||||
|
||||
type Extractor interface {
|
||||
ProcessBatch(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
chunkSize int,
|
||||
batch models.Batch,
|
||||
indexPrimaryKey int,
|
||||
chChunksOut chan<- models.Chunk,
|
||||
rowsRead *int64,
|
||||
) error
|
||||
|
||||
Exec(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
chunkSize int,
|
||||
chBatchesIn <-chan models.Batch,
|
||||
chChunksOut chan<- models.Chunk,
|
||||
chErrorsOut chan<- custom_errors.ExtractorError,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActiveBatches *sync.WaitGroup,
|
||||
rowsRead *int64,
|
||||
)
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package extractor
|
||||
package extractors
|
||||
|
||||
import (
|
||||
"context"
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/convert"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
@@ -21,7 +22,7 @@ type MssqlExtractor struct {
|
||||
db *sql.DB
|
||||
}
|
||||
|
||||
func NewMssqlExtractor(db *sql.DB) Extractor {
|
||||
func NewMssqlExtractor(db *sql.DB) etl.Extractor {
|
||||
return &MssqlExtractor{db: db}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package extractor
|
||||
package extractors
|
||||
|
||||
import (
|
||||
"context"
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
"github.com/google/uuid"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
@@ -19,7 +20,7 @@ type PostgresExtractor struct {
|
||||
db *pgxpool.Pool
|
||||
}
|
||||
|
||||
func NewPostgresExtractor(pool *pgxpool.Pool) Extractor {
|
||||
func NewPostgresExtractor(pool *pgxpool.Pool) etl.Extractor {
|
||||
return &PostgresExtractor{db: pool}
|
||||
}
|
||||
|
||||
1
internal/app/etl/extractors/types.go
Normal file
1
internal/app/etl/extractors/types.go
Normal file
@@ -0,0 +1 @@
|
||||
package extractors
|
||||
@@ -1,30 +0,0 @@
|
||||
package loader
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
)
|
||||
|
||||
type Loader interface {
|
||||
ProcessChunk(
|
||||
ctx context.Context,
|
||||
tableInfo config.TargetTableInfo,
|
||||
colNames []string,
|
||||
chunk models.Chunk,
|
||||
) (int, error)
|
||||
|
||||
Exec(
|
||||
ctx context.Context,
|
||||
tableInfo config.TargetTableInfo,
|
||||
columns []models.ColumnType,
|
||||
chChunksIn <-chan models.Chunk,
|
||||
chErrorsOut chan<- custom_errors.LoaderError,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActiveChunks *sync.WaitGroup,
|
||||
rowsLoaded *int64,
|
||||
)
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package loader
|
||||
package loaders
|
||||
|
||||
import (
|
||||
"context"
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
"github.com/jackc/pgx/v5"
|
||||
"github.com/jackc/pgx/v5/pgconn"
|
||||
@@ -19,7 +20,7 @@ type PostgresLoader struct {
|
||||
db *pgxpool.Pool
|
||||
}
|
||||
|
||||
func NewPostgresLoader(pool *pgxpool.Pool) Loader {
|
||||
func NewPostgresLoader(pool *pgxpool.Pool) etl.Loader {
|
||||
return &PostgresLoader{db: pool}
|
||||
}
|
||||
|
||||
1
internal/app/etl/loaders/types.go
Normal file
1
internal/app/etl/loaders/types.go
Normal file
@@ -0,0 +1 @@
|
||||
package loaders
|
||||
@@ -1,33 +0,0 @@
|
||||
package transformer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
)
|
||||
|
||||
type transformerFunc func(any) (any, error)
|
||||
|
||||
type columnTransformPlan struct {
|
||||
index int
|
||||
fn transformerFunc
|
||||
}
|
||||
|
||||
type Transformer interface {
|
||||
ProcessChunk(
|
||||
ctx context.Context,
|
||||
chunk *models.Chunk,
|
||||
transformationPlan []columnTransformPlan,
|
||||
) error
|
||||
|
||||
Exec(
|
||||
ctx context.Context,
|
||||
columns []models.ColumnType,
|
||||
chChunksIn <-chan models.Chunk,
|
||||
chChunksOut chan<- models.Chunk,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActiveChunks *sync.WaitGroup,
|
||||
)
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package transformer
|
||||
package transformers
|
||||
|
||||
import (
|
||||
"context"
|
||||
@@ -7,24 +7,25 @@ import (
|
||||
"time"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
)
|
||||
|
||||
type MssqlTransformer struct{}
|
||||
|
||||
func NewMssqlTransformer() Transformer {
|
||||
func NewMssqlTransformer() etl.Transformer {
|
||||
return &MssqlTransformer{}
|
||||
}
|
||||
|
||||
func computeTransformationPlan(columns []models.ColumnType) []columnTransformPlan {
|
||||
var plan []columnTransformPlan
|
||||
func computeTransformationPlan(columns []models.ColumnType) []etl.ColumnTransformPlan {
|
||||
var plan []etl.ColumnTransformPlan
|
||||
|
||||
for i, col := range columns {
|
||||
switch col.SystemType() {
|
||||
case "uniqueidentifier":
|
||||
plan = append(plan, columnTransformPlan{
|
||||
index: i,
|
||||
fn: func(v any) (any, error) {
|
||||
plan = append(plan, etl.ColumnTransformPlan{
|
||||
Index: i,
|
||||
Fn: func(v any) (any, error) {
|
||||
if b, ok := v.([]byte); ok && b != nil {
|
||||
return mssqlUuidToBigEndian(b)
|
||||
}
|
||||
@@ -33,9 +34,9 @@ func computeTransformationPlan(columns []models.ColumnType) []columnTransformPla
|
||||
})
|
||||
|
||||
case "geometry", "geography":
|
||||
plan = append(plan, columnTransformPlan{
|
||||
index: i,
|
||||
fn: func(v any) (any, error) {
|
||||
plan = append(plan, etl.ColumnTransformPlan{
|
||||
Index: i,
|
||||
Fn: func(v any) (any, error) {
|
||||
if b, ok := v.([]byte); ok && b != nil {
|
||||
return wkbToEwkbWithSrid(b, 4326)
|
||||
}
|
||||
@@ -44,9 +45,9 @@ func computeTransformationPlan(columns []models.ColumnType) []columnTransformPla
|
||||
})
|
||||
|
||||
case "datetime", "datetime2":
|
||||
plan = append(plan, columnTransformPlan{
|
||||
index: i,
|
||||
fn: func(v any) (any, error) {
|
||||
plan = append(plan, etl.ColumnTransformPlan{
|
||||
Index: i,
|
||||
Fn: func(v any) (any, error) {
|
||||
if t, ok := v.(time.Time); ok {
|
||||
return ensureUTC(t), nil
|
||||
}
|
||||
@@ -64,7 +65,7 @@ const processChunkCtxCheck = 4096
|
||||
func (mssqlTr *MssqlTransformer) ProcessChunk(
|
||||
ctx context.Context,
|
||||
chunk *models.Chunk,
|
||||
transformationPlan []columnTransformPlan,
|
||||
transformationPlan []etl.ColumnTransformPlan,
|
||||
) error {
|
||||
for i, rowValues := range chunk.Data {
|
||||
if i%processChunkCtxCheck == 0 {
|
||||
@@ -74,16 +75,16 @@ func (mssqlTr *MssqlTransformer) ProcessChunk(
|
||||
}
|
||||
|
||||
for _, task := range transformationPlan {
|
||||
val := rowValues[task.index]
|
||||
val := rowValues[task.Index]
|
||||
if val == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
transformed, err := task.fn(val)
|
||||
transformed, err := task.Fn(val)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
rowValues[task.index] = transformed
|
||||
rowValues[task.Index] = transformed
|
||||
}
|
||||
}
|
||||
|
||||
1
internal/app/etl/transformers/types.go
Normal file
1
internal/app/etl/transformers/types.go
Normal file
@@ -0,0 +1 @@
|
||||
package transformers
|
||||
@@ -1,4 +1,4 @@
|
||||
package transformer
|
||||
package transformers
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
80
internal/app/etl/types.go
Normal file
80
internal/app/etl/types.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package etl
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
)
|
||||
|
||||
type Extractor interface {
|
||||
ProcessBatch(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
chunkSize int,
|
||||
batch models.Batch,
|
||||
indexPrimaryKey int,
|
||||
chChunksOut chan<- models.Chunk,
|
||||
rowsRead *int64,
|
||||
) error
|
||||
|
||||
Exec(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
chunkSize int,
|
||||
chBatchesIn <-chan models.Batch,
|
||||
chChunksOut chan<- models.Chunk,
|
||||
chErrorsOut chan<- custom_errors.ExtractorError,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActiveBatches *sync.WaitGroup,
|
||||
rowsRead *int64,
|
||||
)
|
||||
}
|
||||
|
||||
type TransformerFunc func(any) (any, error)
|
||||
|
||||
type ColumnTransformPlan struct {
|
||||
Index int
|
||||
Fn TransformerFunc
|
||||
}
|
||||
|
||||
type Transformer interface {
|
||||
ProcessChunk(
|
||||
ctx context.Context,
|
||||
chunk *models.Chunk,
|
||||
transformationPlan []ColumnTransformPlan,
|
||||
) error
|
||||
|
||||
Exec(
|
||||
ctx context.Context,
|
||||
columns []models.ColumnType,
|
||||
chChunksIn <-chan models.Chunk,
|
||||
chChunksOut chan<- models.Chunk,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActiveChunks *sync.WaitGroup,
|
||||
)
|
||||
}
|
||||
|
||||
type Loader interface {
|
||||
ProcessChunk(
|
||||
ctx context.Context,
|
||||
tableInfo config.TargetTableInfo,
|
||||
colNames []string,
|
||||
chunk models.Chunk,
|
||||
) (int, error)
|
||||
|
||||
Exec(
|
||||
ctx context.Context,
|
||||
tableInfo config.TargetTableInfo,
|
||||
columns []models.ColumnType,
|
||||
chChunksIn <-chan models.Chunk,
|
||||
chErrorsOut chan<- custom_errors.LoaderError,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActiveChunks *sync.WaitGroup,
|
||||
rowsLoaded *int64,
|
||||
)
|
||||
}
|
||||
Reference in New Issue
Block a user