feat: implement MSSQL extractor, transformer, and Postgres loader for enhanced data migration
This commit is contained in:
150
internal/app/etl/transformers/mssql.go
Normal file
150
internal/app/etl/transformers/mssql.go
Normal file
@@ -0,0 +1,150 @@
|
||||
package transformers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
)
|
||||
|
||||
type MssqlTransformer struct{}
|
||||
|
||||
func NewMssqlTransformer() etl.Transformer {
|
||||
return &MssqlTransformer{}
|
||||
}
|
||||
|
||||
func computeTransformationPlan(columns []models.ColumnType) []etl.ColumnTransformPlan {
|
||||
var plan []etl.ColumnTransformPlan
|
||||
|
||||
for i, col := range columns {
|
||||
switch col.SystemType() {
|
||||
case "uniqueidentifier":
|
||||
plan = append(plan, etl.ColumnTransformPlan{
|
||||
Index: i,
|
||||
Fn: func(v any) (any, error) {
|
||||
if b, ok := v.([]byte); ok && b != nil {
|
||||
return mssqlUuidToBigEndian(b)
|
||||
}
|
||||
return v, nil
|
||||
},
|
||||
})
|
||||
|
||||
case "geometry", "geography":
|
||||
plan = append(plan, etl.ColumnTransformPlan{
|
||||
Index: i,
|
||||
Fn: func(v any) (any, error) {
|
||||
if b, ok := v.([]byte); ok && b != nil {
|
||||
return wkbToEwkbWithSrid(b, 4326)
|
||||
}
|
||||
return v, nil
|
||||
},
|
||||
})
|
||||
|
||||
case "datetime", "datetime2":
|
||||
plan = append(plan, etl.ColumnTransformPlan{
|
||||
Index: i,
|
||||
Fn: func(v any) (any, error) {
|
||||
if t, ok := v.(time.Time); ok {
|
||||
return ensureUTC(t), nil
|
||||
}
|
||||
return v, nil
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return plan
|
||||
}
|
||||
|
||||
const processChunkCtxCheck = 4096
|
||||
|
||||
func (mssqlTr *MssqlTransformer) ProcessChunk(
|
||||
ctx context.Context,
|
||||
chunk *models.Chunk,
|
||||
transformationPlan []etl.ColumnTransformPlan,
|
||||
) error {
|
||||
for i, rowValues := range chunk.Data {
|
||||
if i%processChunkCtxCheck == 0 {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, task := range transformationPlan {
|
||||
val := rowValues[task.Index]
|
||||
if val == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
transformed, err := task.Fn(val)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
rowValues[task.Index] = transformed
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (mssqlTr *MssqlTransformer) Exec(
|
||||
ctx context.Context,
|
||||
columns []models.ColumnType,
|
||||
chChunksIn <-chan models.Chunk,
|
||||
chChunksOut chan<- models.Chunk,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActiveChunks *sync.WaitGroup,
|
||||
) {
|
||||
transformationPlan := computeTransformationPlan(columns)
|
||||
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
|
||||
case chunk, ok := <-chChunksIn:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
if len(transformationPlan) == 0 {
|
||||
select {
|
||||
case chChunksOut <- chunk:
|
||||
wgActiveChunks.Add(1)
|
||||
continue
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
err := mssqlTr.ProcessChunk(ctx, &chunk, transformationPlan)
|
||||
if err != nil {
|
||||
if errors.Is(err, ctx.Err()) {
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case chJobErrorsOut <- custom_errors.JobError{ShouldCancelJob: true, Msg: "Transformation failed", Prev: err}:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case chChunksOut <- chunk:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
|
||||
wgActiveChunks.Add(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
1
internal/app/etl/transformers/types.go
Normal file
1
internal/app/etl/transformers/types.go
Normal file
@@ -0,0 +1 @@
|
||||
package transformers
|
||||
80
internal/app/etl/transformers/utils.go
Normal file
80
internal/app/etl/transformers/utils.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package transformers
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"time"
|
||||
)
|
||||
|
||||
func mssqlUuidToBigEndian(mssqlUuid []byte) ([]byte, error) {
|
||||
if len(mssqlUuid) != 16 {
|
||||
return nil, errors.New("Invalid uuid")
|
||||
}
|
||||
|
||||
pgUuid := make([]byte, 16)
|
||||
pgUuid[0], pgUuid[1], pgUuid[2], pgUuid[3] = mssqlUuid[3], mssqlUuid[2], mssqlUuid[1], mssqlUuid[0]
|
||||
pgUuid[4], pgUuid[5] = mssqlUuid[5], mssqlUuid[4]
|
||||
pgUuid[6], pgUuid[7] = mssqlUuid[7], mssqlUuid[6]
|
||||
copy(pgUuid[8:], mssqlUuid[8:])
|
||||
|
||||
return pgUuid, nil
|
||||
}
|
||||
|
||||
const sridFlag = 0x20000000
|
||||
|
||||
func wkbToEwkbWithSrid(geometry []byte, srid int) ([]byte, error) {
|
||||
if len(geometry) < 5 {
|
||||
return nil, errors.New("Invalid wkb")
|
||||
}
|
||||
|
||||
var byteOrder binary.ByteOrder
|
||||
if geometry[0] == 0 {
|
||||
byteOrder = binary.BigEndian
|
||||
} else {
|
||||
byteOrder = binary.LittleEndian
|
||||
}
|
||||
|
||||
wkbType := byteOrder.Uint32(geometry[1:5])
|
||||
if wkbType&sridFlag != 0 {
|
||||
return geometry, nil
|
||||
}
|
||||
|
||||
ewkbType := wkbType | sridFlag
|
||||
|
||||
result := make([]byte, len(geometry)+4)
|
||||
|
||||
result[0] = geometry[0]
|
||||
|
||||
byteOrder.PutUint32(result[1:5], ewkbType)
|
||||
|
||||
byteOrder.PutUint32(result[5:9], uint32(srid))
|
||||
|
||||
copy(result[9:], geometry[5:])
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func ensureUTC(t time.Time) time.Time {
|
||||
if t.Location() == time.UTC {
|
||||
return t
|
||||
}
|
||||
|
||||
return time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), time.UTC)
|
||||
}
|
||||
|
||||
func ToInt64(v any) (int64, bool) {
|
||||
switch t := v.(type) {
|
||||
case int:
|
||||
return int64(t), true
|
||||
case int8:
|
||||
return int64(t), true
|
||||
case int16:
|
||||
return int64(t), true
|
||||
case int32:
|
||||
return int64(t), true
|
||||
case int64:
|
||||
return int64(t), true
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user