refactor: implement bidirectional transformation support with PostgreSQL integration
This commit is contained in:
@@ -59,6 +59,49 @@ func computeTransformationPlan(columns []models.ColumnType) []etl.ColumnTransfor
|
||||
return plan
|
||||
}
|
||||
|
||||
func computePostgresTransformationPlan(columns []models.ColumnType) []etl.ColumnTransformPlan {
|
||||
var plan []etl.ColumnTransformPlan
|
||||
|
||||
for i, col := range columns {
|
||||
switch col.SystemType() {
|
||||
case "uuid":
|
||||
plan = append(plan, etl.ColumnTransformPlan{
|
||||
Index: i,
|
||||
Fn: func(v any) (any, error) {
|
||||
if b, ok := v.([]byte); ok && b != nil {
|
||||
return bigEndianToMssqlUuid(b)
|
||||
}
|
||||
return v, nil
|
||||
},
|
||||
})
|
||||
|
||||
case "geometry":
|
||||
plan = append(plan, etl.ColumnTransformPlan{
|
||||
Index: i,
|
||||
Fn: func(v any) (any, error) {
|
||||
if b, ok := v.([]byte); ok && b != nil {
|
||||
return ewkbToMssqlGeo(b, false)
|
||||
}
|
||||
return v, nil
|
||||
},
|
||||
})
|
||||
|
||||
case "geography":
|
||||
plan = append(plan, etl.ColumnTransformPlan{
|
||||
Index: i,
|
||||
Fn: func(v any) (any, error) {
|
||||
if b, ok := v.([]byte); ok && b != nil {
|
||||
return ewkbToMssqlGeo(b, true)
|
||||
}
|
||||
return v, nil
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return plan
|
||||
}
|
||||
|
||||
func computeStorageTransformationPlan(
|
||||
ctx context.Context,
|
||||
azureClient *azure.Client,
|
||||
|
||||
72
internal/app/etl/transformers/postgres.go
Normal file
72
internal/app/etl/transformers/postgres.go
Normal file
@@ -0,0 +1,72 @@
|
||||
package transformers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
)
|
||||
|
||||
type PostgresTransformer struct {
|
||||
sourceTable config.SourceTableInfo
|
||||
}
|
||||
|
||||
func NewPostgresTransformer(sourceTable config.SourceTableInfo) etl.Transformer {
|
||||
return &PostgresTransformer{sourceTable: sourceTable}
|
||||
}
|
||||
|
||||
func (pgTr *PostgresTransformer) Consume(
|
||||
ctx context.Context,
|
||||
columns []models.ColumnType,
|
||||
retryConfig config.RetryConfig,
|
||||
batchSize int,
|
||||
chBatchesIn <-chan models.Batch,
|
||||
chBatchesOut chan<- models.Batch,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActiveBatches *sync.WaitGroup,
|
||||
) {
|
||||
transformationPlan := computePostgresTransformationPlan(columns)
|
||||
|
||||
acc := &batchAccumulator{batchSize: batchSize}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
|
||||
case batch, ok := <-chBatchesIn:
|
||||
if !ok {
|
||||
acc.flush(ctx, chBatchesOut, wgActiveBatches)
|
||||
return
|
||||
}
|
||||
|
||||
if len(transformationPlan) > 0 {
|
||||
if err := ProcessBatchWithRetries(ctx, &batch, transformationPlan, retryConfig); err != nil {
|
||||
sendTransformError(ctx, err, chJobErrorsOut)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if batchSize <= 0 {
|
||||
wgActiveBatches.Add(1)
|
||||
select {
|
||||
case chBatchesOut <- batch:
|
||||
case <-ctx.Done():
|
||||
wgActiveBatches.Done()
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
acc.add(batch)
|
||||
if acc.ready() {
|
||||
if !acc.flush(ctx, chBatchesOut, wgActiveBatches) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,8 @@ import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"time"
|
||||
|
||||
mssqlclrgeo "github.com/gaspardle/go-mssqlclrgeo"
|
||||
)
|
||||
|
||||
func mssqlUuidToBigEndian(mssqlUuid []byte) ([]byte, error) {
|
||||
@@ -62,6 +64,51 @@ func ensureUTC(t time.Time) time.Time {
|
||||
return time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), time.UTC)
|
||||
}
|
||||
|
||||
func bigEndianToMssqlUuid(pgUuid []byte) ([]byte, error) {
|
||||
if len(pgUuid) != 16 {
|
||||
return nil, errors.New("Invalid uuid")
|
||||
}
|
||||
|
||||
mssqlUuid := make([]byte, 16)
|
||||
mssqlUuid[0], mssqlUuid[1], mssqlUuid[2], mssqlUuid[3] = pgUuid[3], pgUuid[2], pgUuid[1], pgUuid[0]
|
||||
mssqlUuid[4], mssqlUuid[5] = pgUuid[5], pgUuid[4]
|
||||
mssqlUuid[6], mssqlUuid[7] = pgUuid[7], pgUuid[6]
|
||||
copy(mssqlUuid[8:], pgUuid[8:])
|
||||
|
||||
return mssqlUuid, nil
|
||||
}
|
||||
|
||||
func ewkbToMssqlGeo(ewkb []byte, isGeography bool) ([]byte, error) {
|
||||
if len(ewkb) < 5 {
|
||||
return nil, errors.New("Invalid ewkb")
|
||||
}
|
||||
|
||||
var byteOrder binary.ByteOrder
|
||||
if ewkb[0] == 0 {
|
||||
byteOrder = binary.BigEndian
|
||||
} else {
|
||||
byteOrder = binary.LittleEndian
|
||||
}
|
||||
|
||||
wkbType := byteOrder.Uint32(ewkb[1:5])
|
||||
|
||||
var wkb []byte
|
||||
if wkbType&sridFlag != 0 {
|
||||
if len(ewkb) < 9 {
|
||||
return nil, errors.New("Invalid ewkb: SRID flag set but data too short")
|
||||
}
|
||||
clearType := wkbType &^ uint32(sridFlag)
|
||||
wkb = make([]byte, len(ewkb)-4)
|
||||
wkb[0] = ewkb[0]
|
||||
byteOrder.PutUint32(wkb[1:5], clearType)
|
||||
copy(wkb[5:], ewkb[9:])
|
||||
} else {
|
||||
wkb = ewkb
|
||||
}
|
||||
|
||||
return mssqlclrgeo.WkbToUdtGeo(wkb, isGeography)
|
||||
}
|
||||
|
||||
func ToInt64(v any) (int64, bool) {
|
||||
switch t := v.(type) {
|
||||
case int:
|
||||
|
||||
Reference in New Issue
Block a user