feat: integrate Azure storage handling in migration process; update transformers and job processing logic
This commit is contained in:
@@ -3,18 +3,32 @@ package transformers
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/azure"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
"github.com/google/uuid"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type MssqlTransformer struct{}
|
||||
type MssqlTransformer struct {
|
||||
toStorage config.ToStorageConfig
|
||||
sourceTable config.SourceTableInfo
|
||||
azureClient *azure.Client
|
||||
}
|
||||
|
||||
func NewMssqlTransformer() etl.Transformer {
|
||||
return &MssqlTransformer{}
|
||||
func NewMssqlTransformer(toStorage config.ToStorageConfig, sourceTable config.SourceTableInfo, azureClient *azure.Client) etl.Transformer {
|
||||
return &MssqlTransformer{
|
||||
toStorage: toStorage,
|
||||
sourceTable: sourceTable,
|
||||
azureClient: azureClient,
|
||||
}
|
||||
}
|
||||
|
||||
func computeTransformationPlan(columns []models.ColumnType) []etl.ColumnTransformPlan {
|
||||
@@ -60,6 +74,63 @@ func computeTransformationPlan(columns []models.ColumnType) []etl.ColumnTransfor
|
||||
return plan
|
||||
}
|
||||
|
||||
func computeStorageTransformationPlan(
|
||||
ctx context.Context,
|
||||
azureClient *azure.Client,
|
||||
toStorage config.ToStorageConfig,
|
||||
sourceColumns []models.ColumnType,
|
||||
sourceTable config.SourceTableInfo,
|
||||
) []etl.ColumnTransformPlan {
|
||||
if azureClient == nil || len(toStorage.Columns) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
colIndex := make(map[string]int, len(sourceColumns))
|
||||
for i, col := range sourceColumns {
|
||||
colIndex[strings.ToUpper(col.Name())] = i
|
||||
}
|
||||
|
||||
var plan []etl.ColumnTransformPlan
|
||||
for _, storageCol := range toStorage.Columns {
|
||||
if storageCol.Mode != "REFERENCE_ONLY" {
|
||||
log.Warnf("to_storage: unsupported mode %q for column %s — skipping", storageCol.Mode, storageCol.Source)
|
||||
continue
|
||||
}
|
||||
|
||||
idx, ok := colIndex[strings.ToUpper(storageCol.Source)]
|
||||
if !ok {
|
||||
log.Warnf("to_storage: source column %q not found in source schema — skipping", storageCol.Source)
|
||||
continue
|
||||
}
|
||||
|
||||
sourceColName := storageCol.Source
|
||||
schema := sourceTable.Schema
|
||||
table := sourceTable.Table
|
||||
|
||||
plan = append(plan, etl.ColumnTransformPlan{
|
||||
Index: idx,
|
||||
Fn: func(v any) (any, error) {
|
||||
if v == nil {
|
||||
return nil, nil
|
||||
}
|
||||
b, ok := v.([]byte)
|
||||
if !ok {
|
||||
log.Warnf("to_storage: expected []byte for %s.%s.%s, got %T — passing through",
|
||||
schema, table, sourceColName, v)
|
||||
return v, nil
|
||||
}
|
||||
blobPath := fmt.Sprintf("%s/%s/%s/%s.bin", schema, table, sourceColName, uuid.New().String())
|
||||
blobURL, err := azureClient.UploadAndGetURL(ctx, blobPath, b)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("uploading %s.%s.%s: %w", schema, table, sourceColName, err)
|
||||
}
|
||||
return blobURL, nil
|
||||
},
|
||||
})
|
||||
}
|
||||
return plan
|
||||
}
|
||||
|
||||
const processBatchCtxCheck = 4096
|
||||
|
||||
func (mssqlTr *MssqlTransformer) ProcessBatch(
|
||||
@@ -100,6 +171,8 @@ func (mssqlTr *MssqlTransformer) Exec(
|
||||
wgActiveBatches *sync.WaitGroup,
|
||||
) {
|
||||
transformationPlan := computeTransformationPlan(columns)
|
||||
storagePlan := computeStorageTransformationPlan(ctx, mssqlTr.azureClient, mssqlTr.toStorage, columns, mssqlTr.sourceTable)
|
||||
transformationPlan = append(transformationPlan, storagePlan...)
|
||||
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
|
||||
Reference in New Issue
Block a user