feat: implement MSSQL data extraction and transformation process

This commit is contained in:
2026-04-06 20:36:11 -05:00
parent 382c2099f7
commit f589664320
4 changed files with 89 additions and 5 deletions

View File

@@ -17,7 +17,7 @@ func buildExtractQueryMssql(job MigrationJob, columns []ColumnType) string {
sbColumns.WriteString("]") sbColumns.WriteString("]")
if col.unifiedType == "GEOMETRY" { if col.unifiedType == "GEOMETRY" {
sbColumns.WriteString(".STAsWKB() AS [") sbColumns.WriteString(".STAsBinary() AS [")
sbColumns.WriteString(col.name) sbColumns.WriteString(col.name)
sbColumns.WriteString("]") sbColumns.WriteString("]")
} }

View File

@@ -0,0 +1,52 @@
package main
import (
"context"
"database/sql"
_ "github.com/microsoft/go-mssqldb"
log "github.com/sirupsen/logrus"
)
type UnknownRowValues []any
func extractFromMssql(ctx context.Context, job MigrationJob, columns []ColumnType, chunkSize int, db *sql.DB, out chan<- []UnknownRowValues) error {
query := buildExtractQueryMssql(job, columns)
log.Debug("Query used to extract data from mssql: ", query)
rows, err := db.QueryContext(ctx, query)
if err != nil {
return err
}
defer rows.Close()
rowsChunk := make([]UnknownRowValues, 0, chunkSize)
for rows.Next() {
values := make([]any, len(columns))
scanArgs := make([]any, len(columns))
for i := range values {
scanArgs[i] = &values[i]
}
if err := rows.Scan(scanArgs...); err != nil {
return err
}
rowsChunk = append(rowsChunk, values)
if len(rowsChunk) >= chunkSize {
out <- rowsChunk
rowsChunk = make([]UnknownRowValues, 0, chunkSize)
log.Infof("Chunk send... %+v", job)
}
}
if len(rowsChunk) > 0 {
out <- rowsChunk
log.Infof("Chunk send... %+v", job)
}
return nil
}

View File

@@ -18,6 +18,12 @@ var migrationJobs []MigrationJob = []MigrationJob{
}, },
} }
const (
NumExtractors int = 2
ChunkSize int = 20
QueueSize int = 10
)
func main() { func main() {
configureLog() configureLog()
log.Info("Starting migration...") log.Info("Starting migration...")

View File

@@ -1,7 +1,10 @@
package main package main
import ( import (
"context"
"database/sql" "database/sql"
"fmt"
"github.com/jackc/pgx/v5/pgxpool" "github.com/jackc/pgx/v5/pgxpool"
_ "github.com/microsoft/go-mssqldb" _ "github.com/microsoft/go-mssqldb"
@@ -17,12 +20,16 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
logColumnTypes(sourceColTypes, "Source col types") logColumnTypes(sourceColTypes, "Source col types")
logColumnTypes(targetColTypes, "Target col types") logColumnTypes(targetColTypes, "Target col types")
sourceQuery := buildExtractQueryMssql(job, sourceColTypes) chRowsExtract := make(chan []UnknownRowValues, QueueSize)
log.Debug(sourceQuery) mssqlContext := context.Background()
targetQuery := buildExtractQueryPostgres(job, targetColTypes) if err := extractFromMssql(mssqlContext, job, sourceColTypes, ChunkSize, sourceDb, chRowsExtract); err != nil {
log.Debug(targetQuery) log.Fatal("Unexpected error extrating data from mssql: ", err)
}
close(chRowsExtract)
transformRowsMssql(job, sourceColTypes, chRowsExtract)
} }
func logColumnTypes(columnTypes []ColumnType, label string) { func logColumnTypes(columnTypes []ColumnType, label string) {
@@ -32,3 +39,22 @@ func logColumnTypes(columnTypes []ColumnType, label string) {
log.Infof("%+v", col) log.Infof("%+v", col)
} }
} }
func transformRowsMssql(job MigrationJob, columns []ColumnType, in <-chan []UnknownRowValues) {
for rows := range in {
log.Debug("Chunk received, transforming...")
for i, rowValues := range rows {
if i%100 == 0 {
logSampleRow(job, columns, rowValues, fmt.Sprintf("row %d", i))
}
}
}
}
func logSampleRow(job MigrationJob, columns []ColumnType, rowValues UnknownRowValues, tag string) {
log.Infof("[%s.%s] Sample row: (%s)", job.Schema, job.Table, tag)
for i, col := range columns {
log.Infof("%s: %v", col.Name(), rowValues[i])
}
}