feat: implement MSSQL data extraction and transformation process
This commit is contained in:
@@ -17,7 +17,7 @@ func buildExtractQueryMssql(job MigrationJob, columns []ColumnType) string {
|
|||||||
sbColumns.WriteString("]")
|
sbColumns.WriteString("]")
|
||||||
|
|
||||||
if col.unifiedType == "GEOMETRY" {
|
if col.unifiedType == "GEOMETRY" {
|
||||||
sbColumns.WriteString(".STAsWKB() AS [")
|
sbColumns.WriteString(".STAsBinary() AS [")
|
||||||
sbColumns.WriteString(col.name)
|
sbColumns.WriteString(col.name)
|
||||||
sbColumns.WriteString("]")
|
sbColumns.WriteString("]")
|
||||||
}
|
}
|
||||||
|
|||||||
52
cmd/go_migrate/extractor.go
Normal file
52
cmd/go_migrate/extractor.go
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
|
||||||
|
_ "github.com/microsoft/go-mssqldb"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
type UnknownRowValues []any
|
||||||
|
|
||||||
|
func extractFromMssql(ctx context.Context, job MigrationJob, columns []ColumnType, chunkSize int, db *sql.DB, out chan<- []UnknownRowValues) error {
|
||||||
|
query := buildExtractQueryMssql(job, columns)
|
||||||
|
log.Debug("Query used to extract data from mssql: ", query)
|
||||||
|
|
||||||
|
rows, err := db.QueryContext(ctx, query)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
rowsChunk := make([]UnknownRowValues, 0, chunkSize)
|
||||||
|
|
||||||
|
for rows.Next() {
|
||||||
|
values := make([]any, len(columns))
|
||||||
|
scanArgs := make([]any, len(columns))
|
||||||
|
|
||||||
|
for i := range values {
|
||||||
|
scanArgs[i] = &values[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := rows.Scan(scanArgs...); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
rowsChunk = append(rowsChunk, values)
|
||||||
|
|
||||||
|
if len(rowsChunk) >= chunkSize {
|
||||||
|
out <- rowsChunk
|
||||||
|
rowsChunk = make([]UnknownRowValues, 0, chunkSize)
|
||||||
|
log.Infof("Chunk send... %+v", job)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(rowsChunk) > 0 {
|
||||||
|
out <- rowsChunk
|
||||||
|
log.Infof("Chunk send... %+v", job)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -18,6 +18,12 @@ var migrationJobs []MigrationJob = []MigrationJob{
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
NumExtractors int = 2
|
||||||
|
ChunkSize int = 20
|
||||||
|
QueueSize int = 10
|
||||||
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
configureLog()
|
configureLog()
|
||||||
log.Info("Starting migration...")
|
log.Info("Starting migration...")
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
"github.com/jackc/pgx/v5/pgxpool"
|
"github.com/jackc/pgx/v5/pgxpool"
|
||||||
|
|
||||||
_ "github.com/microsoft/go-mssqldb"
|
_ "github.com/microsoft/go-mssqldb"
|
||||||
@@ -17,12 +20,16 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
|
|||||||
logColumnTypes(sourceColTypes, "Source col types")
|
logColumnTypes(sourceColTypes, "Source col types")
|
||||||
logColumnTypes(targetColTypes, "Target col types")
|
logColumnTypes(targetColTypes, "Target col types")
|
||||||
|
|
||||||
sourceQuery := buildExtractQueryMssql(job, sourceColTypes)
|
chRowsExtract := make(chan []UnknownRowValues, QueueSize)
|
||||||
|
|
||||||
log.Debug(sourceQuery)
|
mssqlContext := context.Background()
|
||||||
|
|
||||||
targetQuery := buildExtractQueryPostgres(job, targetColTypes)
|
if err := extractFromMssql(mssqlContext, job, sourceColTypes, ChunkSize, sourceDb, chRowsExtract); err != nil {
|
||||||
log.Debug(targetQuery)
|
log.Fatal("Unexpected error extrating data from mssql: ", err)
|
||||||
|
}
|
||||||
|
close(chRowsExtract)
|
||||||
|
|
||||||
|
transformRowsMssql(job, sourceColTypes, chRowsExtract)
|
||||||
}
|
}
|
||||||
|
|
||||||
func logColumnTypes(columnTypes []ColumnType, label string) {
|
func logColumnTypes(columnTypes []ColumnType, label string) {
|
||||||
@@ -32,3 +39,22 @@ func logColumnTypes(columnTypes []ColumnType, label string) {
|
|||||||
log.Infof("%+v", col)
|
log.Infof("%+v", col)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func transformRowsMssql(job MigrationJob, columns []ColumnType, in <-chan []UnknownRowValues) {
|
||||||
|
for rows := range in {
|
||||||
|
log.Debug("Chunk received, transforming...")
|
||||||
|
|
||||||
|
for i, rowValues := range rows {
|
||||||
|
if i%100 == 0 {
|
||||||
|
logSampleRow(job, columns, rowValues, fmt.Sprintf("row %d", i))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func logSampleRow(job MigrationJob, columns []ColumnType, rowValues UnknownRowValues, tag string) {
|
||||||
|
log.Infof("[%s.%s] Sample row: (%s)", job.Schema, job.Table, tag)
|
||||||
|
for i, col := range columns {
|
||||||
|
log.Infof("%s: %v", col.Name(), rowValues[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user