Compare commits
3 Commits
1c3db39b21
...
feat/mssql
| Author | SHA1 | Date | |
|---|---|---|---|
|
63cf26e1ab
|
|||
|
846a49d40c
|
|||
|
93b302db8e
|
@@ -1,77 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
_ "github.com/microsoft/go-mssqldb"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func connectToSqlServer() (*sql.DB, error) {
|
||||
db, err := sql.Open("sqlserver", config.App.SourceDbUrl)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Unable to connect to sqlserver: %w", err)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err := db.PingContext(ctx); err != nil {
|
||||
return nil, fmt.Errorf("Unable to ping sqlserver: %w", err)
|
||||
}
|
||||
|
||||
return db, nil
|
||||
}
|
||||
|
||||
func connectToPostgres() (*pgxpool.Pool, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
|
||||
defer cancel()
|
||||
|
||||
pool, err := pgxpool.New(ctx, config.App.TargetDbUrl)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Unable to connect to postgres: %w", err)
|
||||
}
|
||||
|
||||
if err := pool.Ping(ctx); err != nil {
|
||||
pool.Close()
|
||||
return nil, fmt.Errorf("Unable to ping postgres: %w", err)
|
||||
}
|
||||
|
||||
return pool, nil
|
||||
}
|
||||
|
||||
func connectToDatabases() (*sql.DB, *pgxpool.Pool, error) {
|
||||
var sourceDbErr, targetDbErr error
|
||||
var sourceDb *sql.DB
|
||||
var targetDb *pgxpool.Pool
|
||||
var wg sync.WaitGroup
|
||||
|
||||
wg.Go(func() {
|
||||
sourceDb, sourceDbErr = connectToSqlServer()
|
||||
if sourceDbErr != nil {
|
||||
log.Error("Unable to connect to source db: ", sourceDbErr)
|
||||
}
|
||||
})
|
||||
|
||||
wg.Go(func() {
|
||||
targetDb, targetDbErr = connectToPostgres()
|
||||
if targetDbErr != nil {
|
||||
log.Error("Unable to connect to target db: ", targetDbErr)
|
||||
}
|
||||
})
|
||||
|
||||
wg.Wait()
|
||||
|
||||
if sourceDbErr != nil || targetDbErr != nil {
|
||||
return nil, nil, errors.New("Unable to connect to databases")
|
||||
}
|
||||
|
||||
return sourceDb, targetDb, nil
|
||||
}
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/loaders"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/table_analyzers"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/transformers"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
@@ -95,10 +96,10 @@ func processMigrationJobs(
|
||||
targetDb dbwrapper.DbWrapper,
|
||||
jobs []config.Job,
|
||||
maxParallelWorkers int,
|
||||
) []JobResult {
|
||||
) []models.JobResult {
|
||||
if len(jobs) == 0 {
|
||||
log.Info("No migration jobs configured")
|
||||
return []JobResult{}
|
||||
return []models.JobResult{}
|
||||
}
|
||||
|
||||
if maxParallelWorkers <= 0 {
|
||||
@@ -111,7 +112,7 @@ func processMigrationJobs(
|
||||
|
||||
log.Infof("Starting migration with %d parallel worker(s)", maxParallelWorkers)
|
||||
|
||||
chJobResults := make(chan JobResult, len(jobs))
|
||||
chJobResults := make(chan models.JobResult, len(jobs))
|
||||
chJobs := make(chan config.Job, len(jobs))
|
||||
var wgJobs sync.WaitGroup
|
||||
|
||||
@@ -119,7 +120,7 @@ func processMigrationJobs(
|
||||
targetTableAnalyzer := table_analyzers.NewPostgresTableAnalyzer(targetDb)
|
||||
extractor := extractors.NewMssqlExtractor(sourceDb)
|
||||
transformer := transformers.NewMssqlTransformer()
|
||||
loader := loaders.NewPostgresLoader(targetDb)
|
||||
loader := loaders.NewGenericLoader(targetDb)
|
||||
|
||||
for i := range maxParallelWorkers {
|
||||
wgJobs.Go(func() {
|
||||
@@ -151,7 +152,7 @@ func processMigrationJobs(
|
||||
close(chJobResults)
|
||||
}()
|
||||
|
||||
var finalResults []JobResult
|
||||
var finalResults []models.JobResult
|
||||
for res := range chJobResults {
|
||||
finalResults = append(finalResults, res)
|
||||
}
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
package main
|
||||
|
||||
import "time"
|
||||
|
||||
type JobResult struct {
|
||||
JobName string
|
||||
StartTime time.Time
|
||||
Duration time.Duration
|
||||
RowsRead int64
|
||||
RowsLoaded int64
|
||||
RowsFailed int64
|
||||
Error error
|
||||
}
|
||||
@@ -26,11 +26,11 @@ func processMigrationJob(
|
||||
transformer etl.Transformer,
|
||||
loader etl.Loader,
|
||||
job config.Job,
|
||||
) JobResult {
|
||||
) models.JobResult {
|
||||
localCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
result := JobResult{
|
||||
result := models.JobResult{
|
||||
JobName: job.Name,
|
||||
StartTime: time.Now(),
|
||||
}
|
||||
@@ -129,7 +129,7 @@ func processMigrationJob(
|
||||
|
||||
for range maxExtractors {
|
||||
wgExtractors.Go(func() {
|
||||
extractor.Consume(
|
||||
extractor.Exec(
|
||||
localCtx,
|
||||
job.SourceTable,
|
||||
sourceColTypes,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
max_parallel_workers: 4
|
||||
source_db_type: sqlserver
|
||||
target_db_type: postgres
|
||||
target_db_type: sqlserver
|
||||
|
||||
defaults:
|
||||
max_extractors: 2
|
||||
|
||||
@@ -9,7 +9,6 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/convert"
|
||||
@@ -44,9 +43,9 @@ func buildExtractQueryMssql(
|
||||
for i, col := range columns {
|
||||
fmt.Fprintf(&sbQuery, "[%s]", col.Name())
|
||||
|
||||
if col.Type() == "GEOMETRY" {
|
||||
fmt.Fprintf(&sbQuery, ".STAsBinary() AS [%s]", col.Name())
|
||||
}
|
||||
// if col.Type() == "GEOMETRY" {
|
||||
// fmt.Fprintf(&sbQuery, ".STAsBinary() AS [%s]", col.Name())
|
||||
// }
|
||||
|
||||
if i < len(columns)-1 {
|
||||
sbQuery.WriteString(", ")
|
||||
@@ -100,7 +99,7 @@ func errorFromLastRow(
|
||||
}
|
||||
}
|
||||
|
||||
func (mssqlEx *MssqlExtractor) Extract(
|
||||
func (mssqlEx *MssqlExtractor) ProcessPartition(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
@@ -108,7 +107,7 @@ func (mssqlEx *MssqlExtractor) Extract(
|
||||
partition models.Partition,
|
||||
indexPrimaryKey int,
|
||||
chBatchesOut chan<- models.Batch,
|
||||
) (int64, error) {
|
||||
) (int, error) {
|
||||
query := buildExtractQueryMssql(tableInfo, columns, partition.HasRange, partition.Range.IsMinInclusive)
|
||||
|
||||
var queryArgs []any
|
||||
@@ -119,7 +118,7 @@ func (mssqlEx *MssqlExtractor) Extract(
|
||||
)
|
||||
}
|
||||
|
||||
var rowsRead int64 = 0
|
||||
rowsRead := 0
|
||||
rows, err := mssqlEx.db.Query(ctx, query, queryArgs...)
|
||||
if err != nil {
|
||||
return rowsRead, &custom_errors.ExtractorError{Partition: partition, HasLastId: false, Msg: err.Error()}
|
||||
@@ -190,67 +189,14 @@ func (mssqlEx *MssqlExtractor) Extract(
|
||||
return rowsRead, nil
|
||||
}
|
||||
|
||||
func (mssqlEx *MssqlExtractor) ExtractWithRetries(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
batchSize int,
|
||||
partition models.Partition,
|
||||
indexPrimaryKey int,
|
||||
chBatchesOut chan<- models.Batch,
|
||||
) (int64, error) {
|
||||
var totalRowsRead int64
|
||||
var fatalErr error
|
||||
delay := time.Duration(time.Second * 1)
|
||||
currentParitition := partition
|
||||
|
||||
for fatalErr != nil || currentParitition.RetryCounter < 3 {
|
||||
currentParitition.RetryCounter++
|
||||
rowsRead, err := mssqlEx.Extract(
|
||||
ctx,
|
||||
tableInfo,
|
||||
columns,
|
||||
batchSize,
|
||||
currentParitition,
|
||||
indexPrimaryKey,
|
||||
chBatchesOut,
|
||||
)
|
||||
|
||||
if rowsRead > 0 {
|
||||
totalRowsRead += int64(rowsRead)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
var exError *custom_errors.ExtractorError
|
||||
if errors.As(err, &exError) {
|
||||
if exError.HasLastId {
|
||||
currentParitition.ParentId = exError.Partition.Id
|
||||
currentParitition.Id = uuid.New()
|
||||
currentParitition.Range.Min = exError.LastId
|
||||
currentParitition.Range.IsMinInclusive = false
|
||||
}
|
||||
|
||||
time.Sleep(delay)
|
||||
} else {
|
||||
fatalErr = err
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
return totalRowsRead, fatalErr
|
||||
}
|
||||
|
||||
func (mssqlEx *MssqlExtractor) Consume(
|
||||
func (mssqlEx *MssqlExtractor) Exec(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
batchSize int,
|
||||
chPartitionsIn <-chan models.Partition,
|
||||
chBatchesOut chan<- models.Batch,
|
||||
chErrorsOut chan<- custom_errors.ExtractorError,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActivePartitions *sync.WaitGroup,
|
||||
rowsRead *int64,
|
||||
@@ -285,7 +231,7 @@ func (mssqlEx *MssqlExtractor) Consume(
|
||||
return
|
||||
}
|
||||
|
||||
rowsReadResult, err := mssqlEx.ExtractWithRetries(
|
||||
rowsReadResult, err := mssqlEx.ProcessPartition(
|
||||
ctx,
|
||||
tableInfo,
|
||||
columns,
|
||||
@@ -300,8 +246,15 @@ func (mssqlEx *MssqlExtractor) Consume(
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
var exError *custom_errors.ExtractorError
|
||||
var jobError *custom_errors.JobError
|
||||
if errors.As(err, &jobError) {
|
||||
if errors.As(err, &exError) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case chErrorsOut <- *exError:
|
||||
}
|
||||
} else if errors.As(err, &jobError) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
@@ -311,7 +264,7 @@ func (mssqlEx *MssqlExtractor) Consume(
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case chJobErrorsOut <- custom_errors.JobError{ShouldCancelJob: false, Msg: err.Error(), Prev: err}:
|
||||
case chErrorsOut <- custom_errors.ExtractorError{Partition: partition, Msg: err.Error()}:
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ func buildExtractQueryPostgres(sourceDbInfo config.SourceTableInfo, columns []mo
|
||||
return fmt.Sprintf(`SELECT %s FROM "%s"."%s" ORDER BY "%s" ASC`, sbColumns.String(), sourceDbInfo.Schema, sourceDbInfo.Table, sourceDbInfo.PrimaryKey)
|
||||
}
|
||||
|
||||
func (postgresEx *PostgresExtractor) Extract(
|
||||
func (postgresEx *PostgresExtractor) ProcessPartition(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
@@ -110,7 +110,7 @@ func (postgresEx *PostgresExtractor) Extract(
|
||||
return rowsRead, nil
|
||||
}
|
||||
|
||||
func (postgresEx *PostgresExtractor) Consume(
|
||||
func (postgresEx *PostgresExtractor) Exec(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
|
||||
@@ -15,31 +15,21 @@ import (
|
||||
"github.com/jackc/pgx/v5/pgconn"
|
||||
)
|
||||
|
||||
type PostgresLoader struct {
|
||||
type GenericLoader struct {
|
||||
db dbwrapper.DbWrapper
|
||||
}
|
||||
|
||||
func NewPostgresLoader(db dbwrapper.DbWrapper) etl.Loader {
|
||||
return &PostgresLoader{db: db}
|
||||
func NewGenericLoader(db dbwrapper.DbWrapper) etl.Loader {
|
||||
return &GenericLoader{db: db}
|
||||
}
|
||||
|
||||
func mapSlice[T any, V any](input []T, mapper func(T) V) []V {
|
||||
result := make([]V, len(input))
|
||||
|
||||
for i, v := range input {
|
||||
result[i] = mapper(v)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (postgresLd *PostgresLoader) ProcessBatch(
|
||||
func (gl *GenericLoader) ProcessBatch(
|
||||
ctx context.Context,
|
||||
tableInfo config.TargetTableInfo,
|
||||
colNames []string,
|
||||
batch models.Batch,
|
||||
) (int, error) {
|
||||
_, err := postgresLd.db.SaveMassive(
|
||||
_, err := gl.db.SaveMassive(
|
||||
ctx,
|
||||
tableInfo.Schema,
|
||||
tableInfo.Table,
|
||||
@@ -65,7 +55,7 @@ func (postgresLd *PostgresLoader) ProcessBatch(
|
||||
return len(batch.Rows), nil
|
||||
}
|
||||
|
||||
func (postgresLd *PostgresLoader) Exec(
|
||||
func (gl *GenericLoader) Exec(
|
||||
ctx context.Context,
|
||||
tableInfo config.TargetTableInfo,
|
||||
columns []models.ColumnType,
|
||||
@@ -92,7 +82,7 @@ func (postgresLd *PostgresLoader) Exec(
|
||||
return
|
||||
}
|
||||
|
||||
processedRows, err := postgresLd.ProcessBatch(ctx, tableInfo, colNames, batch)
|
||||
processedRows, err := gl.ProcessBatch(ctx, tableInfo, colNames, batch)
|
||||
|
||||
if err != nil {
|
||||
var ldError *custom_errors.LoaderError
|
||||
@@ -1 +0,0 @@
|
||||
package loaders
|
||||
11
internal/app/etl/loaders/utils.go
Normal file
11
internal/app/etl/loaders/utils.go
Normal file
@@ -0,0 +1,11 @@
|
||||
package loaders
|
||||
|
||||
func mapSlice[T any, V any](input []T, mapper func(T) V) []V {
|
||||
result := make([]V, len(input))
|
||||
|
||||
for i, v := range input {
|
||||
result[i] = mapper(v)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
@@ -39,6 +39,8 @@ JOIN sys.schemas s ON st.schema_id = s.schema_id
|
||||
WHERE s.name = @schema AND st.name = @table AND (c.is_hidden = 0 OR (c.graph_type IS NOT NULL AND c.name LIKE '$%'))
|
||||
ORDER BY c.column_id;`
|
||||
|
||||
// AND c.name NOT LIKE '$%'
|
||||
|
||||
type rawColumnMssql struct {
|
||||
name string
|
||||
userType string
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
|
||||
@@ -18,46 +17,7 @@ func NewMssqlTransformer() etl.Transformer {
|
||||
}
|
||||
|
||||
func computeTransformationPlan(columns []models.ColumnType) []etl.ColumnTransformPlan {
|
||||
var plan []etl.ColumnTransformPlan
|
||||
|
||||
for i, col := range columns {
|
||||
switch col.SystemType() {
|
||||
case "uniqueidentifier":
|
||||
plan = append(plan, etl.ColumnTransformPlan{
|
||||
Index: i,
|
||||
Fn: func(v any) (any, error) {
|
||||
if b, ok := v.([]byte); ok && b != nil {
|
||||
return mssqlUuidToBigEndian(b)
|
||||
}
|
||||
return v, nil
|
||||
},
|
||||
})
|
||||
|
||||
case "geometry", "geography":
|
||||
plan = append(plan, etl.ColumnTransformPlan{
|
||||
Index: i,
|
||||
Fn: func(v any) (any, error) {
|
||||
if b, ok := v.([]byte); ok && b != nil {
|
||||
return wkbToEwkbWithSrid(b, 4326)
|
||||
}
|
||||
return v, nil
|
||||
},
|
||||
})
|
||||
|
||||
case "datetime", "datetime2":
|
||||
plan = append(plan, etl.ColumnTransformPlan{
|
||||
Index: i,
|
||||
Fn: func(v any) (any, error) {
|
||||
if t, ok := v.(time.Time); ok {
|
||||
return ensureUTC(t), nil
|
||||
}
|
||||
return v, nil
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return plan
|
||||
return []etl.ColumnTransformPlan{}
|
||||
}
|
||||
|
||||
const processBatchCtxCheck = 4096
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
)
|
||||
|
||||
type Extractor interface {
|
||||
Extract(
|
||||
ProcessPartition(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
@@ -18,25 +18,16 @@ type Extractor interface {
|
||||
partition models.Partition,
|
||||
indexPrimaryKey int,
|
||||
chBatchesOut chan<- models.Batch,
|
||||
) (int64, error)
|
||||
) (int, error)
|
||||
|
||||
ExtractWithRetries(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
batchSize int,
|
||||
partition models.Partition,
|
||||
indexPrimaryKey int,
|
||||
chBatchesOut chan<- models.Batch,
|
||||
) (int64, error)
|
||||
|
||||
Consume(
|
||||
Exec(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
batchSize int,
|
||||
chPartitionsIn <-chan models.Partition,
|
||||
chBatchesOut chan<- models.Batch,
|
||||
chErrorsOut chan<- custom_errors.ExtractorError,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActivePartitions *sync.WaitGroup,
|
||||
rowsRead *int64,
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
package models
|
||||
|
||||
import "github.com/google/uuid"
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
type UnknownRowValues = []any
|
||||
|
||||
@@ -25,3 +29,13 @@ type Partition struct {
|
||||
HasRange bool
|
||||
RetryCounter int
|
||||
}
|
||||
|
||||
type JobResult struct {
|
||||
JobName string
|
||||
StartTime time.Time
|
||||
Duration time.Duration
|
||||
RowsRead int64
|
||||
RowsLoaded int64
|
||||
RowsFailed int64
|
||||
Error error
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user