package main import ( "context" "database/sql" "sync" "time" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config" "github.com/jackc/pgx/v5/pgxpool" _ "github.com/microsoft/go-mssqldb" log "github.com/sirupsen/logrus" ) func processMigrationJob( ctx context.Context, sourceDb *sql.DB, targetDb *pgxpool.Pool, job config.Job, ) { jobStartTime := time.Now() log.Infof("Starting migration job: %s.%s [PK: %s]", job.SourceTable.Schema, job.SourceTable.Table, job.SourceTable.PrimaryKey) sourceColTypes, targetColTypes, err := GetColumnTypes(sourceDb, targetDb, job.SourceTable, job.TargetTable) if err != nil { log.Fatal("Unexpected error: ", err) } logColumnTypes(sourceColTypes, "Source col types") logColumnTypes(targetColTypes, "Target col types") jobCtx, cancel := context.WithCancel(ctx) defer cancel() batches, err := batchGeneratorMssql(jobCtx, sourceDb, job.SourceTable) if err != nil { log.Error("Unexpected error calculating batch ranges: ", err) } chJobErrors := make(chan JobError, 50) chBatches := make(chan Batch, QueueSize) chExtractorErrors := make(chan ExtractorError, QueueSize) chChunksRaw := make(chan Chunk, QueueSize) chChunksTransformed := make(chan Chunk, QueueSize) chLoadersErrors := make(chan LoaderError, QueueSize) var wgActiveBatches sync.WaitGroup var wgActiveChunks sync.WaitGroup var wgExtractors sync.WaitGroup var wgTransformers sync.WaitGroup var wgLoaders sync.WaitGroup go func() { if err := jobErrorHandler(jobCtx, chJobErrors); err != nil { cancel() } }() go extractorErrorHandler(jobCtx, chExtractorErrors, chBatches, chJobErrors, &wgActiveBatches) go loaderErrorHandler(jobCtx, chLoadersErrors, chChunksTransformed, chJobErrors, &wgActiveChunks) maxExtractors := min(NumExtractors, len(batches)) log.Infof("Starting %d extractors...", maxExtractors) extractStartTime := time.Now() for range maxExtractors { wgExtractors.Go(func() { extractFromMssql(jobCtx, sourceDb, job.SourceTable, sourceColTypes, ChunkSize, chBatches, chChunksRaw, chExtractorErrors, chJobErrors, &wgActiveBatches) }) } wgActiveBatches.Add(len(batches)) go func() { for _, batch := range batches { chBatches <- batch } }() log.Infof("Starting %d transformers...", maxExtractors) transformStartTime := time.Now() for range maxExtractors { wgTransformers.Go(func() { transformRowsMssql(jobCtx, sourceColTypes, chChunksRaw, chChunksTransformed, chJobErrors, &wgActiveChunks) }) } log.Infof("Starting %d PostgreSQL loader(s)...", NumLoaders) loadStartTime := time.Now() for range NumLoaders { wgLoaders.Go(func() { loadRowsPostgres(jobCtx, targetDb, job.TargetTable, targetColTypes, chChunksTransformed, chLoadersErrors, chJobErrors, &wgActiveChunks) }) } go func() { wgActiveBatches.Wait() close(chBatches) close(chExtractorErrors) wgExtractors.Wait() log.Infof("Extraction completed in %v", time.Since(extractStartTime)) close(chChunksRaw) wgTransformers.Wait() log.Infof("Transformation completed in %v", time.Since(transformStartTime)) wgActiveChunks.Wait() close(chChunksTransformed) close(chLoadersErrors) wgLoaders.Wait() log.Infof("Loading completed in %v", time.Since(loadStartTime)) cancel() }() <-jobCtx.Done() log.Infof("Migration job completed. Total time: %v", time.Since(jobStartTime)) } func logColumnTypes(columnTypes []ColumnType, label string) { log.Debug(label) for _, col := range columnTypes { log.Debugf("%+v", col) } } func logSampleRow( schema string, table string, columns []ColumnType, rowValues UnknownRowValues, tag string, ) { log.Infof("[%s.%s] Sample row: (%s)", schema, table, tag) for i, col := range columns { log.Infof("%s (%T): %v", col.Name(), rowValues[i], rowValues[i]) } }