Files
go-migrate/cmd/go_migrate/process.go

174 lines
4.0 KiB
Go

package main
import (
"context"
"database/sql"
"sync"
"sync/atomic"
"time"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
"github.com/jackc/pgx/v5/pgxpool"
_ "github.com/microsoft/go-mssqldb"
log "github.com/sirupsen/logrus"
)
func processMigrationJob(
ctx context.Context,
sourceDb *sql.DB,
targetDb *pgxpool.Pool,
extractor etl.Extractor,
transformer etl.Transformer,
loader etl.Loader,
job config.Job,
) JobResult {
result := JobResult{
JobName: job.Name,
StartTime: time.Now(),
}
var rowsRead, rowsLoaded, rowsFailed int64
sourceColTypes, targetColTypes, err := GetColumnTypes(sourceDb, targetDb, job.SourceTable, job.TargetTable)
if err != nil {
result.Error = err
return result
}
logColumnTypes(sourceColTypes, "Source col types")
logColumnTypes(targetColTypes, "Target col types")
jobCtx, cancel := context.WithCancel(ctx)
defer cancel()
partitions, err := partitionGeneratorMssql(jobCtx, sourceDb, job.SourceTable, job.RowsPerBatch)
if err != nil {
log.Error("Unexpected error calculating batch ranges: ", err)
}
chJobErrors := make(chan custom_errors.JobError, job.QueueSize)
chExtractorErrors := make(chan custom_errors.ExtractorError, job.QueueSize)
chLoadersErrors := make(chan custom_errors.LoaderError, job.QueueSize)
chPartitions := make(chan models.Partition, job.QueueSize)
chBatchesRaw := make(chan models.Batch, job.QueueSize)
chBatchesTransformed := make(chan models.Batch, job.QueueSize)
var wgActivePartitions sync.WaitGroup
var wgActiveBatches sync.WaitGroup
var wgExtractors sync.WaitGroup
var wgTransformers sync.WaitGroup
var wgLoaders sync.WaitGroup
go func() {
if err := custom_errors.JobErrorHandler(jobCtx, chJobErrors); err != nil {
cancel()
result.Error = err
}
}()
go custom_errors.ExtractorErrorHandler(jobCtx, job.Retry.Attempts, chExtractorErrors, chPartitions, chJobErrors, &wgActivePartitions)
go custom_errors.LoaderErrorHandler(jobCtx, job.Retry.Attempts, chLoadersErrors, chBatchesTransformed, chJobErrors, &wgActiveBatches)
maxExtractors := min(job.MaxExtractors, len(partitions))
log.Infof("Starting %d extractor(s)...", maxExtractors)
for range maxExtractors {
wgExtractors.Go(func() {
extractor.Exec(
jobCtx,
job.SourceTable,
sourceColTypes,
job.ChunkSize,
chPartitions,
chBatchesRaw,
chExtractorErrors,
chJobErrors,
&wgActivePartitions,
&rowsRead,
)
})
}
wgActivePartitions.Add(len(partitions))
go func() {
for _, batch := range partitions {
chPartitions <- batch
}
}()
log.Infof("Starting %d transformer(s)...", maxExtractors)
for range maxExtractors {
wgTransformers.Go(func() {
transformer.Exec(
jobCtx,
sourceColTypes,
chBatchesRaw,
chBatchesTransformed,
chJobErrors,
&wgActiveBatches,
)
})
}
log.Infof("Starting %d loader(s)...", job.MaxLoaders)
for range job.MaxLoaders {
wgLoaders.Go(func() {
loader.Exec(
jobCtx,
job.TargetTable,
targetColTypes,
chBatchesTransformed,
chLoadersErrors,
chJobErrors,
&wgActiveBatches,
&rowsLoaded,
)
})
}
go func() {
wgActivePartitions.Wait()
close(chPartitions)
close(chExtractorErrors)
wgExtractors.Wait()
close(chBatchesRaw)
wgTransformers.Wait()
wgActiveBatches.Wait()
close(chBatchesTransformed)
close(chLoadersErrors)
wgLoaders.Wait()
cancel()
}()
<-jobCtx.Done()
if ctx.Err() != nil {
result.Error = ctx.Err()
}
result.Duration = time.Since(result.StartTime)
result.RowsRead = atomic.LoadInt64(&rowsRead)
result.RowsLoaded = atomic.LoadInt64(&rowsLoaded)
result.RowsFailed = atomic.LoadInt64(&rowsFailed)
return result
}
func logColumnTypes(columnTypes []models.ColumnType, label string) {
log.Debug(label)
for _, col := range columnTypes {
log.Debugf("%+v", col)
}
}