feat: enhance concurrency management by adding WaitGroup support in extractors and loaders

This commit is contained in:
2026-04-09 00:22:30 -05:00
parent dc632361e5
commit 51480015ba
8 changed files with 82 additions and 68 deletions

View File

@@ -32,89 +32,82 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
log.Error("Unexpected error calculating batch ranges: ", err)
}
chJobErrors := make(chan JobError)
defer close(chJobErrors)
go func() {
if err := jobErrorHandler(ctx, chJobErrors); err != nil {
if ctx.Err() == nil {
cancel()
}
}
}()
chBatches := make(chan Batch, len(batches))
chExtractorErrors := make(chan ExtractorError, len(batches))
go func() {
extractorErrorHandler(ctx, chExtractorErrors, chBatches, chJobErrors)
}()
chJobErrors := make(chan JobError, 100)
chBatches := make(chan Batch, QueueSize)
chExtractorErrors := make(chan ExtractorError, QueueSize)
chChunksRaw := make(chan Chunk, QueueSize)
maxExtractors := min(NumExtractors, len(batches))
var wgMssqlExtractors sync.WaitGroup
log.Infof("Starting %d MSSQL extractors...", maxExtractors)
extractStartTime := time.Now()
for range maxExtractors {
wgMssqlExtractors.Go(func() {
extractFromMssql(ctx, sourceDb, job, sourceColTypes, ChunkSize, chBatches, chChunksRaw, chExtractorErrors, chJobErrors)
})
}
go func() {
for _, br := range batches {
chBatches <- br
}
close(chBatches)
close(chExtractorErrors)
}()
go func() {
wgMssqlExtractors.Wait()
close(chChunksRaw)
log.Infof("Extraction completed in %v", time.Since(extractStartTime))
}()
chChunksTransformed := make(chan Chunk, QueueSize)
var wgMssqlTransformers sync.WaitGroup
chLoadersErrors := make(chan LoaderError, QueueSize)
var wgActiveBatches sync.WaitGroup
var wgActiveChunks sync.WaitGroup
var wgExtractors sync.WaitGroup
var wgTransformers sync.WaitGroup
var wgLoaders sync.WaitGroup
go jobErrorHandler(ctx, chJobErrors)
go extractorErrorHandler(ctx, chExtractorErrors, chBatches, chJobErrors, &wgActiveBatches)
go loaderErrorHandler(ctx, chLoadersErrors, chChunksTransformed, chJobErrors, &wgActiveChunks)
maxExtractors := min(NumExtractors, len(batches))
log.Infof("Starting %d extractors...", maxExtractors)
extractStartTime := time.Now()
log.Infof("Starting %d MSSQL transformers...", maxExtractors)
transformStartTime := time.Now()
for range maxExtractors {
wgMssqlTransformers.Go(func() {
transformRowsMssql(ctx, sourceColTypes, chChunksRaw, chChunksTransformed, chJobErrors)
wgExtractors.Go(func() {
extractFromMssql(ctx, sourceDb, job, sourceColTypes, ChunkSize, chBatches, chChunksRaw, chExtractorErrors, chJobErrors, &wgActiveBatches)
})
}
wgActiveBatches.Add(len(batches))
go func() {
wgMssqlTransformers.Wait()
close(chChunksTransformed)
log.Infof("Transformation completed in %v", time.Since(transformStartTime))
for _, batch := range batches {
chBatches <- batch
}
}()
var wgPostgresLoaders sync.WaitGroup
chLoadersErrors := make(chan LoaderError)
log.Infof("Starting %d transformers...", maxExtractors)
transformStartTime := time.Now()
go func() {
loaderErrorHandler(ctx, chLoadersErrors, chChunksTransformed, chJobErrors)
}()
for range maxExtractors {
wgTransformers.Go(func() {
transformRowsMssql(ctx, sourceColTypes, chChunksRaw, chChunksTransformed, chJobErrors, &wgActiveChunks)
})
}
log.Infof("Starting %d PostgreSQL loader(s)...", NumLoaders)
loaderStartTime := time.Now()
loadStartTime := time.Now()
for range NumLoaders {
wgPostgresLoaders.Go(func() {
loadRowsPostgres(ctx, targetDb, job, targetColTypes, chChunksTransformed, chLoadersErrors)
wgLoaders.Go(func() {
loadRowsPostgres(ctx, targetDb, job, targetColTypes, chChunksTransformed, chLoadersErrors, &wgActiveChunks)
})
}
wgPostgresLoaders.Wait()
close(chLoadersErrors)
log.Infof("Loading completed in %v", time.Since(loaderStartTime))
go func() {
wgActiveBatches.Wait()
close(chBatches)
close(chExtractorErrors)
totalDuration := time.Since(jobStartTime)
log.Infof("Migration job completed successfully! Total time: %v", totalDuration)
wgExtractors.Wait()
log.Infof("Extraction completed in %v", time.Since(extractStartTime))
close(chChunksRaw)
wgTransformers.Wait()
log.Infof("Transformation completed in %v", time.Since(transformStartTime))
wgActiveChunks.Wait()
close(chChunksTransformed)
close(chLoadersErrors)
wgLoaders.Wait()
log.Infof("Loading completed in %v", time.Since(loadStartTime))
cancel()
}()
<-ctx.Done()
log.Infof("Migration job completed. Total time: %v", time.Since(jobStartTime))
}
func logColumnTypes(columnTypes []ColumnType, label string) {