feat: implement batch processing for MSSQL extraction and transformation with range handling

This commit is contained in:
2026-04-07 23:39:55 -05:00
parent 8903a04f4d
commit 8afdb45318
5 changed files with 164 additions and 27 deletions

View File

@@ -24,24 +24,56 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
logColumnTypes(sourceColTypes, "Source col types")
logColumnTypes(targetColTypes, "Target col types")
chRowsExtract := make(chan []UnknownRowValues, QueueSize)
chRowsTransform := make(chan []UnknownRowValues)
mssqlCtx := context.Background()
batchRanges, err := calculateBatchMetrics(mssqlCtx, sourceDb, job)
if err != nil {
log.Error("Unexpected error calculating batch ranges: ", err)
}
chBatchRanges := make(chan BatchRange, len(batchRanges))
maxExtractors := min(NumExtractors, len(batchRanges))
chRowsExtract := make(chan []UnknownRowValues, QueueSize)
var wgMssqlExtractors sync.WaitGroup
log.Infof("Starting %d MSSQL extractors...", maxExtractors)
extractStartTime := time.Now()
for range maxExtractors {
wgMssqlExtractors.Go(func() {
for br := range chBatchRanges {
if err := extractFromMssql(mssqlCtx, sourceDb, job, sourceColTypes, ChunkSize, br, chRowsExtract); err != nil {
log.Error("Unexpected error extracting data from mssql: ", err)
}
}
})
}
go func() {
log.Info("Starting extraction from MSSQL...")
extractStartTime := time.Now()
if err := extractFromMssql(mssqlCtx, job, sourceColTypes, ChunkSize, sourceDb, chRowsExtract); err != nil {
log.Error("Unexpected error extracting data from mssql: ", err)
for _, br := range batchRanges {
chBatchRanges <- br
}
close(chBatchRanges)
}()
go func() {
wgMssqlExtractors.Wait()
close(chRowsExtract)
log.Infof("Extraction completed in %v", time.Since(extractStartTime))
}()
chRowsTransform := make(chan []UnknownRowValues, QueueSize)
var wgMssqlTransformers sync.WaitGroup
log.Infof("Starting %d MSSQL transformers...", maxExtractors)
transformStartTime := time.Now()
for range maxExtractors {
wgMssqlTransformers.Go(func() {
transformRowsMssql(sourceColTypes, chRowsExtract, chRowsTransform)
})
}
go func() {
log.Info("Starting transformation of rows...")
transformStartTime := time.Now()
transformRowsMssql(sourceColTypes, chRowsExtract, chRowsTransform)
wgMssqlTransformers.Wait()
close(chRowsTransform)
log.Infof("Transformation completed in %v", time.Since(transformStartTime))
}()