feat: implement batch processing for MSSQL extraction and transformation with range handling
This commit is contained in:
@@ -24,24 +24,56 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
|
||||
logColumnTypes(sourceColTypes, "Source col types")
|
||||
logColumnTypes(targetColTypes, "Target col types")
|
||||
|
||||
chRowsExtract := make(chan []UnknownRowValues, QueueSize)
|
||||
chRowsTransform := make(chan []UnknownRowValues)
|
||||
mssqlCtx := context.Background()
|
||||
batchRanges, err := calculateBatchMetrics(mssqlCtx, sourceDb, job)
|
||||
if err != nil {
|
||||
log.Error("Unexpected error calculating batch ranges: ", err)
|
||||
}
|
||||
|
||||
chBatchRanges := make(chan BatchRange, len(batchRanges))
|
||||
|
||||
maxExtractors := min(NumExtractors, len(batchRanges))
|
||||
chRowsExtract := make(chan []UnknownRowValues, QueueSize)
|
||||
var wgMssqlExtractors sync.WaitGroup
|
||||
|
||||
log.Infof("Starting %d MSSQL extractors...", maxExtractors)
|
||||
extractStartTime := time.Now()
|
||||
for range maxExtractors {
|
||||
wgMssqlExtractors.Go(func() {
|
||||
for br := range chBatchRanges {
|
||||
if err := extractFromMssql(mssqlCtx, sourceDb, job, sourceColTypes, ChunkSize, br, chRowsExtract); err != nil {
|
||||
log.Error("Unexpected error extracting data from mssql: ", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
go func() {
|
||||
log.Info("Starting extraction from MSSQL...")
|
||||
extractStartTime := time.Now()
|
||||
if err := extractFromMssql(mssqlCtx, job, sourceColTypes, ChunkSize, sourceDb, chRowsExtract); err != nil {
|
||||
log.Error("Unexpected error extracting data from mssql: ", err)
|
||||
for _, br := range batchRanges {
|
||||
chBatchRanges <- br
|
||||
}
|
||||
close(chBatchRanges)
|
||||
}()
|
||||
|
||||
go func() {
|
||||
wgMssqlExtractors.Wait()
|
||||
close(chRowsExtract)
|
||||
log.Infof("Extraction completed in %v", time.Since(extractStartTime))
|
||||
}()
|
||||
|
||||
chRowsTransform := make(chan []UnknownRowValues, QueueSize)
|
||||
var wgMssqlTransformers sync.WaitGroup
|
||||
|
||||
log.Infof("Starting %d MSSQL transformers...", maxExtractors)
|
||||
transformStartTime := time.Now()
|
||||
for range maxExtractors {
|
||||
wgMssqlTransformers.Go(func() {
|
||||
transformRowsMssql(sourceColTypes, chRowsExtract, chRowsTransform)
|
||||
})
|
||||
}
|
||||
|
||||
go func() {
|
||||
log.Info("Starting transformation of rows...")
|
||||
transformStartTime := time.Now()
|
||||
transformRowsMssql(sourceColTypes, chRowsExtract, chRowsTransform)
|
||||
wgMssqlTransformers.Wait()
|
||||
close(chRowsTransform)
|
||||
log.Infof("Transformation completed in %v", time.Since(transformStartTime))
|
||||
}()
|
||||
|
||||
Reference in New Issue
Block a user