feat: implement batch processing for MSSQL with improved structure and logging

This commit is contained in:
2026-04-08 17:35:12 -05:00
parent 51d83661a4
commit bc6f9a6a70
8 changed files with 343 additions and 168 deletions

View File

@@ -25,39 +25,43 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
logColumnTypes(targetColTypes, "Target col types")
mssqlCtx := context.Background()
batchRanges, err := calculateBatchMetrics(mssqlCtx, sourceDb, job)
batches, err := batchGeneratorMssql(mssqlCtx, sourceDb, job)
if err != nil {
log.Error("Unexpected error calculating batch ranges: ", err)
}
chBatchRanges := make(chan BatchRange, len(batchRanges))
chJobErrors := make(chan error)
defer close(chJobErrors)
maxExtractors := min(NumExtractors, len(batchRanges))
chRowsExtract := make(chan []UnknownRowValues, QueueSize)
chBatches := make(chan Batch, len(batches))
chChunks := make(chan []UnknownRowValues, QueueSize)
chExtractorErrors := make(chan ExtractorError, len(batches))
maxExtractors := min(NumExtractors, len(batches))
var wgMssqlExtractors sync.WaitGroup
log.Infof("Starting %d MSSQL extractors...", maxExtractors)
extractStartTime := time.Now()
for range maxExtractors {
wgMssqlExtractors.Go(func() {
for br := range chBatchRanges {
if err := extractFromMssql(mssqlCtx, sourceDb, job, sourceColTypes, ChunkSize, br, chRowsExtract); err != nil {
log.Error("Unexpected error extracting data from mssql: ", err)
}
}
extractFromMssql(mssqlCtx, sourceDb, job, sourceColTypes, ChunkSize, chBatches, chChunks, chExtractorErrors)
})
}
go func() {
for _, br := range batchRanges {
chBatchRanges <- br
for _, br := range batches {
chBatches <- br
}
close(chBatchRanges)
close(chBatches)
close(chExtractorErrors)
}()
go func() {
extractorErrorHandler(chExtractorErrors, chBatches, chJobErrors)
}()
go func() {
wgMssqlExtractors.Wait()
close(chRowsExtract)
close(chChunks)
log.Infof("Extraction completed in %v", time.Since(extractStartTime))
}()
@@ -68,7 +72,7 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
transformStartTime := time.Now()
for range maxExtractors {
wgMssqlTransformers.Go(func() {
transformRowsMssql(sourceColTypes, chRowsExtract, chRowsTransform)
transformRowsMssql(sourceColTypes, chChunks, chRowsTransform)
})
}
@@ -89,7 +93,6 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
if err := loadRowsPostgres(postgresLoaderCtx, job, targetColTypes, targetDb, chRowsTransform); err != nil {
log.Error("Unexpected error loading data into postgres: ", err)
}
// fakeLoader(job, sourceColTypes, chRowsTransform)
})
}