feat: enhance concurrency management by adding WaitGroup support in extractors and loaders
This commit is contained in:
@@ -3,6 +3,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
)
|
)
|
||||||
@@ -25,6 +26,7 @@ func extractorErrorHandler(
|
|||||||
chErrorsIn <-chan ExtractorError,
|
chErrorsIn <-chan ExtractorError,
|
||||||
chBatchesOut chan<- Batch,
|
chBatchesOut chan<- Batch,
|
||||||
chJobErrorsOut chan<- JobError,
|
chJobErrorsOut chan<- JobError,
|
||||||
|
wgActiveBatches *sync.WaitGroup,
|
||||||
) {
|
) {
|
||||||
for {
|
for {
|
||||||
if ctx.Err() != nil {
|
if ctx.Err() != nil {
|
||||||
@@ -52,6 +54,8 @@ func extractorErrorHandler(
|
|||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wgActiveBatches.Done()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"slices"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
@@ -33,6 +34,7 @@ func extractFromMssql(
|
|||||||
chChunksOut chan<- Chunk,
|
chChunksOut chan<- Chunk,
|
||||||
chErrorsOut chan<- ExtractorError,
|
chErrorsOut chan<- ExtractorError,
|
||||||
chJobErrorsOut chan<- JobError,
|
chJobErrorsOut chan<- JobError,
|
||||||
|
wgActiveBatches *sync.WaitGroup,
|
||||||
) {
|
) {
|
||||||
indexPrimaryKey := slices.IndexFunc(columns, func(col ColumnType) bool {
|
indexPrimaryKey := slices.IndexFunc(columns, func(col ColumnType) bool {
|
||||||
return strings.EqualFold(col.name, job.PrimaryKey)
|
return strings.EqualFold(col.name, job.PrimaryKey)
|
||||||
@@ -66,7 +68,7 @@ func extractFromMssql(
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if abort := processBatch(ctx, db, job, columns, chunkSize, batch, indexPrimaryKey, chChunksOut, chErrorsOut); abort {
|
if abort := processBatch(ctx, db, job, columns, chunkSize, batch, indexPrimaryKey, chChunksOut, chErrorsOut, wgActiveBatches); abort {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -83,6 +85,7 @@ func processBatch(
|
|||||||
indexPrimaryKey int,
|
indexPrimaryKey int,
|
||||||
chChunksOut chan<- Chunk,
|
chChunksOut chan<- Chunk,
|
||||||
chErrorsOut chan<- ExtractorError,
|
chErrorsOut chan<- ExtractorError,
|
||||||
|
wgActiveBatches *sync.WaitGroup,
|
||||||
) (abort bool) {
|
) (abort bool) {
|
||||||
query := buildExtractQueryMssql(job, columns, batch.ShouldUseRange, batch.IsLowerLimitInclusive)
|
query := buildExtractQueryMssql(job, columns, batch.ShouldUseRange, batch.IsLowerLimitInclusive)
|
||||||
log.Debug("Query used to extract data from mssql: ", query)
|
log.Debug("Query used to extract data from mssql: ", query)
|
||||||
@@ -199,6 +202,7 @@ func processBatch(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wgActiveBatches.Done()
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ func jobErrorHandler(ctx context.Context, chErrorsIn <-chan JobError) error {
|
|||||||
return &err
|
return &err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Error(err)
|
log.Error(err.Msg, " - ", err.Prev)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
type LoaderError struct {
|
type LoaderError struct {
|
||||||
@@ -19,6 +20,7 @@ func loaderErrorHandler(
|
|||||||
chErrorsIn <-chan LoaderError,
|
chErrorsIn <-chan LoaderError,
|
||||||
chChunksOut chan<- Chunk,
|
chChunksOut chan<- Chunk,
|
||||||
chJobErrorsOut chan<- JobError,
|
chJobErrorsOut chan<- JobError,
|
||||||
|
wgActiveChunks *sync.WaitGroup,
|
||||||
) {
|
) {
|
||||||
for {
|
for {
|
||||||
if ctx.Err() != nil {
|
if ctx.Err() != nil {
|
||||||
@@ -46,6 +48,8 @@ func loaderErrorHandler(
|
|||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wgActiveChunks.Done()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/jackc/pgx/v5"
|
"github.com/jackc/pgx/v5"
|
||||||
@@ -19,6 +20,7 @@ func loadRowsPostgres(
|
|||||||
columns []ColumnType,
|
columns []ColumnType,
|
||||||
chChunksIn <-chan Chunk,
|
chChunksIn <-chan Chunk,
|
||||||
chErrorsOut chan<- LoaderError,
|
chErrorsOut chan<- LoaderError,
|
||||||
|
wgActiveChunks *sync.WaitGroup,
|
||||||
) {
|
) {
|
||||||
tableId := pgx.Identifier{job.Schema, job.Table}
|
tableId := pgx.Identifier{job.Schema, job.Table}
|
||||||
colNames := Map(columns, func(col ColumnType) string {
|
colNames := Map(columns, func(col ColumnType) string {
|
||||||
@@ -38,7 +40,7 @@ func loadRowsPostgres(
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if abort := loadChunkPostgres(ctx, db, tableId, colNames, chunk, chErrorsOut); abort {
|
if abort := loadChunkPostgres(ctx, db, tableId, colNames, chunk, chErrorsOut, wgActiveChunks); abort {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -52,6 +54,7 @@ func loadChunkPostgres(
|
|||||||
colNames []string,
|
colNames []string,
|
||||||
chunk Chunk,
|
chunk Chunk,
|
||||||
chErrorsOut chan<- LoaderError,
|
chErrorsOut chan<- LoaderError,
|
||||||
|
wgActiveChunks *sync.WaitGroup,
|
||||||
) (abort bool) {
|
) (abort bool) {
|
||||||
chunkStartTime := time.Now()
|
chunkStartTime := time.Now()
|
||||||
_, err := db.CopyFrom(
|
_, err := db.CopyFrom(
|
||||||
@@ -75,6 +78,7 @@ func loadChunkPostgres(
|
|||||||
|
|
||||||
log.Infof("Loaded chunk: %d rows in %v (%.0f rows/sec)", len(chunk.Data), chunkDuration, rowsPerSec)
|
log.Infof("Loaded chunk: %d rows in %v (%.0f rows/sec)", len(chunk.Data), chunkDuration, rowsPerSec)
|
||||||
|
|
||||||
|
wgActiveChunks.Done()
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -13,5 +13,5 @@ func configureLog() {
|
|||||||
DisableSorting: false,
|
DisableSorting: false,
|
||||||
PadLevelText: true,
|
PadLevelText: true,
|
||||||
})
|
})
|
||||||
log.SetLevel(log.DebugLevel)
|
log.SetLevel(log.InfoLevel)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,89 +32,82 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
|
|||||||
log.Error("Unexpected error calculating batch ranges: ", err)
|
log.Error("Unexpected error calculating batch ranges: ", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
chJobErrors := make(chan JobError)
|
chJobErrors := make(chan JobError, 100)
|
||||||
defer close(chJobErrors)
|
chBatches := make(chan Batch, QueueSize)
|
||||||
|
chExtractorErrors := make(chan ExtractorError, QueueSize)
|
||||||
go func() {
|
|
||||||
if err := jobErrorHandler(ctx, chJobErrors); err != nil {
|
|
||||||
if ctx.Err() == nil {
|
|
||||||
cancel()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
chBatches := make(chan Batch, len(batches))
|
|
||||||
chExtractorErrors := make(chan ExtractorError, len(batches))
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
extractorErrorHandler(ctx, chExtractorErrors, chBatches, chJobErrors)
|
|
||||||
}()
|
|
||||||
|
|
||||||
chChunksRaw := make(chan Chunk, QueueSize)
|
chChunksRaw := make(chan Chunk, QueueSize)
|
||||||
maxExtractors := min(NumExtractors, len(batches))
|
|
||||||
var wgMssqlExtractors sync.WaitGroup
|
|
||||||
|
|
||||||
log.Infof("Starting %d MSSQL extractors...", maxExtractors)
|
|
||||||
extractStartTime := time.Now()
|
|
||||||
for range maxExtractors {
|
|
||||||
wgMssqlExtractors.Go(func() {
|
|
||||||
extractFromMssql(ctx, sourceDb, job, sourceColTypes, ChunkSize, chBatches, chChunksRaw, chExtractorErrors, chJobErrors)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
for _, br := range batches {
|
|
||||||
chBatches <- br
|
|
||||||
}
|
|
||||||
close(chBatches)
|
|
||||||
close(chExtractorErrors)
|
|
||||||
}()
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
wgMssqlExtractors.Wait()
|
|
||||||
close(chChunksRaw)
|
|
||||||
log.Infof("Extraction completed in %v", time.Since(extractStartTime))
|
|
||||||
}()
|
|
||||||
|
|
||||||
chChunksTransformed := make(chan Chunk, QueueSize)
|
chChunksTransformed := make(chan Chunk, QueueSize)
|
||||||
var wgMssqlTransformers sync.WaitGroup
|
chLoadersErrors := make(chan LoaderError, QueueSize)
|
||||||
|
|
||||||
|
var wgActiveBatches sync.WaitGroup
|
||||||
|
var wgActiveChunks sync.WaitGroup
|
||||||
|
var wgExtractors sync.WaitGroup
|
||||||
|
var wgTransformers sync.WaitGroup
|
||||||
|
var wgLoaders sync.WaitGroup
|
||||||
|
|
||||||
|
go jobErrorHandler(ctx, chJobErrors)
|
||||||
|
go extractorErrorHandler(ctx, chExtractorErrors, chBatches, chJobErrors, &wgActiveBatches)
|
||||||
|
go loaderErrorHandler(ctx, chLoadersErrors, chChunksTransformed, chJobErrors, &wgActiveChunks)
|
||||||
|
|
||||||
|
maxExtractors := min(NumExtractors, len(batches))
|
||||||
|
log.Infof("Starting %d extractors...", maxExtractors)
|
||||||
|
extractStartTime := time.Now()
|
||||||
|
|
||||||
log.Infof("Starting %d MSSQL transformers...", maxExtractors)
|
|
||||||
transformStartTime := time.Now()
|
|
||||||
for range maxExtractors {
|
for range maxExtractors {
|
||||||
wgMssqlTransformers.Go(func() {
|
wgExtractors.Go(func() {
|
||||||
transformRowsMssql(ctx, sourceColTypes, chChunksRaw, chChunksTransformed, chJobErrors)
|
extractFromMssql(ctx, sourceDb, job, sourceColTypes, ChunkSize, chBatches, chChunksRaw, chExtractorErrors, chJobErrors, &wgActiveBatches)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wgActiveBatches.Add(len(batches))
|
||||||
go func() {
|
go func() {
|
||||||
wgMssqlTransformers.Wait()
|
for _, batch := range batches {
|
||||||
close(chChunksTransformed)
|
chBatches <- batch
|
||||||
log.Infof("Transformation completed in %v", time.Since(transformStartTime))
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
var wgPostgresLoaders sync.WaitGroup
|
log.Infof("Starting %d transformers...", maxExtractors)
|
||||||
chLoadersErrors := make(chan LoaderError)
|
transformStartTime := time.Now()
|
||||||
|
|
||||||
go func() {
|
for range maxExtractors {
|
||||||
loaderErrorHandler(ctx, chLoadersErrors, chChunksTransformed, chJobErrors)
|
wgTransformers.Go(func() {
|
||||||
}()
|
transformRowsMssql(ctx, sourceColTypes, chChunksRaw, chChunksTransformed, chJobErrors, &wgActiveChunks)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
log.Infof("Starting %d PostgreSQL loader(s)...", NumLoaders)
|
log.Infof("Starting %d PostgreSQL loader(s)...", NumLoaders)
|
||||||
loaderStartTime := time.Now()
|
loadStartTime := time.Now()
|
||||||
|
|
||||||
for range NumLoaders {
|
for range NumLoaders {
|
||||||
wgPostgresLoaders.Go(func() {
|
wgLoaders.Go(func() {
|
||||||
loadRowsPostgres(ctx, targetDb, job, targetColTypes, chChunksTransformed, chLoadersErrors)
|
loadRowsPostgres(ctx, targetDb, job, targetColTypes, chChunksTransformed, chLoadersErrors, &wgActiveChunks)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
wgPostgresLoaders.Wait()
|
go func() {
|
||||||
close(chLoadersErrors)
|
wgActiveBatches.Wait()
|
||||||
log.Infof("Loading completed in %v", time.Since(loaderStartTime))
|
close(chBatches)
|
||||||
|
close(chExtractorErrors)
|
||||||
|
|
||||||
totalDuration := time.Since(jobStartTime)
|
wgExtractors.Wait()
|
||||||
log.Infof("Migration job completed successfully! Total time: %v", totalDuration)
|
log.Infof("Extraction completed in %v", time.Since(extractStartTime))
|
||||||
|
close(chChunksRaw)
|
||||||
|
|
||||||
|
wgTransformers.Wait()
|
||||||
|
log.Infof("Transformation completed in %v", time.Since(transformStartTime))
|
||||||
|
|
||||||
|
wgActiveChunks.Wait()
|
||||||
|
close(chChunksTransformed)
|
||||||
|
close(chLoadersErrors)
|
||||||
|
|
||||||
|
wgLoaders.Wait()
|
||||||
|
log.Infof("Loading completed in %v", time.Since(loadStartTime))
|
||||||
|
|
||||||
|
cancel()
|
||||||
|
}()
|
||||||
|
|
||||||
|
<-ctx.Done()
|
||||||
|
log.Infof("Migration job completed. Total time: %v", time.Since(jobStartTime))
|
||||||
}
|
}
|
||||||
|
|
||||||
func logColumnTypes(columnTypes []ColumnType, label string) {
|
func logColumnTypes(columnTypes []ColumnType, label string) {
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
@@ -21,6 +22,7 @@ func transformRowsMssql(
|
|||||||
chChunksIn <-chan Chunk,
|
chChunksIn <-chan Chunk,
|
||||||
chChunksOut chan<- Chunk,
|
chChunksOut chan<- Chunk,
|
||||||
chJobErrorsOut chan<- JobError,
|
chJobErrorsOut chan<- JobError,
|
||||||
|
wgActiveChunks *sync.WaitGroup,
|
||||||
) {
|
) {
|
||||||
transformationPlan := computeTransformationPlan(columns)
|
transformationPlan := computeTransformationPlan(columns)
|
||||||
|
|
||||||
@@ -41,6 +43,7 @@ func transformRowsMssql(
|
|||||||
if len(transformationPlan) == 0 {
|
if len(transformationPlan) == 0 {
|
||||||
select {
|
select {
|
||||||
case chChunksOut <- chunk:
|
case chChunksOut <- chunk:
|
||||||
|
wgActiveChunks.Add(1)
|
||||||
continue
|
continue
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
@@ -69,6 +72,8 @@ func transformRowsMssql(
|
|||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wgActiveChunks.Add(1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user