refactor: update GenericLoader to use ProcessBatchWithRetries; enhance error handling and retry logic

This commit is contained in:
2026-05-08 07:50:54 -05:00
parent a216a8016f
commit c4e233401b
6 changed files with 101 additions and 121 deletions

View File

@@ -13,6 +13,7 @@ import (
dbwrapper "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/db-wrapper" dbwrapper "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/db-wrapper"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/extractors" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/extractors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/loaders"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/table_analyzers" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/table_analyzers"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/transformers" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/transformers"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
@@ -43,7 +44,7 @@ func processMigrationJob(
targetTableAnalyzer etl.TableAnalyzer, targetTableAnalyzer etl.TableAnalyzer,
extractor extractors.GenericExtractor, extractor extractors.GenericExtractor,
azureClient *azure.Client, azureClient *azure.Client,
loader etl.Loader, loader loaders.GenericLoader,
job config.Job, job config.Job,
targetDbType string, targetDbType string,
) models.JobResult { ) models.JobResult {
@@ -189,12 +190,12 @@ func processMigrationJob(
for range job.MaxLoaders { for range job.MaxLoaders {
wgLoaders.Go(func() { wgLoaders.Go(func() {
loader.Exec( loader.Consume(
localCtx, localCtx,
job.TargetTable, job.TargetTable,
targetColTypes, targetColTypes,
job.Retry,
chBatchesTransformed, chBatchesTransformed,
chLoadersErrors,
chJobErrors, chJobErrors,
&wgActiveBatches, &wgActiveBatches,
&rowsLoaded, &rowsLoaded,

View File

@@ -38,7 +38,7 @@ func (gl *GenericLoader) Consume(
return return
} }
processedRows, err := gl.ProcessBatch(ctx, tableInfo, colNames, batch) processedRows, err := gl.ProcessBatchWithRetries(ctx, tableInfo, colNames, retryConfig, batch)
wgActiveBatches.Done() wgActiveBatches.Done()
if err != nil { if err != nil {

View File

@@ -1,116 +1,13 @@
package loaders package loaders
import ( import (
"context"
"errors"
"fmt"
"sync"
"sync/atomic"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
dbwrapper "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/db-wrapper" dbwrapper "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/db-wrapper"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
"github.com/jackc/pgx/v5/pgconn"
) )
type GenericLoader struct { type GenericLoader struct {
db dbwrapper.DbWrapper db dbwrapper.DbWrapper
} }
func NewGenericLoader(db dbwrapper.DbWrapper) etl.Loader { func NewGenericLoader(db dbwrapper.DbWrapper) GenericLoader {
return &GenericLoader{db: db} return GenericLoader{db: db}
}
func (gl *GenericLoader) ProcessBatch(
ctx context.Context,
tableInfo config.TargetTableInfo,
colNames []string,
batch models.Batch,
) (int, error) {
_, err := gl.db.SaveMassive(
ctx,
tableInfo.Schema,
tableInfo.Table,
colNames,
batch.Rows,
)
if err != nil {
if pgErr, ok := errors.AsType[*pgconn.PgError](err); ok {
if pgErr.Code == "23505" {
return 0, &custom_errors.JobError{
ShouldCancelJob: true,
Msg: fmt.Sprintf("Fatal error in table %s.%s", tableInfo.Schema, tableInfo.Table),
Prev: err,
}
}
}
return 0, &custom_errors.LoaderError{Batch: batch, Msg: err.Error()}
}
return len(batch.Rows), nil
}
func (gl *GenericLoader) Exec(
ctx context.Context,
tableInfo config.TargetTableInfo,
columns []models.ColumnType,
chBatchesIn <-chan models.Batch,
chErrorsOut chan<- custom_errors.LoaderError,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveBatches *sync.WaitGroup,
rowsLoaded *int64,
) {
colNames := mapSlice(columns, func(col models.ColumnType) string {
return col.Name()
})
for {
if ctx.Err() != nil {
return
}
select {
case <-ctx.Done():
return
case batch, ok := <-chBatchesIn:
if !ok {
return
}
processedRows, err := gl.ProcessBatch(ctx, tableInfo, colNames, batch)
if err != nil {
var ldError *custom_errors.LoaderError
var jobError *custom_errors.JobError
if errors.As(err, &ldError) {
select {
case <-ctx.Done():
return
case chErrorsOut <- *ldError:
}
} else if errors.As(err, &jobError) {
select {
case <-ctx.Done():
return
case chJobErrorsOut <- *jobError:
}
} else {
select {
case <-ctx.Done():
return
case chErrorsOut <- custom_errors.LoaderError{Batch: batch, Msg: err.Error()}:
}
}
continue
}
wgActiveBatches.Done()
atomic.AddInt64(rowsLoaded, int64(processedRows))
}
}
} }

View File

@@ -0,0 +1,50 @@
package loaders
import (
"context"
"errors"
"fmt"
"time"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
)
func (gl *GenericLoader) ProcessBatchWithRetries(
ctx context.Context,
tableInfo config.TargetTableInfo,
colNames []string,
retryConfig config.RetryConfig,
batch models.Batch,
) (int64, error) {
for {
rowsLoaded, err := gl.ProcessBatch(ctx, tableInfo, colNames, batch)
if err == nil {
return rowsLoaded, nil
}
if btError, ok := errors.AsType[*custom_errors.LoaderError](err); ok {
batch.RetryCounter++
if batch.RetryCounter >= retryConfig.Attempts {
return rowsLoaded, &custom_errors.JobError{
ShouldCancelJob: false,
Msg: fmt.Sprintf("Temporal error in batch %v (retries: %d)", btError.Batch.Id, btError.Batch.RetryCounter),
Prev: btError,
}
}
delay := custom_errors.ComputeBackoffDelay(
batch.RetryCounter,
retryConfig.BaseDelayMs,
retryConfig.MaxDelayMs,
retryConfig.MaxJitterMs,
)
time.Sleep(delay)
continue
}
return rowsLoaded, err
}
}

View File

@@ -0,0 +1,43 @@
package loaders
import (
"context"
"errors"
"fmt"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
"github.com/jackc/pgx/v5/pgconn"
)
func (gl *GenericLoader) ProcessBatch(
ctx context.Context,
tableInfo config.TargetTableInfo,
colNames []string,
batch models.Batch,
) (int64, error) {
_, err := gl.db.SaveMassive(
ctx,
tableInfo.Schema,
tableInfo.Table,
colNames,
batch.Rows,
)
if err != nil {
if pgErr, ok := errors.AsType[*pgconn.PgError](err); ok {
if pgErr.Code == "23505" {
return 0, &custom_errors.JobError{
ShouldCancelJob: true,
Msg: fmt.Sprintf("Fatal error in table %s.%s", tableInfo.Schema, tableInfo.Table),
Prev: err,
}
}
}
return 0, &custom_errors.LoaderError{Batch: batch, Msg: err.Error()}
}
return int64(len(batch.Rows)), nil
}

View File

@@ -51,18 +51,7 @@ type Loader interface {
tableInfo config.TargetTableInfo, tableInfo config.TargetTableInfo,
colNames []string, colNames []string,
batch models.Batch, batch models.Batch,
) (int, error) ) (int64, error)
Exec(
ctx context.Context,
tableInfo config.TargetTableInfo,
columns []models.ColumnType,
chBatchesIn <-chan models.Batch,
chErrorsOut chan<- custom_errors.LoaderError,
chJobErrorsOut chan<- custom_errors.JobError,
wgActiveBatches *sync.WaitGroup,
rowsLoaded *int64,
)
} }
type TableAnalyzer interface { type TableAnalyzer interface {