refactor: rename batch-related variables and functions for consistency and clarity

This commit is contained in:
2026-04-11 00:44:12 -05:00
parent 955bc65ce9
commit 7830ae862d
5 changed files with 33 additions and 33 deletions

View File

@@ -33,13 +33,13 @@ GROUP BY t.name`
return rowsCount, nil return rowsCount, nil
} }
func calculateBatchesMssql(ctx context.Context, db *sql.DB, tableInfo config.SourceTableInfo, batchCount int64) ([]models.Partition, error) { func calculatePartitionRanges(ctx context.Context, db *sql.DB, tableInfo config.SourceTableInfo, maxPartitions int64) ([]models.Partition, error) {
query := fmt.Sprintf(` query := fmt.Sprintf(`
SELECT SELECT
MIN([%s]) AS lower_limit, MIN([%s]) AS lower_limit,
MAX([%s]) AS upper_limit MAX([%s]) AS upper_limit
FROM FROM
(SELECT [%s], NTILE(@batchCount) OVER (ORDER BY [%s]) AS batch_id FROM [%s].[%s]) AS T (SELECT [%s], NTILE(@maxPartitions) OVER (ORDER BY [%s]) AS batch_id FROM [%s].[%s]) AS T
GROUP BY batch_id GROUP BY batch_id
ORDER BY batch_id`, ORDER BY batch_id`,
tableInfo.PrimaryKey, tableInfo.PrimaryKey,
@@ -52,45 +52,45 @@ ORDER BY batch_id`,
ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*20) ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*20)
defer cancel() defer cancel()
rows, err := db.QueryContext(ctxTimeout, query, sql.Named("batchCount", batchCount)) rows, err := db.QueryContext(ctxTimeout, query, sql.Named("maxPartitions", maxPartitions))
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer rows.Close() defer rows.Close()
batches := make([]models.Partition, 0, batchCount) partitions := make([]models.Partition, 0, maxPartitions)
for rows.Next() { for rows.Next() {
batch := models.Partition{ partition := models.Partition{
Id: uuid.New(), Id: uuid.New(),
ShouldUseRange: true, ShouldUseRange: true,
RetryCounter: 0, RetryCounter: 0,
IsLowerLimitInclusive: true, IsLowerLimitInclusive: true,
} }
if err := rows.Scan(&batch.LowerLimit, &batch.UpperLimit); err != nil { if err := rows.Scan(&partition.LowerLimit, &partition.UpperLimit); err != nil {
return nil, err return nil, err
} }
batches = append(batches, batch) partitions = append(partitions, partition)
} }
if err := rows.Err(); err != nil { if err := rows.Err(); err != nil {
return nil, err return nil, err
} }
return batches, nil return partitions, nil
} }
func partitionGeneratorMssql(ctx context.Context, db *sql.DB, tableInfo config.SourceTableInfo, rowsPerBatch int64) ([]models.Partition, error) { func partitionGeneratorMssql(ctx context.Context, db *sql.DB, tableInfo config.SourceTableInfo, rowsPerPartition int64) ([]models.Partition, error) {
rowsCount, err := estimateTotalRowsMssql(ctx, db, tableInfo) rowsCount, err := estimateTotalRowsMssql(ctx, db, tableInfo)
if err != nil { if err != nil {
return nil, err return nil, err
} }
var batchCount int64 = 1 var partitionsCount int64 = 1
if rowsCount > rowsPerBatch { if rowsCount > rowsPerPartition {
batchCount = rowsCount / rowsPerBatch partitionsCount = rowsCount / rowsPerPartition
} else { } else {
return []models.Partition{{ return []models.Partition{{
Id: uuid.New(), Id: uuid.New(),
@@ -99,10 +99,10 @@ func partitionGeneratorMssql(ctx context.Context, db *sql.DB, tableInfo config.S
}}, nil }}, nil
} }
batches, err := calculateBatchesMssql(ctx, db, tableInfo, batchCount) partitions, err := calculatePartitionRanges(ctx, db, tableInfo, partitionsCount)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return batches, nil return partitions, nil
} }

View File

@@ -44,7 +44,7 @@ func processMigrationJob(
jobCtx, cancel := context.WithCancel(ctx) jobCtx, cancel := context.WithCancel(ctx)
defer cancel() defer cancel()
partitions, err := partitionGeneratorMssql(jobCtx, sourceDb, job.SourceTable, job.RowsPerBatch) partitions, err := partitionGeneratorMssql(jobCtx, sourceDb, job.SourceTable, job.RowsPerPartition)
if err != nil { if err != nil {
log.Error("Unexpected error calculating batch ranges: ", err) log.Error("Unexpected error calculating batch ranges: ", err)
} }
@@ -81,7 +81,7 @@ func processMigrationJob(
jobCtx, jobCtx,
job.SourceTable, job.SourceTable,
sourceColTypes, sourceColTypes,
job.ChunkSize, job.BatchSize,
chPartitions, chPartitions,
chBatchesRaw, chBatchesRaw,
chExtractorErrors, chExtractorErrors,

View File

@@ -6,8 +6,8 @@ defaults:
max_extractors: 2 max_extractors: 2
max_loaders: 4 max_loaders: 4
queue_size: 8 queue_size: 8
chunk_size: 25000 batch_size: 25000
chunks_per_batch: 8 batches_per_partition: 8
truncate_target: true truncate_target: true
truncate_method: TRUNCATE # TRUNCATE | DELETE truncate_method: TRUNCATE # TRUNCATE | DELETE
retry: retry:

View File

@@ -15,12 +15,12 @@ type JobConfig struct {
MaxExtractors int `yaml:"max_extractors"` MaxExtractors int `yaml:"max_extractors"`
MaxLoaders int `yaml:"max_loaders"` MaxLoaders int `yaml:"max_loaders"`
QueueSize int `yaml:"queue_size"` QueueSize int `yaml:"queue_size"`
ChunkSize int `yaml:"chunk_size"` BatchSize int `yaml:"batch_size"`
ChunksPerBatch int `yaml:"chunks_per_batch"` BatchesPerPartition int `yaml:"batches_per_partition"`
RowsPerBatch int64
TruncateTarget bool `yaml:"truncate_target"` TruncateTarget bool `yaml:"truncate_target"`
TruncateMethod string `yaml:"truncate_method"` TruncateMethod string `yaml:"truncate_method"`
Retry RetryConfig `yaml:"retry"` Retry RetryConfig `yaml:"retry"`
RowsPerPartition int64
} }
type TableInfo struct { type TableInfo struct {
@@ -71,7 +71,7 @@ func (c *MigrationConfig) UnmarshalYAML(value *yaml.Node) error {
c.MaxParallelWorkers = raw.MaxParallelWorkers c.MaxParallelWorkers = raw.MaxParallelWorkers
c.Defaults = raw.Defaults c.Defaults = raw.Defaults
c.Defaults.RowsPerBatch = int64(raw.Defaults.ChunkSize * raw.Defaults.ChunksPerBatch) c.Defaults.RowsPerPartition = int64(raw.Defaults.BatchSize * raw.Defaults.BatchesPerPartition)
for _, node := range raw.Jobs { for _, node := range raw.Jobs {
job := Job{ job := Job{
@@ -82,7 +82,7 @@ func (c *MigrationConfig) UnmarshalYAML(value *yaml.Node) error {
return err return err
} }
job.RowsPerBatch = int64(job.ChunkSize * job.ChunksPerBatch) job.RowsPerPartition = int64(job.BatchSize * job.BatchesPerPartition)
c.Jobs = append(c.Jobs, job) c.Jobs = append(c.Jobs, job)
} }

View File

@@ -64,7 +64,7 @@ type Loader interface {
ctx context.Context, ctx context.Context,
tableInfo config.TargetTableInfo, tableInfo config.TargetTableInfo,
colNames []string, colNames []string,
Batch models.Batch, batch models.Batch,
) (int, error) ) (int, error)
Exec( Exec(
@@ -79,7 +79,7 @@ type Loader interface {
) )
} }
type TableAnalizer interface { type TableAnalyzer interface {
QueryColumnTypes( QueryColumnTypes(
ctx context.Context, ctx context.Context,
tableInfo config.TableInfo, tableInfo config.TableInfo,
@@ -93,6 +93,6 @@ type TableAnalizer interface {
CalculatePartitionRanges( CalculatePartitionRanges(
ctx context.Context, ctx context.Context,
tableInfo config.TableInfo, tableInfo config.TableInfo,
totalPartitions int, maxPartitions int,
) (models.Partition, error) ) ([]models.Partition, error)
} }