diff --git a/cmd/go_migrate/batch-generator.go b/cmd/go_migrate/batch-generator.go index 78bc487..6c7a213 100644 --- a/cmd/go_migrate/batch-generator.go +++ b/cmd/go_migrate/batch-generator.go @@ -33,13 +33,13 @@ GROUP BY t.name` return rowsCount, nil } -func calculateBatchesMssql(ctx context.Context, db *sql.DB, tableInfo config.SourceTableInfo, batchCount int64) ([]models.Partition, error) { +func calculatePartitionRanges(ctx context.Context, db *sql.DB, tableInfo config.SourceTableInfo, maxPartitions int64) ([]models.Partition, error) { query := fmt.Sprintf(` SELECT MIN([%s]) AS lower_limit, MAX([%s]) AS upper_limit FROM - (SELECT [%s], NTILE(@batchCount) OVER (ORDER BY [%s]) AS batch_id FROM [%s].[%s]) AS T + (SELECT [%s], NTILE(@maxPartitions) OVER (ORDER BY [%s]) AS batch_id FROM [%s].[%s]) AS T GROUP BY batch_id ORDER BY batch_id`, tableInfo.PrimaryKey, @@ -52,45 +52,45 @@ ORDER BY batch_id`, ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*20) defer cancel() - rows, err := db.QueryContext(ctxTimeout, query, sql.Named("batchCount", batchCount)) + rows, err := db.QueryContext(ctxTimeout, query, sql.Named("maxPartitions", maxPartitions)) if err != nil { return nil, err } defer rows.Close() - batches := make([]models.Partition, 0, batchCount) + partitions := make([]models.Partition, 0, maxPartitions) for rows.Next() { - batch := models.Partition{ + partition := models.Partition{ Id: uuid.New(), ShouldUseRange: true, RetryCounter: 0, IsLowerLimitInclusive: true, } - if err := rows.Scan(&batch.LowerLimit, &batch.UpperLimit); err != nil { + if err := rows.Scan(&partition.LowerLimit, &partition.UpperLimit); err != nil { return nil, err } - batches = append(batches, batch) + partitions = append(partitions, partition) } if err := rows.Err(); err != nil { return nil, err } - return batches, nil + return partitions, nil } -func partitionGeneratorMssql(ctx context.Context, db *sql.DB, tableInfo config.SourceTableInfo, rowsPerBatch int64) ([]models.Partition, error) { +func partitionGeneratorMssql(ctx context.Context, db *sql.DB, tableInfo config.SourceTableInfo, rowsPerPartition int64) ([]models.Partition, error) { rowsCount, err := estimateTotalRowsMssql(ctx, db, tableInfo) if err != nil { return nil, err } - var batchCount int64 = 1 - if rowsCount > rowsPerBatch { - batchCount = rowsCount / rowsPerBatch + var partitionsCount int64 = 1 + if rowsCount > rowsPerPartition { + partitionsCount = rowsCount / rowsPerPartition } else { return []models.Partition{{ Id: uuid.New(), @@ -99,10 +99,10 @@ func partitionGeneratorMssql(ctx context.Context, db *sql.DB, tableInfo config.S }}, nil } - batches, err := calculateBatchesMssql(ctx, db, tableInfo, batchCount) + partitions, err := calculatePartitionRanges(ctx, db, tableInfo, partitionsCount) if err != nil { return nil, err } - return batches, nil + return partitions, nil } diff --git a/cmd/go_migrate/process.go b/cmd/go_migrate/process.go index 626aad4..75ef517 100644 --- a/cmd/go_migrate/process.go +++ b/cmd/go_migrate/process.go @@ -44,7 +44,7 @@ func processMigrationJob( jobCtx, cancel := context.WithCancel(ctx) defer cancel() - partitions, err := partitionGeneratorMssql(jobCtx, sourceDb, job.SourceTable, job.RowsPerBatch) + partitions, err := partitionGeneratorMssql(jobCtx, sourceDb, job.SourceTable, job.RowsPerPartition) if err != nil { log.Error("Unexpected error calculating batch ranges: ", err) } @@ -81,7 +81,7 @@ func processMigrationJob( jobCtx, job.SourceTable, sourceColTypes, - job.ChunkSize, + job.BatchSize, chPartitions, chBatchesRaw, chExtractorErrors, diff --git a/config.yaml b/config.yaml index 92e09df..4012875 100644 --- a/config.yaml +++ b/config.yaml @@ -6,8 +6,8 @@ defaults: max_extractors: 2 max_loaders: 4 queue_size: 8 - chunk_size: 25000 - chunks_per_batch: 8 + batch_size: 25000 + batches_per_partition: 8 truncate_target: true truncate_method: TRUNCATE # TRUNCATE | DELETE retry: diff --git a/internal/app/config/migration.go b/internal/app/config/migration.go index 9bb6f4c..2574279 100644 --- a/internal/app/config/migration.go +++ b/internal/app/config/migration.go @@ -12,15 +12,15 @@ type RetryConfig struct { } type JobConfig struct { - MaxExtractors int `yaml:"max_extractors"` - MaxLoaders int `yaml:"max_loaders"` - QueueSize int `yaml:"queue_size"` - ChunkSize int `yaml:"chunk_size"` - ChunksPerBatch int `yaml:"chunks_per_batch"` - RowsPerBatch int64 - TruncateTarget bool `yaml:"truncate_target"` - TruncateMethod string `yaml:"truncate_method"` - Retry RetryConfig `yaml:"retry"` + MaxExtractors int `yaml:"max_extractors"` + MaxLoaders int `yaml:"max_loaders"` + QueueSize int `yaml:"queue_size"` + BatchSize int `yaml:"batch_size"` + BatchesPerPartition int `yaml:"batches_per_partition"` + TruncateTarget bool `yaml:"truncate_target"` + TruncateMethod string `yaml:"truncate_method"` + Retry RetryConfig `yaml:"retry"` + RowsPerPartition int64 } type TableInfo struct { @@ -71,7 +71,7 @@ func (c *MigrationConfig) UnmarshalYAML(value *yaml.Node) error { c.MaxParallelWorkers = raw.MaxParallelWorkers c.Defaults = raw.Defaults - c.Defaults.RowsPerBatch = int64(raw.Defaults.ChunkSize * raw.Defaults.ChunksPerBatch) + c.Defaults.RowsPerPartition = int64(raw.Defaults.BatchSize * raw.Defaults.BatchesPerPartition) for _, node := range raw.Jobs { job := Job{ @@ -82,7 +82,7 @@ func (c *MigrationConfig) UnmarshalYAML(value *yaml.Node) error { return err } - job.RowsPerBatch = int64(job.ChunkSize * job.ChunksPerBatch) + job.RowsPerPartition = int64(job.BatchSize * job.BatchesPerPartition) c.Jobs = append(c.Jobs, job) } diff --git a/internal/app/etl/types.go b/internal/app/etl/types.go index c3e2f60..09f4a9c 100644 --- a/internal/app/etl/types.go +++ b/internal/app/etl/types.go @@ -64,7 +64,7 @@ type Loader interface { ctx context.Context, tableInfo config.TargetTableInfo, colNames []string, - Batch models.Batch, + batch models.Batch, ) (int, error) Exec( @@ -79,7 +79,7 @@ type Loader interface { ) } -type TableAnalizer interface { +type TableAnalyzer interface { QueryColumnTypes( ctx context.Context, tableInfo config.TableInfo, @@ -93,6 +93,6 @@ type TableAnalizer interface { CalculatePartitionRanges( ctx context.Context, tableInfo config.TableInfo, - totalPartitions int, - ) (models.Partition, error) + maxPartitions int, + ) ([]models.Partition, error) }