refactor: add partition calculation strategy and implement estimation logic; enhance table analyzers for max/min column queries
This commit is contained in:
@@ -2,6 +2,7 @@ package table_analyzers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
|
||||
@@ -15,6 +16,7 @@ func PartitionRangeGenerator(
|
||||
tableAnalyzer etl.TableAnalyzer,
|
||||
tableInfo config.TableInfo,
|
||||
partitionColumn string,
|
||||
partitionCalculationStrategy string,
|
||||
rowsPerPartition int64,
|
||||
jobRange config.RangeConfig,
|
||||
) ([]models.Partition, error) {
|
||||
@@ -48,12 +50,67 @@ func PartitionRangeGenerator(
|
||||
}
|
||||
|
||||
partitionsCount := rowsCount / rowsPerPartition
|
||||
|
||||
if partitionCalculationStrategy == "ESTIMATION" {
|
||||
return calculatePartitionsEstimation(ctx, tableAnalyzer, tableInfo, partitionColumn, partitionsCount)
|
||||
}
|
||||
|
||||
partitions, err := tableAnalyzer.CalculatePartitionRanges(ctx, tableInfo, partitionColumn, partitionsCount)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// logrus.Debugf("Partitions: %+v (%s.%s)", partitions, tableInfo.Schema, tableInfo.Table)
|
||||
logrus.Debugf("Partitions: %+v (%s.%s)", partitions, tableInfo.Schema, tableInfo.Table)
|
||||
|
||||
return partitions, nil
|
||||
}
|
||||
|
||||
func calculatePartitionsEstimation(
|
||||
ctx context.Context,
|
||||
tableAnalyzer etl.TableAnalyzer,
|
||||
tableInfo config.TableInfo,
|
||||
partitionColumn string,
|
||||
partitionsCount int64,
|
||||
) ([]models.Partition, error) {
|
||||
result, err := tableAnalyzer.QueryMaxMinFromColumn(ctx, tableInfo, partitionColumn)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
logrus.Infof("Column range for %s.%s.%s: [%d, %d]", tableInfo.Schema, tableInfo.Table, partitionColumn, result.Min, result.Max)
|
||||
|
||||
minValue := result.Min
|
||||
maxValue := result.Max
|
||||
rangeSize := maxValue - minValue
|
||||
stepSize := int64(math.Ceil(float64(rangeSize) / float64(partitionsCount)))
|
||||
|
||||
partitions := make([]models.Partition, 0, partitionsCount)
|
||||
|
||||
for i := range partitionsCount {
|
||||
partitionMin := minValue + (i * stepSize)
|
||||
partitionMax := minValue + ((i + 1) * stepSize)
|
||||
|
||||
if i == partitionsCount-1 {
|
||||
partitionMax = maxValue
|
||||
}
|
||||
|
||||
// Only the first partition has IsMinInclusive=true to avoid overlap
|
||||
isMinInclusive := i == 0
|
||||
|
||||
partition := models.Partition{
|
||||
Id: uuid.New(),
|
||||
HasRange: true,
|
||||
RetryCounter: 0,
|
||||
Range: models.PartitionRange{
|
||||
Min: partitionMin,
|
||||
Max: partitionMax,
|
||||
IsMinInclusive: isMinInclusive,
|
||||
IsMaxInclusive: true,
|
||||
},
|
||||
}
|
||||
|
||||
partitions = append(partitions, partition)
|
||||
}
|
||||
|
||||
return partitions, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user