refactor: update RangeConfig to use pointers for min and max; adjust partition calculation logic to handle nil values
This commit is contained in:
@@ -20,20 +20,6 @@ func PartitionRangeGenerator(
|
||||
rowsPerPartition int64,
|
||||
jobRange config.RangeConfig,
|
||||
) ([]models.Partition, error) {
|
||||
if jobRange.Min > 0 {
|
||||
return []models.Partition{{
|
||||
Id: uuid.New(),
|
||||
HasRange: true,
|
||||
RetryCounter: 0,
|
||||
Range: models.PartitionRange{
|
||||
Min: jobRange.Min,
|
||||
Max: jobRange.Max,
|
||||
IsMinInclusive: jobRange.IsMinInclusive,
|
||||
IsMaxInclusive: jobRange.IsMaxInclusive,
|
||||
},
|
||||
}}, nil
|
||||
}
|
||||
|
||||
rowsCount, err := tableAnalyzer.EstimateTotalRows(ctx, tableInfo)
|
||||
logrus.Infof("Estimated rows in source: %v (%s.%s)", rowsCount, tableInfo.Schema, tableInfo.Table)
|
||||
if err != nil {
|
||||
@@ -41,21 +27,33 @@ func PartitionRangeGenerator(
|
||||
}
|
||||
|
||||
if rowsCount <= rowsPerPartition {
|
||||
return []models.Partition{{
|
||||
Id: uuid.New(),
|
||||
HasRange: false,
|
||||
RetryCounter: 0,
|
||||
}}, nil
|
||||
|
||||
hasRange := jobRange.Min != nil || jobRange.Max != nil
|
||||
partition := models.Partition{Id: uuid.New(), HasRange: hasRange, RetryCounter: 0}
|
||||
if hasRange {
|
||||
var min, max int64
|
||||
if jobRange.Min != nil {
|
||||
min = *jobRange.Min
|
||||
}
|
||||
if jobRange.Max != nil {
|
||||
max = *jobRange.Max
|
||||
}
|
||||
partition.Range = models.PartitionRange{
|
||||
Min: min,
|
||||
Max: max,
|
||||
IsMinInclusive: jobRange.IsMinInclusive,
|
||||
IsMaxInclusive: jobRange.IsMaxInclusive,
|
||||
}
|
||||
}
|
||||
return []models.Partition{partition}, nil
|
||||
}
|
||||
|
||||
partitionsCount := rowsCount / rowsPerPartition
|
||||
|
||||
if partitionCalculationStrategy == "ESTIMATION" {
|
||||
return calculatePartitionsEstimation(ctx, tableAnalyzer, tableInfo, partitionColumn, partitionsCount)
|
||||
return calculatePartitionsEstimation(ctx, tableAnalyzer, tableInfo, partitionColumn, partitionsCount, jobRange)
|
||||
}
|
||||
|
||||
partitions, err := tableAnalyzer.CalculatePartitionRanges(ctx, tableInfo, partitionColumn, partitionsCount)
|
||||
partitions, err := tableAnalyzer.CalculatePartitionRanges(ctx, tableInfo, partitionColumn, partitionsCount, jobRange)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -71,16 +69,37 @@ func calculatePartitionsEstimation(
|
||||
tableInfo config.TableInfo,
|
||||
partitionColumn string,
|
||||
partitionsCount int64,
|
||||
rangeConstraint config.RangeConfig,
|
||||
) ([]models.Partition, error) {
|
||||
result, err := tableAnalyzer.QueryMaxMinFromColumn(ctx, tableInfo, partitionColumn)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
var minValue, maxValue int64
|
||||
|
||||
if rangeConstraint.Min != nil && rangeConstraint.Max != nil {
|
||||
minValue = *rangeConstraint.Min
|
||||
maxValue = *rangeConstraint.Max
|
||||
logrus.Infof("Column range for %s.%s.%s: [%d, %d] (user-defined)", tableInfo.Schema, tableInfo.Table, partitionColumn, minValue, maxValue)
|
||||
} else if rangeConstraint.Min != nil || rangeConstraint.Max != nil {
|
||||
result, err := tableAnalyzer.QueryMaxMinFromColumn(ctx, tableInfo, partitionColumn)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if rangeConstraint.Min != nil {
|
||||
minValue = *rangeConstraint.Min
|
||||
maxValue = result.Max
|
||||
logrus.Infof("Column range for %s.%s.%s: [%d, %d] (min user-defined)", tableInfo.Schema, tableInfo.Table, partitionColumn, minValue, maxValue)
|
||||
} else {
|
||||
minValue = result.Min
|
||||
maxValue = *rangeConstraint.Max
|
||||
logrus.Infof("Column range for %s.%s.%s: [%d, %d] (max user-defined)", tableInfo.Schema, tableInfo.Table, partitionColumn, minValue, maxValue)
|
||||
}
|
||||
} else {
|
||||
result, err := tableAnalyzer.QueryMaxMinFromColumn(ctx, tableInfo, partitionColumn)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
logrus.Infof("Column range for %s.%s.%s: [%d, %d]", tableInfo.Schema, tableInfo.Table, partitionColumn, result.Min, result.Max)
|
||||
minValue = result.Min
|
||||
maxValue = result.Max
|
||||
}
|
||||
|
||||
logrus.Infof("Column range for %s.%s.%s: [%d, %d]", tableInfo.Schema, tableInfo.Table, partitionColumn, result.Min, result.Max)
|
||||
|
||||
minValue := result.Min
|
||||
maxValue := result.Max
|
||||
rangeSize := maxValue - minValue
|
||||
stepSize := int64(math.Ceil(float64(rangeSize) / float64(partitionsCount)))
|
||||
|
||||
@@ -94,9 +113,7 @@ func calculatePartitionsEstimation(
|
||||
partitionMax = maxValue
|
||||
}
|
||||
|
||||
// Only the first partition has IsMinInclusive=true to avoid overlap
|
||||
isMinInclusive := i == 0
|
||||
|
||||
partition := models.Partition{
|
||||
Id: uuid.New(),
|
||||
HasRange: true,
|
||||
|
||||
Reference in New Issue
Block a user