117 lines
3.0 KiB
Go
117 lines
3.0 KiB
Go
package table_analyzers
|
|
|
|
import (
|
|
"context"
|
|
"math"
|
|
|
|
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
|
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
|
|
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
|
"github.com/google/uuid"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
func PartitionRangeGenerator(
|
|
ctx context.Context,
|
|
tableAnalyzer etl.TableAnalyzer,
|
|
tableInfo config.TableInfo,
|
|
partitionColumn string,
|
|
partitionCalculationStrategy string,
|
|
rowsPerPartition int64,
|
|
jobRange config.RangeConfig,
|
|
) ([]models.Partition, error) {
|
|
if jobRange.Min > 0 {
|
|
return []models.Partition{{
|
|
Id: uuid.New(),
|
|
HasRange: true,
|
|
RetryCounter: 0,
|
|
Range: models.PartitionRange{
|
|
Min: jobRange.Min,
|
|
Max: jobRange.Max,
|
|
IsMinInclusive: jobRange.IsMinInclusive,
|
|
IsMaxInclusive: jobRange.IsMaxInclusive,
|
|
},
|
|
}}, nil
|
|
}
|
|
|
|
rowsCount, err := tableAnalyzer.EstimateTotalRows(ctx, tableInfo)
|
|
logrus.Infof("Estimated rows in source: %v (%s.%s)", rowsCount, tableInfo.Schema, tableInfo.Table)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if rowsCount <= rowsPerPartition {
|
|
return []models.Partition{{
|
|
Id: uuid.New(),
|
|
HasRange: false,
|
|
RetryCounter: 0,
|
|
}}, nil
|
|
|
|
}
|
|
|
|
partitionsCount := rowsCount / rowsPerPartition
|
|
|
|
if partitionCalculationStrategy == "ESTIMATION" {
|
|
return calculatePartitionsEstimation(ctx, tableAnalyzer, tableInfo, partitionColumn, partitionsCount)
|
|
}
|
|
|
|
partitions, err := tableAnalyzer.CalculatePartitionRanges(ctx, tableInfo, partitionColumn, partitionsCount)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
logrus.Debugf("Partitions: %+v (%s.%s)", partitions, tableInfo.Schema, tableInfo.Table)
|
|
|
|
return partitions, nil
|
|
}
|
|
|
|
func calculatePartitionsEstimation(
|
|
ctx context.Context,
|
|
tableAnalyzer etl.TableAnalyzer,
|
|
tableInfo config.TableInfo,
|
|
partitionColumn string,
|
|
partitionsCount int64,
|
|
) ([]models.Partition, error) {
|
|
result, err := tableAnalyzer.QueryMaxMinFromColumn(ctx, tableInfo, partitionColumn)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
logrus.Infof("Column range for %s.%s.%s: [%d, %d]", tableInfo.Schema, tableInfo.Table, partitionColumn, result.Min, result.Max)
|
|
|
|
minValue := result.Min
|
|
maxValue := result.Max
|
|
rangeSize := maxValue - minValue
|
|
stepSize := int64(math.Ceil(float64(rangeSize) / float64(partitionsCount)))
|
|
|
|
partitions := make([]models.Partition, 0, partitionsCount)
|
|
|
|
for i := range partitionsCount {
|
|
partitionMin := minValue + (i * stepSize)
|
|
partitionMax := minValue + ((i + 1) * stepSize)
|
|
|
|
if i == partitionsCount-1 {
|
|
partitionMax = maxValue
|
|
}
|
|
|
|
// Only the first partition has IsMinInclusive=true to avoid overlap
|
|
isMinInclusive := i == 0
|
|
|
|
partition := models.Partition{
|
|
Id: uuid.New(),
|
|
HasRange: true,
|
|
RetryCounter: 0,
|
|
Range: models.PartitionRange{
|
|
Min: partitionMin,
|
|
Max: partitionMax,
|
|
IsMinInclusive: isMinInclusive,
|
|
IsMaxInclusive: true,
|
|
},
|
|
}
|
|
|
|
partitions = append(partitions, partition)
|
|
}
|
|
|
|
return partitions, nil
|
|
}
|