Files
go-migrate/internal/app/etl/table_analyzers/main.go

134 lines
4.0 KiB
Go

package table_analyzers
import (
"context"
"math"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
"github.com/google/uuid"
"github.com/sirupsen/logrus"
)
func PartitionRangeGenerator(
ctx context.Context,
tableAnalyzer etl.TableAnalyzer,
tableInfo config.TableInfo,
partitionColumn string,
partitionCalculationStrategy string,
rowsPerPartition int64,
jobRange config.RangeConfig,
) ([]models.Partition, error) {
rowsCount, err := tableAnalyzer.EstimateTotalRows(ctx, tableInfo)
logrus.Infof("Estimated rows in source: %v (%s.%s)", rowsCount, tableInfo.Schema, tableInfo.Table)
if err != nil {
return nil, err
}
if rowsCount <= rowsPerPartition {
hasRange := jobRange.Min != nil || jobRange.Max != nil
partition := models.Partition{Id: uuid.New(), HasRange: hasRange, RetryCounter: 0}
if hasRange {
var min, max int64
if jobRange.Min != nil {
min = *jobRange.Min
}
if jobRange.Max != nil {
max = *jobRange.Max
}
partition.Range = models.PartitionRange{
Min: min,
Max: max,
IsMinInclusive: jobRange.IsMinInclusive,
IsMaxInclusive: jobRange.IsMaxInclusive,
}
}
return []models.Partition{partition}, nil
}
partitionsCount := rowsCount / rowsPerPartition
if partitionCalculationStrategy == "ESTIMATION" {
return calculatePartitionsEstimation(ctx, tableAnalyzer, tableInfo, partitionColumn, partitionsCount, jobRange)
}
partitions, err := tableAnalyzer.CalculatePartitionRanges(ctx, tableInfo, partitionColumn, partitionsCount, jobRange)
if err != nil {
return nil, err
}
logrus.Debugf("Partitions count: %v (%s.%s)", len(partitions), tableInfo.Schema, tableInfo.Table)
return partitions, nil
}
func calculatePartitionsEstimation(
ctx context.Context,
tableAnalyzer etl.TableAnalyzer,
tableInfo config.TableInfo,
partitionColumn string,
partitionsCount int64,
rangeConstraint config.RangeConfig,
) ([]models.Partition, error) {
var minValue, maxValue int64
if rangeConstraint.Min != nil && rangeConstraint.Max != nil {
minValue = *rangeConstraint.Min
maxValue = *rangeConstraint.Max
logrus.Infof("Column range for %s.%s.%s: [%d, %d] (user-defined)", tableInfo.Schema, tableInfo.Table, partitionColumn, minValue, maxValue)
} else if rangeConstraint.Min != nil || rangeConstraint.Max != nil {
result, err := tableAnalyzer.QueryMaxMinFromColumn(ctx, tableInfo, partitionColumn)
if err != nil {
return nil, err
}
if rangeConstraint.Min != nil {
minValue = *rangeConstraint.Min
maxValue = result.Max
logrus.Infof("Column range for %s.%s.%s: [%d, %d] (min user-defined)", tableInfo.Schema, tableInfo.Table, partitionColumn, minValue, maxValue)
} else {
minValue = result.Min
maxValue = *rangeConstraint.Max
logrus.Infof("Column range for %s.%s.%s: [%d, %d] (max user-defined)", tableInfo.Schema, tableInfo.Table, partitionColumn, minValue, maxValue)
}
} else {
result, err := tableAnalyzer.QueryMaxMinFromColumn(ctx, tableInfo, partitionColumn)
if err != nil {
return nil, err
}
logrus.Infof("Column range for %s.%s.%s: [%d, %d]", tableInfo.Schema, tableInfo.Table, partitionColumn, result.Min, result.Max)
minValue = result.Min
maxValue = result.Max
}
rangeSize := maxValue - minValue
stepSize := int64(math.Ceil(float64(rangeSize) / float64(partitionsCount)))
partitions := make([]models.Partition, 0, partitionsCount)
for i := range partitionsCount {
partitionMin := minValue + (i * stepSize)
partitionMax := minValue + ((i + 1) * stepSize)
if i == partitionsCount-1 {
partitionMax = maxValue
}
isMinInclusive := i == 0
partition := models.Partition{
Id: uuid.New(),
HasRange: true,
RetryCounter: 0,
Range: models.PartitionRange{
Min: partitionMin,
Max: partitionMax,
IsMinInclusive: isMinInclusive,
IsMaxInclusive: true,
},
}
partitions = append(partitions, partition)
}
return partitions, nil
}