package table_analyzers import ( "context" "math" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models" "github.com/google/uuid" "github.com/sirupsen/logrus" ) func PartitionRangeGenerator( ctx context.Context, tableAnalyzer etl.TableAnalyzer, tableInfo config.TableInfo, partitionColumn string, partitionCalculationStrategy string, rowsPerPartition int64, jobRange config.RangeConfig, ) ([]models.Partition, error) { if jobRange.Min > 0 { return []models.Partition{{ Id: uuid.New(), HasRange: true, RetryCounter: 0, Range: models.PartitionRange{ Min: jobRange.Min, Max: jobRange.Max, IsMinInclusive: jobRange.IsMinInclusive, IsMaxInclusive: jobRange.IsMaxInclusive, }, }}, nil } rowsCount, err := tableAnalyzer.EstimateTotalRows(ctx, tableInfo) logrus.Infof("Estimated rows in source: %v (%s.%s)", rowsCount, tableInfo.Schema, tableInfo.Table) if err != nil { return nil, err } if rowsCount <= rowsPerPartition { return []models.Partition{{ Id: uuid.New(), HasRange: false, RetryCounter: 0, }}, nil } partitionsCount := rowsCount / rowsPerPartition if partitionCalculationStrategy == "ESTIMATION" { return calculatePartitionsEstimation(ctx, tableAnalyzer, tableInfo, partitionColumn, partitionsCount) } partitions, err := tableAnalyzer.CalculatePartitionRanges(ctx, tableInfo, partitionColumn, partitionsCount) if err != nil { return nil, err } logrus.Debugf("Partitions: %+v (%s.%s)", partitions, tableInfo.Schema, tableInfo.Table) return partitions, nil } func calculatePartitionsEstimation( ctx context.Context, tableAnalyzer etl.TableAnalyzer, tableInfo config.TableInfo, partitionColumn string, partitionsCount int64, ) ([]models.Partition, error) { result, err := tableAnalyzer.QueryMaxMinFromColumn(ctx, tableInfo, partitionColumn) if err != nil { return nil, err } logrus.Infof("Column range for %s.%s.%s: [%d, %d]", tableInfo.Schema, tableInfo.Table, partitionColumn, result.Min, result.Max) minValue := result.Min maxValue := result.Max rangeSize := maxValue - minValue stepSize := int64(math.Ceil(float64(rangeSize) / float64(partitionsCount))) partitions := make([]models.Partition, 0, partitionsCount) for i := range partitionsCount { partitionMin := minValue + (i * stepSize) partitionMax := minValue + ((i + 1) * stepSize) if i == partitionsCount-1 { partitionMax = maxValue } // Only the first partition has IsMinInclusive=true to avoid overlap isMinInclusive := i == 0 partition := models.Partition{ Id: uuid.New(), HasRange: true, RetryCounter: 0, Range: models.PartitionRange{ Min: partitionMin, Max: partitionMax, IsMinInclusive: isMinInclusive, IsMaxInclusive: true, }, } partitions = append(partitions, partition) } return partitions, nil }