package table_analyzers import ( "context" "math" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models" "github.com/google/uuid" "github.com/sirupsen/logrus" ) func PartitionRangeGenerator( ctx context.Context, tableAnalyzer etl.TableAnalyzer, tableInfo config.TableInfo, partitionColumn string, partitionCalculationStrategy string, rowsPerPartition int64, jobRange config.RangeConfig, ) ([]models.Partition, error) { rowsCount, err := tableAnalyzer.EstimateTotalRows(ctx, tableInfo) logrus.Infof("Estimated rows in source: %v (%s.%s)", rowsCount, tableInfo.Schema, tableInfo.Table) if err != nil { return nil, err } if rowsCount <= rowsPerPartition { hasRange := jobRange.Min != nil || jobRange.Max != nil partition := models.Partition{Id: uuid.New(), HasRange: hasRange, RetryCounter: 0} if hasRange { var min, max int64 if jobRange.Min != nil { min = *jobRange.Min } if jobRange.Max != nil { max = *jobRange.Max } partition.Range = models.PartitionRange{ Min: min, Max: max, IsMinInclusive: jobRange.IsMinInclusive, IsMaxInclusive: jobRange.IsMaxInclusive, } } return []models.Partition{partition}, nil } partitionsCount := rowsCount / rowsPerPartition if partitionCalculationStrategy == "ESTIMATION" { return calculatePartitionsEstimation(ctx, tableAnalyzer, tableInfo, partitionColumn, partitionsCount, jobRange) } partitions, err := tableAnalyzer.CalculatePartitionRanges(ctx, tableInfo, partitionColumn, partitionsCount, jobRange) if err != nil { return nil, err } logrus.Debugf("Partitions count: %v (%s.%s)", len(partitions), tableInfo.Schema, tableInfo.Table) return partitions, nil } func calculatePartitionsEstimation( ctx context.Context, tableAnalyzer etl.TableAnalyzer, tableInfo config.TableInfo, partitionColumn string, partitionsCount int64, rangeConstraint config.RangeConfig, ) ([]models.Partition, error) { var minValue, maxValue int64 if rangeConstraint.Min != nil && rangeConstraint.Max != nil { minValue = *rangeConstraint.Min maxValue = *rangeConstraint.Max logrus.Infof("Column range for %s.%s.%s: [%d, %d] (user-defined)", tableInfo.Schema, tableInfo.Table, partitionColumn, minValue, maxValue) } else if rangeConstraint.Min != nil || rangeConstraint.Max != nil { result, err := tableAnalyzer.QueryMaxMinFromColumn(ctx, tableInfo, partitionColumn) if err != nil { return nil, err } if rangeConstraint.Min != nil { minValue = *rangeConstraint.Min maxValue = result.Max logrus.Infof("Column range for %s.%s.%s: [%d, %d] (min user-defined)", tableInfo.Schema, tableInfo.Table, partitionColumn, minValue, maxValue) } else { minValue = result.Min maxValue = *rangeConstraint.Max logrus.Infof("Column range for %s.%s.%s: [%d, %d] (max user-defined)", tableInfo.Schema, tableInfo.Table, partitionColumn, minValue, maxValue) } } else { result, err := tableAnalyzer.QueryMaxMinFromColumn(ctx, tableInfo, partitionColumn) if err != nil { return nil, err } logrus.Infof("Column range for %s.%s.%s: [%d, %d]", tableInfo.Schema, tableInfo.Table, partitionColumn, result.Min, result.Max) minValue = result.Min maxValue = result.Max } rangeSize := maxValue - minValue stepSize := int64(math.Ceil(float64(rangeSize) / float64(partitionsCount))) partitions := make([]models.Partition, 0, partitionsCount) for i := range partitionsCount { partitionMin := minValue + (i * stepSize) partitionMax := minValue + ((i + 1) * stepSize) if i == partitionsCount-1 { partitionMax = maxValue } isMinInclusive := i == 0 partition := models.Partition{ Id: uuid.New(), HasRange: true, RetryCounter: 0, Range: models.PartitionRange{ Min: partitionMin, Max: partitionMax, IsMinInclusive: isMinInclusive, IsMaxInclusive: true, }, } partitions = append(partitions, partition) } return partitions, nil }