refactor: add partition calculation strategy and implement estimation logic; enhance table analyzers for max/min column queries
This commit is contained in:
107
internal/app/etl/table_analyzers/main_test.go
Normal file
107
internal/app/etl/table_analyzers/main_test.go
Normal file
@@ -0,0 +1,107 @@
|
||||
package table_analyzers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
)
|
||||
|
||||
type MockTableAnalyzer struct {
|
||||
minValue int64
|
||||
maxValue int64
|
||||
}
|
||||
|
||||
func (m *MockTableAnalyzer) QueryColumnTypes(ctx context.Context, tableInfo config.TableInfo) ([]models.ColumnType, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockTableAnalyzer) EstimateTotalRows(ctx context.Context, tableInfo config.TableInfo) (int64, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (m *MockTableAnalyzer) QueryMaxMinFromColumn(ctx context.Context, tableInfo config.TableInfo, columnName string) (etl.MaxMinColumnResult, error) {
|
||||
return etl.MaxMinColumnResult{Min: m.minValue, Max: m.maxValue}, nil
|
||||
}
|
||||
|
||||
func (m *MockTableAnalyzer) CalculatePartitionRanges(ctx context.Context, tableInfo config.TableInfo, partitionColumn string, maxPartitions int64) ([]models.Partition, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func TestCalculatePartitionsEstimation_NoOverlap(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
mock := &MockTableAnalyzer{minValue: 0, maxValue: 100}
|
||||
tableInfo := config.TableInfo{Schema: "dbo", Table: "test"}
|
||||
|
||||
partitions, err := calculatePartitionsEstimation(ctx, mock, tableInfo, "id", 4)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if len(partitions) != 4 {
|
||||
t.Errorf("expected 4 partitions, got %d", len(partitions))
|
||||
}
|
||||
|
||||
for i := 0; i < len(partitions)-1; i++ {
|
||||
current := partitions[i].Range
|
||||
next := partitions[i+1].Range
|
||||
|
||||
if current.Max == next.Min {
|
||||
if current.IsMaxInclusive && next.IsMinInclusive {
|
||||
t.Errorf("partition %d and %d overlap at value %d (both inclusive)", i, i+1, current.Max)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
t.Logf("Partitions generated:")
|
||||
for i, p := range partitions {
|
||||
t.Logf(" P%d: [%d, %d] (minInc=%v, maxInc=%v)", i, p.Range.Min, p.Range.Max, p.Range.IsMinInclusive, p.Range.IsMaxInclusive)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCalculatePartitionsEstimation_CoverageComplete(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
mock := &MockTableAnalyzer{minValue: 1000, maxValue: 2000}
|
||||
tableInfo := config.TableInfo{Schema: "dbo", Table: "test"}
|
||||
|
||||
partitions, err := calculatePartitionsEstimation(ctx, mock, tableInfo, "id", 5)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if partitions[0].Range.Min != 1000 || !partitions[0].Range.IsMinInclusive {
|
||||
t.Errorf("first partition should start at 1000 (inclusive), got %d (inclusive=%v)",
|
||||
partitions[0].Range.Min, partitions[0].Range.IsMinInclusive)
|
||||
}
|
||||
|
||||
if partitions[len(partitions)-1].Range.Max != 2000 {
|
||||
t.Errorf("last partition should end at 2000, got %d", partitions[len(partitions)-1].Range.Max)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCalculatePartitionsEstimation_FirstPartitionInclusive(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
mock := &MockTableAnalyzer{minValue: 50, maxValue: 70}
|
||||
tableInfo := config.TableInfo{Schema: "dbo", Table: "test"}
|
||||
|
||||
partitions, err := calculatePartitionsEstimation(ctx, mock, tableInfo, "id", 3)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if !partitions[0].Range.IsMinInclusive {
|
||||
t.Errorf("first partition should have IsMinInclusive=true")
|
||||
}
|
||||
|
||||
if partitions[0].Range.Min != 50 {
|
||||
t.Errorf("first partition should start at 50, got %d", partitions[0].Range.Min)
|
||||
}
|
||||
|
||||
for i := 1; i < len(partitions); i++ {
|
||||
if partitions[i].Range.IsMinInclusive {
|
||||
t.Errorf("partition %d should have IsMinInclusive=false to avoid overlap", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user