refactor: implement bidirectional transformation support with PostgreSQL integration

This commit is contained in:
2026-05-29 12:17:16 -05:00
parent 537b7fbd28
commit 4ba26092a9
7 changed files with 430 additions and 6 deletions

View File

@@ -2,6 +2,7 @@ package table_analyzers
import (
"context"
"fmt"
"strings"
"time"
@@ -9,6 +10,7 @@ import (
dbwrapper "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/db-wrapper"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
"github.com/google/uuid"
)
type PostgresTableAnalyzer struct {
@@ -161,7 +163,30 @@ func (ta *PostgresTableAnalyzer) EstimateTotalRows(
ctx context.Context,
tableInfo config.TableInfo,
) (int64, error) {
return 0, nil
query := `
SELECT reltuples::bigint
FROM pg_class
JOIN pg_namespace ON pg_namespace.oid = pg_class.relnamespace
WHERE pg_namespace.nspname = $1 AND pg_class.relname = $2`
ctxTimeout, cancel := context.WithTimeout(ctx, 1*time.Minute)
defer cancel()
var estimate int64
err := ta.db.QueryRow(ctxTimeout, query, tableInfo.Schema, tableInfo.Table).Scan(&estimate)
if err != nil {
return 0, err
}
if estimate < 0 {
countQuery := fmt.Sprintf(`SELECT COUNT(*) FROM "%s"."%s"`, tableInfo.Schema, tableInfo.Table)
err = ta.db.QueryRow(ctxTimeout, countQuery).Scan(&estimate)
if err != nil {
return 0, err
}
}
return estimate, nil
}
func (ta *PostgresTableAnalyzer) QueryMaxMinFromColumn(
@@ -169,7 +194,19 @@ func (ta *PostgresTableAnalyzer) QueryMaxMinFromColumn(
tableInfo config.TableInfo,
columnName string,
) (etl.MaxMinColumnResult, error) {
return etl.MaxMinColumnResult{}, nil
query := fmt.Sprintf(`SELECT MIN("%s"), MAX("%s") FROM "%s"."%s"`,
columnName, columnName, tableInfo.Schema, tableInfo.Table)
ctxTimeout, cancel := context.WithTimeout(ctx, 1*time.Minute)
defer cancel()
result := etl.MaxMinColumnResult{}
err := ta.db.QueryRow(ctxTimeout, query).Scan(&result.Min, &result.Max)
if err != nil {
return etl.MaxMinColumnResult{}, err
}
return result, nil
}
func (ta *PostgresTableAnalyzer) CalculatePartitionRanges(
@@ -179,5 +216,78 @@ func (ta *PostgresTableAnalyzer) CalculatePartitionRanges(
maxPartitions int64,
rangeConstraint config.RangeConfig,
) ([]models.Partition, error) {
return []models.Partition{}, nil
whereClause := ""
args := []any{maxPartitions}
if rangeConstraint.Min != nil || rangeConstraint.Max != nil {
var conditions []string
if rangeConstraint.Min != nil {
minOp := ">"
if rangeConstraint.IsMinInclusive {
minOp = ">="
}
args = append(args, *rangeConstraint.Min)
conditions = append(conditions, fmt.Sprintf(`"%s" %s $%d`, partitionColumn, minOp, len(args)))
}
if rangeConstraint.Max != nil {
maxOp := "<"
if rangeConstraint.IsMaxInclusive {
maxOp = "<="
}
args = append(args, *rangeConstraint.Max)
conditions = append(conditions, fmt.Sprintf(`"%s" %s $%d`, partitionColumn, maxOp, len(args)))
}
whereClause = "WHERE " + strings.Join(conditions, " AND ")
}
query := fmt.Sprintf(`
SELECT MIN("%s") AS lower_limit, MAX("%s") AS upper_limit
FROM (
SELECT "%s", NTILE($1) OVER (ORDER BY "%s") AS batch_id
FROM "%s"."%s" %s
) AS t
GROUP BY batch_id
ORDER BY batch_id`,
partitionColumn,
partitionColumn,
partitionColumn,
partitionColumn,
tableInfo.Schema,
tableInfo.Table,
whereClause)
ctxTimeout, cancel := context.WithTimeout(ctx, 1*time.Minute)
defer cancel()
rows, err := ta.db.Query(ctxTimeout, query, args...)
if err != nil {
return nil, err
}
defer rows.Close()
partitions := make([]models.Partition, 0, maxPartitions)
for rows.Next() {
partition := models.Partition{
Id: uuid.New(),
HasRange: true,
RetryCounter: 0,
Range: models.PartitionRange{
IsMinInclusive: true,
IsMaxInclusive: true,
},
}
if err := rows.Scan(&partition.Range.Min, &partition.Range.Max); err != nil {
return nil, err
}
partitions = append(partitions, partition)
}
if err := rows.Err(); err != nil {
return nil, err
}
return partitions, nil
}