feat: add MSSQL table analyzer and integrate partition range generation for improved data migration
This commit is contained in:
40
internal/app/etl/table_analyzers/main.go
Normal file
40
internal/app/etl/table_analyzers/main.go
Normal file
@@ -0,0 +1,40 @@
|
||||
package table_analyzers
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
func PartitionRangeGenerator(
|
||||
ctx context.Context,
|
||||
tableAnalyzer etl.TableAnalyzer,
|
||||
tableInfo config.TableInfo,
|
||||
partitionColumn string,
|
||||
rowsPerPartition int64,
|
||||
) ([]models.Partition, error) {
|
||||
rowsCount, err := tableAnalyzer.EstimateTotalRows(ctx, tableInfo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if rowsCount <= rowsPerPartition {
|
||||
return []models.Partition{{
|
||||
Id: uuid.New(),
|
||||
ShouldUseRange: false,
|
||||
RetryCounter: 0,
|
||||
}}, nil
|
||||
|
||||
}
|
||||
|
||||
partitionsCount := rowsCount / rowsPerPartition
|
||||
partitions, err := tableAnalyzer.CalculatePartitionRanges(ctx, tableInfo, partitionColumn, partitionsCount)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return partitions, nil
|
||||
}
|
||||
105
internal/app/etl/table_analyzers/mssql.go
Normal file
105
internal/app/etl/table_analyzers/mssql.go
Normal file
@@ -0,0 +1,105 @@
|
||||
package table_analyzers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
type MssqlTableAnalyzer struct {
|
||||
db *sql.DB
|
||||
}
|
||||
|
||||
func NewMssqlTableAnalyzer(db *sql.DB) etl.TableAnalyzer {
|
||||
return &MssqlTableAnalyzer{db: db}
|
||||
}
|
||||
|
||||
func (ta *MssqlTableAnalyzer) QueryColumnTypes(
|
||||
ctx context.Context,
|
||||
tableInfo config.TableInfo,
|
||||
) ([]models.ColumnType, error) {
|
||||
return []models.ColumnType{}, nil
|
||||
}
|
||||
|
||||
func (ta *MssqlTableAnalyzer) EstimateTotalRows(
|
||||
ctx context.Context,
|
||||
tableInfo config.TableInfo,
|
||||
) (int64, error) {
|
||||
query := `
|
||||
SELECT SUM(p.rows) AS count
|
||||
FROM sys.tables t
|
||||
JOIN sys.schemas s ON t.schema_id = s.schema_id
|
||||
JOIN sys.partitions p ON t.object_id = p.object_id
|
||||
WHERE s.name = @schema AND t.name = @table AND p.index_id IN (0, 1)
|
||||
GROUP BY t.name`
|
||||
|
||||
ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*20)
|
||||
defer cancel()
|
||||
|
||||
var rowsCount int64
|
||||
err := ta.db.QueryRowContext(ctxTimeout, query, sql.Named("schema", tableInfo.Schema), sql.Named("table", tableInfo.Table)).Scan(&rowsCount)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return rowsCount, nil
|
||||
}
|
||||
|
||||
func (ta *MssqlTableAnalyzer) CalculatePartitionRanges(
|
||||
ctx context.Context,
|
||||
tableInfo config.TableInfo,
|
||||
partitionColumn string,
|
||||
maxPartitions int64,
|
||||
) ([]models.Partition, error) {
|
||||
query := fmt.Sprintf(`
|
||||
SELECT
|
||||
MIN([%s]) AS lower_limit,
|
||||
MAX([%s]) AS upper_limit
|
||||
FROM (SELECT [%s], NTILE(@maxPartitions) OVER (ORDER BY [%s]) AS batch_id FROM [%s].[%s]) AS T
|
||||
GROUP BY batch_id
|
||||
ORDER BY batch_id`,
|
||||
partitionColumn,
|
||||
partitionColumn,
|
||||
partitionColumn,
|
||||
partitionColumn,
|
||||
tableInfo.Schema,
|
||||
tableInfo.Table)
|
||||
|
||||
ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*20)
|
||||
defer cancel()
|
||||
|
||||
rows, err := ta.db.QueryContext(ctxTimeout, query, sql.Named("maxPartitions", maxPartitions))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
partitions := make([]models.Partition, 0, maxPartitions)
|
||||
|
||||
for rows.Next() {
|
||||
partition := models.Partition{
|
||||
Id: uuid.New(),
|
||||
ShouldUseRange: true,
|
||||
RetryCounter: 0,
|
||||
IsLowerLimitInclusive: true,
|
||||
}
|
||||
|
||||
if err := rows.Scan(&partition.LowerLimit, &partition.UpperLimit); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
partitions = append(partitions, partition)
|
||||
}
|
||||
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return partitions, nil
|
||||
}
|
||||
@@ -93,6 +93,7 @@ type TableAnalyzer interface {
|
||||
CalculatePartitionRanges(
|
||||
ctx context.Context,
|
||||
tableInfo config.TableInfo,
|
||||
maxPartitions int,
|
||||
partitionColumn string,
|
||||
maxPartitions int64,
|
||||
) ([]models.Partition, error)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user