feat: add MSSQL table analyzer and integrate partition range generation for improved data migration
This commit is contained in:
@@ -1,108 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
func estimateTotalRowsMssql(ctx context.Context, db *sql.DB, tableInfo config.SourceTableInfo) (int64, error) {
|
||||
query := `
|
||||
SELECT
|
||||
SUM(p.rows) AS count
|
||||
FROM sys.tables t
|
||||
JOIN sys.schemas s ON t.schema_id = s.schema_id
|
||||
JOIN sys.partitions p ON t.object_id = p.object_id
|
||||
WHERE s.name = @schema AND t.name = @table AND p.index_id IN (0, 1)
|
||||
GROUP BY t.name`
|
||||
|
||||
ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*20)
|
||||
defer cancel()
|
||||
|
||||
var rowsCount int64
|
||||
err := db.QueryRowContext(ctxTimeout, query, sql.Named("schema", tableInfo.Schema), sql.Named("table", tableInfo.Table)).Scan(&rowsCount)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return rowsCount, nil
|
||||
}
|
||||
|
||||
func calculatePartitionRanges(ctx context.Context, db *sql.DB, tableInfo config.SourceTableInfo, maxPartitions int64) ([]models.Partition, error) {
|
||||
query := fmt.Sprintf(`
|
||||
SELECT
|
||||
MIN([%s]) AS lower_limit,
|
||||
MAX([%s]) AS upper_limit
|
||||
FROM
|
||||
(SELECT [%s], NTILE(@maxPartitions) OVER (ORDER BY [%s]) AS batch_id FROM [%s].[%s]) AS T
|
||||
GROUP BY batch_id
|
||||
ORDER BY batch_id`,
|
||||
tableInfo.PrimaryKey,
|
||||
tableInfo.PrimaryKey,
|
||||
tableInfo.PrimaryKey,
|
||||
tableInfo.PrimaryKey,
|
||||
tableInfo.Schema,
|
||||
tableInfo.Table)
|
||||
|
||||
ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*20)
|
||||
defer cancel()
|
||||
|
||||
rows, err := db.QueryContext(ctxTimeout, query, sql.Named("maxPartitions", maxPartitions))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
partitions := make([]models.Partition, 0, maxPartitions)
|
||||
|
||||
for rows.Next() {
|
||||
partition := models.Partition{
|
||||
Id: uuid.New(),
|
||||
ShouldUseRange: true,
|
||||
RetryCounter: 0,
|
||||
IsLowerLimitInclusive: true,
|
||||
}
|
||||
|
||||
if err := rows.Scan(&partition.LowerLimit, &partition.UpperLimit); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
partitions = append(partitions, partition)
|
||||
}
|
||||
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return partitions, nil
|
||||
}
|
||||
|
||||
func partitionGeneratorMssql(ctx context.Context, db *sql.DB, tableInfo config.SourceTableInfo, rowsPerPartition int64) ([]models.Partition, error) {
|
||||
rowsCount, err := estimateTotalRowsMssql(ctx, db, tableInfo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var partitionsCount int64 = 1
|
||||
if rowsCount > rowsPerPartition {
|
||||
partitionsCount = rowsCount / rowsPerPartition
|
||||
} else {
|
||||
return []models.Partition{{
|
||||
Id: uuid.New(),
|
||||
ShouldUseRange: false,
|
||||
RetryCounter: 0,
|
||||
}}, nil
|
||||
}
|
||||
|
||||
partitions, err := calculatePartitionRanges(ctx, db, tableInfo, partitionsCount)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return partitions, nil
|
||||
}
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/extractors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/loaders"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/table_analyzers"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/transformers"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
log "github.com/sirupsen/logrus"
|
||||
@@ -90,6 +91,7 @@ func processMigrationJobs(
|
||||
chJobs := make(chan config.Job, len(jobs))
|
||||
var wgJobs sync.WaitGroup
|
||||
|
||||
tableAnalyzer := table_analyzers.NewMssqlTableAnalyzer(sourceDb)
|
||||
extractor := extractors.NewMssqlExtractor(sourceDb)
|
||||
transformer := transformers.NewMssqlTransformer()
|
||||
loader := loaders.NewPostgresLoader(targetDb)
|
||||
@@ -102,6 +104,7 @@ func processMigrationJobs(
|
||||
ctx,
|
||||
sourceDb,
|
||||
targetDb,
|
||||
tableAnalyzer,
|
||||
extractor,
|
||||
transformer,
|
||||
loader,
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/table_analyzers"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
_ "github.com/microsoft/go-mssqldb"
|
||||
@@ -20,6 +21,7 @@ func processMigrationJob(
|
||||
ctx context.Context,
|
||||
sourceDb *sql.DB,
|
||||
targetDb *pgxpool.Pool,
|
||||
tableAnalyzer etl.TableAnalyzer,
|
||||
extractor etl.Extractor,
|
||||
transformer etl.Transformer,
|
||||
loader etl.Loader,
|
||||
@@ -44,7 +46,13 @@ func processMigrationJob(
|
||||
jobCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
partitions, err := partitionGeneratorMssql(jobCtx, sourceDb, job.SourceTable, job.RowsPerPartition)
|
||||
partitions, err := table_analyzers.PartitionRangeGenerator(
|
||||
jobCtx,
|
||||
tableAnalyzer,
|
||||
job.SourceTable.TableInfo,
|
||||
job.SourceTable.PrimaryKey,
|
||||
job.RowsPerPartition,
|
||||
)
|
||||
if err != nil {
|
||||
log.Error("Unexpected error calculating batch ranges: ", err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user