1 Commits

Author SHA1 Message Date
DiegoAlessandroMotta
1c3db39b21 update extractor interface 2026-04-16 23:49:23 -05:00
11 changed files with 175 additions and 108 deletions

77
cmd/go_migrate/connect.go Normal file
View File

@@ -0,0 +1,77 @@
package main
import (
"context"
"database/sql"
"errors"
"fmt"
"sync"
"time"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"github.com/jackc/pgx/v5/pgxpool"
_ "github.com/microsoft/go-mssqldb"
log "github.com/sirupsen/logrus"
)
func connectToSqlServer() (*sql.DB, error) {
db, err := sql.Open("sqlserver", config.App.SourceDbUrl)
if err != nil {
return nil, fmt.Errorf("Unable to connect to sqlserver: %w", err)
}
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
if err := db.PingContext(ctx); err != nil {
return nil, fmt.Errorf("Unable to ping sqlserver: %w", err)
}
return db, nil
}
func connectToPostgres() (*pgxpool.Pool, error) {
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
pool, err := pgxpool.New(ctx, config.App.TargetDbUrl)
if err != nil {
return nil, fmt.Errorf("Unable to connect to postgres: %w", err)
}
if err := pool.Ping(ctx); err != nil {
pool.Close()
return nil, fmt.Errorf("Unable to ping postgres: %w", err)
}
return pool, nil
}
func connectToDatabases() (*sql.DB, *pgxpool.Pool, error) {
var sourceDbErr, targetDbErr error
var sourceDb *sql.DB
var targetDb *pgxpool.Pool
var wg sync.WaitGroup
wg.Go(func() {
sourceDb, sourceDbErr = connectToSqlServer()
if sourceDbErr != nil {
log.Error("Unable to connect to source db: ", sourceDbErr)
}
})
wg.Go(func() {
targetDb, targetDbErr = connectToPostgres()
if targetDbErr != nil {
log.Error("Unable to connect to target db: ", targetDbErr)
}
})
wg.Wait()
if sourceDbErr != nil || targetDbErr != nil {
return nil, nil, errors.New("Unable to connect to databases")
}
return sourceDb, targetDb, nil
}

View File

@@ -11,7 +11,6 @@ import (
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/loaders" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/loaders"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/table_analyzers" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/table_analyzers"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/transformers" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl/transformers"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"golang.org/x/sync/errgroup" "golang.org/x/sync/errgroup"
) )
@@ -96,10 +95,10 @@ func processMigrationJobs(
targetDb dbwrapper.DbWrapper, targetDb dbwrapper.DbWrapper,
jobs []config.Job, jobs []config.Job,
maxParallelWorkers int, maxParallelWorkers int,
) []models.JobResult { ) []JobResult {
if len(jobs) == 0 { if len(jobs) == 0 {
log.Info("No migration jobs configured") log.Info("No migration jobs configured")
return []models.JobResult{} return []JobResult{}
} }
if maxParallelWorkers <= 0 { if maxParallelWorkers <= 0 {
@@ -112,7 +111,7 @@ func processMigrationJobs(
log.Infof("Starting migration with %d parallel worker(s)", maxParallelWorkers) log.Infof("Starting migration with %d parallel worker(s)", maxParallelWorkers)
chJobResults := make(chan models.JobResult, len(jobs)) chJobResults := make(chan JobResult, len(jobs))
chJobs := make(chan config.Job, len(jobs)) chJobs := make(chan config.Job, len(jobs))
var wgJobs sync.WaitGroup var wgJobs sync.WaitGroup
@@ -152,7 +151,7 @@ func processMigrationJobs(
close(chJobResults) close(chJobResults)
}() }()
var finalResults []models.JobResult var finalResults []JobResult
for res := range chJobResults { for res := range chJobResults {
finalResults = append(finalResults, res) finalResults = append(finalResults, res)
} }

13
cmd/go_migrate/metrics.go Normal file
View File

@@ -0,0 +1,13 @@
package main
import "time"
type JobResult struct {
JobName string
StartTime time.Time
Duration time.Duration
RowsRead int64
RowsLoaded int64
RowsFailed int64
Error error
}

View File

@@ -26,11 +26,11 @@ func processMigrationJob(
transformer etl.Transformer, transformer etl.Transformer,
loader etl.Loader, loader etl.Loader,
job config.Job, job config.Job,
) models.JobResult { ) JobResult {
localCtx, cancel := context.WithCancel(ctx) localCtx, cancel := context.WithCancel(ctx)
defer cancel() defer cancel()
result := models.JobResult{ result := JobResult{
JobName: job.Name, JobName: job.Name,
StartTime: time.Now(), StartTime: time.Now(),
} }
@@ -129,7 +129,7 @@ func processMigrationJob(
for range maxExtractors { for range maxExtractors {
wgExtractors.Go(func() { wgExtractors.Go(func() {
extractor.Exec( extractor.Consume(
localCtx, localCtx,
job.SourceTable, job.SourceTable,
sourceColTypes, sourceColTypes,

2
go.mod
View File

@@ -15,8 +15,6 @@ require (
) )
require ( require (
github.com/cenkalti/backoff v2.2.1+incompatible // indirect
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 // indirect github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 // indirect
github.com/golang-sql/sqlexp v0.1.0 // indirect github.com/golang-sql/sqlexp v0.1.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect

4
go.sum
View File

@@ -16,10 +16,6 @@ github.com/alecthomas/assert/v2 v2.10.0 h1:jjRCHsj6hBJhkmhznrCzoNpbA3zqy0fYiUcYZ
github.com/alecthomas/assert/v2 v2.10.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= github.com/alecthomas/assert/v2 v2.10.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc= github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4=
github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM=
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=

View File

@@ -9,6 +9,7 @@ import (
"strings" "strings"
"sync" "sync"
"sync/atomic" "sync/atomic"
"time"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/convert" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/convert"
@@ -99,7 +100,7 @@ func errorFromLastRow(
} }
} }
func (mssqlEx *MssqlExtractor) ProcessPartition( func (mssqlEx *MssqlExtractor) Extract(
ctx context.Context, ctx context.Context,
tableInfo config.SourceTableInfo, tableInfo config.SourceTableInfo,
columns []models.ColumnType, columns []models.ColumnType,
@@ -107,7 +108,7 @@ func (mssqlEx *MssqlExtractor) ProcessPartition(
partition models.Partition, partition models.Partition,
indexPrimaryKey int, indexPrimaryKey int,
chBatchesOut chan<- models.Batch, chBatchesOut chan<- models.Batch,
) (int, error) { ) (int64, error) {
query := buildExtractQueryMssql(tableInfo, columns, partition.HasRange, partition.Range.IsMinInclusive) query := buildExtractQueryMssql(tableInfo, columns, partition.HasRange, partition.Range.IsMinInclusive)
var queryArgs []any var queryArgs []any
@@ -118,7 +119,7 @@ func (mssqlEx *MssqlExtractor) ProcessPartition(
) )
} }
rowsRead := 0 var rowsRead int64 = 0
rows, err := mssqlEx.db.Query(ctx, query, queryArgs...) rows, err := mssqlEx.db.Query(ctx, query, queryArgs...)
if err != nil { if err != nil {
return rowsRead, &custom_errors.ExtractorError{Partition: partition, HasLastId: false, Msg: err.Error()} return rowsRead, &custom_errors.ExtractorError{Partition: partition, HasLastId: false, Msg: err.Error()}
@@ -189,14 +190,67 @@ func (mssqlEx *MssqlExtractor) ProcessPartition(
return rowsRead, nil return rowsRead, nil
} }
func (mssqlEx *MssqlExtractor) Exec( func (mssqlEx *MssqlExtractor) ExtractWithRetries(
ctx context.Context,
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
batchSize int,
partition models.Partition,
indexPrimaryKey int,
chBatchesOut chan<- models.Batch,
) (int64, error) {
var totalRowsRead int64
var fatalErr error
delay := time.Duration(time.Second * 1)
currentParitition := partition
for fatalErr != nil || currentParitition.RetryCounter < 3 {
currentParitition.RetryCounter++
rowsRead, err := mssqlEx.Extract(
ctx,
tableInfo,
columns,
batchSize,
currentParitition,
indexPrimaryKey,
chBatchesOut,
)
if rowsRead > 0 {
totalRowsRead += int64(rowsRead)
}
if err != nil {
var exError *custom_errors.ExtractorError
if errors.As(err, &exError) {
if exError.HasLastId {
currentParitition.ParentId = exError.Partition.Id
currentParitition.Id = uuid.New()
currentParitition.Range.Min = exError.LastId
currentParitition.Range.IsMinInclusive = false
}
time.Sleep(delay)
} else {
fatalErr = err
}
continue
}
break
}
return totalRowsRead, fatalErr
}
func (mssqlEx *MssqlExtractor) Consume(
ctx context.Context, ctx context.Context,
tableInfo config.SourceTableInfo, tableInfo config.SourceTableInfo,
columns []models.ColumnType, columns []models.ColumnType,
batchSize int, batchSize int,
chPartitionsIn <-chan models.Partition, chPartitionsIn <-chan models.Partition,
chBatchesOut chan<- models.Batch, chBatchesOut chan<- models.Batch,
chErrorsOut chan<- custom_errors.ExtractorError,
chJobErrorsOut chan<- custom_errors.JobError, chJobErrorsOut chan<- custom_errors.JobError,
wgActivePartitions *sync.WaitGroup, wgActivePartitions *sync.WaitGroup,
rowsRead *int64, rowsRead *int64,
@@ -231,7 +285,7 @@ func (mssqlEx *MssqlExtractor) Exec(
return return
} }
rowsReadResult, err := mssqlEx.ProcessPartition( rowsReadResult, err := mssqlEx.ExtractWithRetries(
ctx, ctx,
tableInfo, tableInfo,
columns, columns,
@@ -246,15 +300,8 @@ func (mssqlEx *MssqlExtractor) Exec(
} }
if err != nil { if err != nil {
var exError *custom_errors.ExtractorError
var jobError *custom_errors.JobError var jobError *custom_errors.JobError
if errors.As(err, &exError) { if errors.As(err, &jobError) {
select {
case <-ctx.Done():
return
case chErrorsOut <- *exError:
}
} else if errors.As(err, &jobError) {
select { select {
case <-ctx.Done(): case <-ctx.Done():
return return
@@ -264,7 +311,7 @@ func (mssqlEx *MssqlExtractor) Exec(
select { select {
case <-ctx.Done(): case <-ctx.Done():
return return
case chErrorsOut <- custom_errors.ExtractorError{Partition: partition, Msg: err.Error()}: case chJobErrorsOut <- custom_errors.JobError{ShouldCancelJob: false, Msg: err.Error(), Prev: err}:
} }
} }

View File

@@ -51,7 +51,7 @@ func buildExtractQueryPostgres(sourceDbInfo config.SourceTableInfo, columns []mo
return fmt.Sprintf(`SELECT %s FROM "%s"."%s" ORDER BY "%s" ASC`, sbColumns.String(), sourceDbInfo.Schema, sourceDbInfo.Table, sourceDbInfo.PrimaryKey) return fmt.Sprintf(`SELECT %s FROM "%s"."%s" ORDER BY "%s" ASC`, sbColumns.String(), sourceDbInfo.Schema, sourceDbInfo.Table, sourceDbInfo.PrimaryKey)
} }
func (postgresEx *PostgresExtractor) ProcessPartition( func (postgresEx *PostgresExtractor) Extract(
ctx context.Context, ctx context.Context,
tableInfo config.SourceTableInfo, tableInfo config.SourceTableInfo,
columns []models.ColumnType, columns []models.ColumnType,
@@ -110,7 +110,7 @@ func (postgresEx *PostgresExtractor) ProcessPartition(
return rowsRead, nil return rowsRead, nil
} }
func (postgresEx *PostgresExtractor) Exec( func (postgresEx *PostgresExtractor) Consume(
ctx context.Context, ctx context.Context,
tableInfo config.SourceTableInfo, tableInfo config.SourceTableInfo,
columns []models.ColumnType, columns []models.ColumnType,

View File

@@ -10,7 +10,7 @@ import (
) )
type Extractor interface { type Extractor interface {
ProcessPartition( Extract(
ctx context.Context, ctx context.Context,
tableInfo config.SourceTableInfo, tableInfo config.SourceTableInfo,
columns []models.ColumnType, columns []models.ColumnType,
@@ -18,16 +18,25 @@ type Extractor interface {
partition models.Partition, partition models.Partition,
indexPrimaryKey int, indexPrimaryKey int,
chBatchesOut chan<- models.Batch, chBatchesOut chan<- models.Batch,
) (int, error) ) (int64, error)
Exec( ExtractWithRetries(
ctx context.Context,
tableInfo config.SourceTableInfo,
columns []models.ColumnType,
batchSize int,
partition models.Partition,
indexPrimaryKey int,
chBatchesOut chan<- models.Batch,
) (int64, error)
Consume(
ctx context.Context, ctx context.Context,
tableInfo config.SourceTableInfo, tableInfo config.SourceTableInfo,
columns []models.ColumnType, columns []models.ColumnType,
batchSize int, batchSize int,
chPartitionsIn <-chan models.Partition, chPartitionsIn <-chan models.Partition,
chBatchesOut chan<- models.Batch, chBatchesOut chan<- models.Batch,
chErrorsOut chan<- custom_errors.ExtractorError,
chJobErrorsOut chan<- custom_errors.JobError, chJobErrorsOut chan<- custom_errors.JobError,
wgActivePartitions *sync.WaitGroup, wgActivePartitions *sync.WaitGroup,
rowsRead *int64, rowsRead *int64,

View File

@@ -1,10 +1,6 @@
package models package models
import ( import "github.com/google/uuid"
"time"
"github.com/google/uuid"
)
type UnknownRowValues = []any type UnknownRowValues = []any
@@ -29,13 +25,3 @@ type Partition struct {
HasRange bool HasRange bool
RetryCounter int RetryCounter int
} }
type JobResult struct {
JobName string
StartTime time.Time
Duration time.Duration
RowsRead int64
RowsLoaded int64
RowsFailed int64
Error error
}

View File

@@ -1,58 +0,0 @@
package main
import (
"context"
"errors"
"fmt"
"net/http"
"strconv"
"github.com/cenkalti/backoff/v5"
)
func ExampleRetry() {
// Define an operation function that returns a value and an error.
// The value can be any type.
// We'll pass this operation to Retry function.
operation := func() (string, error) {
// An example request that may fail.
resp, err := http.Get("http://httpbin.org/get")
if err != nil {
return "", err
}
defer resp.Body.Close()
// If we are being rate limited, return a RetryAfter to specify how long to wait.
// This will also reset the backoff policy.
if resp.StatusCode == 429 {
seconds, err := strconv.ParseInt(resp.Header.Get("Retry-After"), 10, 64)
if err == nil {
return "", backoff.RetryAfter(int(seconds))
}
}
// In case of non-retriable error, return Permanent error to stop retrying.
// For this HTTP example, client errors are non-retriable.
if resp.StatusCode >= 400 && resp.StatusCode < 500 {
return "", backoff.Permanent(errors.New("bad request"))
}
// Return successful response.
return "hello", nil
}
result, err := backoff.Retry(context.TODO(), operation, backoff.WithBackOff(backoff.NewExponentialBackOff()))
if err != nil {
fmt.Println("Error:", err)
return
}
// Operation is successful after retries.
fmt.Println(result)
// Output: hello
}
func main() {
ExampleRetry()
}