192 lines
4.2 KiB
Go
192 lines
4.2 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"errors"
|
|
"fmt"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/jackc/pgx/v5"
|
|
"github.com/jackc/pgx/v5/pgconn"
|
|
"github.com/jackc/pgx/v5/pgxpool"
|
|
mssql "github.com/microsoft/go-mssqldb"
|
|
log "github.com/sirupsen/logrus"
|
|
)
|
|
|
|
func loadRowsPostgres(
|
|
ctx context.Context,
|
|
db *pgxpool.Pool,
|
|
job MigrationJob,
|
|
columns []ColumnType,
|
|
chChunksIn <-chan Chunk,
|
|
chErrorsOut chan<- LoaderError,
|
|
chJobErrorsOut chan<- JobError,
|
|
wgActiveChunks *sync.WaitGroup,
|
|
) {
|
|
tableId := pgx.Identifier{job.Schema, job.Table}
|
|
colNames := Map(columns, func(col ColumnType) string {
|
|
return col.name
|
|
})
|
|
|
|
for {
|
|
if ctx.Err() != nil {
|
|
return
|
|
}
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case chunk, ok := <-chChunksIn:
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
if abort := loadChunkPostgres(ctx, db, tableId, colNames, chunk, chErrorsOut, chJobErrorsOut, wgActiveChunks); abort {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func loadChunkPostgres(
|
|
ctx context.Context,
|
|
db *pgxpool.Pool,
|
|
identifier pgx.Identifier,
|
|
colNames []string,
|
|
chunk Chunk,
|
|
chErrorsOut chan<- LoaderError,
|
|
chJobErrorsOut chan<- JobError,
|
|
wgActiveChunks *sync.WaitGroup,
|
|
) (abort bool) {
|
|
chunkStartTime := time.Now()
|
|
_, err := db.CopyFrom(
|
|
ctx,
|
|
identifier,
|
|
colNames,
|
|
pgx.CopyFromRows(chunk.Data),
|
|
)
|
|
|
|
if err != nil {
|
|
var pgErr *pgconn.PgError
|
|
if errors.As(err, &pgErr) {
|
|
if pgErr.Code == "23505" {
|
|
select {
|
|
case chJobErrorsOut <- JobError{
|
|
ShouldCancelJob: true,
|
|
Msg: fmt.Sprintf("Fatal data integrity error in table %s", identifier.Sanitize()),
|
|
Prev: err,
|
|
}:
|
|
case <-ctx.Done():
|
|
}
|
|
wgActiveChunks.Done()
|
|
return true
|
|
}
|
|
}
|
|
|
|
select {
|
|
case chErrorsOut <- LoaderError{Chunk: chunk, Msg: err.Error()}:
|
|
case <-ctx.Done():
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
chunkDuration := time.Since(chunkStartTime)
|
|
rowsPerSec := float64(len(chunk.Data)) / chunkDuration.Seconds()
|
|
|
|
log.Infof("Loaded chunk: %d rows in %v (%.0f rows/sec)", len(chunk.Data), chunkDuration, rowsPerSec)
|
|
|
|
wgActiveChunks.Done()
|
|
return false
|
|
}
|
|
|
|
func loadRowsMssql(ctx context.Context, job MigrationJob, columns []ColumnType, db *sql.DB, in <-chan []UnknownRowValues) error {
|
|
chunkCount := 0
|
|
totalRowsLoaded := 0
|
|
|
|
for rows := range in {
|
|
chunkStartTime := time.Now()
|
|
|
|
tx, err := db.BeginTx(ctx, nil)
|
|
if err != nil {
|
|
return fmt.Errorf("error starting transaction: %w", err)
|
|
}
|
|
|
|
fullTableName := fmt.Sprintf("[%s].[%s]", job.Schema, job.Table)
|
|
colNames := Map(columns, func(col ColumnType) string {
|
|
return col.name
|
|
})
|
|
|
|
stmt, err := tx.PrepareContext(ctx, mssql.CopyIn(fullTableName, mssql.BulkOptions{}, colNames...))
|
|
if err != nil {
|
|
tx.Rollback()
|
|
return fmt.Errorf("error preparing bulk copy statement: %w", err)
|
|
}
|
|
|
|
copyStartTime := time.Now()
|
|
|
|
for _, row := range rows {
|
|
_, err = stmt.ExecContext(ctx, row...)
|
|
if err != nil {
|
|
stmt.Close()
|
|
tx.Rollback()
|
|
return fmt.Errorf("error executing row insert: %w", err)
|
|
}
|
|
}
|
|
|
|
result, err := stmt.ExecContext(ctx)
|
|
if err != nil {
|
|
stmt.Close()
|
|
tx.Rollback()
|
|
return fmt.Errorf("error flushing bulk data: %w", err)
|
|
}
|
|
|
|
err = stmt.Close()
|
|
if err != nil {
|
|
tx.Rollback()
|
|
return fmt.Errorf("error closing statement: %w", err)
|
|
}
|
|
|
|
if err := tx.Commit(); err != nil {
|
|
return fmt.Errorf("error committing transaction: %w", err)
|
|
}
|
|
|
|
rowsAffected, _ := result.RowsAffected()
|
|
chunkCount++
|
|
totalRowsLoaded += int(rowsAffected)
|
|
|
|
copyDuration := time.Since(copyStartTime)
|
|
chunkDuration := time.Since(chunkStartTime)
|
|
rowsPerSec := float64(len(rows)) / chunkDuration.Seconds()
|
|
|
|
log.Infof("Loaded chunk #%d (MSSQL): %d rows in %v (copy: %v, %.0f rows/sec) - Total: %d rows", chunkCount, len(rows), chunkDuration, copyDuration, rowsPerSec, totalRowsLoaded)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func Map[T any, V any](input []T, mapper func(T) V) []V {
|
|
result := make([]V, len(input))
|
|
|
|
for i, v := range input {
|
|
result[i] = mapper(v)
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func fakeLoader(job MigrationJob, columns []ColumnType, in <-chan [][]any) {
|
|
|
|
for rows := range in {
|
|
log.Debugf("Chunk received, loading data into...")
|
|
|
|
for i, rowValues := range rows {
|
|
if i%100 == 0 {
|
|
logSampleRow(job, columns, rowValues, fmt.Sprintf("row %d", i))
|
|
}
|
|
}
|
|
}
|
|
}
|