feat: implement batch processing for MSSQL with improved structure and logging
This commit is contained in:
@@ -3,6 +3,8 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
@@ -12,67 +14,126 @@ import (
|
||||
|
||||
type UnknownRowValues = []any
|
||||
|
||||
func extractFromMssql(ctx context.Context, db *sql.DB, job MigrationJob, columns []ColumnType, chunkSize int, batchRange BatchRange, out chan<- []UnknownRowValues) error {
|
||||
query := buildExtractQueryMssql(job, columns, batchRange.validRange)
|
||||
log.Debug("Query used to extract data from mssql: ", query)
|
||||
func extractFromMssql(
|
||||
ctx context.Context,
|
||||
db *sql.DB,
|
||||
job MigrationJob,
|
||||
columns []ColumnType,
|
||||
chunkSize int,
|
||||
chBatchesIn <-chan Batch,
|
||||
chChunksOut chan<- []UnknownRowValues,
|
||||
chErrorsOut chan<- ExtractorError,
|
||||
) {
|
||||
indexPrimaryKey := slices.IndexFunc(columns, func(col ColumnType) bool {
|
||||
return strings.EqualFold(col.name, job.PrimaryKey)
|
||||
})
|
||||
|
||||
var queryArgs []any
|
||||
if batchRange.validRange {
|
||||
queryArgs = append(queryArgs,
|
||||
sql.Named("minRange", batchRange.LowerLimit),
|
||||
sql.Named("maxRange", batchRange.UpperLimit),
|
||||
)
|
||||
}
|
||||
|
||||
queryStartTime := time.Now()
|
||||
rows, err := db.QueryContext(ctx, query, queryArgs...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer rows.Close()
|
||||
log.Debugf("Query executed in %v", time.Since(queryStartTime))
|
||||
|
||||
rowsChunk := make([]UnknownRowValues, 0, chunkSize)
|
||||
totalRowsExtracted := 0
|
||||
chunkCount := 0
|
||||
chunkStartTime := time.Now()
|
||||
|
||||
for rows.Next() {
|
||||
values := make([]any, len(columns))
|
||||
scanArgs := make([]any, len(columns))
|
||||
|
||||
for i := range values {
|
||||
scanArgs[i] = &values[i]
|
||||
}
|
||||
|
||||
if err := rows.Scan(scanArgs...); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rowsChunk = append(rowsChunk, values)
|
||||
totalRowsExtracted++
|
||||
|
||||
if len(rowsChunk) >= chunkSize {
|
||||
chunkCount++
|
||||
chunkDuration := time.Since(chunkStartTime)
|
||||
rowsPerSec := float64(chunkSize) / chunkDuration.Seconds()
|
||||
log.Infof("Extracted chunk #%d: %d rows in %v (%.0f rows/sec) - Total: %d rows", chunkCount, len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
|
||||
out <- rowsChunk
|
||||
rowsChunk = make([]UnknownRowValues, 0, chunkSize)
|
||||
chunkStartTime = time.Now()
|
||||
if indexPrimaryKey == -1 {
|
||||
exError := ExtractorError{
|
||||
Batch: Batch{
|
||||
RetryCounter: maxRetryAttempts,
|
||||
},
|
||||
HasLastId: false,
|
||||
Msg: "Primary key not found in columns provided",
|
||||
}
|
||||
chErrorsOut <- exError
|
||||
return
|
||||
}
|
||||
|
||||
if len(rowsChunk) > 0 {
|
||||
chunkCount++
|
||||
chunkDuration := time.Since(chunkStartTime)
|
||||
rowsPerSec := float64(len(rowsChunk)) / chunkDuration.Seconds()
|
||||
log.Infof("Extracted final chunk #%d: %d rows in %v (%.0f rows/sec) - Total: %d rows",
|
||||
chunkCount, len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
|
||||
out <- rowsChunk
|
||||
}
|
||||
for batch := range chBatchesIn {
|
||||
func() {
|
||||
query := buildExtractQueryMssql(job, columns, batch.ShouldUseRange, batch.IsLowerLimitInclusive)
|
||||
log.Debug("Query used to extract data from mssql: ", query)
|
||||
|
||||
return rows.Err()
|
||||
var queryArgs []any
|
||||
if batch.ShouldUseRange {
|
||||
queryArgs = append(queryArgs,
|
||||
sql.Named("min", batch.LowerLimit),
|
||||
sql.Named("max", batch.UpperLimit),
|
||||
)
|
||||
}
|
||||
|
||||
queryStartTime := time.Now()
|
||||
rows, err := db.QueryContext(ctx, query, queryArgs...)
|
||||
if err != nil {
|
||||
exError := ExtractorError{
|
||||
Batch: batch,
|
||||
HasLastId: false,
|
||||
Msg: err.Error(),
|
||||
}
|
||||
chErrorsOut <- exError
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
log.Debugf("Query executed in %v", time.Since(queryStartTime))
|
||||
|
||||
rowsChunk := make([]UnknownRowValues, 0, chunkSize)
|
||||
totalRowsExtracted := 0
|
||||
chunkStartTime := time.Now()
|
||||
|
||||
for rows.Next() {
|
||||
values := make([]any, len(columns))
|
||||
scanArgs := make([]any, len(columns))
|
||||
|
||||
for i := range values {
|
||||
scanArgs[i] = &values[i]
|
||||
}
|
||||
|
||||
if err := rows.Scan(scanArgs...); err != nil {
|
||||
if len(rowsChunk) == 0 {
|
||||
exError := ExtractorError{
|
||||
Batch: batch,
|
||||
HasLastId: false,
|
||||
Msg: err.Error(),
|
||||
}
|
||||
chErrorsOut <- exError
|
||||
return
|
||||
}
|
||||
|
||||
lastRow := rowsChunk[len(rowsChunk)-1]
|
||||
chErrorsOut <- ExtractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err)
|
||||
return
|
||||
}
|
||||
|
||||
rowsChunk = append(rowsChunk, values)
|
||||
totalRowsExtracted++
|
||||
|
||||
if len(rowsChunk) >= chunkSize {
|
||||
chunkDuration := time.Since(chunkStartTime)
|
||||
rowsPerSec := float64(chunkSize) / chunkDuration.Seconds()
|
||||
log.Infof("Extracted chunk: %d rows in %v (%.0f rows/sec) - Total: %d rows",
|
||||
len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
|
||||
chChunksOut <- rowsChunk
|
||||
rowsChunk = make([]UnknownRowValues, 0, chunkSize)
|
||||
chunkStartTime = time.Now()
|
||||
}
|
||||
}
|
||||
|
||||
if len(rowsChunk) > 0 {
|
||||
chunkDuration := time.Since(chunkStartTime)
|
||||
rowsPerSec := float64(len(rowsChunk)) / chunkDuration.Seconds()
|
||||
log.Infof("Extracted final chunk: %d rows in %v (%.0f rows/sec) - Total: %d rows",
|
||||
len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
|
||||
chChunksOut <- rowsChunk
|
||||
}
|
||||
|
||||
if err := rows.Err(); err != nil {
|
||||
if len(rowsChunk) == 0 {
|
||||
exError := ExtractorError{
|
||||
Batch: batch,
|
||||
HasLastId: false,
|
||||
Msg: err.Error(),
|
||||
}
|
||||
chErrorsOut <- exError
|
||||
return
|
||||
}
|
||||
|
||||
lastRow := rowsChunk[len(rowsChunk)-1]
|
||||
chErrorsOut <- ExtractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err)
|
||||
return
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
func extractFromPostgres(ctx context.Context, job MigrationJob, columns []ColumnType, chunkSize int, db *pgxpool.Pool, out chan<- []UnknownRowValues) error {
|
||||
|
||||
Reference in New Issue
Block a user