package main import ( "context" "database/sql" "slices" "strings" "time" "github.com/jackc/pgx/v5/pgxpool" _ "github.com/microsoft/go-mssqldb" log "github.com/sirupsen/logrus" ) type UnknownRowValues = []any func extractFromMssql( ctx context.Context, db *sql.DB, job MigrationJob, columns []ColumnType, chunkSize int, chBatchesIn <-chan Batch, chChunksOut chan<- []UnknownRowValues, chErrorsOut chan<- ExtractorError, chJobErrorsOut chan<- JobError, ) { indexPrimaryKey := slices.IndexFunc(columns, func(col ColumnType) bool { return strings.EqualFold(col.name, job.PrimaryKey) }) if indexPrimaryKey == -1 { jobError := JobError{ ShouldCancelJob: true, Msg: "Primary key not found in provided columns", } chJobErrorsOut <- jobError return } for batch := range chBatchesIn { func() { query := buildExtractQueryMssql(job, columns, batch.ShouldUseRange, batch.IsLowerLimitInclusive) log.Debug("Query used to extract data from mssql: ", query) var queryArgs []any if batch.ShouldUseRange { queryArgs = append(queryArgs, sql.Named("min", batch.LowerLimit), sql.Named("max", batch.UpperLimit), ) } queryStartTime := time.Now() rows, err := db.QueryContext(ctx, query, queryArgs...) if err != nil { exError := ExtractorError{ Batch: batch, HasLastId: false, Msg: err.Error(), } chErrorsOut <- exError return } defer rows.Close() log.Debugf("Query executed in %v", time.Since(queryStartTime)) rowsChunk := make([]UnknownRowValues, 0, chunkSize) totalRowsExtracted := 0 chunkStartTime := time.Now() for rows.Next() { values := make([]any, len(columns)) scanArgs := make([]any, len(columns)) for i := range values { scanArgs[i] = &values[i] } if err := rows.Scan(scanArgs...); err != nil { if len(rowsChunk) == 0 { exError := ExtractorError{ Batch: batch, HasLastId: false, Msg: err.Error(), } chErrorsOut <- exError return } lastRow := rowsChunk[len(rowsChunk)-1] chChunksOut <- rowsChunk chErrorsOut <- ExtractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err) return } rowsChunk = append(rowsChunk, values) totalRowsExtracted++ if len(rowsChunk) >= chunkSize { chunkDuration := time.Since(chunkStartTime) rowsPerSec := float64(chunkSize) / chunkDuration.Seconds() log.Infof("Extracted chunk: %d rows in %v (%.0f rows/sec) - Total: %d rows", len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted) chChunksOut <- rowsChunk rowsChunk = make([]UnknownRowValues, 0, chunkSize) chunkStartTime = time.Now() } } if len(rowsChunk) > 0 { chunkDuration := time.Since(chunkStartTime) rowsPerSec := float64(len(rowsChunk)) / chunkDuration.Seconds() log.Infof("Extracted final chunk: %d rows in %v (%.0f rows/sec) - Total: %d rows", len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted) chChunksOut <- rowsChunk } if err := rows.Err(); err != nil { if len(rowsChunk) == 0 { exError := ExtractorError{ Batch: batch, HasLastId: false, Msg: err.Error(), } chErrorsOut <- exError return } lastRow := rowsChunk[len(rowsChunk)-1] chErrorsOut <- ExtractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err) return } }() } } func extractFromPostgres(ctx context.Context, job MigrationJob, columns []ColumnType, chunkSize int, db *pgxpool.Pool, out chan<- []UnknownRowValues) error { query := buildExtractQueryPostgres(job, columns) log.Debug("Query used to extract data from postgres: ", query) rows, err := db.Query(ctx, query) if err != nil { return err } defer rows.Close() rowsChunk := make([]UnknownRowValues, 0, chunkSize) for rows.Next() { values, err := rows.Values() if err != nil { return err } rowsChunk = append(rowsChunk, values) if len(rowsChunk) >= chunkSize { out <- rowsChunk rowsChunk = make([]UnknownRowValues, 0, chunkSize) log.Infof("Chunk send... %+v", job) } } if len(rowsChunk) > 0 { out <- rowsChunk log.Infof("Chunk send... %+v", job) } return nil }