package extractors import ( "context" "database/sql" "errors" "fmt" "slices" "strings" "sync" "sync/atomic" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/convert" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl" "git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models" "github.com/google/uuid" ) type MssqlExtractor struct { db *sql.DB } func NewMssqlExtractor(db *sql.DB) etl.Extractor { return &MssqlExtractor{db: db} } func buildExtractQueryMssql( tableInfo config.SourceTableInfo, columns []models.ColumnType, includeRange bool, isMinInclusive bool, ) string { var sbQuery strings.Builder sbQuery.WriteString("SELECT ") if len(columns) == 0 { sbQuery.WriteString("*") } else { for i, col := range columns { fmt.Fprintf(&sbQuery, "[%s]", col.Name()) if col.Type() == "GEOMETRY" { fmt.Fprintf(&sbQuery, ".STAsBinary() AS [%s]", col.Name()) } if i < len(columns)-1 { sbQuery.WriteString(", ") } } } fmt.Fprintf(&sbQuery, " FROM [%s].[%s]", tableInfo.Schema, tableInfo.Table) if includeRange { fmt.Fprintf(&sbQuery, " WHERE [%s]", tableInfo.PrimaryKey) if isMinInclusive { sbQuery.WriteString(" >=") } else { sbQuery.WriteString(" >") } fmt.Fprintf(&sbQuery, " @min AND [%s] <= @max", tableInfo.PrimaryKey) } fmt.Fprintf(&sbQuery, " ORDER BY [%s] ASC", tableInfo.PrimaryKey) return sbQuery.String() } func errorFromLastRow( lastRow models.UnknownRowValues, indexPrimaryKey int, partition *models.Partition, previousError error, ) *custom_errors.ExtractorError { lastIdRawValue := lastRow[indexPrimaryKey] lastId, ok := convert.ToInt64(lastIdRawValue) if !ok { currentPartition := *partition currentPartition.RetryCounter = 3 return &custom_errors.ExtractorError{ Partition: currentPartition, HasLastId: true, Msg: fmt.Sprintf("Couldn't cast last id value as int: %s", previousError.Error()), } } return &custom_errors.ExtractorError{ Partition: *partition, HasLastId: true, LastId: lastId, Msg: previousError.Error(), } } func (mssqlEx *MssqlExtractor) ProcessPartition( ctx context.Context, tableInfo config.SourceTableInfo, columns []models.ColumnType, batchSize int, partition models.Partition, indexPrimaryKey int, chBatchesOut chan<- models.Batch, rowsRead *int64, ) error { query := buildExtractQueryMssql(tableInfo, columns, partition.ShouldUseRange, partition.IsLowerLimitInclusive) var queryArgs []any if partition.ShouldUseRange { queryArgs = append(queryArgs, sql.Named("min", partition.LowerLimit), sql.Named("max", partition.UpperLimit), ) } rows, err := mssqlEx.db.QueryContext(ctx, query, queryArgs...) if err != nil { return &custom_errors.ExtractorError{Partition: partition, HasLastId: false, Msg: err.Error()} } defer rows.Close() batchRows := make([]models.UnknownRowValues, 0, batchSize) for rows.Next() { rowValues := make([]any, len(columns)) scanArgs := make([]any, len(columns)) for i := range rowValues { scanArgs[i] = &rowValues[i] } if err := rows.Scan(scanArgs...); err != nil { if len(batchRows) == 0 { return &custom_errors.ExtractorError{Partition: partition, HasLastId: false, Msg: err.Error()} } lastRow := batchRows[len(batchRows)-1] select { case chBatchesOut <- models.Batch{Id: uuid.New(), PartitionId: partition.Id, Rows: batchRows, RetryCounter: 0}: case <-ctx.Done(): return nil } atomic.AddInt64(rowsRead, int64(len(batchRows))) return errorFromLastRow(lastRow, indexPrimaryKey, &partition, err) } batchRows = append(batchRows, rowValues) if len(batchRows) >= batchSize { select { case chBatchesOut <- models.Batch{Id: uuid.New(), PartitionId: partition.Id, Rows: batchRows, RetryCounter: 0}: case <-ctx.Done(): return nil } atomic.AddInt64(rowsRead, int64(len(batchRows))) batchRows = make([]models.UnknownRowValues, 0, batchSize) } } if err := rows.Err(); err != nil { if errors.Is(err, ctx.Err()) { return ctx.Err() } if len(batchRows) == 0 { return &custom_errors.ExtractorError{Partition: partition, HasLastId: false, Msg: err.Error()} } lastRow := batchRows[len(batchRows)-1] return errorFromLastRow(lastRow, indexPrimaryKey, &partition, err) } if len(batchRows) > 0 { select { case chBatchesOut <- models.Batch{Id: uuid.New(), PartitionId: partition.Id, Rows: batchRows, RetryCounter: 0}: case <-ctx.Done(): return nil } atomic.AddInt64(rowsRead, int64(len(batchRows))) } return nil } func (mssqlEx *MssqlExtractor) Exec( ctx context.Context, tableInfo config.SourceTableInfo, columns []models.ColumnType, batchSize int, chPartitionsIn <-chan models.Partition, chBatchesOut chan<- models.Batch, chErrorsOut chan<- custom_errors.ExtractorError, chJobErrorsOut chan<- custom_errors.JobError, wgActivePartitions *sync.WaitGroup, rowsRead *int64, ) { indexPrimaryKey := slices.IndexFunc(columns, func(col models.ColumnType) bool { return strings.EqualFold(col.Name(), tableInfo.PrimaryKey) }) if indexPrimaryKey == -1 { select { case <-ctx.Done(): return case chJobErrorsOut <- custom_errors.JobError{ ShouldCancelJob: true, Msg: "Primary key not found in provided columns", }: } return } for { if ctx.Err() != nil { return } select { case <-ctx.Done(): return case partition, ok := <-chPartitionsIn: if !ok { return } err := mssqlEx.ProcessPartition( ctx, tableInfo, columns, batchSize, partition, indexPrimaryKey, chBatchesOut, rowsRead, ) if err != nil { var exError *custom_errors.ExtractorError var jobError *custom_errors.JobError if errors.As(err, &exError) { select { case <-ctx.Done(): return case chErrorsOut <- *exError: } } else if errors.As(err, &jobError) { select { case <-ctx.Done(): return case chJobErrorsOut <- *jobError: } } else { select { case <-ctx.Done(): return case chErrorsOut <- custom_errors.ExtractorError{Partition: partition, Msg: err.Error()}: } } continue } wgActivePartitions.Done() } } }