feat: refactor extractor interface and implement Consume function for ETL process
This commit is contained in:
101
internal/app/etl/extractors/main.go
Normal file
101
internal/app/etl/extractors/main.go
Normal file
@@ -0,0 +1,101 @@
|
||||
package extractors
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/etl"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
|
||||
)
|
||||
|
||||
func Consume(
|
||||
ctx context.Context,
|
||||
extractor etl.Extractor,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
batchSize int,
|
||||
chPartitionsIn <-chan models.Partition,
|
||||
chBatchesOut chan<- models.Batch,
|
||||
chErrorsOut chan<- custom_errors.ExtractorError,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActivePartitions *sync.WaitGroup,
|
||||
rowsRead *int64,
|
||||
) {
|
||||
indexPrimaryKey := slices.IndexFunc(columns, func(col models.ColumnType) bool {
|
||||
return strings.EqualFold(col.Name(), tableInfo.PrimaryKey)
|
||||
})
|
||||
|
||||
if indexPrimaryKey == -1 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case chJobErrorsOut <- custom_errors.JobError{
|
||||
ShouldCancelJob: true,
|
||||
Msg: "Primary key not found in provided columns",
|
||||
}:
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case partition, ok := <-chPartitionsIn:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
rowsReadResult, err := extractor.Exec(
|
||||
ctx,
|
||||
tableInfo,
|
||||
columns,
|
||||
batchSize,
|
||||
partition,
|
||||
indexPrimaryKey,
|
||||
chBatchesOut,
|
||||
)
|
||||
|
||||
if rowsReadResult > 0 {
|
||||
atomic.AddInt64(rowsRead, int64(rowsReadResult))
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
if exError, ok := errors.AsType[*custom_errors.ExtractorError](err); ok {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case chErrorsOut <- *exError:
|
||||
}
|
||||
} else if jobError, ok := errors.AsType[*custom_errors.JobError](err); ok {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case chJobErrorsOut <- *jobError:
|
||||
}
|
||||
} else {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case chErrorsOut <- custom_errors.ExtractorError{Partition: partition, Msg: err.Error()}:
|
||||
}
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
wgActivePartitions.Done()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -5,10 +5,7 @@ import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/convert"
|
||||
@@ -99,7 +96,7 @@ func errorFromLastRow(
|
||||
}
|
||||
}
|
||||
|
||||
func (mssqlEx *MssqlExtractor) ProcessPartition(
|
||||
func (mssqlEx *MssqlExtractor) Exec(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
@@ -188,90 +185,3 @@ func (mssqlEx *MssqlExtractor) ProcessPartition(
|
||||
|
||||
return rowsRead, nil
|
||||
}
|
||||
|
||||
func (mssqlEx *MssqlExtractor) Exec(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
batchSize int,
|
||||
chPartitionsIn <-chan models.Partition,
|
||||
chBatchesOut chan<- models.Batch,
|
||||
chErrorsOut chan<- custom_errors.ExtractorError,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActivePartitions *sync.WaitGroup,
|
||||
rowsRead *int64,
|
||||
) {
|
||||
indexPrimaryKey := slices.IndexFunc(columns, func(col models.ColumnType) bool {
|
||||
return strings.EqualFold(col.Name(), tableInfo.PrimaryKey)
|
||||
})
|
||||
|
||||
if indexPrimaryKey == -1 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case chJobErrorsOut <- custom_errors.JobError{
|
||||
ShouldCancelJob: true,
|
||||
Msg: "Primary key not found in provided columns",
|
||||
}:
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case partition, ok := <-chPartitionsIn:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
rowsReadResult, err := mssqlEx.ProcessPartition(
|
||||
ctx,
|
||||
tableInfo,
|
||||
columns,
|
||||
batchSize,
|
||||
partition,
|
||||
indexPrimaryKey,
|
||||
chBatchesOut,
|
||||
)
|
||||
|
||||
if rowsReadResult > 0 {
|
||||
atomic.AddInt64(rowsRead, int64(rowsReadResult))
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
var exError *custom_errors.ExtractorError
|
||||
var jobError *custom_errors.JobError
|
||||
if errors.As(err, &exError) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case chErrorsOut <- *exError:
|
||||
}
|
||||
} else if errors.As(err, &jobError) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case chJobErrorsOut <- *jobError:
|
||||
}
|
||||
} else {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case chErrorsOut <- custom_errors.ExtractorError{Partition: partition, Msg: err.Error()}:
|
||||
}
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
wgActivePartitions.Done()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
|
||||
@@ -51,7 +50,7 @@ func buildExtractQueryPostgres(sourceDbInfo config.SourceTableInfo, columns []mo
|
||||
return fmt.Sprintf(`SELECT %s FROM "%s"."%s" ORDER BY "%s" ASC`, sbColumns.String(), sourceDbInfo.Schema, sourceDbInfo.Table, sourceDbInfo.PrimaryKey)
|
||||
}
|
||||
|
||||
func (postgresEx *PostgresExtractor) ProcessPartition(
|
||||
func (postgresEx *PostgresExtractor) Exec(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
@@ -109,17 +108,3 @@ func (postgresEx *PostgresExtractor) ProcessPartition(
|
||||
|
||||
return rowsRead, nil
|
||||
}
|
||||
|
||||
func (postgresEx *PostgresExtractor) Exec(
|
||||
ctx context.Context,
|
||||
tableInfo config.SourceTableInfo,
|
||||
columns []models.ColumnType,
|
||||
batchSize int,
|
||||
chPartitionsIn <-chan models.Partition,
|
||||
chBatchesOut chan<- models.Batch,
|
||||
chErrorsOut chan<- custom_errors.ExtractorError,
|
||||
chJobErrorsOut chan<- custom_errors.JobError,
|
||||
wgActivePartitions *sync.WaitGroup,
|
||||
rowsRead *int64,
|
||||
) {
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user