go-migrate/internal/app/etl/loaders/consume.go

package loaders

import (
	"context"
	"errors"
	"sync"
	"sync/atomic"

	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/custom_errors"
	"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/models"
	"github.com/google/uuid"
	"github.com/sirupsen/logrus"
)

func (gl *GenericLoader) Consume(
	ctx context.Context,
	tableInfo config.TargetTableInfo,
	columns []models.ColumnType,
	retryConfig config.RetryConfig,
	batchSize int,
	chBatchesIn <-chan models.Batch,
	chErrorsOut chan<- custom_errors.JobError,
	wgActiveBatches *sync.WaitGroup,
	rowsLoaded *int64,
	failedBatchesCount *int32,
) {
	colNames := mapSlice(columns, func(col models.ColumnType) string {
		return col.Name()
	})

	var accRows []models.UnknownRowValues
	var parentBatchesId []uuid.UUID
	pendingDone := 0

	defer func() {
		for range pendingDone {
			wgActiveBatches.Done()
		}
	}()

	flush := func() bool {
		if len(accRows) == 0 {
			return true
		}
		count := len(parentBatchesId)
		superBatch := models.Batch{
			Id:              uuid.New(),
			ParentBatchesId: parentBatchesId,
			Rows:            accRows,
		}
		processedRows, err := gl.ProcessBatchWithRetries(ctx, tableInfo, colNames, retryConfig, superBatch)
		for range count {
			wgActiveBatches.Done()
		}
		pendingDone -= count
		accRows = nil
		parentBatchesId = nil

		if err != nil {
			atomic.AddInt32(failedBatchesCount, 1)
			if jobError, ok := errors.AsType[*custom_errors.JobError](err); ok {
				select {
				case <-ctx.Done():
					return false
				case chErrorsOut <- *jobError:
				}
			} else {
				select {
				case <-ctx.Done():
					return false
				case chErrorsOut <- custom_errors.JobError{ShouldCancelJob: false, Msg: err.Error(), Prev: err}:
				}
			}

			if atomic.LoadInt32(failedBatchesCount) > int32(retryConfig.MaxFailedBatchesLoad) {
				select {
				case <-ctx.Done():
				case chErrorsOut <- custom_errors.JobError{ShouldCancelJob: true, Msg: "Max failed batches (load) reached"}:
				}
				return false
			}
			return true
		}

		current := atomic.LoadInt64(rowsLoaded)
		logrus.Debugf("Rows loaded (batch loaded): +%v [current=%v] (%s.%s)", processedRows, current, tableInfo.Schema, tableInfo.Table)
		atomic.AddInt64(rowsLoaded, int64(processedRows))
		return true
	}

	for {
		if ctx.Err() != nil {
			return
		}

		select {
		case <-ctx.Done():
			return
		case batch, ok := <-chBatchesIn:
			if !ok {
				flush()
				return
			}

			if batchSize <= 0 {
				processedRows, err := gl.ProcessBatchWithRetries(ctx, tableInfo, colNames, retryConfig, batch)
				wgActiveBatches.Done()

				if err != nil {
					atomic.AddInt32(failedBatchesCount, 1)
					if jobError, ok := errors.AsType[*custom_errors.JobError](err); ok {
						select {
						case <-ctx.Done():
							return
						case chErrorsOut <- *jobError:
						}
					} else {
						select {
						case <-ctx.Done():
							return
						case chErrorsOut <- custom_errors.JobError{ShouldCancelJob: false, Msg: err.Error(), Prev: err}:
						}
					}

					if atomic.LoadInt32(failedBatchesCount) > int32(retryConfig.MaxFailedBatchesLoad) {
						select {
						case <-ctx.Done():
							return
						case chErrorsOut <- custom_errors.JobError{ShouldCancelJob: true, Msg: "Max failed batches (load) reached"}:
							return
						}
					}
					continue
				}

				current := atomic.LoadInt64(rowsLoaded)
				logrus.Debugf("Rows loaded: +%v [current=%v] (%s.%s)", processedRows, current, tableInfo.Schema, tableInfo.Table)
				atomic.AddInt64(rowsLoaded, int64(processedRows))
				continue
			}

			pendingDone++
			accRows = append(accRows, batch.Rows...)
			parentBatchesId = append(parentBatchesId, batch.Id)

			if len(accRows) >= batchSize {
				if !flush() {
					return
				}
			}
		}
	}
}