feat: implement batch processing for MSSQL with improved structure and logging

fix: update globalID generation for consistency in UUID handling
feat: add MANZANA migration job and update related processing logic
2026-04-08 19:18:53 -05:00 · 2026-04-08 14:12:33 -05:00 · 2026-04-08 10:16:27 -05:00 · 2026-04-08 10:08:17 -05:00
11 changed files with 586 additions and 183 deletions
--- a/cmd/go_migrate/batch-generator.go
+++ b/cmd/go_migrate/batch-generator.go
@@ -0,0 +1,110 @@
+package main
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"time"
+
+	"github.com/google/uuid"
+)
+
+type Batch struct {
+	Id                    uuid.UUID
+	ParentId              uuid.UUID
+	LowerLimit            int64
+	UpperLimit            int64
+	IsLowerLimitInclusive bool
+	ShouldUseRange        bool
+	RetryCounter          int
+}
+
+func estimateTotalRowsMssql(ctx context.Context, db *sql.DB, job MigrationJob) (int64, error) {
+	query := `
+SELECT
+	SUM(p.rows) AS count
+FROM sys.tables t
+JOIN sys.schemas s ON t.schema_id = s.schema_id
+JOIN sys.partitions p ON t.object_id = p.object_id
+WHERE s.name = @schema AND t.name = @table AND p.index_id IN (0, 1)
+GROUP BY t.name`
+
+	ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*20)
+	defer cancel()
+
+	var rowsCount int64
+	err := db.QueryRowContext(ctxTimeout, query, sql.Named("schema", job.Schema), sql.Named("table", job.Table)).Scan(&rowsCount)
+	if err != nil {
+		return 0, err
+	}
+
+	return rowsCount, nil
+}
+
+func calculateBatchesMssql(ctx context.Context, db *sql.DB, job MigrationJob, batchCount int64) ([]Batch, error) {
+	query := fmt.Sprintf(`
+SELECT
+	MIN([%s]) AS lower_limit,
+	MAX([%s]) AS upper_limit
+FROM
+	(SELECT [%s], NTILE(@batchCount) OVER (ORDER BY [%s]) AS batch_id FROM [%s].[%s]) AS T
+GROUP BY batch_id
+ORDER BY batch_id`, job.PrimaryKey, job.PrimaryKey, job.PrimaryKey, job.PrimaryKey, job.Schema, job.Table)
+
+	ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*20)
+	defer cancel()
+
+	rows, err := db.QueryContext(ctxTimeout, query, sql.Named("batchCount", batchCount))
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	batches := make([]Batch, 0, batchCount)
+
+	for rows.Next() {
+		batch := Batch{
+			Id:                    uuid.New(),
+			ShouldUseRange:        true,
+			RetryCounter:          0,
+			IsLowerLimitInclusive: true,
+		}
+
+		if err := rows.Scan(&batch.LowerLimit, &batch.UpperLimit); err != nil {
+			return nil, err
+		}
+
+		batches = append(batches, batch)
+	}
+
+	if err := rows.Err(); err != nil {
+		return nil, err
+	}
+
+	return batches, nil
+}
+
+func batchGeneratorMssql(ctx context.Context, db *sql.DB, job MigrationJob) ([]Batch, error) {
+	rowsCount, err := estimateTotalRowsMssql(ctx, db, job)
+	if err != nil {
+		return nil, err
+	}
+
+	var batchCount int64 = 1
+	if rowsCount > RowsPerBatch {
+		batchCount = rowsCount / RowsPerBatch
+	} else {
+		return []Batch{{
+			Id:             uuid.New(),
+			ShouldUseRange: false,
+			RetryCounter:   0,
+		}}, nil
+	}
+
+	batches, err := calculateBatchesMssql(ctx, db, job, batchCount)
+	if err != nil {
+		return nil, err
+	}
+
+	return batches, nil
+}
--- a/cmd/go_migrate/build-extract-query.go
+++ b/cmd/go_migrate/build-extract-query.go
@@ -5,7 +5,7 @@ import (
 	"strings"
 )

-func buildExtractQueryMssql(job MigrationJob, columns []ColumnType, includeRange bool) string {
+func buildExtractQueryMssql(job MigrationJob, columns []ColumnType, includeRange bool, isMinInclusive bool) string {
 	var sbQuery strings.Builder

 	sbQuery.WriteString("SELECT ")
@@ -29,7 +29,14 @@ func buildExtractQueryMssql(job MigrationJob, columns []ColumnType, includeRange
 	fmt.Fprintf(&sbQuery, " FROM [%s].[%s]", job.Schema, job.Table)

 	if includeRange {
-		fmt.Fprintf(&sbQuery, " WHERE [%s] BETWEEN @minRange AND @maxRange", job.PrimaryKey)
+		fmt.Fprintf(&sbQuery, " WHERE [%s]", job.PrimaryKey)
+		if isMinInclusive {
+			sbQuery.WriteString(" >=")
+		} else {
+			sbQuery.WriteString(" >")
+		}
+
+		fmt.Fprintf(&sbQuery, " @min AND [%s] <= @max", job.PrimaryKey)
 	}

 	fmt.Fprintf(&sbQuery, " ORDER BY [%s] ASC", job.PrimaryKey)
--- a/cmd/go_migrate/chunk-planner.go
+++ b/cmd/go_migrate/chunk-planner.go
@@ -1,91 +0,0 @@
-package main
-
-import (
-	"context"
-	"database/sql"
-	"fmt"
-)
-
-type BatchRange struct {
-	LowerLimit int
-	UpperLimit int
-	validRange bool
-}
-
-func estimateTotalRowsMssql(ctx context.Context, db *sql.DB, job MigrationJob) (int, error) {
-	query := `
-SELECT
-	SUM(p.rows) AS count
-FROM sys.tables t
-JOIN sys.schemas s ON t.schema_id = s.schema_id
-JOIN sys.partitions p ON t.object_id = p.object_id
-WHERE s.name = @schema AND t.name = @table AND p.index_id IN (0, 1)
-GROUP BY t.name`
-
-	var rowsCount int
-	err := db.QueryRowContext(ctx, query, sql.Named("schema", job.Schema), sql.Named("table", job.Table)).Scan(&rowsCount)
-	if err != nil {
-		return 0, err
-	}
-
-	return rowsCount, nil
-}
-
-func calculateChunkRangesMssql(ctx context.Context, db *sql.DB, job MigrationJob, batchCount int) ([]BatchRange, error) {
-	query := fmt.Sprintf(`
-SELECT
-	MIN([%s]) AS lower_limit,
-	MAX([%s]) AS upper_limit
-FROM
-	(SELECT [%s], NTILE(@batchCount) OVER (ORDER BY [%s]) AS chunk_id FROM [%s].[%s]) AS T
-GROUP BY chunk_id
-ORDER BY chunk_id`, job.PrimaryKey, job.PrimaryKey, job.PrimaryKey, job.PrimaryKey, job.Schema, job.Table)
-
-	rows, err := db.QueryContext(ctx, query, sql.Named("batchCount", batchCount))
-	if err != nil {
-		return nil, err
-	}
-	defer rows.Close()
-
-	batchRanges := make([]BatchRange, 0, batchCount)
-
-	for rows.Next() {
-		var br BatchRange
-		br.validRange = true
-
-		if err := rows.Scan(&br.LowerLimit, &br.UpperLimit); err != nil {
-			return nil, err
-		}
-
-		batchRanges = append(batchRanges, br)
-	}
-
-	if err := rows.Err(); err != nil {
-		return nil, err
-	}
-
-	return batchRanges, nil
-}
-
-const estimatedRowsPerBatch = 100_000
-
-func calculateBatchMetrics(ctx context.Context, db *sql.DB, job MigrationJob) ([]BatchRange, error) {
-	rowsCount, err := estimateTotalRowsMssql(ctx, db, job)
-	if err != nil {
-		return nil, err
-	}
-
-	batchCount := 1
-	if rowsCount > estimatedRowsPerBatch {
-		batchCount = rowsCount / estimatedRowsPerBatch
-	} else {
-		return []BatchRange{{validRange: false}}, nil
-	}
-
-	chunksRange, err := calculateChunkRangesMssql(ctx, db, job, batchCount)
-	if err != nil {
-		return nil, err
-	}
-
-	return chunksRange, nil
-}
--- a/cmd/go_migrate/extractor-error-handler.go
+++ b/cmd/go_migrate/extractor-error-handler.go
@@ -0,0 +1,66 @@
+package main
+
+import (
+	"fmt"
+
+	"github.com/google/uuid"
+)
+
+type ExtractorError struct {
+	Batch
+	LastId    int64
+	HasLastId bool
+	Msg       string
+}
+
+func (e *ExtractorError) Error() string {
+	return e.Msg
+}
+
+const maxRetryAttempts = 3
+
+func extractorErrorHandler(chErrorsIn <-chan ExtractorError, chBatchesOut chan<- Batch, chGlobalErrorsOut chan<- error) {
+	for err := range chErrorsIn {
+		if err.RetryCounter >= maxRetryAttempts {
+			chGlobalErrorsOut <- fmt.Errorf("batch %v reached max retries (%d): %s", err.Id, maxRetryAttempts, err.Msg)
+			continue
+		}
+
+		newBatch := err.Batch
+		newBatch.RetryCounter++
+
+		if err.HasLastId {
+			newBatch.ParentId = err.Id
+			newBatch.Id = uuid.New()
+			newBatch.LowerLimit = err.LastId
+			newBatch.IsLowerLimitInclusive = false
+		}
+
+		chBatchesOut <- newBatch
+	}
+}
+
+func ExtractorErrorFromLastRowMssql(lastRow UnknownRowValues, indexPrimaryKey int, batch *Batch, previousError error) ExtractorError {
+	lastIdRawValue := lastRow[indexPrimaryKey]
+
+	lastId, ok := ToInt64(lastIdRawValue)
+	if !ok {
+		currentBatch := *batch
+		currentBatch.RetryCounter = maxRetryAttempts
+		exError := ExtractorError{
+			Batch:     currentBatch,
+			HasLastId: true,
+			Msg:       fmt.Sprintf("Couldn't cast last id value as int: %s", previousError.Error()),
+		}
+		return exError
+
+	}
+
+	exError := ExtractorError{
+		Batch:     *batch,
+		HasLastId: true,
+		LastId:    lastId,
+		Msg:       previousError.Error(),
+	}
+	return exError
+}
--- a/cmd/go_migrate/extractor.go
+++ b/cmd/go_migrate/extractor.go
@@ -3,6 +3,8 @@ package main
 import (
 	"context"
 	"database/sql"
+	"slices"
+	"strings"
 	"time"

 	"github.com/jackc/pgx/v5/pgxpool"
@@ -12,67 +14,126 @@ import (

 type UnknownRowValues = []any

-func extractFromMssql(ctx context.Context, db *sql.DB, job MigrationJob, columns []ColumnType, chunkSize int, batchRange BatchRange, out chan<- []UnknownRowValues) error {
-	query := buildExtractQueryMssql(job, columns, batchRange.validRange)
-	log.Debug("Query used to extract data from mssql: ", query)
+func extractFromMssql(
+	ctx context.Context,
+	db *sql.DB,
+	job MigrationJob,
+	columns []ColumnType,
+	chunkSize int,
+	chBatchesIn <-chan Batch,
+	chChunksOut chan<- []UnknownRowValues,
+	chErrorsOut chan<- ExtractorError,
+) {
+	indexPrimaryKey := slices.IndexFunc(columns, func(col ColumnType) bool {
+		return strings.EqualFold(col.name, job.PrimaryKey)
+	})

-	var queryArgs []any
-	if batchRange.validRange {
-		queryArgs = append(queryArgs,
-			sql.Named("minRange", batchRange.LowerLimit),
-			sql.Named("maxRange", batchRange.UpperLimit),
-		)
-	}
-
-	queryStartTime := time.Now()
-	rows, err := db.QueryContext(ctx, query, queryArgs...)
-	if err != nil {
-		return err
-	}
-	defer rows.Close()
-	log.Debugf("Query executed in %v", time.Since(queryStartTime))
-
-	rowsChunk := make([]UnknownRowValues, 0, chunkSize)
-	totalRowsExtracted := 0
-	chunkCount := 0
-	chunkStartTime := time.Now()
-
-	for rows.Next() {
-		values := make([]any, len(columns))
-		scanArgs := make([]any, len(columns))
-
-		for i := range values {
-			scanArgs[i] = &values[i]
-		}
-
-		if err := rows.Scan(scanArgs...); err != nil {
-			return err
-		}
-
-		rowsChunk = append(rowsChunk, values)
-		totalRowsExtracted++
-
-		if len(rowsChunk) >= chunkSize {
-			chunkCount++
-			chunkDuration := time.Since(chunkStartTime)
-			rowsPerSec := float64(chunkSize) / chunkDuration.Seconds()
-			log.Infof("Extracted chunk #%d: %d rows in %v (%.0f rows/sec) - Total: %d rows", chunkCount, len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
-			out <- rowsChunk
-			rowsChunk = make([]UnknownRowValues, 0, chunkSize)
-			chunkStartTime = time.Now()
+	if indexPrimaryKey == -1 {
+		exError := ExtractorError{
+			Batch: Batch{
+				RetryCounter: maxRetryAttempts,
+			},
+			HasLastId: false,
+			Msg:       "Primary key not found in columns provided",
 		}
+		chErrorsOut <- exError
+		return
 	}

-	if len(rowsChunk) > 0 {
-		chunkCount++
-		chunkDuration := time.Since(chunkStartTime)
-		rowsPerSec := float64(len(rowsChunk)) / chunkDuration.Seconds()
-		log.Infof("Extracted final chunk #%d: %d rows in %v (%.0f rows/sec) - Total: %d rows",
-			chunkCount, len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
-		out <- rowsChunk
-	}
+	for batch := range chBatchesIn {
+		func() {
+			query := buildExtractQueryMssql(job, columns, batch.ShouldUseRange, batch.IsLowerLimitInclusive)
+			log.Debug("Query used to extract data from mssql: ", query)

-	return rows.Err()
+			var queryArgs []any
+			if batch.ShouldUseRange {
+				queryArgs = append(queryArgs,
+					sql.Named("min", batch.LowerLimit),
+					sql.Named("max", batch.UpperLimit),
+				)
+			}
+
+			queryStartTime := time.Now()
+			rows, err := db.QueryContext(ctx, query, queryArgs...)
+			if err != nil {
+				exError := ExtractorError{
+					Batch:     batch,
+					HasLastId: false,
+					Msg:       err.Error(),
+				}
+				chErrorsOut <- exError
+				return
+			}
+			defer rows.Close()
+			log.Debugf("Query executed in %v", time.Since(queryStartTime))
+
+			rowsChunk := make([]UnknownRowValues, 0, chunkSize)
+			totalRowsExtracted := 0
+			chunkStartTime := time.Now()
+
+			for rows.Next() {
+				values := make([]any, len(columns))
+				scanArgs := make([]any, len(columns))
+
+				for i := range values {
+					scanArgs[i] = &values[i]
+				}
+
+				if err := rows.Scan(scanArgs...); err != nil {
+					if len(rowsChunk) == 0 {
+						exError := ExtractorError{
+							Batch:     batch,
+							HasLastId: false,
+							Msg:       err.Error(),
+						}
+						chErrorsOut <- exError
+						return
+					}
+
+					lastRow := rowsChunk[len(rowsChunk)-1]
+					chErrorsOut <- ExtractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err)
+					return
+				}
+
+				rowsChunk = append(rowsChunk, values)
+				totalRowsExtracted++
+
+				if len(rowsChunk) >= chunkSize {
+					chunkDuration := time.Since(chunkStartTime)
+					rowsPerSec := float64(chunkSize) / chunkDuration.Seconds()
+					log.Infof("Extracted chunk: %d rows in %v (%.0f rows/sec) - Total: %d rows",
+						len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
+					chChunksOut <- rowsChunk
+					rowsChunk = make([]UnknownRowValues, 0, chunkSize)
+					chunkStartTime = time.Now()
+				}
+			}
+
+			if len(rowsChunk) > 0 {
+				chunkDuration := time.Since(chunkStartTime)
+				rowsPerSec := float64(len(rowsChunk)) / chunkDuration.Seconds()
+				log.Infof("Extracted final chunk: %d rows in %v (%.0f rows/sec) - Total: %d rows",
+					len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
+				chChunksOut <- rowsChunk
+			}
+
+			if err := rows.Err(); err != nil {
+				if len(rowsChunk) == 0 {
+					exError := ExtractorError{
+						Batch:     batch,
+						HasLastId: false,
+						Msg:       err.Error(),
+					}
+					chErrorsOut <- exError
+					return
+				}
+
+				lastRow := rowsChunk[len(rowsChunk)-1]
+				chErrorsOut <- ExtractorErrorFromLastRowMssql(lastRow, indexPrimaryKey, &batch, err)
+				return
+			}
+		}()
+	}
 }

 func extractFromPostgres(ctx context.Context, job MigrationJob, columns []ColumnType, chunkSize int, db *pgxpool.Pool, out chan<- []UnknownRowValues) error {
--- a/cmd/go_migrate/main.go
+++ b/cmd/go_migrate/main.go
@@ -13,6 +13,11 @@ type MigrationJob struct {
 }

 var migrationJobs []MigrationJob = []MigrationJob{
+	{
+		Schema:     "Cartografia",
+		Table:      "MANZANA",
+		PrimaryKey: "GDB_ARCHIVE_OID",
+	},
 	{
 		Schema:     "Red",
 		Table:      "PUERTO",
@@ -21,10 +26,12 @@ var migrationJobs []MigrationJob = []MigrationJob{
 }

 const (
-	NumExtractors   int = 4
-	NumLoaders      int = 8
-	ChunkSize       int = 25000
-	QueueSize       int = 8
+	NumExtractors  int   = 4
+	NumLoaders     int   = 8
+	ChunkSize      int   = 25000
+	QueueSize      int   = 8
+	ChunksPerBatch int   = 16
+	RowsPerBatch   int64 = int64(ChunkSize * ChunksPerBatch)
 )

 func main() {
--- a/cmd/go_migrate/process.go
+++ b/cmd/go_migrate/process.go
@@ -25,39 +25,43 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
 	logColumnTypes(targetColTypes, "Target col types")

 	mssqlCtx := context.Background()
-	batchRanges, err := calculateBatchMetrics(mssqlCtx, sourceDb, job)
+	batches, err := batchGeneratorMssql(mssqlCtx, sourceDb, job)
 	if err != nil {
 		log.Error("Unexpected error calculating batch ranges: ", err)
 	}

-	chBatchRanges := make(chan BatchRange, len(batchRanges))
+	chGlobalErrors := make(chan error)
+	defer close(chGlobalErrors)

-	maxExtractors := min(NumExtractors, len(batchRanges))
-	chRowsExtract := make(chan []UnknownRowValues, QueueSize)
+	chBatches := make(chan Batch, len(batches))
+	chChunks := make(chan []UnknownRowValues, QueueSize)
+	chExtractorErrors := make(chan ExtractorError, len(batches))
+	maxExtractors := min(NumExtractors, len(batches))
 	var wgMssqlExtractors sync.WaitGroup

 	log.Infof("Starting %d MSSQL extractors...", maxExtractors)
 	extractStartTime := time.Now()
 	for range maxExtractors {
 		wgMssqlExtractors.Go(func() {
-			for br := range chBatchRanges {
-				if err := extractFromMssql(mssqlCtx, sourceDb, job, sourceColTypes, ChunkSize, br, chRowsExtract); err != nil {
-					log.Error("Unexpected error extracting data from mssql: ", err)
-				}
-			}
+			extractFromMssql(mssqlCtx, sourceDb, job, sourceColTypes, ChunkSize, chBatches, chChunks, chExtractorErrors)
 		})
 	}

 	go func() {
-		for _, br := range batchRanges {
-			chBatchRanges <- br
+		for _, br := range batches {
+			chBatches <- br
 		}
-		close(chBatchRanges)
+		close(chBatches)
+		close(chExtractorErrors)
+	}()
+
+	go func() {
+		extractorErrorHandler(chExtractorErrors, chBatches, chGlobalErrors)
 	}()

 	go func() {
 		wgMssqlExtractors.Wait()
-		close(chRowsExtract)
+		close(chChunks)
 		log.Infof("Extraction completed in %v", time.Since(extractStartTime))
 	}()

@@ -68,7 +72,7 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
 	transformStartTime := time.Now()
 	for range maxExtractors {
 		wgMssqlTransformers.Go(func() {
-			transformRowsMssql(sourceColTypes, chRowsExtract, chRowsTransform)
+			transformRowsMssql(sourceColTypes, chChunks, chRowsTransform)
 		})
 	}

@@ -79,17 +83,17 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
 	}()

 	var wgPostgresLoaders sync.WaitGroup
-	// postgresLoaderCtx := context.Background()
+	postgresLoaderCtx := context.Background()

 	log.Infof("Starting %d PostgreSQL loader(s)...", NumLoaders)
 	loaderStartTime := time.Now()

 	for range NumLoaders {
 		wgPostgresLoaders.Go(func() {
-			// if err := loadRowsPostgres(postgresLoaderCtx, job, sourceColTypes, targetDb, chRowsTransform); err != nil {
-			// 	log.Error("Unexpected error loading data into postgres: ", err)
-			// }
-			fakeLoader(job, sourceColTypes, chRowsTransform)
+			if err := loadRowsPostgres(postgresLoaderCtx, job, targetColTypes, targetDb, chRowsTransform); err != nil {
+				log.Error("Unexpected error loading data into postgres: ", err)
+			}
+			// fakeLoader(job, sourceColTypes, chRowsTransform)
 		})
 	}

--- a/cmd/go_migrate/transformer.go
+++ b/cmd/go_migrate/transformer.go
@@ -43,3 +43,20 @@ func transformRowsMssql(columns []ColumnType, in <-chan []UnknownRowValues, out
 		out <- rows
 	}
 }
+
+func ToInt64(v any) (int64, bool) {
+	switch t := v.(type) {
+	case int:
+		return int64(t), true
+	case int8:
+		return int64(t), true
+	case int16:
+		return int64(t), true
+	case int32:
+		return int64(t), true
+	case int64:
+		return int64(t), true
+	default:
+		return 0, false
+	}
+}
--- a/scripts/mssql-copy-in/main.go
+++ b/scripts/mssql-copy-in/main.go
@@ -14,8 +14,6 @@ import (
 const (
 	totalRows int    = 1_000_000
 	chunkSize int    = 50_000
-	schema    string = "Cartografia"
-	table     string = "MANZANA"
 	queueSize int    = 4
 )

--- a/scripts/mssql-copy-in/puerto.go
+++ b/scripts/mssql-copy-in/puerto.go
@@ -0,0 +1,225 @@
+package main
+
+import (
+	"context"
+	"database/sql"
+	"math/rand"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+)
+
+func seedPuertos(ctx context.Context, db *sql.DB) {
+	rowsChan := make(chan []UnknownRowValues, queueSize)
+
+	// Column names for PUERTO table (excluding ID_PUERTO which is IDENTITY)
+	colNames := []string{
+		"ID_EQUIPO",
+		"ID_TERMINAL",
+		"ID_TIPO_EQUIPO",
+		"ID_PROYECTO_RESERVA",
+		"ID_TIPO_PUERTO",
+		"NUMERO",
+		"CODIGO",
+		"ESTADO",
+		"FECHA_ALTA",
+		"FECHA_ACT",
+		"ID_SITE_HOLDER",
+		"ID_PROYECTO_RESERVA_INICIAL",
+		"ID_DIRECCION",
+		"ID_TIPO_PUERTO_MEMORY",
+	}
+
+	// Start the data generator goroutine
+	go generatePuertoRows(ctx, totalRows, chunkSize, rowsChan)
+
+	// Load rows into MSSQL
+	job := MigrationJob{
+		Schema: "Red",
+		Table:  "PUERTO",
+	}
+
+	if err := loadRowsMssql(ctx, job, colNames, db, rowsChan); err != nil {
+		log.Fatal("Error loading PUERTO rows: ", err)
+	}
+
+	log.Info("PUERTO data generation and loading completed successfully")
+}
+
+// generatePuertoRows creates random row data for the PUERTO table and sends it through a channel
+func generatePuertoRows(
+	ctx context.Context,
+	totalRows int,
+	chunkSize int,
+	out chan<- []UnknownRowValues,
+) {
+	defer close(out)
+
+	rowsGenerated := 0
+	currentChunk := make([]UnknownRowValues, 0, chunkSize)
+
+	for range totalRows {
+		row := generatePuertoRow()
+		currentChunk = append(currentChunk, row)
+		rowsGenerated++
+
+		// Send chunk when it reaches the desired size
+		if len(currentChunk) == chunkSize {
+			select {
+			case out <- currentChunk:
+				log.Debugf("Sent PUERTO chunk with %d rows", len(currentChunk))
+			case <-ctx.Done():
+				log.Info("Context cancelled, stopping PUERTO row generation")
+				return
+			}
+			currentChunk = make([]UnknownRowValues, 0, chunkSize)
+		}
+
+		if rowsGenerated%100_000 == 0 {
+			logPuertoSampleRow(rowsGenerated, row)
+		}
+	}
+
+	// Send remaining rows
+	if len(currentChunk) > 0 {
+		select {
+		case out <- currentChunk:
+			log.Debugf("Sent final PUERTO chunk with %d rows", len(currentChunk))
+		case <-ctx.Done():
+			log.Info("Context cancelled, stopping PUERTO row generation")
+		}
+	}
+
+	log.Infof("Finished generating %d PUERTO rows", rowsGenerated)
+}
+
+// generatePuertoRow creates a single random row for the PUERTO table
+func generatePuertoRow() UnknownRowValues {
+	dateLowerLimit, _ := time.Parse(time.RFC3339, "2020-12-31T23:59:59Z")
+	dateUpperLimit, _ := time.Parse(time.RFC3339, "2025-12-31T23:59:59Z")
+
+	// Required columns
+	idEquipo := rand.Intn(10000) + 1    // ID_EQUIPO (1-10000)
+	idTipoEquipo := rand.Intn(100) + 1  // ID_TIPO_EQUIPO (1-100)
+	idTipoPuerto := rand.Intn(50) + 1   // ID_TIPO_PUERTO (1-50)
+	numero := rand.Intn(1000) + 1       // NUMERO (1-1000)
+	codigo := generateRandomString(100) // CODIGO: Random alphanumeric (up to 100 chars)
+
+	// Optional columns - randomly decide whether to include NULL or a value
+	var idTerminal any
+	if rand.Intn(2) == 0 {
+		idTerminal = rand.Intn(5000) + 1
+	} else {
+		idTerminal = nil
+	}
+
+	var idProyectoReserva any
+	if rand.Intn(2) == 0 {
+		idProyectoReserva = rand.Intn(1000) + 1
+	} else {
+		idProyectoReserva = nil
+	}
+
+	var estado any
+	if rand.Intn(2) == 0 {
+		estados := []string{"ACTIVO", "LIBRE", "DISPONIBLE", "MANTENIMIENTO", "RESERVADO"}
+		estado = estados[rand.Intn(len(estados))]
+	} else {
+		estado = nil
+	}
+
+	var fechaAlta any
+	if rand.Intn(2) == 0 {
+		fechaAlta = generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
+	} else {
+		fechaAlta = nil
+	}
+
+	var fechaAct any
+	if rand.Intn(2) == 0 {
+		fechaAct = generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
+	} else {
+		fechaAct = nil
+	}
+
+	var idSiteHolder any
+	if rand.Intn(2) == 0 {
+		idSiteHolder = rand.Intn(500) + 1
+	} else {
+		idSiteHolder = nil
+	}
+
+	var idProyectoReservaInicial any
+	if rand.Intn(2) == 0 {
+		idProyectoReservaInicial = rand.Intn(1000) + 1
+	} else {
+		idProyectoReservaInicial = nil
+	}
+
+	var idDireccion any
+	if rand.Intn(2) == 0 {
+		idDireccion = rand.Intn(100) + 1
+	} else {
+		idDireccion = nil
+	}
+
+	var idTipoPuertoMemory any
+	if rand.Intn(2) == 0 {
+		idTipoPuertoMemory = rand.Intn(50) + 1
+	} else {
+		idTipoPuertoMemory = nil
+	}
+
+	return UnknownRowValues{
+		idEquipo,
+		idTerminal,
+		idTipoEquipo,
+		idProyectoReserva,
+		idTipoPuerto,
+		numero,
+		codigo,
+		estado,
+		fechaAlta,
+		fechaAct,
+		idSiteHolder,
+		idProyectoReservaInicial,
+		idDireccion,
+		idTipoPuertoMemory,
+	}
+}
+
+func logPuertoSampleRow(id int, rowValues UnknownRowValues) {
+	log.Infof(`
+Sample row #%d:
+ID_EQUIPO (%T): %v
+ID_TERMINAL (%T): %v
+ID_TIPO_EQUIPO (%T): %v
+ID_PROYECTO_RESERVA (%T): %v
+ID_TIPO_PUERTO (%T): %v
+NUMERO (%T): %v
+CODIGO (%T): %v
+ESTADO (%T): %v
+FECHA_ALTA (%T): %v
+FECHA_ACT (%T): %v
+ID_SITE_HOLDER (%T): %v
+ID_PROYECTO_RESERVA_INICIAL (%T): %v
+ID_DIRECCION (%T): %v
+ID_TIPO_PUERTO_MEMORY (%T): %v
+`,
+		id,
+		rowValues[0], rowValues[0],
+		rowValues[1], rowValues[1],
+		rowValues[2], rowValues[2],
+		rowValues[3], rowValues[3],
+		rowValues[4], rowValues[4],
+		rowValues[5], rowValues[5],
+		rowValues[6], rowValues[6],
+		rowValues[7], rowValues[7],
+		rowValues[8], rowValues[8],
+		rowValues[9], rowValues[9],
+		rowValues[10], rowValues[10],
+		rowValues[11], rowValues[11],
+		rowValues[12], rowValues[12],
+		rowValues[13], rowValues[13],
+	)
+}
--- a/scripts/mssql-copy-in/seed-manzana.go
+++ b/scripts/mssql-copy-in/seed-manzana.go
@@ -13,13 +13,18 @@ import (
 	log "github.com/sirupsen/logrus"
 )

+var manzanaJob = MigrationJob{
+	Schema: "Cartografia",
+	Table:  "MANZANA",
+}
+
 func getMaxGDBArchiveOid(ctx context.Context, db *sql.DB) (int, error) {
 	var maxOid sql.NullInt64

 	query := fmt.Sprintf(`
 		SELECT ISNULL(MAX(GDB_ARCHIVE_OID), 0)
 		FROM [%s].[%s]
-	`, schema, table)
+	`, manzanaJob.Schema, manzanaJob.Table)

 	err := db.QueryRowContext(ctx, query).Scan(&maxOid)
 	if err != nil && err != sql.ErrNoRows {
@@ -96,8 +101,7 @@ func generateManzanaRow(gdbArchiveOid int) UnknownRowValues {
 	fechaAct := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
 	shapeWKB := generateRandomPolygonWKB()
 	geoData := []byte{}
-	id := uuid.New()
-	globalID := id[:]
+	globalID, _ := uuid.New().MarshalBinary()
 	gdbFromDate := fechaAct
 	gdbToDate, _ := time.Parse(time.RFC3339, "9999-12-31T23:59:59Z")
 	objectID := gdbArchiveOid
@@ -211,12 +215,7 @@ func seedManzanas(ctx context.Context, db *sql.DB) error {
 		"OBJECTID",
 	}

-	job := MigrationJob{
-		Schema: schema,
-		Table:  table,
-	}
-
-	if err := loadRowsMssql(ctx, job, columns, db, rowsChan); err != nil {
+	if err := loadRowsMssql(ctx, manzanaJob, columns, db, rowsChan); err != nil {
 		return fmt.Errorf("Error loading rows (MANZANA): %w", err)
 	}
Author	SHA1	Message	Date
Kylesoda	d32d4df6e4	feat: implement batch processing for MSSQL with improved structure and logging	2026-04-08 19:18:53 -05:00
Kylesoda	51d83661a4	fix: update globalID generation for consistency in UUID handling	2026-04-08 14:12:33 -05:00
Kylesoda	75b04d4b2e	feat: add MANZANA migration job and update related processing logic	2026-04-08 10:16:27 -05:00
Kylesoda	3765e8adb3	feat: implement PUERTO data generation and loading with improved structure and logging	2026-04-08 10:08:17 -05:00