feat: implement data generation and loading for MANZANA with improved structure and logging
This commit is contained in:
227
scripts/mssql-copy-in/seed-manzana.go
Normal file
227
scripts/mssql-copy-in/seed-manzana.go
Normal file
@@ -0,0 +1,227 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/gaspardle/go-mssqlclrgeo"
|
||||
"github.com/google/uuid"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func getMaxGDBArchiveOid(ctx context.Context, db *sql.DB) (int, error) {
|
||||
var maxOid sql.NullInt64
|
||||
|
||||
query := fmt.Sprintf(`
|
||||
SELECT ISNULL(MAX(GDB_ARCHIVE_OID), 0)
|
||||
FROM [%s].[%s]
|
||||
`, schema, table)
|
||||
|
||||
err := db.QueryRowContext(ctx, query).Scan(&maxOid)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if !maxOid.Valid {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return int(maxOid.Int64), nil
|
||||
}
|
||||
|
||||
func generateManzanaRows(
|
||||
ctx context.Context,
|
||||
startOid int,
|
||||
totalRows int,
|
||||
chunkSize int,
|
||||
out chan<- []UnknownRowValues,
|
||||
) {
|
||||
defer close(out)
|
||||
|
||||
rowsGenerated := 0
|
||||
currentChunk := make([]UnknownRowValues, 0, chunkSize)
|
||||
|
||||
for i := range totalRows {
|
||||
gdbArchiveOid := startOid + i + 1
|
||||
row := generateManzanaRow(gdbArchiveOid)
|
||||
currentChunk = append(currentChunk, row)
|
||||
rowsGenerated++
|
||||
|
||||
if len(currentChunk) == chunkSize {
|
||||
select {
|
||||
case out <- currentChunk:
|
||||
log.Debugf("Sent chunk with %d rows", len(currentChunk))
|
||||
case <-ctx.Done():
|
||||
log.Info("Context cancelled, stopping row generation")
|
||||
return
|
||||
}
|
||||
currentChunk = make([]UnknownRowValues, 0, chunkSize)
|
||||
}
|
||||
|
||||
if rowsGenerated%100_000 == 0 {
|
||||
logManzanaSampleRow(rowsGenerated, row)
|
||||
}
|
||||
}
|
||||
|
||||
if len(currentChunk) > 0 {
|
||||
select {
|
||||
case out <- currentChunk:
|
||||
log.Debugf("Sent final chunk with %d rows", len(currentChunk))
|
||||
case <-ctx.Done():
|
||||
log.Info("Context cancelled, stopping row generation")
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Finished generating %d rows", rowsGenerated)
|
||||
}
|
||||
|
||||
func generateManzanaRow(gdbArchiveOid int) UnknownRowValues {
|
||||
dateLowerLimit, _ := time.Parse(time.RFC3339, "2020-12-31T23:59:59Z")
|
||||
dateUpperLimit, _ := time.Parse(time.RFC3339, "2025-12-31T23:59:59Z")
|
||||
|
||||
rowID := gdbArchiveOid
|
||||
distrito := fmt.Sprintf("D%d", rand.Intn(100))
|
||||
nombre := generateRandomString(15)
|
||||
codigo := generateRandomString(15)
|
||||
cantidadTotal := rand.Intn(1000)
|
||||
ocupacionResidencial := rand.Intn(1000)
|
||||
ocupacionNegocio := rand.Intn(1000)
|
||||
ocupacionDepartamento := rand.Intn(1000)
|
||||
indicador := rand.Intn(10000)
|
||||
fechaAlta := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
|
||||
fechaAct := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
|
||||
shapeWKB := generateRandomPolygonWKB()
|
||||
geoData := []byte{}
|
||||
id := uuid.New()
|
||||
globalID := id[:]
|
||||
gdbFromDate := fechaAct
|
||||
gdbToDate, _ := time.Parse(time.RFC3339, "9999-12-31T23:59:59Z")
|
||||
objectID := gdbArchiveOid
|
||||
|
||||
shapeMssql, err := mssqlclrgeo.WkbToUdtGeo(shapeWKB, false)
|
||||
if err != nil {
|
||||
log.Errorf("Error convirtiendo WKB a formato MSSQL: %v", err)
|
||||
shapeMssql = []byte{}
|
||||
}
|
||||
|
||||
return UnknownRowValues{
|
||||
gdbArchiveOid,
|
||||
rowID,
|
||||
distrito,
|
||||
nombre,
|
||||
codigo,
|
||||
cantidadTotal,
|
||||
ocupacionResidencial,
|
||||
ocupacionNegocio,
|
||||
ocupacionDepartamento,
|
||||
indicador,
|
||||
fechaAlta,
|
||||
fechaAct,
|
||||
shapeMssql,
|
||||
geoData,
|
||||
globalID,
|
||||
gdbFromDate,
|
||||
gdbToDate,
|
||||
objectID,
|
||||
}
|
||||
}
|
||||
|
||||
func logManzanaSampleRow(id int, rowValues UnknownRowValues) {
|
||||
log.Infof(`
|
||||
Sample row #%d:
|
||||
GDB_ARCHIVE_OID (%T): %v
|
||||
ID_MANZANA (%T): %v
|
||||
ID_DISTRITO (%T): %v
|
||||
NOMBRE (%T): %v
|
||||
CODIGO (%T): %v
|
||||
CANTIDAD_TOTAL (%T): %v
|
||||
OCUPACION_RESIDENCIAL (%T): %v
|
||||
OCUPACION_NEGOCIO (%T): %v
|
||||
OCUPACION_DEPARTAMENTO (%T): %v
|
||||
INDICADOR (%T): %v
|
||||
FECHA_ALTA (%T): %v
|
||||
FECHA_ACT (%T): %v
|
||||
Shape (%T): %v
|
||||
GDB_GEOMATTR_DATA (%T): %v
|
||||
GlobalID (%T): %v
|
||||
GDB_FROM_DATE (%T): %v
|
||||
GDB_TO_DATE (%T): %v
|
||||
OBJECTID (%T): %v
|
||||
`,
|
||||
id,
|
||||
rowValues[0], rowValues[0],
|
||||
rowValues[1], rowValues[1],
|
||||
rowValues[2], rowValues[2],
|
||||
rowValues[3], rowValues[3],
|
||||
rowValues[4], rowValues[4],
|
||||
rowValues[5], rowValues[5],
|
||||
rowValues[6], rowValues[6],
|
||||
rowValues[7], rowValues[7],
|
||||
rowValues[8], rowValues[8],
|
||||
rowValues[9], rowValues[9],
|
||||
rowValues[10], rowValues[10],
|
||||
rowValues[11], rowValues[11],
|
||||
rowValues[12], rowValues[12],
|
||||
rowValues[13], rowValues[13],
|
||||
rowValues[14], rowValues[14],
|
||||
rowValues[15], rowValues[15],
|
||||
rowValues[16], rowValues[16],
|
||||
rowValues[17], rowValues[17],
|
||||
)
|
||||
}
|
||||
|
||||
func seedManzanas(ctx context.Context, db *sql.DB) error {
|
||||
maxOid, err := getMaxGDBArchiveOid(ctx, db)
|
||||
if err != nil {
|
||||
log.Fatal("Error getting max GDB_ARCHIVE_OID: ", err)
|
||||
}
|
||||
|
||||
log.Infof("Starting data generation from GDB_ARCHIVE_OID: %d", maxOid+1)
|
||||
|
||||
rowsChan := make(chan []UnknownRowValues, queueSize)
|
||||
|
||||
var wgRowGenerator sync.WaitGroup
|
||||
|
||||
wgRowGenerator.Go(func() {
|
||||
generateManzanaRows(ctx, maxOid, totalRows, chunkSize, rowsChan)
|
||||
})
|
||||
|
||||
columns := []string{
|
||||
"GDB_ARCHIVE_OID",
|
||||
"ID_MANZANA",
|
||||
"ID_DISTRITO",
|
||||
"NOMBRE",
|
||||
"CODIGO",
|
||||
"CANTIDAD_TOTAL",
|
||||
"OCUPACION_RESIDENCIAL",
|
||||
"OCUPACION_NEGOCIO",
|
||||
"OCUPACION_DEPARTAMENTO",
|
||||
"INDICADOR",
|
||||
"FECHA_ALTA",
|
||||
"FECHA_ACT",
|
||||
"Shape",
|
||||
"GDB_GEOMATTR_DATA",
|
||||
"GlobalID",
|
||||
"GDB_FROM_DATE",
|
||||
"GDB_TO_DATE",
|
||||
"OBJECTID",
|
||||
}
|
||||
|
||||
job := MigrationJob{
|
||||
Schema: schema,
|
||||
Table: table,
|
||||
}
|
||||
|
||||
if err := loadRowsMssql(ctx, job, columns, db, rowsChan); err != nil {
|
||||
return fmt.Errorf("Error loading rows (MANZANA): %w", err)
|
||||
}
|
||||
|
||||
log.Info("Data generation and loading completed successfully (MANZANA)")
|
||||
wgRowGenerator.Wait()
|
||||
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user