feat: implement data generation and loading for MANZANA with improved structure and logging
This commit is contained in:
@@ -4,16 +4,11 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/gaspardle/go-mssqlclrgeo"
|
||||
"github.com/google/uuid"
|
||||
mssql "github.com/microsoft/go-mssqldb"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/twpayne/go-geom"
|
||||
"github.com/twpayne/go-geom/encoding/wkb"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -41,53 +36,13 @@ func main() {
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
maxOid, err := getMaxGDBArchiveOid(ctx, db)
|
||||
if err != nil {
|
||||
log.Fatal("Error getting max GDB_ARCHIVE_OID: ", err)
|
||||
}
|
||||
var wgSeed sync.WaitGroup
|
||||
|
||||
log.Infof("Starting data generation from GDB_ARCHIVE_OID: %d", maxOid+1)
|
||||
|
||||
rowsChan := make(chan []UnknownRowValues, queueSize)
|
||||
|
||||
var wgRowGenerator sync.WaitGroup
|
||||
|
||||
wgRowGenerator.Go(func() {
|
||||
generateManzanaRows(ctx, maxOid, totalRows, chunkSize, rowsChan)
|
||||
wgSeed.Go(func() {
|
||||
seedManzanas(ctx, db)
|
||||
})
|
||||
|
||||
columns := []string{
|
||||
"GDB_ARCHIVE_OID",
|
||||
"ID_MANZANA",
|
||||
"ID_DISTRITO",
|
||||
"NOMBRE",
|
||||
"CODIGO",
|
||||
"CANTIDAD_TOTAL",
|
||||
"OCUPACION_RESIDENCIAL",
|
||||
"OCUPACION_NEGOCIO",
|
||||
"OCUPACION_DEPARTAMENTO",
|
||||
"INDICADOR",
|
||||
"FECHA_ALTA",
|
||||
"FECHA_ACT",
|
||||
"Shape",
|
||||
"GDB_GEOMATTR_DATA",
|
||||
"GlobalID",
|
||||
"GDB_FROM_DATE",
|
||||
"GDB_TO_DATE",
|
||||
"OBJECTID",
|
||||
}
|
||||
|
||||
job := MigrationJob{
|
||||
Schema: schema,
|
||||
Table: table,
|
||||
}
|
||||
|
||||
if err := loadRowsMssql(ctx, job, columns, db, rowsChan); err != nil {
|
||||
log.Fatal("Error loading rows: ", err)
|
||||
}
|
||||
|
||||
log.Info("Data generation and loading completed successfully")
|
||||
wgRowGenerator.Wait()
|
||||
wgSeed.Wait()
|
||||
}
|
||||
|
||||
func loadRowsMssql(ctx context.Context, job MigrationJob, colNames []string, db *sql.DB, in <-chan []UnknownRowValues) error {
|
||||
@@ -151,162 +106,3 @@ func loadRowsMssql(ctx context.Context, job MigrationJob, colNames []string, db
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func generateRandomPolygonWKB() []byte {
|
||||
minX := rand.Float64()*180 - 90
|
||||
minY := rand.Float64()*180 - 90
|
||||
|
||||
size := 0.01
|
||||
|
||||
coords := []geom.Coord{
|
||||
{minX, minY},
|
||||
{minX + size, minY},
|
||||
{minX + size, minY + size},
|
||||
{minX, minY + size},
|
||||
{minX, minY},
|
||||
}
|
||||
|
||||
polygon := geom.NewPolygon(geom.XY).MustSetCoords([][]geom.Coord{coords})
|
||||
|
||||
polygonWkb, _ := wkb.Marshal(polygon, wkb.NDR)
|
||||
|
||||
return polygonWkb
|
||||
}
|
||||
|
||||
func getMaxGDBArchiveOid(ctx context.Context, db *sql.DB) (int, error) {
|
||||
var maxOid sql.NullInt64
|
||||
|
||||
query := fmt.Sprintf(`
|
||||
SELECT ISNULL(MAX(GDB_ARCHIVE_OID), 0)
|
||||
FROM [%s].[%s]
|
||||
`, schema, table)
|
||||
|
||||
err := db.QueryRowContext(ctx, query).Scan(&maxOid)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if !maxOid.Valid {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return int(maxOid.Int64), nil
|
||||
}
|
||||
|
||||
func generateManzanaRows(
|
||||
ctx context.Context,
|
||||
startOid int,
|
||||
totalRows int,
|
||||
chunkSize int,
|
||||
out chan<- []UnknownRowValues,
|
||||
) {
|
||||
defer close(out)
|
||||
|
||||
rowsGenerated := 0
|
||||
currentChunk := make([]UnknownRowValues, 0, chunkSize)
|
||||
|
||||
for i := range totalRows {
|
||||
gdbArchiveOid := startOid + i + 1
|
||||
row := generateManzanaRow(gdbArchiveOid)
|
||||
currentChunk = append(currentChunk, row)
|
||||
rowsGenerated++
|
||||
|
||||
if len(currentChunk) == chunkSize {
|
||||
select {
|
||||
case out <- currentChunk:
|
||||
log.Debugf("Sent chunk with %d rows", len(currentChunk))
|
||||
case <-ctx.Done():
|
||||
log.Info("Context cancelled, stopping row generation")
|
||||
return
|
||||
}
|
||||
currentChunk = make([]UnknownRowValues, 0, chunkSize)
|
||||
}
|
||||
|
||||
if rowsGenerated%100_000 == 0 {
|
||||
logManzanaSampleRow(rowsGenerated, row)
|
||||
}
|
||||
}
|
||||
|
||||
if len(currentChunk) > 0 {
|
||||
select {
|
||||
case out <- currentChunk:
|
||||
log.Debugf("Sent final chunk with %d rows", len(currentChunk))
|
||||
case <-ctx.Done():
|
||||
log.Info("Context cancelled, stopping row generation")
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Finished generating %d rows", rowsGenerated)
|
||||
}
|
||||
|
||||
func generateManzanaRow(gdbArchiveOid int) UnknownRowValues {
|
||||
dateLowerLimit, _ := time.Parse(time.RFC3339, "2020-12-31T23:59:59Z")
|
||||
dateUpperLimit, _ := time.Parse(time.RFC3339, "2025-12-31T23:59:59Z")
|
||||
|
||||
rowID := gdbArchiveOid
|
||||
distrito := fmt.Sprintf("D%d", rand.Intn(100))
|
||||
nombre := generateRandomString(15)
|
||||
codigo := generateRandomString(15)
|
||||
cantidadTotal := rand.Intn(1000)
|
||||
ocupacionResidencial := rand.Intn(1000)
|
||||
ocupacionNegocio := rand.Intn(1000)
|
||||
ocupacionDepartamento := rand.Intn(1000)
|
||||
indicador := rand.Intn(10000)
|
||||
fechaAlta := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
|
||||
fechaAct := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
|
||||
shapeWKB := generateRandomPolygonWKB()
|
||||
geoData := []byte{}
|
||||
id := uuid.New()
|
||||
globalID := id[:]
|
||||
gdbFromDate := fechaAct
|
||||
gdbToDate, _ := time.Parse(time.RFC3339, "9999-12-31T23:59:59Z")
|
||||
objectID := gdbArchiveOid
|
||||
|
||||
shapeMssql, err := mssqlclrgeo.WkbToUdtGeo(shapeWKB, false)
|
||||
if err != nil {
|
||||
log.Errorf("Error convirtiendo WKB a formato MSSQL: %v", err)
|
||||
shapeMssql = []byte{}
|
||||
}
|
||||
|
||||
return UnknownRowValues{
|
||||
gdbArchiveOid,
|
||||
rowID,
|
||||
distrito,
|
||||
nombre,
|
||||
codigo,
|
||||
cantidadTotal,
|
||||
ocupacionResidencial,
|
||||
ocupacionNegocio,
|
||||
ocupacionDepartamento,
|
||||
indicador,
|
||||
fechaAlta,
|
||||
fechaAct,
|
||||
shapeMssql,
|
||||
geoData,
|
||||
globalID,
|
||||
gdbFromDate,
|
||||
gdbToDate,
|
||||
objectID,
|
||||
}
|
||||
}
|
||||
|
||||
func generateRandomTimestamp(min, max time.Time) time.Time {
|
||||
minUnix := min.Unix()
|
||||
maxUnix := max.Unix()
|
||||
|
||||
delta := maxUnix - minUnix
|
||||
secAleatorios := rand.Int63n(delta)
|
||||
|
||||
return time.Unix(minUnix+secAleatorios, 0)
|
||||
}
|
||||
|
||||
func generateRandomString(maxLength int) string {
|
||||
const charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||
length := min(rand.Intn(maxLength)+1, maxLength)
|
||||
|
||||
b := make([]byte, length)
|
||||
for i := range b {
|
||||
b[i] = charset[rand.Intn(len(charset))]
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user