Compare commits
4 Commits
c318428563
...
4434054b21
| Author | SHA1 | Date | |
|---|---|---|---|
|
4434054b21
|
|||
|
1e2a37e59f
|
|||
|
8afdb45318
|
|||
|
8903a04f4d
|
@@ -5,30 +5,36 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
func buildExtractQueryMssql(job MigrationJob, columns []ColumnType) string {
|
func buildExtractQueryMssql(job MigrationJob, columns []ColumnType, includeRange bool) string {
|
||||||
var sbColumns strings.Builder
|
var sbQuery strings.Builder
|
||||||
|
|
||||||
|
sbQuery.WriteString("SELECT ")
|
||||||
|
|
||||||
if len(columns) == 0 {
|
if len(columns) == 0 {
|
||||||
sbColumns.WriteString("*")
|
sbQuery.WriteString("*")
|
||||||
} else {
|
} else {
|
||||||
for i, col := range columns {
|
for i, col := range columns {
|
||||||
sbColumns.WriteString("[")
|
fmt.Fprintf(&sbQuery, "[%s]", col.name)
|
||||||
sbColumns.WriteString(col.name)
|
|
||||||
sbColumns.WriteString("]")
|
|
||||||
|
|
||||||
if col.unifiedType == "GEOMETRY" {
|
if col.unifiedType == "GEOMETRY" {
|
||||||
sbColumns.WriteString(".STAsBinary() AS [")
|
fmt.Fprintf(&sbQuery, ".STAsBinary() AS [%s]", col.name)
|
||||||
sbColumns.WriteString(col.name)
|
|
||||||
sbColumns.WriteString("]")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if i < len(columns)-1 {
|
if i < len(columns)-1 {
|
||||||
sbColumns.WriteString(", ")
|
sbQuery.WriteString(", ")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return fmt.Sprintf(`SELECT %s FROM [%s].[%s] ORDER BY [%s] ASC`, sbColumns.String(), job.Schema, job.Table, job.PrimaryKey)
|
fmt.Fprintf(&sbQuery, " FROM [%s].[%s]", job.Schema, job.Table)
|
||||||
|
|
||||||
|
if includeRange {
|
||||||
|
fmt.Fprintf(&sbQuery, " WHERE [%s] BETWEEN @minRange AND @maxRange", job.PrimaryKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(&sbQuery, " ORDER BY [%s] ASC", job.PrimaryKey)
|
||||||
|
|
||||||
|
return sbQuery.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
func buildExtractQueryPostgres(job MigrationJob, columns []ColumnType) string {
|
func buildExtractQueryPostgres(job MigrationJob, columns []ColumnType) string {
|
||||||
|
|||||||
91
cmd/go_migrate/chunk-planner.go
Normal file
91
cmd/go_migrate/chunk-planner.go
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
type BatchRange struct {
|
||||||
|
LowerLimit int
|
||||||
|
UpperLimit int
|
||||||
|
validRange bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func estimateTotalRowsMssql(ctx context.Context, db *sql.DB, job MigrationJob) (int, error) {
|
||||||
|
query := `
|
||||||
|
SELECT
|
||||||
|
SUM(p.rows) AS count
|
||||||
|
FROM sys.tables t
|
||||||
|
JOIN sys.schemas s ON t.schema_id = s.schema_id
|
||||||
|
JOIN sys.partitions p ON t.object_id = p.object_id
|
||||||
|
WHERE s.name = @schema AND t.name = @table AND p.index_id IN (0, 1)
|
||||||
|
GROUP BY t.name`
|
||||||
|
|
||||||
|
var rowsCount int
|
||||||
|
err := db.QueryRowContext(ctx, query, sql.Named("schema", job.Schema), sql.Named("table", job.Table)).Scan(&rowsCount)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return rowsCount, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func calculateChunkRangesMssql(ctx context.Context, db *sql.DB, job MigrationJob, batchCount int) ([]BatchRange, error) {
|
||||||
|
query := fmt.Sprintf(`
|
||||||
|
SELECT
|
||||||
|
MIN([%s]) AS lower_limit,
|
||||||
|
MAX([%s]) AS upper_limit
|
||||||
|
FROM
|
||||||
|
(SELECT [%s], NTILE(@batchCount) OVER (ORDER BY [%s]) AS chunk_id FROM [%s].[%s]) AS T
|
||||||
|
GROUP BY chunk_id
|
||||||
|
ORDER BY chunk_id`, job.PrimaryKey, job.PrimaryKey, job.PrimaryKey, job.PrimaryKey, job.Schema, job.Table)
|
||||||
|
|
||||||
|
rows, err := db.QueryContext(ctx, query, sql.Named("batchCount", batchCount))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
batchRanges := make([]BatchRange, 0, batchCount)
|
||||||
|
|
||||||
|
for rows.Next() {
|
||||||
|
var br BatchRange
|
||||||
|
br.validRange = true
|
||||||
|
|
||||||
|
if err := rows.Scan(&br.LowerLimit, &br.UpperLimit); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
batchRanges = append(batchRanges, br)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := rows.Err(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return batchRanges, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
const estimatedRowsPerBatch = 100_000
|
||||||
|
|
||||||
|
func calculateBatchMetrics(ctx context.Context, db *sql.DB, job MigrationJob) ([]BatchRange, error) {
|
||||||
|
rowsCount, err := estimateTotalRowsMssql(ctx, db, job)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
batchCount := 1
|
||||||
|
if rowsCount > estimatedRowsPerBatch {
|
||||||
|
batchCount = rowsCount / estimatedRowsPerBatch
|
||||||
|
} else {
|
||||||
|
return []BatchRange{{validRange: false}}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
chunksRange, err := calculateChunkRangesMssql(ctx, db, job, batchCount)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return chunksRange, nil
|
||||||
|
}
|
||||||
@@ -12,12 +12,20 @@ import (
|
|||||||
|
|
||||||
type UnknownRowValues = []any
|
type UnknownRowValues = []any
|
||||||
|
|
||||||
func extractFromMssql(ctx context.Context, job MigrationJob, columns []ColumnType, chunkSize int, db *sql.DB, out chan<- []UnknownRowValues) error {
|
func extractFromMssql(ctx context.Context, db *sql.DB, job MigrationJob, columns []ColumnType, chunkSize int, batchRange BatchRange, out chan<- []UnknownRowValues) error {
|
||||||
query := buildExtractQueryMssql(job, columns)
|
query := buildExtractQueryMssql(job, columns, batchRange.validRange)
|
||||||
log.Debug("Query used to extract data from mssql: ", query)
|
log.Debug("Query used to extract data from mssql: ", query)
|
||||||
|
|
||||||
|
var queryArgs []any
|
||||||
|
if batchRange.validRange {
|
||||||
|
queryArgs = append(queryArgs,
|
||||||
|
sql.Named("minRange", batchRange.LowerLimit),
|
||||||
|
sql.Named("maxRange", batchRange.UpperLimit),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
queryStartTime := time.Now()
|
queryStartTime := time.Now()
|
||||||
rows, err := db.QueryContext(ctx, query)
|
rows, err := db.QueryContext(ctx, query, queryArgs...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -242,6 +242,10 @@ ORDER BY c.column_id;
|
|||||||
return nil, fmt.Errorf("Error scanning column type results: %W", err)
|
return nil, fmt.Errorf("Error scanning column type results: %W", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if strings.HasPrefix(column.name, "graph_id") && column.systemType == "bigint" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
colTypes = append(colTypes, MapMssqlColumn(column))
|
colTypes = append(colTypes, MapMssqlColumn(column))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -13,5 +13,5 @@ func configureLog() {
|
|||||||
DisableSorting: false,
|
DisableSorting: false,
|
||||||
PadLevelText: true,
|
PadLevelText: true,
|
||||||
})
|
})
|
||||||
log.SetLevel(log.InfoLevel)
|
log.SetLevel(log.DebugLevel)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,17 +14,17 @@ type MigrationJob struct {
|
|||||||
|
|
||||||
var migrationJobs []MigrationJob = []MigrationJob{
|
var migrationJobs []MigrationJob = []MigrationJob{
|
||||||
{
|
{
|
||||||
Schema: "Cartografia",
|
Schema: "Red",
|
||||||
Table: "MANZANA",
|
Table: "PUERTO",
|
||||||
PrimaryKey: "GDB_ARCHIVE_OID",
|
PrimaryKey: "ID_PUERTO",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
NumExtractors int = 1
|
NumExtractors int = 4
|
||||||
NumLoaders int = 4
|
NumLoaders int = 8
|
||||||
ChunkSize int = 50000
|
ChunkSize int = 25000
|
||||||
QueueSize int = 10
|
QueueSize int = 8
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
|||||||
@@ -24,39 +24,72 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
|
|||||||
logColumnTypes(sourceColTypes, "Source col types")
|
logColumnTypes(sourceColTypes, "Source col types")
|
||||||
logColumnTypes(targetColTypes, "Target col types")
|
logColumnTypes(targetColTypes, "Target col types")
|
||||||
|
|
||||||
chRowsExtract := make(chan []UnknownRowValues, QueueSize)
|
|
||||||
chRowsTransform := make(chan []UnknownRowValues)
|
|
||||||
mssqlCtx := context.Background()
|
mssqlCtx := context.Background()
|
||||||
|
batchRanges, err := calculateBatchMetrics(mssqlCtx, sourceDb, job)
|
||||||
|
if err != nil {
|
||||||
|
log.Error("Unexpected error calculating batch ranges: ", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
chBatchRanges := make(chan BatchRange, len(batchRanges))
|
||||||
|
|
||||||
|
maxExtractors := min(NumExtractors, len(batchRanges))
|
||||||
|
chRowsExtract := make(chan []UnknownRowValues, QueueSize)
|
||||||
|
var wgMssqlExtractors sync.WaitGroup
|
||||||
|
|
||||||
|
log.Infof("Starting %d MSSQL extractors...", maxExtractors)
|
||||||
|
extractStartTime := time.Now()
|
||||||
|
for range maxExtractors {
|
||||||
|
wgMssqlExtractors.Go(func() {
|
||||||
|
for br := range chBatchRanges {
|
||||||
|
if err := extractFromMssql(mssqlCtx, sourceDb, job, sourceColTypes, ChunkSize, br, chRowsExtract); err != nil {
|
||||||
|
log.Error("Unexpected error extracting data from mssql: ", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
log.Info("Starting extraction from MSSQL...")
|
for _, br := range batchRanges {
|
||||||
extractStartTime := time.Now()
|
chBatchRanges <- br
|
||||||
if err := extractFromMssql(mssqlCtx, job, sourceColTypes, ChunkSize, sourceDb, chRowsExtract); err != nil {
|
|
||||||
log.Error("Unexpected error extracting data from mssql: ", err)
|
|
||||||
}
|
}
|
||||||
|
close(chBatchRanges)
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
wgMssqlExtractors.Wait()
|
||||||
close(chRowsExtract)
|
close(chRowsExtract)
|
||||||
log.Infof("Extraction completed in %v", time.Since(extractStartTime))
|
log.Infof("Extraction completed in %v", time.Since(extractStartTime))
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
chRowsTransform := make(chan []UnknownRowValues, QueueSize)
|
||||||
|
var wgMssqlTransformers sync.WaitGroup
|
||||||
|
|
||||||
|
log.Infof("Starting %d MSSQL transformers...", maxExtractors)
|
||||||
|
transformStartTime := time.Now()
|
||||||
|
for range maxExtractors {
|
||||||
|
wgMssqlTransformers.Go(func() {
|
||||||
|
transformRowsMssql(sourceColTypes, chRowsExtract, chRowsTransform)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
log.Info("Starting transformation of rows...")
|
wgMssqlTransformers.Wait()
|
||||||
transformStartTime := time.Now()
|
|
||||||
transformRowsMssql(sourceColTypes, chRowsExtract, chRowsTransform)
|
|
||||||
close(chRowsTransform)
|
close(chRowsTransform)
|
||||||
log.Infof("Transformation completed in %v", time.Since(transformStartTime))
|
log.Infof("Transformation completed in %v", time.Since(transformStartTime))
|
||||||
}()
|
}()
|
||||||
|
|
||||||
var wgPostgresLoaders sync.WaitGroup
|
var wgPostgresLoaders sync.WaitGroup
|
||||||
postgresLoaderCtx := context.Background()
|
// postgresLoaderCtx := context.Background()
|
||||||
|
|
||||||
log.Infof("Starting %d PostgreSQL loader(s)...", NumLoaders)
|
log.Infof("Starting %d PostgreSQL loader(s)...", NumLoaders)
|
||||||
loaderStartTime := time.Now()
|
loaderStartTime := time.Now()
|
||||||
|
|
||||||
for range NumLoaders {
|
for range NumLoaders {
|
||||||
wgPostgresLoaders.Go(func() {
|
wgPostgresLoaders.Go(func() {
|
||||||
if err := loadRowsPostgres(postgresLoaderCtx, job, sourceColTypes, targetDb, chRowsTransform); err != nil {
|
// if err := loadRowsPostgres(postgresLoaderCtx, job, sourceColTypes, targetDb, chRowsTransform); err != nil {
|
||||||
log.Error("Unexpected error loading data into postgres: ", err)
|
// log.Error("Unexpected error loading data into postgres: ", err)
|
||||||
}
|
// }
|
||||||
|
fakeLoader(job, sourceColTypes, chRowsTransform)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,20 +4,15 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math/rand"
|
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/gaspardle/go-mssqlclrgeo"
|
|
||||||
"github.com/google/uuid"
|
|
||||||
mssql "github.com/microsoft/go-mssqldb"
|
mssql "github.com/microsoft/go-mssqldb"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
"github.com/twpayne/go-geom"
|
|
||||||
"github.com/twpayne/go-geom/encoding/wkb"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
totalRows int = 5_000_000
|
totalRows int = 1_000_000
|
||||||
chunkSize int = 50_000
|
chunkSize int = 50_000
|
||||||
schema string = "Cartografia"
|
schema string = "Cartografia"
|
||||||
table string = "MANZANA"
|
table string = "MANZANA"
|
||||||
@@ -41,53 +36,13 @@ func main() {
|
|||||||
|
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
||||||
maxOid, err := getMaxGDBArchiveOid(ctx, db)
|
var wgSeed sync.WaitGroup
|
||||||
if err != nil {
|
|
||||||
log.Fatal("Error getting max GDB_ARCHIVE_OID: ", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Infof("Starting data generation from GDB_ARCHIVE_OID: %d", maxOid+1)
|
wgSeed.Go(func() {
|
||||||
|
seedManzanas(ctx, db)
|
||||||
rowsChan := make(chan []UnknownRowValues, queueSize)
|
|
||||||
|
|
||||||
var wgRowGenerator sync.WaitGroup
|
|
||||||
|
|
||||||
wgRowGenerator.Go(func() {
|
|
||||||
generateManzanaRows(ctx, maxOid, totalRows, chunkSize, rowsChan)
|
|
||||||
})
|
})
|
||||||
|
|
||||||
columns := []string{
|
wgSeed.Wait()
|
||||||
"GDB_ARCHIVE_OID",
|
|
||||||
"ID_MANZANA",
|
|
||||||
"ID_DISTRITO",
|
|
||||||
"NOMBRE",
|
|
||||||
"CODIGO",
|
|
||||||
"CANTIDAD_TOTAL",
|
|
||||||
"OCUPACION_RESIDENCIAL",
|
|
||||||
"OCUPACION_NEGOCIO",
|
|
||||||
"OCUPACION_DEPARTAMENTO",
|
|
||||||
"INDICADOR",
|
|
||||||
"FECHA_ALTA",
|
|
||||||
"FECHA_ACT",
|
|
||||||
"Shape",
|
|
||||||
"GDB_GEOMATTR_DATA",
|
|
||||||
"GlobalID",
|
|
||||||
"GDB_FROM_DATE",
|
|
||||||
"GDB_TO_DATE",
|
|
||||||
"OBJECTID",
|
|
||||||
}
|
|
||||||
|
|
||||||
job := MigrationJob{
|
|
||||||
Schema: schema,
|
|
||||||
Table: table,
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := loadRowsMssql(ctx, job, columns, db, rowsChan); err != nil {
|
|
||||||
log.Fatal("Error loading rows: ", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Info("Data generation and loading completed successfully")
|
|
||||||
wgRowGenerator.Wait()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadRowsMssql(ctx context.Context, job MigrationJob, colNames []string, db *sql.DB, in <-chan []UnknownRowValues) error {
|
func loadRowsMssql(ctx context.Context, job MigrationJob, colNames []string, db *sql.DB, in <-chan []UnknownRowValues) error {
|
||||||
@@ -151,162 +106,3 @@ func loadRowsMssql(ctx context.Context, job MigrationJob, colNames []string, db
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func generateRandomPolygonWKB() []byte {
|
|
||||||
minX := rand.Float64()*180 - 90
|
|
||||||
minY := rand.Float64()*180 - 90
|
|
||||||
|
|
||||||
size := 0.01
|
|
||||||
|
|
||||||
coords := []geom.Coord{
|
|
||||||
{minX, minY},
|
|
||||||
{minX + size, minY},
|
|
||||||
{minX + size, minY + size},
|
|
||||||
{minX, minY + size},
|
|
||||||
{minX, minY},
|
|
||||||
}
|
|
||||||
|
|
||||||
polygon := geom.NewPolygon(geom.XY).MustSetCoords([][]geom.Coord{coords})
|
|
||||||
|
|
||||||
polygonWkb, _ := wkb.Marshal(polygon, wkb.NDR)
|
|
||||||
|
|
||||||
return polygonWkb
|
|
||||||
}
|
|
||||||
|
|
||||||
func getMaxGDBArchiveOid(ctx context.Context, db *sql.DB) (int, error) {
|
|
||||||
var maxOid sql.NullInt64
|
|
||||||
|
|
||||||
query := fmt.Sprintf(`
|
|
||||||
SELECT ISNULL(MAX(GDB_ARCHIVE_OID), 0)
|
|
||||||
FROM [%s].[%s]
|
|
||||||
`, schema, table)
|
|
||||||
|
|
||||||
err := db.QueryRowContext(ctx, query).Scan(&maxOid)
|
|
||||||
if err != nil && err != sql.ErrNoRows {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if !maxOid.Valid {
|
|
||||||
return 0, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return int(maxOid.Int64), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func generateManzanaRows(
|
|
||||||
ctx context.Context,
|
|
||||||
startOid int,
|
|
||||||
totalRows int,
|
|
||||||
chunkSize int,
|
|
||||||
out chan<- []UnknownRowValues,
|
|
||||||
) {
|
|
||||||
defer close(out)
|
|
||||||
|
|
||||||
rowsGenerated := 0
|
|
||||||
currentChunk := make([]UnknownRowValues, 0, chunkSize)
|
|
||||||
|
|
||||||
for i := range totalRows {
|
|
||||||
gdbArchiveOid := startOid + i + 1
|
|
||||||
row := generateManzanaRow(gdbArchiveOid)
|
|
||||||
currentChunk = append(currentChunk, row)
|
|
||||||
rowsGenerated++
|
|
||||||
|
|
||||||
if len(currentChunk) == chunkSize {
|
|
||||||
select {
|
|
||||||
case out <- currentChunk:
|
|
||||||
log.Debugf("Sent chunk with %d rows", len(currentChunk))
|
|
||||||
case <-ctx.Done():
|
|
||||||
log.Info("Context cancelled, stopping row generation")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
currentChunk = make([]UnknownRowValues, 0, chunkSize)
|
|
||||||
}
|
|
||||||
|
|
||||||
if rowsGenerated%100_000 == 0 {
|
|
||||||
logManzanaSampleRow(rowsGenerated, row)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(currentChunk) > 0 {
|
|
||||||
select {
|
|
||||||
case out <- currentChunk:
|
|
||||||
log.Debugf("Sent final chunk with %d rows", len(currentChunk))
|
|
||||||
case <-ctx.Done():
|
|
||||||
log.Info("Context cancelled, stopping row generation")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Infof("Finished generating %d rows", rowsGenerated)
|
|
||||||
}
|
|
||||||
|
|
||||||
func generateManzanaRow(gdbArchiveOid int) UnknownRowValues {
|
|
||||||
dateLowerLimit, _ := time.Parse(time.RFC3339, "2020-12-31T23:59:59Z")
|
|
||||||
dateUpperLimit, _ := time.Parse(time.RFC3339, "2025-12-31T23:59:59Z")
|
|
||||||
|
|
||||||
rowID := gdbArchiveOid
|
|
||||||
distrito := fmt.Sprintf("D%d", rand.Intn(100))
|
|
||||||
nombre := generateRandomString(15)
|
|
||||||
codigo := generateRandomString(15)
|
|
||||||
cantidadTotal := rand.Intn(1000)
|
|
||||||
ocupacionResidencial := rand.Intn(1000)
|
|
||||||
ocupacionNegocio := rand.Intn(1000)
|
|
||||||
ocupacionDepartamento := rand.Intn(1000)
|
|
||||||
indicador := rand.Intn(10000)
|
|
||||||
fechaAlta := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
|
|
||||||
fechaAct := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
|
|
||||||
shapeWKB := generateRandomPolygonWKB()
|
|
||||||
geoData := []byte{}
|
|
||||||
id := uuid.New()
|
|
||||||
globalID := id[:]
|
|
||||||
gdbFromDate := fechaAct
|
|
||||||
gdbToDate, _ := time.Parse(time.RFC3339, "9999-12-31T23:59:59Z")
|
|
||||||
objectID := gdbArchiveOid
|
|
||||||
|
|
||||||
shapeMssql, err := mssqlclrgeo.WkbToUdtGeo(shapeWKB, false)
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("Error convirtiendo WKB a formato MSSQL: %v", err)
|
|
||||||
shapeMssql = []byte{}
|
|
||||||
}
|
|
||||||
|
|
||||||
return UnknownRowValues{
|
|
||||||
gdbArchiveOid,
|
|
||||||
rowID,
|
|
||||||
distrito,
|
|
||||||
nombre,
|
|
||||||
codigo,
|
|
||||||
cantidadTotal,
|
|
||||||
ocupacionResidencial,
|
|
||||||
ocupacionNegocio,
|
|
||||||
ocupacionDepartamento,
|
|
||||||
indicador,
|
|
||||||
fechaAlta,
|
|
||||||
fechaAct,
|
|
||||||
shapeMssql,
|
|
||||||
geoData,
|
|
||||||
globalID,
|
|
||||||
gdbFromDate,
|
|
||||||
gdbToDate,
|
|
||||||
objectID,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func generateRandomTimestamp(min, max time.Time) time.Time {
|
|
||||||
minUnix := min.Unix()
|
|
||||||
maxUnix := max.Unix()
|
|
||||||
|
|
||||||
delta := maxUnix - minUnix
|
|
||||||
secAleatorios := rand.Int63n(delta)
|
|
||||||
|
|
||||||
return time.Unix(minUnix+secAleatorios, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
func generateRandomString(maxLength int) string {
|
|
||||||
const charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
|
||||||
length := min(rand.Intn(maxLength)+1, maxLength)
|
|
||||||
|
|
||||||
b := make([]byte, length)
|
|
||||||
for i := range b {
|
|
||||||
b[i] = charset[rand.Intn(len(charset))]
|
|
||||||
}
|
|
||||||
return string(b)
|
|
||||||
}
|
|
||||||
|
|||||||
227
scripts/mssql-copy-in/seed-manzana.go
Normal file
227
scripts/mssql-copy-in/seed-manzana.go
Normal file
@@ -0,0 +1,227 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/gaspardle/go-mssqlclrgeo"
|
||||||
|
"github.com/google/uuid"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
func getMaxGDBArchiveOid(ctx context.Context, db *sql.DB) (int, error) {
|
||||||
|
var maxOid sql.NullInt64
|
||||||
|
|
||||||
|
query := fmt.Sprintf(`
|
||||||
|
SELECT ISNULL(MAX(GDB_ARCHIVE_OID), 0)
|
||||||
|
FROM [%s].[%s]
|
||||||
|
`, schema, table)
|
||||||
|
|
||||||
|
err := db.QueryRowContext(ctx, query).Scan(&maxOid)
|
||||||
|
if err != nil && err != sql.ErrNoRows {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if !maxOid.Valid {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return int(maxOid.Int64), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateManzanaRows(
|
||||||
|
ctx context.Context,
|
||||||
|
startOid int,
|
||||||
|
totalRows int,
|
||||||
|
chunkSize int,
|
||||||
|
out chan<- []UnknownRowValues,
|
||||||
|
) {
|
||||||
|
defer close(out)
|
||||||
|
|
||||||
|
rowsGenerated := 0
|
||||||
|
currentChunk := make([]UnknownRowValues, 0, chunkSize)
|
||||||
|
|
||||||
|
for i := range totalRows {
|
||||||
|
gdbArchiveOid := startOid + i + 1
|
||||||
|
row := generateManzanaRow(gdbArchiveOid)
|
||||||
|
currentChunk = append(currentChunk, row)
|
||||||
|
rowsGenerated++
|
||||||
|
|
||||||
|
if len(currentChunk) == chunkSize {
|
||||||
|
select {
|
||||||
|
case out <- currentChunk:
|
||||||
|
log.Debugf("Sent chunk with %d rows", len(currentChunk))
|
||||||
|
case <-ctx.Done():
|
||||||
|
log.Info("Context cancelled, stopping row generation")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
currentChunk = make([]UnknownRowValues, 0, chunkSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
if rowsGenerated%100_000 == 0 {
|
||||||
|
logManzanaSampleRow(rowsGenerated, row)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(currentChunk) > 0 {
|
||||||
|
select {
|
||||||
|
case out <- currentChunk:
|
||||||
|
log.Debugf("Sent final chunk with %d rows", len(currentChunk))
|
||||||
|
case <-ctx.Done():
|
||||||
|
log.Info("Context cancelled, stopping row generation")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Finished generating %d rows", rowsGenerated)
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateManzanaRow(gdbArchiveOid int) UnknownRowValues {
|
||||||
|
dateLowerLimit, _ := time.Parse(time.RFC3339, "2020-12-31T23:59:59Z")
|
||||||
|
dateUpperLimit, _ := time.Parse(time.RFC3339, "2025-12-31T23:59:59Z")
|
||||||
|
|
||||||
|
rowID := gdbArchiveOid
|
||||||
|
distrito := fmt.Sprintf("D%d", rand.Intn(100))
|
||||||
|
nombre := generateRandomString(15)
|
||||||
|
codigo := generateRandomString(15)
|
||||||
|
cantidadTotal := rand.Intn(1000)
|
||||||
|
ocupacionResidencial := rand.Intn(1000)
|
||||||
|
ocupacionNegocio := rand.Intn(1000)
|
||||||
|
ocupacionDepartamento := rand.Intn(1000)
|
||||||
|
indicador := rand.Intn(10000)
|
||||||
|
fechaAlta := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
|
||||||
|
fechaAct := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
|
||||||
|
shapeWKB := generateRandomPolygonWKB()
|
||||||
|
geoData := []byte{}
|
||||||
|
id := uuid.New()
|
||||||
|
globalID := id[:]
|
||||||
|
gdbFromDate := fechaAct
|
||||||
|
gdbToDate, _ := time.Parse(time.RFC3339, "9999-12-31T23:59:59Z")
|
||||||
|
objectID := gdbArchiveOid
|
||||||
|
|
||||||
|
shapeMssql, err := mssqlclrgeo.WkbToUdtGeo(shapeWKB, false)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error convirtiendo WKB a formato MSSQL: %v", err)
|
||||||
|
shapeMssql = []byte{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return UnknownRowValues{
|
||||||
|
gdbArchiveOid,
|
||||||
|
rowID,
|
||||||
|
distrito,
|
||||||
|
nombre,
|
||||||
|
codigo,
|
||||||
|
cantidadTotal,
|
||||||
|
ocupacionResidencial,
|
||||||
|
ocupacionNegocio,
|
||||||
|
ocupacionDepartamento,
|
||||||
|
indicador,
|
||||||
|
fechaAlta,
|
||||||
|
fechaAct,
|
||||||
|
shapeMssql,
|
||||||
|
geoData,
|
||||||
|
globalID,
|
||||||
|
gdbFromDate,
|
||||||
|
gdbToDate,
|
||||||
|
objectID,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func logManzanaSampleRow(id int, rowValues UnknownRowValues) {
|
||||||
|
log.Infof(`
|
||||||
|
Sample row #%d:
|
||||||
|
GDB_ARCHIVE_OID (%T): %v
|
||||||
|
ID_MANZANA (%T): %v
|
||||||
|
ID_DISTRITO (%T): %v
|
||||||
|
NOMBRE (%T): %v
|
||||||
|
CODIGO (%T): %v
|
||||||
|
CANTIDAD_TOTAL (%T): %v
|
||||||
|
OCUPACION_RESIDENCIAL (%T): %v
|
||||||
|
OCUPACION_NEGOCIO (%T): %v
|
||||||
|
OCUPACION_DEPARTAMENTO (%T): %v
|
||||||
|
INDICADOR (%T): %v
|
||||||
|
FECHA_ALTA (%T): %v
|
||||||
|
FECHA_ACT (%T): %v
|
||||||
|
Shape (%T): %v
|
||||||
|
GDB_GEOMATTR_DATA (%T): %v
|
||||||
|
GlobalID (%T): %v
|
||||||
|
GDB_FROM_DATE (%T): %v
|
||||||
|
GDB_TO_DATE (%T): %v
|
||||||
|
OBJECTID (%T): %v
|
||||||
|
`,
|
||||||
|
id,
|
||||||
|
rowValues[0], rowValues[0],
|
||||||
|
rowValues[1], rowValues[1],
|
||||||
|
rowValues[2], rowValues[2],
|
||||||
|
rowValues[3], rowValues[3],
|
||||||
|
rowValues[4], rowValues[4],
|
||||||
|
rowValues[5], rowValues[5],
|
||||||
|
rowValues[6], rowValues[6],
|
||||||
|
rowValues[7], rowValues[7],
|
||||||
|
rowValues[8], rowValues[8],
|
||||||
|
rowValues[9], rowValues[9],
|
||||||
|
rowValues[10], rowValues[10],
|
||||||
|
rowValues[11], rowValues[11],
|
||||||
|
rowValues[12], rowValues[12],
|
||||||
|
rowValues[13], rowValues[13],
|
||||||
|
rowValues[14], rowValues[14],
|
||||||
|
rowValues[15], rowValues[15],
|
||||||
|
rowValues[16], rowValues[16],
|
||||||
|
rowValues[17], rowValues[17],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func seedManzanas(ctx context.Context, db *sql.DB) error {
|
||||||
|
maxOid, err := getMaxGDBArchiveOid(ctx, db)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("Error getting max GDB_ARCHIVE_OID: ", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Starting data generation from GDB_ARCHIVE_OID: %d", maxOid+1)
|
||||||
|
|
||||||
|
rowsChan := make(chan []UnknownRowValues, queueSize)
|
||||||
|
|
||||||
|
var wgRowGenerator sync.WaitGroup
|
||||||
|
|
||||||
|
wgRowGenerator.Go(func() {
|
||||||
|
generateManzanaRows(ctx, maxOid, totalRows, chunkSize, rowsChan)
|
||||||
|
})
|
||||||
|
|
||||||
|
columns := []string{
|
||||||
|
"GDB_ARCHIVE_OID",
|
||||||
|
"ID_MANZANA",
|
||||||
|
"ID_DISTRITO",
|
||||||
|
"NOMBRE",
|
||||||
|
"CODIGO",
|
||||||
|
"CANTIDAD_TOTAL",
|
||||||
|
"OCUPACION_RESIDENCIAL",
|
||||||
|
"OCUPACION_NEGOCIO",
|
||||||
|
"OCUPACION_DEPARTAMENTO",
|
||||||
|
"INDICADOR",
|
||||||
|
"FECHA_ALTA",
|
||||||
|
"FECHA_ACT",
|
||||||
|
"Shape",
|
||||||
|
"GDB_GEOMATTR_DATA",
|
||||||
|
"GlobalID",
|
||||||
|
"GDB_FROM_DATE",
|
||||||
|
"GDB_TO_DATE",
|
||||||
|
"OBJECTID",
|
||||||
|
}
|
||||||
|
|
||||||
|
job := MigrationJob{
|
||||||
|
Schema: schema,
|
||||||
|
Table: table,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := loadRowsMssql(ctx, job, columns, db, rowsChan); err != nil {
|
||||||
|
return fmt.Errorf("Error loading rows (MANZANA): %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info("Data generation and loading completed successfully (MANZANA)")
|
||||||
|
wgRowGenerator.Wait()
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -4,10 +4,12 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||||
log "github.com/sirupsen/logrus"
|
"github.com/twpayne/go-geom"
|
||||||
|
"github.com/twpayne/go-geom/encoding/wkb"
|
||||||
)
|
)
|
||||||
|
|
||||||
func connectToSqlServer() (*sql.DB, error) {
|
func connectToSqlServer() (*sql.DB, error) {
|
||||||
@@ -36,46 +38,44 @@ func Map[T any, V any](input []T, mapper func(T) V) []V {
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
func logManzanaSampleRow(id int, rowValues UnknownRowValues) {
|
func generateRandomPolygonWKB() []byte {
|
||||||
log.Infof(`
|
minX := rand.Float64()*180 - 90
|
||||||
Sample row #%d:
|
minY := rand.Float64()*180 - 90
|
||||||
GDB_ARCHIVE_OID (%T): %v
|
|
||||||
ID_MANZANA (%T): %v
|
size := 0.01
|
||||||
ID_DISTRITO (%T): %v
|
|
||||||
NOMBRE (%T): %v
|
coords := []geom.Coord{
|
||||||
CODIGO (%T): %v
|
{minX, minY},
|
||||||
CANTIDAD_TOTAL (%T): %v
|
{minX + size, minY},
|
||||||
OCUPACION_RESIDENCIAL (%T): %v
|
{minX + size, minY + size},
|
||||||
OCUPACION_NEGOCIO (%T): %v
|
{minX, minY + size},
|
||||||
OCUPACION_DEPARTAMENTO (%T): %v
|
{minX, minY},
|
||||||
INDICADOR (%T): %v
|
}
|
||||||
FECHA_ALTA (%T): %v
|
|
||||||
FECHA_ACT (%T): %v
|
polygon := geom.NewPolygon(geom.XY).MustSetCoords([][]geom.Coord{coords})
|
||||||
Shape (%T): %v
|
|
||||||
GDB_GEOMATTR_DATA (%T): %v
|
polygonWkb, _ := wkb.Marshal(polygon, wkb.NDR)
|
||||||
GlobalID (%T): %v
|
|
||||||
GDB_FROM_DATE (%T): %v
|
return polygonWkb
|
||||||
GDB_TO_DATE (%T): %v
|
}
|
||||||
OBJECTID (%T): %v
|
|
||||||
`,
|
func generateRandomTimestamp(min, max time.Time) time.Time {
|
||||||
id,
|
minUnix := min.Unix()
|
||||||
rowValues[0], rowValues[0],
|
maxUnix := max.Unix()
|
||||||
rowValues[1], rowValues[1],
|
|
||||||
rowValues[2], rowValues[2],
|
delta := maxUnix - minUnix
|
||||||
rowValues[3], rowValues[3],
|
secAleatorios := rand.Int63n(delta)
|
||||||
rowValues[4], rowValues[4],
|
|
||||||
rowValues[5], rowValues[5],
|
return time.Unix(minUnix+secAleatorios, 0)
|
||||||
rowValues[6], rowValues[6],
|
}
|
||||||
rowValues[7], rowValues[7],
|
|
||||||
rowValues[8], rowValues[8],
|
func generateRandomString(maxLength int) string {
|
||||||
rowValues[9], rowValues[9],
|
const charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||||
rowValues[10], rowValues[10],
|
length := min(rand.Intn(maxLength)+1, maxLength)
|
||||||
rowValues[11], rowValues[11],
|
|
||||||
rowValues[12], rowValues[12],
|
b := make([]byte, length)
|
||||||
rowValues[13], rowValues[13],
|
for i := range b {
|
||||||
rowValues[14], rowValues[14],
|
b[i] = charset[rand.Intn(len(charset))]
|
||||||
rowValues[15], rowValues[15],
|
}
|
||||||
rowValues[16], rowValues[16],
|
return string(b)
|
||||||
rowValues[17], rowValues[17],
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,12 +9,14 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
NumExtractors int = 2
|
numExtractors int = 2
|
||||||
NumLoaders int = 4
|
numTransformers int = numExtractors
|
||||||
ChunkSize int = 20
|
numLoaders int = 4
|
||||||
totalRecords int = 500
|
chunkSize int = 20
|
||||||
queueSize int = 10
|
totalRecords int = 1000
|
||||||
recordsPerExtractor int = totalRecords / NumExtractors
|
extractorsQueueSize int = 10
|
||||||
|
transformersQueueSize int = 10
|
||||||
|
recordsPerExtractor int = totalRecords / numExtractors
|
||||||
)
|
)
|
||||||
|
|
||||||
type Record struct {
|
type Record struct {
|
||||||
@@ -89,18 +91,18 @@ func Transformer(id int, in <-chan []Record, out chan<- []Record) {
|
|||||||
func Loader(id int, in <-chan []Record) {
|
func Loader(id int, in <-chan []Record) {
|
||||||
for chunk := range in {
|
for chunk := range in {
|
||||||
fmt.Printf("[Loader %d] Procesando lote de %d registros...\n", id, len(chunk))
|
fmt.Printf("[Loader %d] Procesando lote de %d registros...\n", id, len(chunk))
|
||||||
time.Sleep(randomDurationMs(100, 3000))
|
time.Sleep(randomDurationMs(100, 2000))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
chChunksExtract := make(chan []Record, queueSize)
|
chChunksExtract := make(chan []Record, extractorsQueueSize)
|
||||||
chChunksTransform := make(chan []Record, queueSize)
|
chChunksTransform := make(chan []Record, transformersQueueSize)
|
||||||
|
|
||||||
var wgExtractors sync.WaitGroup
|
var wgExtractors sync.WaitGroup
|
||||||
for i := 1; i <= NumExtractors; i++ {
|
for i := 1; i <= numExtractors; i++ {
|
||||||
wgExtractors.Go(func() {
|
wgExtractors.Go(func() {
|
||||||
Extractor(i, ChunkSize, chChunksExtract)
|
Extractor(i, chunkSize, chChunksExtract)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -111,7 +113,7 @@ func main() {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
var wgTransformers sync.WaitGroup
|
var wgTransformers sync.WaitGroup
|
||||||
for i := 1; i <= NumExtractors; i++ {
|
for i := 1; i <= numTransformers; i++ {
|
||||||
wgTransformers.Go(func() {
|
wgTransformers.Go(func() {
|
||||||
Transformer(i, chChunksExtract, chChunksTransform)
|
Transformer(i, chChunksExtract, chChunksTransform)
|
||||||
})
|
})
|
||||||
@@ -124,7 +126,7 @@ func main() {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
var wgLoaders sync.WaitGroup
|
var wgLoaders sync.WaitGroup
|
||||||
for i := 1; i <= NumLoaders; i++ {
|
for i := 1; i <= numLoaders; i++ {
|
||||||
wgLoaders.Go(func() {
|
wgLoaders.Go(func() {
|
||||||
Loader(i, chChunksTransform)
|
Loader(i, chChunksTransform)
|
||||||
})
|
})
|
||||||
|
|||||||
Reference in New Issue
Block a user