4 Commits

11 changed files with 467 additions and 300 deletions

View File

@@ -5,30 +5,36 @@ import (
"strings"
)
func buildExtractQueryMssql(job MigrationJob, columns []ColumnType) string {
var sbColumns strings.Builder
func buildExtractQueryMssql(job MigrationJob, columns []ColumnType, includeRange bool) string {
var sbQuery strings.Builder
sbQuery.WriteString("SELECT ")
if len(columns) == 0 {
sbColumns.WriteString("*")
sbQuery.WriteString("*")
} else {
for i, col := range columns {
sbColumns.WriteString("[")
sbColumns.WriteString(col.name)
sbColumns.WriteString("]")
fmt.Fprintf(&sbQuery, "[%s]", col.name)
if col.unifiedType == "GEOMETRY" {
sbColumns.WriteString(".STAsBinary() AS [")
sbColumns.WriteString(col.name)
sbColumns.WriteString("]")
fmt.Fprintf(&sbQuery, ".STAsBinary() AS [%s]", col.name)
}
if i < len(columns)-1 {
sbColumns.WriteString(", ")
sbQuery.WriteString(", ")
}
}
}
return fmt.Sprintf(`SELECT %s FROM [%s].[%s] ORDER BY [%s] ASC`, sbColumns.String(), job.Schema, job.Table, job.PrimaryKey)
fmt.Fprintf(&sbQuery, " FROM [%s].[%s]", job.Schema, job.Table)
if includeRange {
fmt.Fprintf(&sbQuery, " WHERE [%s] BETWEEN @minRange AND @maxRange", job.PrimaryKey)
}
fmt.Fprintf(&sbQuery, " ORDER BY [%s] ASC", job.PrimaryKey)
return sbQuery.String()
}
func buildExtractQueryPostgres(job MigrationJob, columns []ColumnType) string {

View File

@@ -0,0 +1,91 @@
package main
import (
"context"
"database/sql"
"fmt"
)
type BatchRange struct {
LowerLimit int
UpperLimit int
validRange bool
}
func estimateTotalRowsMssql(ctx context.Context, db *sql.DB, job MigrationJob) (int, error) {
query := `
SELECT
SUM(p.rows) AS count
FROM sys.tables t
JOIN sys.schemas s ON t.schema_id = s.schema_id
JOIN sys.partitions p ON t.object_id = p.object_id
WHERE s.name = @schema AND t.name = @table AND p.index_id IN (0, 1)
GROUP BY t.name`
var rowsCount int
err := db.QueryRowContext(ctx, query, sql.Named("schema", job.Schema), sql.Named("table", job.Table)).Scan(&rowsCount)
if err != nil {
return 0, err
}
return rowsCount, nil
}
func calculateChunkRangesMssql(ctx context.Context, db *sql.DB, job MigrationJob, batchCount int) ([]BatchRange, error) {
query := fmt.Sprintf(`
SELECT
MIN([%s]) AS lower_limit,
MAX([%s]) AS upper_limit
FROM
(SELECT [%s], NTILE(@batchCount) OVER (ORDER BY [%s]) AS chunk_id FROM [%s].[%s]) AS T
GROUP BY chunk_id
ORDER BY chunk_id`, job.PrimaryKey, job.PrimaryKey, job.PrimaryKey, job.PrimaryKey, job.Schema, job.Table)
rows, err := db.QueryContext(ctx, query, sql.Named("batchCount", batchCount))
if err != nil {
return nil, err
}
defer rows.Close()
batchRanges := make([]BatchRange, 0, batchCount)
for rows.Next() {
var br BatchRange
br.validRange = true
if err := rows.Scan(&br.LowerLimit, &br.UpperLimit); err != nil {
return nil, err
}
batchRanges = append(batchRanges, br)
}
if err := rows.Err(); err != nil {
return nil, err
}
return batchRanges, nil
}
const estimatedRowsPerBatch = 100_000
func calculateBatchMetrics(ctx context.Context, db *sql.DB, job MigrationJob) ([]BatchRange, error) {
rowsCount, err := estimateTotalRowsMssql(ctx, db, job)
if err != nil {
return nil, err
}
batchCount := 1
if rowsCount > estimatedRowsPerBatch {
batchCount = rowsCount / estimatedRowsPerBatch
} else {
return []BatchRange{{validRange: false}}, nil
}
chunksRange, err := calculateChunkRangesMssql(ctx, db, job, batchCount)
if err != nil {
return nil, err
}
return chunksRange, nil
}

View File

@@ -12,12 +12,20 @@ import (
type UnknownRowValues = []any
func extractFromMssql(ctx context.Context, job MigrationJob, columns []ColumnType, chunkSize int, db *sql.DB, out chan<- []UnknownRowValues) error {
query := buildExtractQueryMssql(job, columns)
func extractFromMssql(ctx context.Context, db *sql.DB, job MigrationJob, columns []ColumnType, chunkSize int, batchRange BatchRange, out chan<- []UnknownRowValues) error {
query := buildExtractQueryMssql(job, columns, batchRange.validRange)
log.Debug("Query used to extract data from mssql: ", query)
var queryArgs []any
if batchRange.validRange {
queryArgs = append(queryArgs,
sql.Named("minRange", batchRange.LowerLimit),
sql.Named("maxRange", batchRange.UpperLimit),
)
}
queryStartTime := time.Now()
rows, err := db.QueryContext(ctx, query)
rows, err := db.QueryContext(ctx, query, queryArgs...)
if err != nil {
return err
}

View File

@@ -242,6 +242,10 @@ ORDER BY c.column_id;
return nil, fmt.Errorf("Error scanning column type results: %W", err)
}
if strings.HasPrefix(column.name, "graph_id") && column.systemType == "bigint" {
continue
}
colTypes = append(colTypes, MapMssqlColumn(column))
}

View File

@@ -13,5 +13,5 @@ func configureLog() {
DisableSorting: false,
PadLevelText: true,
})
log.SetLevel(log.InfoLevel)
log.SetLevel(log.DebugLevel)
}

View File

@@ -14,17 +14,17 @@ type MigrationJob struct {
var migrationJobs []MigrationJob = []MigrationJob{
{
Schema: "Cartografia",
Table: "MANZANA",
PrimaryKey: "GDB_ARCHIVE_OID",
Schema: "Red",
Table: "PUERTO",
PrimaryKey: "ID_PUERTO",
},
}
const (
NumExtractors int = 1
NumLoaders int = 4
ChunkSize int = 50000
QueueSize int = 10
NumExtractors int = 4
NumLoaders int = 8
ChunkSize int = 25000
QueueSize int = 8
)
func main() {

View File

@@ -24,39 +24,72 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
logColumnTypes(sourceColTypes, "Source col types")
logColumnTypes(targetColTypes, "Target col types")
chRowsExtract := make(chan []UnknownRowValues, QueueSize)
chRowsTransform := make(chan []UnknownRowValues)
mssqlCtx := context.Background()
batchRanges, err := calculateBatchMetrics(mssqlCtx, sourceDb, job)
if err != nil {
log.Error("Unexpected error calculating batch ranges: ", err)
}
go func() {
log.Info("Starting extraction from MSSQL...")
chBatchRanges := make(chan BatchRange, len(batchRanges))
maxExtractors := min(NumExtractors, len(batchRanges))
chRowsExtract := make(chan []UnknownRowValues, QueueSize)
var wgMssqlExtractors sync.WaitGroup
log.Infof("Starting %d MSSQL extractors...", maxExtractors)
extractStartTime := time.Now()
if err := extractFromMssql(mssqlCtx, job, sourceColTypes, ChunkSize, sourceDb, chRowsExtract); err != nil {
for range maxExtractors {
wgMssqlExtractors.Go(func() {
for br := range chBatchRanges {
if err := extractFromMssql(mssqlCtx, sourceDb, job, sourceColTypes, ChunkSize, br, chRowsExtract); err != nil {
log.Error("Unexpected error extracting data from mssql: ", err)
}
}
})
}
go func() {
for _, br := range batchRanges {
chBatchRanges <- br
}
close(chBatchRanges)
}()
go func() {
wgMssqlExtractors.Wait()
close(chRowsExtract)
log.Infof("Extraction completed in %v", time.Since(extractStartTime))
}()
go func() {
log.Info("Starting transformation of rows...")
chRowsTransform := make(chan []UnknownRowValues, QueueSize)
var wgMssqlTransformers sync.WaitGroup
log.Infof("Starting %d MSSQL transformers...", maxExtractors)
transformStartTime := time.Now()
for range maxExtractors {
wgMssqlTransformers.Go(func() {
transformRowsMssql(sourceColTypes, chRowsExtract, chRowsTransform)
})
}
go func() {
wgMssqlTransformers.Wait()
close(chRowsTransform)
log.Infof("Transformation completed in %v", time.Since(transformStartTime))
}()
var wgPostgresLoaders sync.WaitGroup
postgresLoaderCtx := context.Background()
// postgresLoaderCtx := context.Background()
log.Infof("Starting %d PostgreSQL loader(s)...", NumLoaders)
loaderStartTime := time.Now()
for range NumLoaders {
wgPostgresLoaders.Go(func() {
if err := loadRowsPostgres(postgresLoaderCtx, job, sourceColTypes, targetDb, chRowsTransform); err != nil {
log.Error("Unexpected error loading data into postgres: ", err)
}
// if err := loadRowsPostgres(postgresLoaderCtx, job, sourceColTypes, targetDb, chRowsTransform); err != nil {
// log.Error("Unexpected error loading data into postgres: ", err)
// }
fakeLoader(job, sourceColTypes, chRowsTransform)
})
}

View File

@@ -4,20 +4,15 @@ import (
"context"
"database/sql"
"fmt"
"math/rand"
"sync"
"time"
"github.com/gaspardle/go-mssqlclrgeo"
"github.com/google/uuid"
mssql "github.com/microsoft/go-mssqldb"
log "github.com/sirupsen/logrus"
"github.com/twpayne/go-geom"
"github.com/twpayne/go-geom/encoding/wkb"
)
const (
totalRows int = 5_000_000
totalRows int = 1_000_000
chunkSize int = 50_000
schema string = "Cartografia"
table string = "MANZANA"
@@ -41,53 +36,13 @@ func main() {
ctx := context.Background()
maxOid, err := getMaxGDBArchiveOid(ctx, db)
if err != nil {
log.Fatal("Error getting max GDB_ARCHIVE_OID: ", err)
}
var wgSeed sync.WaitGroup
log.Infof("Starting data generation from GDB_ARCHIVE_OID: %d", maxOid+1)
rowsChan := make(chan []UnknownRowValues, queueSize)
var wgRowGenerator sync.WaitGroup
wgRowGenerator.Go(func() {
generateManzanaRows(ctx, maxOid, totalRows, chunkSize, rowsChan)
wgSeed.Go(func() {
seedManzanas(ctx, db)
})
columns := []string{
"GDB_ARCHIVE_OID",
"ID_MANZANA",
"ID_DISTRITO",
"NOMBRE",
"CODIGO",
"CANTIDAD_TOTAL",
"OCUPACION_RESIDENCIAL",
"OCUPACION_NEGOCIO",
"OCUPACION_DEPARTAMENTO",
"INDICADOR",
"FECHA_ALTA",
"FECHA_ACT",
"Shape",
"GDB_GEOMATTR_DATA",
"GlobalID",
"GDB_FROM_DATE",
"GDB_TO_DATE",
"OBJECTID",
}
job := MigrationJob{
Schema: schema,
Table: table,
}
if err := loadRowsMssql(ctx, job, columns, db, rowsChan); err != nil {
log.Fatal("Error loading rows: ", err)
}
log.Info("Data generation and loading completed successfully")
wgRowGenerator.Wait()
wgSeed.Wait()
}
func loadRowsMssql(ctx context.Context, job MigrationJob, colNames []string, db *sql.DB, in <-chan []UnknownRowValues) error {
@@ -151,162 +106,3 @@ func loadRowsMssql(ctx context.Context, job MigrationJob, colNames []string, db
return nil
}
func generateRandomPolygonWKB() []byte {
minX := rand.Float64()*180 - 90
minY := rand.Float64()*180 - 90
size := 0.01
coords := []geom.Coord{
{minX, minY},
{minX + size, minY},
{minX + size, minY + size},
{minX, minY + size},
{minX, minY},
}
polygon := geom.NewPolygon(geom.XY).MustSetCoords([][]geom.Coord{coords})
polygonWkb, _ := wkb.Marshal(polygon, wkb.NDR)
return polygonWkb
}
func getMaxGDBArchiveOid(ctx context.Context, db *sql.DB) (int, error) {
var maxOid sql.NullInt64
query := fmt.Sprintf(`
SELECT ISNULL(MAX(GDB_ARCHIVE_OID), 0)
FROM [%s].[%s]
`, schema, table)
err := db.QueryRowContext(ctx, query).Scan(&maxOid)
if err != nil && err != sql.ErrNoRows {
return 0, err
}
if !maxOid.Valid {
return 0, nil
}
return int(maxOid.Int64), nil
}
func generateManzanaRows(
ctx context.Context,
startOid int,
totalRows int,
chunkSize int,
out chan<- []UnknownRowValues,
) {
defer close(out)
rowsGenerated := 0
currentChunk := make([]UnknownRowValues, 0, chunkSize)
for i := range totalRows {
gdbArchiveOid := startOid + i + 1
row := generateManzanaRow(gdbArchiveOid)
currentChunk = append(currentChunk, row)
rowsGenerated++
if len(currentChunk) == chunkSize {
select {
case out <- currentChunk:
log.Debugf("Sent chunk with %d rows", len(currentChunk))
case <-ctx.Done():
log.Info("Context cancelled, stopping row generation")
return
}
currentChunk = make([]UnknownRowValues, 0, chunkSize)
}
if rowsGenerated%100_000 == 0 {
logManzanaSampleRow(rowsGenerated, row)
}
}
if len(currentChunk) > 0 {
select {
case out <- currentChunk:
log.Debugf("Sent final chunk with %d rows", len(currentChunk))
case <-ctx.Done():
log.Info("Context cancelled, stopping row generation")
}
}
log.Infof("Finished generating %d rows", rowsGenerated)
}
func generateManzanaRow(gdbArchiveOid int) UnknownRowValues {
dateLowerLimit, _ := time.Parse(time.RFC3339, "2020-12-31T23:59:59Z")
dateUpperLimit, _ := time.Parse(time.RFC3339, "2025-12-31T23:59:59Z")
rowID := gdbArchiveOid
distrito := fmt.Sprintf("D%d", rand.Intn(100))
nombre := generateRandomString(15)
codigo := generateRandomString(15)
cantidadTotal := rand.Intn(1000)
ocupacionResidencial := rand.Intn(1000)
ocupacionNegocio := rand.Intn(1000)
ocupacionDepartamento := rand.Intn(1000)
indicador := rand.Intn(10000)
fechaAlta := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
fechaAct := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
shapeWKB := generateRandomPolygonWKB()
geoData := []byte{}
id := uuid.New()
globalID := id[:]
gdbFromDate := fechaAct
gdbToDate, _ := time.Parse(time.RFC3339, "9999-12-31T23:59:59Z")
objectID := gdbArchiveOid
shapeMssql, err := mssqlclrgeo.WkbToUdtGeo(shapeWKB, false)
if err != nil {
log.Errorf("Error convirtiendo WKB a formato MSSQL: %v", err)
shapeMssql = []byte{}
}
return UnknownRowValues{
gdbArchiveOid,
rowID,
distrito,
nombre,
codigo,
cantidadTotal,
ocupacionResidencial,
ocupacionNegocio,
ocupacionDepartamento,
indicador,
fechaAlta,
fechaAct,
shapeMssql,
geoData,
globalID,
gdbFromDate,
gdbToDate,
objectID,
}
}
func generateRandomTimestamp(min, max time.Time) time.Time {
minUnix := min.Unix()
maxUnix := max.Unix()
delta := maxUnix - minUnix
secAleatorios := rand.Int63n(delta)
return time.Unix(minUnix+secAleatorios, 0)
}
func generateRandomString(maxLength int) string {
const charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
length := min(rand.Intn(maxLength)+1, maxLength)
b := make([]byte, length)
for i := range b {
b[i] = charset[rand.Intn(len(charset))]
}
return string(b)
}

View File

@@ -0,0 +1,227 @@
package main
import (
"context"
"database/sql"
"fmt"
"math/rand"
"sync"
"time"
"github.com/gaspardle/go-mssqlclrgeo"
"github.com/google/uuid"
log "github.com/sirupsen/logrus"
)
func getMaxGDBArchiveOid(ctx context.Context, db *sql.DB) (int, error) {
var maxOid sql.NullInt64
query := fmt.Sprintf(`
SELECT ISNULL(MAX(GDB_ARCHIVE_OID), 0)
FROM [%s].[%s]
`, schema, table)
err := db.QueryRowContext(ctx, query).Scan(&maxOid)
if err != nil && err != sql.ErrNoRows {
return 0, err
}
if !maxOid.Valid {
return 0, nil
}
return int(maxOid.Int64), nil
}
func generateManzanaRows(
ctx context.Context,
startOid int,
totalRows int,
chunkSize int,
out chan<- []UnknownRowValues,
) {
defer close(out)
rowsGenerated := 0
currentChunk := make([]UnknownRowValues, 0, chunkSize)
for i := range totalRows {
gdbArchiveOid := startOid + i + 1
row := generateManzanaRow(gdbArchiveOid)
currentChunk = append(currentChunk, row)
rowsGenerated++
if len(currentChunk) == chunkSize {
select {
case out <- currentChunk:
log.Debugf("Sent chunk with %d rows", len(currentChunk))
case <-ctx.Done():
log.Info("Context cancelled, stopping row generation")
return
}
currentChunk = make([]UnknownRowValues, 0, chunkSize)
}
if rowsGenerated%100_000 == 0 {
logManzanaSampleRow(rowsGenerated, row)
}
}
if len(currentChunk) > 0 {
select {
case out <- currentChunk:
log.Debugf("Sent final chunk with %d rows", len(currentChunk))
case <-ctx.Done():
log.Info("Context cancelled, stopping row generation")
}
}
log.Infof("Finished generating %d rows", rowsGenerated)
}
func generateManzanaRow(gdbArchiveOid int) UnknownRowValues {
dateLowerLimit, _ := time.Parse(time.RFC3339, "2020-12-31T23:59:59Z")
dateUpperLimit, _ := time.Parse(time.RFC3339, "2025-12-31T23:59:59Z")
rowID := gdbArchiveOid
distrito := fmt.Sprintf("D%d", rand.Intn(100))
nombre := generateRandomString(15)
codigo := generateRandomString(15)
cantidadTotal := rand.Intn(1000)
ocupacionResidencial := rand.Intn(1000)
ocupacionNegocio := rand.Intn(1000)
ocupacionDepartamento := rand.Intn(1000)
indicador := rand.Intn(10000)
fechaAlta := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
fechaAct := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
shapeWKB := generateRandomPolygonWKB()
geoData := []byte{}
id := uuid.New()
globalID := id[:]
gdbFromDate := fechaAct
gdbToDate, _ := time.Parse(time.RFC3339, "9999-12-31T23:59:59Z")
objectID := gdbArchiveOid
shapeMssql, err := mssqlclrgeo.WkbToUdtGeo(shapeWKB, false)
if err != nil {
log.Errorf("Error convirtiendo WKB a formato MSSQL: %v", err)
shapeMssql = []byte{}
}
return UnknownRowValues{
gdbArchiveOid,
rowID,
distrito,
nombre,
codigo,
cantidadTotal,
ocupacionResidencial,
ocupacionNegocio,
ocupacionDepartamento,
indicador,
fechaAlta,
fechaAct,
shapeMssql,
geoData,
globalID,
gdbFromDate,
gdbToDate,
objectID,
}
}
func logManzanaSampleRow(id int, rowValues UnknownRowValues) {
log.Infof(`
Sample row #%d:
GDB_ARCHIVE_OID (%T): %v
ID_MANZANA (%T): %v
ID_DISTRITO (%T): %v
NOMBRE (%T): %v
CODIGO (%T): %v
CANTIDAD_TOTAL (%T): %v
OCUPACION_RESIDENCIAL (%T): %v
OCUPACION_NEGOCIO (%T): %v
OCUPACION_DEPARTAMENTO (%T): %v
INDICADOR (%T): %v
FECHA_ALTA (%T): %v
FECHA_ACT (%T): %v
Shape (%T): %v
GDB_GEOMATTR_DATA (%T): %v
GlobalID (%T): %v
GDB_FROM_DATE (%T): %v
GDB_TO_DATE (%T): %v
OBJECTID (%T): %v
`,
id,
rowValues[0], rowValues[0],
rowValues[1], rowValues[1],
rowValues[2], rowValues[2],
rowValues[3], rowValues[3],
rowValues[4], rowValues[4],
rowValues[5], rowValues[5],
rowValues[6], rowValues[6],
rowValues[7], rowValues[7],
rowValues[8], rowValues[8],
rowValues[9], rowValues[9],
rowValues[10], rowValues[10],
rowValues[11], rowValues[11],
rowValues[12], rowValues[12],
rowValues[13], rowValues[13],
rowValues[14], rowValues[14],
rowValues[15], rowValues[15],
rowValues[16], rowValues[16],
rowValues[17], rowValues[17],
)
}
func seedManzanas(ctx context.Context, db *sql.DB) error {
maxOid, err := getMaxGDBArchiveOid(ctx, db)
if err != nil {
log.Fatal("Error getting max GDB_ARCHIVE_OID: ", err)
}
log.Infof("Starting data generation from GDB_ARCHIVE_OID: %d", maxOid+1)
rowsChan := make(chan []UnknownRowValues, queueSize)
var wgRowGenerator sync.WaitGroup
wgRowGenerator.Go(func() {
generateManzanaRows(ctx, maxOid, totalRows, chunkSize, rowsChan)
})
columns := []string{
"GDB_ARCHIVE_OID",
"ID_MANZANA",
"ID_DISTRITO",
"NOMBRE",
"CODIGO",
"CANTIDAD_TOTAL",
"OCUPACION_RESIDENCIAL",
"OCUPACION_NEGOCIO",
"OCUPACION_DEPARTAMENTO",
"INDICADOR",
"FECHA_ALTA",
"FECHA_ACT",
"Shape",
"GDB_GEOMATTR_DATA",
"GlobalID",
"GDB_FROM_DATE",
"GDB_TO_DATE",
"OBJECTID",
}
job := MigrationJob{
Schema: schema,
Table: table,
}
if err := loadRowsMssql(ctx, job, columns, db, rowsChan); err != nil {
return fmt.Errorf("Error loading rows (MANZANA): %w", err)
}
log.Info("Data generation and loading completed successfully (MANZANA)")
wgRowGenerator.Wait()
return nil
}

View File

@@ -4,10 +4,12 @@ import (
"context"
"database/sql"
"fmt"
"math/rand"
"time"
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
log "github.com/sirupsen/logrus"
"github.com/twpayne/go-geom"
"github.com/twpayne/go-geom/encoding/wkb"
)
func connectToSqlServer() (*sql.DB, error) {
@@ -36,46 +38,44 @@ func Map[T any, V any](input []T, mapper func(T) V) []V {
return result
}
func logManzanaSampleRow(id int, rowValues UnknownRowValues) {
log.Infof(`
Sample row #%d:
GDB_ARCHIVE_OID (%T): %v
ID_MANZANA (%T): %v
ID_DISTRITO (%T): %v
NOMBRE (%T): %v
CODIGO (%T): %v
CANTIDAD_TOTAL (%T): %v
OCUPACION_RESIDENCIAL (%T): %v
OCUPACION_NEGOCIO (%T): %v
OCUPACION_DEPARTAMENTO (%T): %v
INDICADOR (%T): %v
FECHA_ALTA (%T): %v
FECHA_ACT (%T): %v
Shape (%T): %v
GDB_GEOMATTR_DATA (%T): %v
GlobalID (%T): %v
GDB_FROM_DATE (%T): %v
GDB_TO_DATE (%T): %v
OBJECTID (%T): %v
`,
id,
rowValues[0], rowValues[0],
rowValues[1], rowValues[1],
rowValues[2], rowValues[2],
rowValues[3], rowValues[3],
rowValues[4], rowValues[4],
rowValues[5], rowValues[5],
rowValues[6], rowValues[6],
rowValues[7], rowValues[7],
rowValues[8], rowValues[8],
rowValues[9], rowValues[9],
rowValues[10], rowValues[10],
rowValues[11], rowValues[11],
rowValues[12], rowValues[12],
rowValues[13], rowValues[13],
rowValues[14], rowValues[14],
rowValues[15], rowValues[15],
rowValues[16], rowValues[16],
rowValues[17], rowValues[17],
)
func generateRandomPolygonWKB() []byte {
minX := rand.Float64()*180 - 90
minY := rand.Float64()*180 - 90
size := 0.01
coords := []geom.Coord{
{minX, minY},
{minX + size, minY},
{minX + size, minY + size},
{minX, minY + size},
{minX, minY},
}
polygon := geom.NewPolygon(geom.XY).MustSetCoords([][]geom.Coord{coords})
polygonWkb, _ := wkb.Marshal(polygon, wkb.NDR)
return polygonWkb
}
func generateRandomTimestamp(min, max time.Time) time.Time {
minUnix := min.Unix()
maxUnix := max.Unix()
delta := maxUnix - minUnix
secAleatorios := rand.Int63n(delta)
return time.Unix(minUnix+secAleatorios, 0)
}
func generateRandomString(maxLength int) string {
const charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
length := min(rand.Intn(maxLength)+1, maxLength)
b := make([]byte, length)
for i := range b {
b[i] = charset[rand.Intn(len(charset))]
}
return string(b)
}

View File

@@ -9,12 +9,14 @@ import (
)
const (
NumExtractors int = 2
NumLoaders int = 4
ChunkSize int = 20
totalRecords int = 500
queueSize int = 10
recordsPerExtractor int = totalRecords / NumExtractors
numExtractors int = 2
numTransformers int = numExtractors
numLoaders int = 4
chunkSize int = 20
totalRecords int = 1000
extractorsQueueSize int = 10
transformersQueueSize int = 10
recordsPerExtractor int = totalRecords / numExtractors
)
type Record struct {
@@ -89,18 +91,18 @@ func Transformer(id int, in <-chan []Record, out chan<- []Record) {
func Loader(id int, in <-chan []Record) {
for chunk := range in {
fmt.Printf("[Loader %d] Procesando lote de %d registros...\n", id, len(chunk))
time.Sleep(randomDurationMs(100, 3000))
time.Sleep(randomDurationMs(100, 2000))
}
}
func main() {
chChunksExtract := make(chan []Record, queueSize)
chChunksTransform := make(chan []Record, queueSize)
chChunksExtract := make(chan []Record, extractorsQueueSize)
chChunksTransform := make(chan []Record, transformersQueueSize)
var wgExtractors sync.WaitGroup
for i := 1; i <= NumExtractors; i++ {
for i := 1; i <= numExtractors; i++ {
wgExtractors.Go(func() {
Extractor(i, ChunkSize, chChunksExtract)
Extractor(i, chunkSize, chChunksExtract)
})
}
@@ -111,7 +113,7 @@ func main() {
}()
var wgTransformers sync.WaitGroup
for i := 1; i <= NumExtractors; i++ {
for i := 1; i <= numTransformers; i++ {
wgTransformers.Go(func() {
Transformer(i, chChunksExtract, chChunksTransform)
})
@@ -124,7 +126,7 @@ func main() {
}()
var wgLoaders sync.WaitGroup
for i := 1; i <= NumLoaders; i++ {
for i := 1; i <= numLoaders; i++ {
wgLoaders.Go(func() {
Loader(i, chChunksTransform)
})