Compare commits
10 Commits
f589664320
...
c318428563
| Author | SHA1 | Date | |
|---|---|---|---|
|
c318428563
|
|||
|
8738ef92b7
|
|||
|
270a66dbbf
|
|||
|
7bb67ddfcf
|
|||
|
6da321c4bb
|
|||
|
2f8263d332
|
|||
|
aea310a3dd
|
|||
|
f305ddec0b
|
|||
|
8de4838e00
|
|||
|
d4786ed920
|
@@ -3,24 +3,31 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/jackc/pgx/v5/pgxpool"
|
||||||
_ "github.com/microsoft/go-mssqldb"
|
_ "github.com/microsoft/go-mssqldb"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
type UnknownRowValues []any
|
type UnknownRowValues = []any
|
||||||
|
|
||||||
func extractFromMssql(ctx context.Context, job MigrationJob, columns []ColumnType, chunkSize int, db *sql.DB, out chan<- []UnknownRowValues) error {
|
func extractFromMssql(ctx context.Context, job MigrationJob, columns []ColumnType, chunkSize int, db *sql.DB, out chan<- []UnknownRowValues) error {
|
||||||
query := buildExtractQueryMssql(job, columns)
|
query := buildExtractQueryMssql(job, columns)
|
||||||
log.Debug("Query used to extract data from mssql: ", query)
|
log.Debug("Query used to extract data from mssql: ", query)
|
||||||
|
|
||||||
|
queryStartTime := time.Now()
|
||||||
rows, err := db.QueryContext(ctx, query)
|
rows, err := db.QueryContext(ctx, query)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer rows.Close()
|
defer rows.Close()
|
||||||
|
log.Debugf("Query executed in %v", time.Since(queryStartTime))
|
||||||
|
|
||||||
rowsChunk := make([]UnknownRowValues, 0, chunkSize)
|
rowsChunk := make([]UnknownRowValues, 0, chunkSize)
|
||||||
|
totalRowsExtracted := 0
|
||||||
|
chunkCount := 0
|
||||||
|
chunkStartTime := time.Now()
|
||||||
|
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
values := make([]any, len(columns))
|
values := make([]any, len(columns))
|
||||||
@@ -34,6 +41,50 @@ func extractFromMssql(ctx context.Context, job MigrationJob, columns []ColumnTyp
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rowsChunk = append(rowsChunk, values)
|
||||||
|
totalRowsExtracted++
|
||||||
|
|
||||||
|
if len(rowsChunk) >= chunkSize {
|
||||||
|
chunkCount++
|
||||||
|
chunkDuration := time.Since(chunkStartTime)
|
||||||
|
rowsPerSec := float64(chunkSize) / chunkDuration.Seconds()
|
||||||
|
log.Infof("Extracted chunk #%d: %d rows in %v (%.0f rows/sec) - Total: %d rows", chunkCount, len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
|
||||||
|
out <- rowsChunk
|
||||||
|
rowsChunk = make([]UnknownRowValues, 0, chunkSize)
|
||||||
|
chunkStartTime = time.Now()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(rowsChunk) > 0 {
|
||||||
|
chunkCount++
|
||||||
|
chunkDuration := time.Since(chunkStartTime)
|
||||||
|
rowsPerSec := float64(len(rowsChunk)) / chunkDuration.Seconds()
|
||||||
|
log.Infof("Extracted final chunk #%d: %d rows in %v (%.0f rows/sec) - Total: %d rows",
|
||||||
|
chunkCount, len(rowsChunk), chunkDuration, rowsPerSec, totalRowsExtracted)
|
||||||
|
out <- rowsChunk
|
||||||
|
}
|
||||||
|
|
||||||
|
return rows.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractFromPostgres(ctx context.Context, job MigrationJob, columns []ColumnType, chunkSize int, db *pgxpool.Pool, out chan<- []UnknownRowValues) error {
|
||||||
|
query := buildExtractQueryPostgres(job, columns)
|
||||||
|
log.Debug("Query used to extract data from postgres: ", query)
|
||||||
|
|
||||||
|
rows, err := db.Query(ctx, query)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
rowsChunk := make([]UnknownRowValues, 0, chunkSize)
|
||||||
|
|
||||||
|
for rows.Next() {
|
||||||
|
values, err := rows.Values()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
rowsChunk = append(rowsChunk, values)
|
rowsChunk = append(rowsChunk, values)
|
||||||
|
|
||||||
if len(rowsChunk) >= chunkSize {
|
if len(rowsChunk) >= chunkSize {
|
||||||
|
|||||||
136
cmd/go_migrate/loader.go
Normal file
136
cmd/go_migrate/loader.go
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/jackc/pgx/v5"
|
||||||
|
"github.com/jackc/pgx/v5/pgxpool"
|
||||||
|
mssql "github.com/microsoft/go-mssqldb"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
func fakeLoader(job MigrationJob, columns []ColumnType, in <-chan [][]any) {
|
||||||
|
|
||||||
|
for rows := range in {
|
||||||
|
log.Debugf("Chunk received, loading data into...")
|
||||||
|
|
||||||
|
for i, rowValues := range rows {
|
||||||
|
if i%100 == 0 {
|
||||||
|
logSampleRow(job, columns, rowValues, fmt.Sprintf("row %d", i))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadRowsPostgres(ctx context.Context, job MigrationJob, columns []ColumnType, db *pgxpool.Pool, in <-chan []UnknownRowValues) error {
|
||||||
|
chunkCount := 0
|
||||||
|
totalRowsLoaded := 0
|
||||||
|
|
||||||
|
for rows := range in {
|
||||||
|
chunkStartTime := time.Now()
|
||||||
|
identifier := pgx.Identifier{job.Schema, job.Table}
|
||||||
|
colNames := Map(columns, func(col ColumnType) string {
|
||||||
|
return col.name
|
||||||
|
})
|
||||||
|
|
||||||
|
copyStartTime := time.Now()
|
||||||
|
_, err := db.CopyFrom(
|
||||||
|
ctx,
|
||||||
|
identifier,
|
||||||
|
colNames,
|
||||||
|
pgx.CopyFromRows(rows),
|
||||||
|
)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
chunkCount++
|
||||||
|
totalRowsLoaded += len(rows)
|
||||||
|
copyDuration := time.Since(copyStartTime)
|
||||||
|
chunkDuration := time.Since(chunkStartTime)
|
||||||
|
rowsPerSec := float64(len(rows)) / chunkDuration.Seconds()
|
||||||
|
|
||||||
|
log.Infof("Loaded chunk #%d: %d rows in %v (copy: %v, %.0f rows/sec) - Total: %d rows", chunkCount, len(rows), chunkDuration, copyDuration, rowsPerSec, totalRowsLoaded)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadRowsMssql(ctx context.Context, job MigrationJob, columns []ColumnType, db *sql.DB, in <-chan []UnknownRowValues) error {
|
||||||
|
chunkCount := 0
|
||||||
|
totalRowsLoaded := 0
|
||||||
|
|
||||||
|
for rows := range in {
|
||||||
|
chunkStartTime := time.Now()
|
||||||
|
|
||||||
|
tx, err := db.BeginTx(ctx, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error starting transaction: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fullTableName := fmt.Sprintf("[%s].[%s]", job.Schema, job.Table)
|
||||||
|
colNames := Map(columns, func(col ColumnType) string {
|
||||||
|
return col.name
|
||||||
|
})
|
||||||
|
|
||||||
|
stmt, err := tx.PrepareContext(ctx, mssql.CopyIn(fullTableName, mssql.BulkOptions{}, colNames...))
|
||||||
|
if err != nil {
|
||||||
|
tx.Rollback()
|
||||||
|
return fmt.Errorf("error preparing bulk copy statement: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
copyStartTime := time.Now()
|
||||||
|
|
||||||
|
for _, row := range rows {
|
||||||
|
_, err = stmt.ExecContext(ctx, row...)
|
||||||
|
if err != nil {
|
||||||
|
stmt.Close()
|
||||||
|
tx.Rollback()
|
||||||
|
return fmt.Errorf("error executing row insert: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := stmt.ExecContext(ctx)
|
||||||
|
if err != nil {
|
||||||
|
stmt.Close()
|
||||||
|
tx.Rollback()
|
||||||
|
return fmt.Errorf("error flushing bulk data: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = stmt.Close()
|
||||||
|
if err != nil {
|
||||||
|
tx.Rollback()
|
||||||
|
return fmt.Errorf("error closing statement: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := tx.Commit(); err != nil {
|
||||||
|
return fmt.Errorf("error committing transaction: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
rowsAffected, _ := result.RowsAffected()
|
||||||
|
chunkCount++
|
||||||
|
totalRowsLoaded += int(rowsAffected)
|
||||||
|
|
||||||
|
copyDuration := time.Since(copyStartTime)
|
||||||
|
chunkDuration := time.Since(chunkStartTime)
|
||||||
|
rowsPerSec := float64(len(rows)) / chunkDuration.Seconds()
|
||||||
|
|
||||||
|
log.Infof("Loaded chunk #%d (MSSQL): %d rows in %v (copy: %v, %.0f rows/sec) - Total: %d rows", chunkCount, len(rows), chunkDuration, copyDuration, rowsPerSec, totalRowsLoaded)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func Map[T any, V any](input []T, mapper func(T) V) []V {
|
||||||
|
result := make([]V, len(input))
|
||||||
|
|
||||||
|
for i, v := range input {
|
||||||
|
result[i] = mapper(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
@@ -10,6 +10,8 @@ func configureLog() {
|
|||||||
log.SetFormatter(&log.TextFormatter{
|
log.SetFormatter(&log.TextFormatter{
|
||||||
FullTimestamp: true,
|
FullTimestamp: true,
|
||||||
TimestampFormat: time.StampMilli,
|
TimestampFormat: time.StampMilli,
|
||||||
|
DisableSorting: false,
|
||||||
|
PadLevelText: true,
|
||||||
})
|
})
|
||||||
log.SetLevel(log.DebugLevel)
|
log.SetLevel(log.InfoLevel)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -19,15 +21,17 @@ var migrationJobs []MigrationJob = []MigrationJob{
|
|||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
NumExtractors int = 2
|
NumExtractors int = 1
|
||||||
ChunkSize int = 20
|
NumLoaders int = 4
|
||||||
|
ChunkSize int = 50000
|
||||||
QueueSize int = 10
|
QueueSize int = 10
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
configureLog()
|
configureLog()
|
||||||
log.Info("Starting migration...")
|
startTime := time.Now()
|
||||||
// log.Debugf("Migration jobs: %+v", migrationJobs)
|
log.Info("=== Starting migration ===")
|
||||||
|
log.Infof("Number of loaders: %d, Chunk size: %d", NumLoaders, ChunkSize)
|
||||||
|
|
||||||
sourceDb, targetDb, connError := connectToDatabases()
|
sourceDb, targetDb, connError := connectToDatabases()
|
||||||
if connError != nil {
|
if connError != nil {
|
||||||
@@ -38,9 +42,11 @@ func main() {
|
|||||||
defer targetDb.Close()
|
defer targetDb.Close()
|
||||||
|
|
||||||
for _, job := range migrationJobs {
|
for _, job := range migrationJobs {
|
||||||
log.Infof("Processing job: %+v", job)
|
log.Infof(">>> Processing job: %s.%s <<<", job.Schema, job.Table)
|
||||||
processMigrationJob(sourceDb, targetDb, job)
|
processMigrationJob(sourceDb, targetDb, job)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Info("Migration completed successfully!")
|
totalDuration := time.Since(startTime)
|
||||||
|
log.Infof("=== Migration completed successfully! ===")
|
||||||
|
log.Infof("Total migration time: %v", totalDuration)
|
||||||
}
|
}
|
||||||
|
|||||||
61
cmd/go_migrate/mssql-transform.go
Normal file
61
cmd/go_migrate/mssql-transform.go
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func mssqlUuidToBigEndian(mssqlUuid []byte) []byte {
|
||||||
|
if len(mssqlUuid) != 16 {
|
||||||
|
return mssqlUuid
|
||||||
|
}
|
||||||
|
pgUuid := make([]byte, 16)
|
||||||
|
pgUuid[0], pgUuid[1], pgUuid[2], pgUuid[3] = mssqlUuid[3], mssqlUuid[2], mssqlUuid[1], mssqlUuid[0]
|
||||||
|
pgUuid[4], pgUuid[5] = mssqlUuid[5], mssqlUuid[4]
|
||||||
|
pgUuid[6], pgUuid[7] = mssqlUuid[7], mssqlUuid[6]
|
||||||
|
copy(pgUuid[8:], mssqlUuid[8:])
|
||||||
|
|
||||||
|
return pgUuid
|
||||||
|
}
|
||||||
|
|
||||||
|
const sridFlag = 0x20000000
|
||||||
|
|
||||||
|
func wkbToEwkbWithSrid(geometry []byte, srid int) []byte {
|
||||||
|
if len(geometry) < 5 {
|
||||||
|
return geometry
|
||||||
|
}
|
||||||
|
|
||||||
|
var byteOrder binary.ByteOrder
|
||||||
|
if geometry[0] == 0 {
|
||||||
|
byteOrder = binary.BigEndian
|
||||||
|
} else {
|
||||||
|
byteOrder = binary.LittleEndian
|
||||||
|
}
|
||||||
|
|
||||||
|
wkbType := byteOrder.Uint32(geometry[1:5])
|
||||||
|
if wkbType&sridFlag != 0 {
|
||||||
|
return geometry
|
||||||
|
}
|
||||||
|
|
||||||
|
ewkbType := wkbType | sridFlag
|
||||||
|
|
||||||
|
result := make([]byte, len(geometry)+4)
|
||||||
|
|
||||||
|
result[0] = geometry[0]
|
||||||
|
|
||||||
|
byteOrder.PutUint32(result[1:5], ewkbType)
|
||||||
|
|
||||||
|
byteOrder.PutUint32(result[5:9], uint32(srid))
|
||||||
|
|
||||||
|
copy(result[9:], geometry[5:])
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func ensureUTC(t time.Time) time.Time {
|
||||||
|
if t.Location() == time.UTC {
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
return time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), time.UTC)
|
||||||
|
}
|
||||||
@@ -3,7 +3,8 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"fmt"
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/jackc/pgx/v5/pgxpool"
|
"github.com/jackc/pgx/v5/pgxpool"
|
||||||
|
|
||||||
@@ -12,6 +13,9 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job MigrationJob) {
|
func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job MigrationJob) {
|
||||||
|
jobStartTime := time.Now()
|
||||||
|
log.Infof("Starting migration job: %s.%s [PK: %s]", job.Schema, job.Table, job.PrimaryKey)
|
||||||
|
|
||||||
sourceColTypes, targetColTypes, err := GetColumnTypes(sourceDb, targetDb, job)
|
sourceColTypes, targetColTypes, err := GetColumnTypes(sourceDb, targetDb, job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal("Unexpected error: ", err)
|
log.Fatal("Unexpected error: ", err)
|
||||||
@@ -21,40 +25,59 @@ func processMigrationJob(sourceDb *sql.DB, targetDb *pgxpool.Pool, job Migration
|
|||||||
logColumnTypes(targetColTypes, "Target col types")
|
logColumnTypes(targetColTypes, "Target col types")
|
||||||
|
|
||||||
chRowsExtract := make(chan []UnknownRowValues, QueueSize)
|
chRowsExtract := make(chan []UnknownRowValues, QueueSize)
|
||||||
|
chRowsTransform := make(chan []UnknownRowValues)
|
||||||
|
mssqlCtx := context.Background()
|
||||||
|
|
||||||
mssqlContext := context.Background()
|
go func() {
|
||||||
|
log.Info("Starting extraction from MSSQL...")
|
||||||
if err := extractFromMssql(mssqlContext, job, sourceColTypes, ChunkSize, sourceDb, chRowsExtract); err != nil {
|
extractStartTime := time.Now()
|
||||||
log.Fatal("Unexpected error extrating data from mssql: ", err)
|
if err := extractFromMssql(mssqlCtx, job, sourceColTypes, ChunkSize, sourceDb, chRowsExtract); err != nil {
|
||||||
|
log.Error("Unexpected error extracting data from mssql: ", err)
|
||||||
}
|
}
|
||||||
close(chRowsExtract)
|
close(chRowsExtract)
|
||||||
|
log.Infof("Extraction completed in %v", time.Since(extractStartTime))
|
||||||
|
}()
|
||||||
|
|
||||||
transformRowsMssql(job, sourceColTypes, chRowsExtract)
|
go func() {
|
||||||
|
log.Info("Starting transformation of rows...")
|
||||||
|
transformStartTime := time.Now()
|
||||||
|
transformRowsMssql(sourceColTypes, chRowsExtract, chRowsTransform)
|
||||||
|
close(chRowsTransform)
|
||||||
|
log.Infof("Transformation completed in %v", time.Since(transformStartTime))
|
||||||
|
}()
|
||||||
|
|
||||||
|
var wgPostgresLoaders sync.WaitGroup
|
||||||
|
postgresLoaderCtx := context.Background()
|
||||||
|
|
||||||
|
log.Infof("Starting %d PostgreSQL loader(s)...", NumLoaders)
|
||||||
|
loaderStartTime := time.Now()
|
||||||
|
|
||||||
|
for range NumLoaders {
|
||||||
|
wgPostgresLoaders.Go(func() {
|
||||||
|
if err := loadRowsPostgres(postgresLoaderCtx, job, sourceColTypes, targetDb, chRowsTransform); err != nil {
|
||||||
|
log.Error("Unexpected error loading data into postgres: ", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
wgPostgresLoaders.Wait()
|
||||||
|
log.Infof("Loading completed in %v", time.Since(loaderStartTime))
|
||||||
|
|
||||||
|
totalDuration := time.Since(jobStartTime)
|
||||||
|
log.Infof("Migration job completed successfully! Total time: %v", totalDuration)
|
||||||
}
|
}
|
||||||
|
|
||||||
func logColumnTypes(columnTypes []ColumnType, label string) {
|
func logColumnTypes(columnTypes []ColumnType, label string) {
|
||||||
log.Info(label)
|
log.Debug(label)
|
||||||
|
|
||||||
for _, col := range columnTypes {
|
for _, col := range columnTypes {
|
||||||
log.Infof("%+v", col)
|
log.Debugf("%+v", col)
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func transformRowsMssql(job MigrationJob, columns []ColumnType, in <-chan []UnknownRowValues) {
|
|
||||||
for rows := range in {
|
|
||||||
log.Debug("Chunk received, transforming...")
|
|
||||||
|
|
||||||
for i, rowValues := range rows {
|
|
||||||
if i%100 == 0 {
|
|
||||||
logSampleRow(job, columns, rowValues, fmt.Sprintf("row %d", i))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func logSampleRow(job MigrationJob, columns []ColumnType, rowValues UnknownRowValues, tag string) {
|
func logSampleRow(job MigrationJob, columns []ColumnType, rowValues UnknownRowValues, tag string) {
|
||||||
log.Infof("[%s.%s] Sample row: (%s)", job.Schema, job.Table, tag)
|
log.Infof("[%s.%s] Sample row: (%s)", job.Schema, job.Table, tag)
|
||||||
for i, col := range columns {
|
for i, col := range columns {
|
||||||
log.Infof("%s: %v", col.Name(), rowValues[i])
|
log.Infof("%s (%T): %v", col.Name(), rowValues[i], rowValues[i])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
45
cmd/go_migrate/transformer.go
Normal file
45
cmd/go_migrate/transformer.go
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
func transformRowsMssql(columns []ColumnType, in <-chan []UnknownRowValues, out chan<- []UnknownRowValues) {
|
||||||
|
chunkCount := 0
|
||||||
|
totalRowsTransformed := 0
|
||||||
|
|
||||||
|
for rows := range in {
|
||||||
|
chunkStartTime := time.Now()
|
||||||
|
log.Debugf("Chunk #%d received, transforming %d rows...", chunkCount+1, len(rows))
|
||||||
|
|
||||||
|
for _, rowValues := range rows {
|
||||||
|
for i, col := range columns {
|
||||||
|
value := rowValues[i]
|
||||||
|
if col.SystemType() == "uniqueidentifier" {
|
||||||
|
if b, ok := value.([]byte); ok {
|
||||||
|
rowValues[i] = mssqlUuidToBigEndian(b)
|
||||||
|
}
|
||||||
|
} else if col.SystemType() == "geometry" || col.SystemType() == "geography" {
|
||||||
|
if b, ok := value.([]byte); ok {
|
||||||
|
rowValues[i] = wkbToEwkbWithSrid(b, 4326)
|
||||||
|
}
|
||||||
|
} else if col.SystemType() == "datetime" || col.SystemType() == "datetime2" {
|
||||||
|
if t, ok := value.(time.Time); ok {
|
||||||
|
rowValues[i] = ensureUTC(t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
chunkCount++
|
||||||
|
totalRowsTransformed += len(rows)
|
||||||
|
chunkDuration := time.Since(chunkStartTime)
|
||||||
|
rowsPerSec := float64(len(rows)) / chunkDuration.Seconds()
|
||||||
|
log.Infof("Transformed chunk #%d: %d rows in %v (%.0f rows/sec) - Total: %d rows",
|
||||||
|
chunkCount, len(rows), chunkDuration, rowsPerSec, totalRowsTransformed)
|
||||||
|
|
||||||
|
out <- rows
|
||||||
|
}
|
||||||
|
}
|
||||||
4
go.mod
4
go.mod
@@ -3,16 +3,18 @@ module git.ksdemosapps.com/kylesoda/go-migrate
|
|||||||
go 1.25.7
|
go 1.25.7
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
github.com/gaspardle/go-mssqlclrgeo v0.0.0-20160129143314-97ceabf987a4
|
||||||
|
github.com/google/uuid v1.6.0
|
||||||
github.com/jackc/pgx/v5 v5.9.1
|
github.com/jackc/pgx/v5 v5.9.1
|
||||||
github.com/joho/godotenv v1.5.1
|
github.com/joho/godotenv v1.5.1
|
||||||
github.com/microsoft/go-mssqldb v1.9.8
|
github.com/microsoft/go-mssqldb v1.9.8
|
||||||
github.com/sirupsen/logrus v1.9.4
|
github.com/sirupsen/logrus v1.9.4
|
||||||
|
github.com/twpayne/go-geom v1.6.1
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 // indirect
|
github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 // indirect
|
||||||
github.com/golang-sql/sqlexp v0.1.0 // indirect
|
github.com/golang-sql/sqlexp v0.1.0 // indirect
|
||||||
github.com/google/uuid v1.6.0 // indirect
|
|
||||||
github.com/jackc/pgpassfile v1.0.0 // indirect
|
github.com/jackc/pgpassfile v1.0.0 // indirect
|
||||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
|
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
|
||||||
github.com/jackc/puddle/v2 v2.2.2 // indirect
|
github.com/jackc/puddle/v2 v2.2.2 // indirect
|
||||||
|
|||||||
12
go.sum
12
go.sum
@@ -10,9 +10,17 @@ github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.2.0 h1:nCYfg
|
|||||||
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.2.0/go.mod h1:ucUjca2JtSZboY8IoUqyQyuuXvwbMBVwFOm0vdQPNhA=
|
github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.2.0/go.mod h1:ucUjca2JtSZboY8IoUqyQyuuXvwbMBVwFOm0vdQPNhA=
|
||||||
github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 h1:XRzhVemXdgvJqCH0sFfrBUTnUJSBrBf7++ypk+twtRs=
|
github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 h1:XRzhVemXdgvJqCH0sFfrBUTnUJSBrBf7++ypk+twtRs=
|
||||||
github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk=
|
github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk=
|
||||||
|
github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
|
||||||
|
github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
|
||||||
|
github.com/alecthomas/assert/v2 v2.10.0 h1:jjRCHsj6hBJhkmhznrCzoNpbA3zqy0fYiUcYZP/GkPY=
|
||||||
|
github.com/alecthomas/assert/v2 v2.10.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
|
||||||
|
github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
|
||||||
|
github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/gaspardle/go-mssqlclrgeo v0.0.0-20160129143314-97ceabf987a4 h1:4vH4+3zfwZTqoJEFw7DsTaH1V8jgVwnyeDvNi2TxzAc=
|
||||||
|
github.com/gaspardle/go-mssqlclrgeo v0.0.0-20160129143314-97ceabf987a4/go.mod h1:jlB0I5BIfcJBGdV6rRGPthSBfeY86RGkSAwcsldbHJc=
|
||||||
github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY=
|
github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY=
|
||||||
github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
|
github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
|
||||||
github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA=
|
github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA=
|
||||||
@@ -21,6 +29,8 @@ github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei
|
|||||||
github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EOqtpKwwwHI=
|
github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EOqtpKwwwHI=
|
||||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
|
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
|
||||||
|
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
|
||||||
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
|
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
|
||||||
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
|
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
|
||||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
|
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
|
||||||
@@ -48,6 +58,8 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
|
|||||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||||
|
github.com/twpayne/go-geom v1.6.1 h1:iLE+Opv0Ihm/ABIcvQFGIiFBXd76oBIar9drAwHFhR4=
|
||||||
|
github.com/twpayne/go-geom v1.6.1/go.mod h1:Kr+Nly6BswFsKM5sd31YaoWS5PeDDH2NftJTK7Gd028=
|
||||||
golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts=
|
golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts=
|
||||||
golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos=
|
golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos=
|
||||||
golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo=
|
golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo=
|
||||||
|
|||||||
312
scripts/mssql-copy-in/main.go
Normal file
312
scripts/mssql-copy-in/main.go
Normal file
@@ -0,0 +1,312 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/gaspardle/go-mssqlclrgeo"
|
||||||
|
"github.com/google/uuid"
|
||||||
|
mssql "github.com/microsoft/go-mssqldb"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
"github.com/twpayne/go-geom"
|
||||||
|
"github.com/twpayne/go-geom/encoding/wkb"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
totalRows int = 5_000_000
|
||||||
|
chunkSize int = 50_000
|
||||||
|
schema string = "Cartografia"
|
||||||
|
table string = "MANZANA"
|
||||||
|
queueSize int = 4
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
log.SetFormatter(&log.TextFormatter{
|
||||||
|
FullTimestamp: true,
|
||||||
|
TimestampFormat: time.StampMilli,
|
||||||
|
DisableSorting: false,
|
||||||
|
PadLevelText: true,
|
||||||
|
})
|
||||||
|
log.SetLevel(log.DebugLevel)
|
||||||
|
|
||||||
|
db, connError := connectToSqlServer()
|
||||||
|
if connError != nil {
|
||||||
|
log.Fatal("Connection error: ", connError)
|
||||||
|
}
|
||||||
|
defer db.Close()
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
maxOid, err := getMaxGDBArchiveOid(ctx, db)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("Error getting max GDB_ARCHIVE_OID: ", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Starting data generation from GDB_ARCHIVE_OID: %d", maxOid+1)
|
||||||
|
|
||||||
|
rowsChan := make(chan []UnknownRowValues, queueSize)
|
||||||
|
|
||||||
|
var wgRowGenerator sync.WaitGroup
|
||||||
|
|
||||||
|
wgRowGenerator.Go(func() {
|
||||||
|
generateManzanaRows(ctx, maxOid, totalRows, chunkSize, rowsChan)
|
||||||
|
})
|
||||||
|
|
||||||
|
columns := []string{
|
||||||
|
"GDB_ARCHIVE_OID",
|
||||||
|
"ID_MANZANA",
|
||||||
|
"ID_DISTRITO",
|
||||||
|
"NOMBRE",
|
||||||
|
"CODIGO",
|
||||||
|
"CANTIDAD_TOTAL",
|
||||||
|
"OCUPACION_RESIDENCIAL",
|
||||||
|
"OCUPACION_NEGOCIO",
|
||||||
|
"OCUPACION_DEPARTAMENTO",
|
||||||
|
"INDICADOR",
|
||||||
|
"FECHA_ALTA",
|
||||||
|
"FECHA_ACT",
|
||||||
|
"Shape",
|
||||||
|
"GDB_GEOMATTR_DATA",
|
||||||
|
"GlobalID",
|
||||||
|
"GDB_FROM_DATE",
|
||||||
|
"GDB_TO_DATE",
|
||||||
|
"OBJECTID",
|
||||||
|
}
|
||||||
|
|
||||||
|
job := MigrationJob{
|
||||||
|
Schema: schema,
|
||||||
|
Table: table,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := loadRowsMssql(ctx, job, columns, db, rowsChan); err != nil {
|
||||||
|
log.Fatal("Error loading rows: ", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info("Data generation and loading completed successfully")
|
||||||
|
wgRowGenerator.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadRowsMssql(ctx context.Context, job MigrationJob, colNames []string, db *sql.DB, in <-chan []UnknownRowValues) error {
|
||||||
|
chunkCount := 0
|
||||||
|
totalRowsLoaded := 0
|
||||||
|
|
||||||
|
for rows := range in {
|
||||||
|
chunkStartTime := time.Now()
|
||||||
|
|
||||||
|
tx, err := db.BeginTx(ctx, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error starting transaction: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fullTableName := fmt.Sprintf("[%s].[%s]", job.Schema, job.Table)
|
||||||
|
|
||||||
|
stmt, err := tx.PrepareContext(ctx, mssql.CopyIn(fullTableName, mssql.BulkOptions{}, colNames...))
|
||||||
|
if err != nil {
|
||||||
|
tx.Rollback()
|
||||||
|
return fmt.Errorf("error preparing bulk copy statement: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
copyStartTime := time.Now()
|
||||||
|
|
||||||
|
for _, row := range rows {
|
||||||
|
_, err = stmt.ExecContext(ctx, row...)
|
||||||
|
if err != nil {
|
||||||
|
stmt.Close()
|
||||||
|
tx.Rollback()
|
||||||
|
return fmt.Errorf("error executing row insert: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := stmt.ExecContext(ctx)
|
||||||
|
if err != nil {
|
||||||
|
stmt.Close()
|
||||||
|
tx.Rollback()
|
||||||
|
return fmt.Errorf("error flushing bulk data: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = stmt.Close()
|
||||||
|
if err != nil {
|
||||||
|
tx.Rollback()
|
||||||
|
return fmt.Errorf("error closing statement: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := tx.Commit(); err != nil {
|
||||||
|
return fmt.Errorf("error committing transaction: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
rowsAffected, _ := result.RowsAffected()
|
||||||
|
chunkCount++
|
||||||
|
totalRowsLoaded += int(rowsAffected)
|
||||||
|
|
||||||
|
copyDuration := time.Since(copyStartTime)
|
||||||
|
chunkDuration := time.Since(chunkStartTime)
|
||||||
|
rowsPerSec := float64(len(rows)) / chunkDuration.Seconds()
|
||||||
|
|
||||||
|
log.Infof("Loaded chunk #%d (MSSQL): %d rows in %v (copy: %v, %.0f rows/sec) - Total: %d rows", chunkCount, len(rows), chunkDuration, copyDuration, rowsPerSec, totalRowsLoaded)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateRandomPolygonWKB() []byte {
|
||||||
|
minX := rand.Float64()*180 - 90
|
||||||
|
minY := rand.Float64()*180 - 90
|
||||||
|
|
||||||
|
size := 0.01
|
||||||
|
|
||||||
|
coords := []geom.Coord{
|
||||||
|
{minX, minY},
|
||||||
|
{minX + size, minY},
|
||||||
|
{minX + size, minY + size},
|
||||||
|
{minX, minY + size},
|
||||||
|
{minX, minY},
|
||||||
|
}
|
||||||
|
|
||||||
|
polygon := geom.NewPolygon(geom.XY).MustSetCoords([][]geom.Coord{coords})
|
||||||
|
|
||||||
|
polygonWkb, _ := wkb.Marshal(polygon, wkb.NDR)
|
||||||
|
|
||||||
|
return polygonWkb
|
||||||
|
}
|
||||||
|
|
||||||
|
func getMaxGDBArchiveOid(ctx context.Context, db *sql.DB) (int, error) {
|
||||||
|
var maxOid sql.NullInt64
|
||||||
|
|
||||||
|
query := fmt.Sprintf(`
|
||||||
|
SELECT ISNULL(MAX(GDB_ARCHIVE_OID), 0)
|
||||||
|
FROM [%s].[%s]
|
||||||
|
`, schema, table)
|
||||||
|
|
||||||
|
err := db.QueryRowContext(ctx, query).Scan(&maxOid)
|
||||||
|
if err != nil && err != sql.ErrNoRows {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if !maxOid.Valid {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return int(maxOid.Int64), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateManzanaRows(
|
||||||
|
ctx context.Context,
|
||||||
|
startOid int,
|
||||||
|
totalRows int,
|
||||||
|
chunkSize int,
|
||||||
|
out chan<- []UnknownRowValues,
|
||||||
|
) {
|
||||||
|
defer close(out)
|
||||||
|
|
||||||
|
rowsGenerated := 0
|
||||||
|
currentChunk := make([]UnknownRowValues, 0, chunkSize)
|
||||||
|
|
||||||
|
for i := range totalRows {
|
||||||
|
gdbArchiveOid := startOid + i + 1
|
||||||
|
row := generateManzanaRow(gdbArchiveOid)
|
||||||
|
currentChunk = append(currentChunk, row)
|
||||||
|
rowsGenerated++
|
||||||
|
|
||||||
|
if len(currentChunk) == chunkSize {
|
||||||
|
select {
|
||||||
|
case out <- currentChunk:
|
||||||
|
log.Debugf("Sent chunk with %d rows", len(currentChunk))
|
||||||
|
case <-ctx.Done():
|
||||||
|
log.Info("Context cancelled, stopping row generation")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
currentChunk = make([]UnknownRowValues, 0, chunkSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
if rowsGenerated%100_000 == 0 {
|
||||||
|
logManzanaSampleRow(rowsGenerated, row)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(currentChunk) > 0 {
|
||||||
|
select {
|
||||||
|
case out <- currentChunk:
|
||||||
|
log.Debugf("Sent final chunk with %d rows", len(currentChunk))
|
||||||
|
case <-ctx.Done():
|
||||||
|
log.Info("Context cancelled, stopping row generation")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Finished generating %d rows", rowsGenerated)
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateManzanaRow(gdbArchiveOid int) UnknownRowValues {
|
||||||
|
dateLowerLimit, _ := time.Parse(time.RFC3339, "2020-12-31T23:59:59Z")
|
||||||
|
dateUpperLimit, _ := time.Parse(time.RFC3339, "2025-12-31T23:59:59Z")
|
||||||
|
|
||||||
|
rowID := gdbArchiveOid
|
||||||
|
distrito := fmt.Sprintf("D%d", rand.Intn(100))
|
||||||
|
nombre := generateRandomString(15)
|
||||||
|
codigo := generateRandomString(15)
|
||||||
|
cantidadTotal := rand.Intn(1000)
|
||||||
|
ocupacionResidencial := rand.Intn(1000)
|
||||||
|
ocupacionNegocio := rand.Intn(1000)
|
||||||
|
ocupacionDepartamento := rand.Intn(1000)
|
||||||
|
indicador := rand.Intn(10000)
|
||||||
|
fechaAlta := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
|
||||||
|
fechaAct := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
|
||||||
|
shapeWKB := generateRandomPolygonWKB()
|
||||||
|
geoData := []byte{}
|
||||||
|
id := uuid.New()
|
||||||
|
globalID := id[:]
|
||||||
|
gdbFromDate := fechaAct
|
||||||
|
gdbToDate, _ := time.Parse(time.RFC3339, "9999-12-31T23:59:59Z")
|
||||||
|
objectID := gdbArchiveOid
|
||||||
|
|
||||||
|
shapeMssql, err := mssqlclrgeo.WkbToUdtGeo(shapeWKB, false)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error convirtiendo WKB a formato MSSQL: %v", err)
|
||||||
|
shapeMssql = []byte{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return UnknownRowValues{
|
||||||
|
gdbArchiveOid,
|
||||||
|
rowID,
|
||||||
|
distrito,
|
||||||
|
nombre,
|
||||||
|
codigo,
|
||||||
|
cantidadTotal,
|
||||||
|
ocupacionResidencial,
|
||||||
|
ocupacionNegocio,
|
||||||
|
ocupacionDepartamento,
|
||||||
|
indicador,
|
||||||
|
fechaAlta,
|
||||||
|
fechaAct,
|
||||||
|
shapeMssql,
|
||||||
|
geoData,
|
||||||
|
globalID,
|
||||||
|
gdbFromDate,
|
||||||
|
gdbToDate,
|
||||||
|
objectID,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateRandomTimestamp(min, max time.Time) time.Time {
|
||||||
|
minUnix := min.Unix()
|
||||||
|
maxUnix := max.Unix()
|
||||||
|
|
||||||
|
delta := maxUnix - minUnix
|
||||||
|
secAleatorios := rand.Int63n(delta)
|
||||||
|
|
||||||
|
return time.Unix(minUnix+secAleatorios, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateRandomString(maxLength int) string {
|
||||||
|
const charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||||
|
length := min(rand.Intn(maxLength)+1, maxLength)
|
||||||
|
|
||||||
|
b := make([]byte, length)
|
||||||
|
for i := range b {
|
||||||
|
b[i] = charset[rand.Intn(len(charset))]
|
||||||
|
}
|
||||||
|
return string(b)
|
||||||
|
}
|
||||||
52
scripts/mssql-copy-in/types.go
Normal file
52
scripts/mssql-copy-in/types.go
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
type ColumnType struct {
|
||||||
|
name string
|
||||||
|
|
||||||
|
hasMaxLength bool
|
||||||
|
hasPrecisionScale bool
|
||||||
|
|
||||||
|
userType string
|
||||||
|
systemType string
|
||||||
|
unifiedType string
|
||||||
|
nullable bool
|
||||||
|
maxLength int64
|
||||||
|
precision int64
|
||||||
|
scale int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *ColumnType) Name() string {
|
||||||
|
return c.name
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *ColumnType) UserType() string {
|
||||||
|
return c.userType
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *ColumnType) SystemType() string {
|
||||||
|
return c.systemType
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *ColumnType) Length() (length int64, ok bool) {
|
||||||
|
return c.maxLength, c.hasMaxLength
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *ColumnType) DecimalSize() (precision, scale int64, ok bool) {
|
||||||
|
return c.precision, c.scale, c.hasPrecisionScale
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *ColumnType) Nullable() bool {
|
||||||
|
return c.nullable
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *ColumnType) Type() string {
|
||||||
|
return c.unifiedType
|
||||||
|
}
|
||||||
|
|
||||||
|
type MigrationJob struct {
|
||||||
|
Schema string
|
||||||
|
Table string
|
||||||
|
PrimaryKey string
|
||||||
|
}
|
||||||
|
|
||||||
|
type UnknownRowValues = []any
|
||||||
81
scripts/mssql-copy-in/utils.go
Normal file
81
scripts/mssql-copy-in/utils.go
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"git.ksdemosapps.com/kylesoda/go-migrate/internal/app/config"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
func connectToSqlServer() (*sql.DB, error) {
|
||||||
|
db, err := sql.Open("sqlserver", config.App.SourceDbUrl)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("Unable to connect to sqlserver: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
if err := db.PingContext(ctx); err != nil {
|
||||||
|
return nil, fmt.Errorf("Unable to ping sqlserver: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return db, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func Map[T any, V any](input []T, mapper func(T) V) []V {
|
||||||
|
result := make([]V, len(input))
|
||||||
|
|
||||||
|
for i, v := range input {
|
||||||
|
result[i] = mapper(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func logManzanaSampleRow(id int, rowValues UnknownRowValues) {
|
||||||
|
log.Infof(`
|
||||||
|
Sample row #%d:
|
||||||
|
GDB_ARCHIVE_OID (%T): %v
|
||||||
|
ID_MANZANA (%T): %v
|
||||||
|
ID_DISTRITO (%T): %v
|
||||||
|
NOMBRE (%T): %v
|
||||||
|
CODIGO (%T): %v
|
||||||
|
CANTIDAD_TOTAL (%T): %v
|
||||||
|
OCUPACION_RESIDENCIAL (%T): %v
|
||||||
|
OCUPACION_NEGOCIO (%T): %v
|
||||||
|
OCUPACION_DEPARTAMENTO (%T): %v
|
||||||
|
INDICADOR (%T): %v
|
||||||
|
FECHA_ALTA (%T): %v
|
||||||
|
FECHA_ACT (%T): %v
|
||||||
|
Shape (%T): %v
|
||||||
|
GDB_GEOMATTR_DATA (%T): %v
|
||||||
|
GlobalID (%T): %v
|
||||||
|
GDB_FROM_DATE (%T): %v
|
||||||
|
GDB_TO_DATE (%T): %v
|
||||||
|
OBJECTID (%T): %v
|
||||||
|
`,
|
||||||
|
id,
|
||||||
|
rowValues[0], rowValues[0],
|
||||||
|
rowValues[1], rowValues[1],
|
||||||
|
rowValues[2], rowValues[2],
|
||||||
|
rowValues[3], rowValues[3],
|
||||||
|
rowValues[4], rowValues[4],
|
||||||
|
rowValues[5], rowValues[5],
|
||||||
|
rowValues[6], rowValues[6],
|
||||||
|
rowValues[7], rowValues[7],
|
||||||
|
rowValues[8], rowValues[8],
|
||||||
|
rowValues[9], rowValues[9],
|
||||||
|
rowValues[10], rowValues[10],
|
||||||
|
rowValues[11], rowValues[11],
|
||||||
|
rowValues[12], rowValues[12],
|
||||||
|
rowValues[13], rowValues[13],
|
||||||
|
rowValues[14], rowValues[14],
|
||||||
|
rowValues[15], rowValues[15],
|
||||||
|
rowValues[16], rowValues[16],
|
||||||
|
rowValues[17], rowValues[17],
|
||||||
|
)
|
||||||
|
}
|
||||||
59
scripts/wkb-to-ewkb/main.go
Normal file
59
scripts/wkb-to-ewkb/main.go
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
const sridFlag = 0x20000000
|
||||||
|
|
||||||
|
func wkbToEwkbWithSrid(geometry []byte, srid int) []byte {
|
||||||
|
if len(geometry) < 5 {
|
||||||
|
return geometry
|
||||||
|
}
|
||||||
|
|
||||||
|
var byteOrder binary.ByteOrder
|
||||||
|
if geometry[0] == 0 {
|
||||||
|
byteOrder = binary.BigEndian
|
||||||
|
} else {
|
||||||
|
byteOrder = binary.LittleEndian
|
||||||
|
}
|
||||||
|
|
||||||
|
wkbType := byteOrder.Uint32(geometry[1:5])
|
||||||
|
if wkbType&sridFlag != 0 {
|
||||||
|
return geometry
|
||||||
|
}
|
||||||
|
|
||||||
|
ewkbType := wkbType | sridFlag
|
||||||
|
|
||||||
|
result := make([]byte, len(geometry)+4)
|
||||||
|
|
||||||
|
result[0] = geometry[0]
|
||||||
|
|
||||||
|
byteOrder.PutUint32(result[1:5], ewkbType)
|
||||||
|
|
||||||
|
byteOrder.PutUint32(result[5:9], uint32(srid))
|
||||||
|
|
||||||
|
copy(result[9:], geometry[5:])
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
shape := []byte{
|
||||||
|
1, 3, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 217, 61, 121, 88, 168, 57, 83, 192,
|
||||||
|
60, 78, 209, 145, 92, 222, 39, 192, 232, 106, 43, 246, 151, 57, 83, 192,
|
||||||
|
60, 78, 209, 145, 92, 222, 39, 192, 232, 106, 43, 246, 151, 57, 83, 192,
|
||||||
|
174, 182, 98, 127, 217, 221, 39, 192, 217, 61, 121, 88, 168, 57, 83, 192,
|
||||||
|
174, 182, 98, 127, 217, 221, 39, 192, 217, 61, 121, 88, 168, 57, 83, 192,
|
||||||
|
60, 78, 209, 145, 92, 222, 39, 192,
|
||||||
|
}
|
||||||
|
|
||||||
|
srid := 4326
|
||||||
|
result := wkbToEwkbWithSrid(shape, srid)
|
||||||
|
|
||||||
|
fmt.Printf("WKB Original (len): %d\n", len(shape))
|
||||||
|
fmt.Printf("EWKB Result (len): %d\n", len(result))
|
||||||
|
fmt.Printf("Primeros bytes (original): %v\n", shape[:10])
|
||||||
|
fmt.Printf("Primeros bytes (resultado): %v\n", result[:10])
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user