diff --git a/scripts/mssql-copy-in/main.go b/scripts/mssql-copy-in/main.go index a913904..eafaa3e 100644 --- a/scripts/mssql-copy-in/main.go +++ b/scripts/mssql-copy-in/main.go @@ -12,9 +12,10 @@ import ( ) const ( - totalRows int = 1_000_000 - chunkSize int = 50_000 - queueSize int = 4 + // totalRows int = 1_000_000 + totalRows int = 10_000 + chunkSize int = 50_000 + queueSize int = 4 ) func main() { @@ -40,6 +41,14 @@ func main() { seedManzanas(ctx, db) }) + wgSeed.Go(func() { + seedPuertos(ctx, db) + }) + + wgSeed.Go(func() { + seedSiteHolderAttach(ctx, db) + }) + wgSeed.Wait() } diff --git a/scripts/mssql-copy-in/site-holder-attach.go b/scripts/mssql-copy-in/site-holder-attach.go new file mode 100644 index 0000000..1283f64 --- /dev/null +++ b/scripts/mssql-copy-in/site-holder-attach.go @@ -0,0 +1,227 @@ +package main + +import ( + "bytes" + "context" + "database/sql" + "fmt" + "math/rand" + "sync" + "time" + + "github.com/google/uuid" + log "github.com/sirupsen/logrus" +) + +var siteHolderAttachJob = MigrationJob{ + Schema: "Infraestructura", + Table: "SITE_HOLDER__ATTACH", +} + +func seedSiteHolderAttach(ctx context.Context, db *sql.DB) error { + maxOid, err := getMaxGDBArchiveOidForAttach(ctx, db) + if err != nil { + log.Fatal("Error getting max GDB_ARCHIVE_OID: ", err) + } + + log.Infof("Starting SITE_HOLDER__ATTACH data generation from GDB_ARCHIVE_OID: %d", maxOid+1) + + rowsChan := make(chan []UnknownRowValues, queueSize) + + var wgRowGenerator sync.WaitGroup + + wgRowGenerator.Go(func() { + generateSiteHolderAttachRows(ctx, maxOid, totalRows, chunkSize, rowsChan) + }) + + columns := []string{ + "GDB_ARCHIVE_OID", + "REL_GLOBALID", + "CONTENT_TYPE", + "ATT_NAME", + "DATA_SIZE", + "DATA", + "GLOBALID", + "GDB_FROM_DATE", + "GDB_TO_DATE", + "ATTACHMENTID", + } + + if err := loadRowsMssql(ctx, siteHolderAttachJob, columns, db, rowsChan); err != nil { + return fmt.Errorf("Error loading rows (SITE_HOLDER__ATTACH): %w", err) + } + + log.Info("Data generation and loading completed successfully (SITE_HOLDER__ATTACH)") + wgRowGenerator.Wait() + + return nil +} + +func getMaxGDBArchiveOidForAttach(ctx context.Context, db *sql.DB) (int, error) { + var maxOid sql.NullInt64 + + query := fmt.Sprintf(` + SELECT ISNULL(MAX(GDB_ARCHIVE_OID), 0) + FROM [%s].[%s] + `, siteHolderAttachJob.Schema, siteHolderAttachJob.Table) + + err := db.QueryRowContext(ctx, query).Scan(&maxOid) + if err != nil && err != sql.ErrNoRows { + return 0, err + } + + if !maxOid.Valid { + return 0, nil + } + + return int(maxOid.Int64), nil +} + +func generateSiteHolderAttachRows( + ctx context.Context, + startOid int, + totalRows int, + chunkSize int, + out chan<- []UnknownRowValues, +) { + defer close(out) + + rowsGenerated := 0 + currentChunk := make([]UnknownRowValues, 0, chunkSize) + + for i := range totalRows { + gdbArchiveOid := startOid + i + 1 + row := generateSiteHolderAttachRow(gdbArchiveOid) + currentChunk = append(currentChunk, row) + rowsGenerated++ + + if len(currentChunk) == chunkSize { + select { + case out <- currentChunk: + log.Debugf("Sent SITE_HOLDER__ATTACH chunk with %d rows", len(currentChunk)) + case <-ctx.Done(): + log.Info("Context cancelled, stopping SITE_HOLDER__ATTACH row generation") + return + } + currentChunk = make([]UnknownRowValues, 0, chunkSize) + } + + if rowsGenerated%100_000 == 0 { + logSiteHolderAttachSampleRow(rowsGenerated, row) + } + } + + if len(currentChunk) > 0 { + select { + case out <- currentChunk: + log.Debugf("Sent final SITE_HOLDER__ATTACH chunk with %d rows", len(currentChunk)) + case <-ctx.Done(): + log.Info("Context cancelled, stopping SITE_HOLDER__ATTACH row generation") + } + } + + log.Infof("Finished generating %d SITE_HOLDER__ATTACH rows", rowsGenerated) +} + +func generateSiteHolderAttachRow(gdbArchiveOid int) UnknownRowValues { + dateLowerLimit, _ := time.Parse(time.RFC3339, "2020-12-31T23:59:59Z") + dateUpperLimit, _ := time.Parse(time.RFC3339, "2025-12-31T23:59:59Z") + + relGlobalID, _ := uuid.New().MarshalBinary() + contentType := generateRandomContentType() + attName := generateRandomAttachmentName() + binaryData := generateRandomBinaryContent() + dataSize := len(binaryData) + globalID, _ := uuid.New().MarshalBinary() + gdbFromDate := generateRandomTimestamp(dateLowerLimit, dateUpperLimit) + gdbToDate, _ := time.Parse(time.RFC3339, "9999-12-31T23:59:59Z") + attachmentID := rand.Intn(10000) + 1 + + return UnknownRowValues{ + gdbArchiveOid, + relGlobalID, + contentType, + attName, + dataSize, + binaryData, + globalID, + gdbFromDate, + gdbToDate, + attachmentID, + } +} + +func generateRandomContentType() string { + contentTypes := []string{ + "text/plain", + "application/pdf", + "image/jpeg", + "image/png", + "application/msword", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "text/csv", + "application/json", + } + return contentTypes[rand.Intn(len(contentTypes))] +} + +func generateRandomAttachmentName() string { + extensions := []string{".txt", ".pdf", ".jpg", ".png", ".doc", ".docx", ".csv", ".json"} + baseName := generateRandomString(20) + extension := extensions[rand.Intn(len(extensions))] + return baseName + extension +} + +func generateRandomBinaryContent() []byte { + sizeOptions := []int{100, 500, 1000, 5000, 10000, 50000, 100000} + size := sizeOptions[rand.Intn(len(sizeOptions))] + + var buf bytes.Buffer + + lineCount := rand.Intn(size/50) + 1 + for range lineCount { + line := generateRandomString(rand.Intn(80) + 20) + buf.WriteString(line) + buf.WriteString("\n") + } + + for buf.Len() < size { + randomText := generateRandomString(rand.Intn(100) + 50) + buf.WriteString(randomText) + buf.WriteString("\n") + } + + result := buf.Bytes() + if len(result) > size { + result = result[:size] + } + + return result +} + +func logSiteHolderAttachSampleRow(id int, rowValues UnknownRowValues) { + dataBytes := rowValues[5].([]byte) + log.Infof(` +Sample SITE_HOLDER__ATTACH row #%d: +GDB_ARCHIVE_OID: %v +REL_GLOBALID: [binary UUID] +CONTENT_TYPE: %v +ATT_NAME: %v +DATA_SIZE: %v +DATA: [%d bytes of binary content] +GLOBALID: [binary UUID] +GDB_FROM_DATE: %v +GDB_TO_DATE: %v +ATTACHMENTID: %v +`, + id, + rowValues[0], + rowValues[2], + rowValues[3], + rowValues[4], + len(dataBytes), + rowValues[7], + rowValues[8], + rowValues[9], + ) +}