refactor: update totalRows constant and add siteHolderAttach data generation logic; enhance row generation and loading process
This commit is contained in:
227
scripts/mssql-copy-in/site-holder-attach.go
Normal file
227
scripts/mssql-copy-in/site-holder-attach.go
Normal file
@@ -0,0 +1,227 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var siteHolderAttachJob = MigrationJob{
|
||||
Schema: "Infraestructura",
|
||||
Table: "SITE_HOLDER__ATTACH",
|
||||
}
|
||||
|
||||
func seedSiteHolderAttach(ctx context.Context, db *sql.DB) error {
|
||||
maxOid, err := getMaxGDBArchiveOidForAttach(ctx, db)
|
||||
if err != nil {
|
||||
log.Fatal("Error getting max GDB_ARCHIVE_OID: ", err)
|
||||
}
|
||||
|
||||
log.Infof("Starting SITE_HOLDER__ATTACH data generation from GDB_ARCHIVE_OID: %d", maxOid+1)
|
||||
|
||||
rowsChan := make(chan []UnknownRowValues, queueSize)
|
||||
|
||||
var wgRowGenerator sync.WaitGroup
|
||||
|
||||
wgRowGenerator.Go(func() {
|
||||
generateSiteHolderAttachRows(ctx, maxOid, totalRows, chunkSize, rowsChan)
|
||||
})
|
||||
|
||||
columns := []string{
|
||||
"GDB_ARCHIVE_OID",
|
||||
"REL_GLOBALID",
|
||||
"CONTENT_TYPE",
|
||||
"ATT_NAME",
|
||||
"DATA_SIZE",
|
||||
"DATA",
|
||||
"GLOBALID",
|
||||
"GDB_FROM_DATE",
|
||||
"GDB_TO_DATE",
|
||||
"ATTACHMENTID",
|
||||
}
|
||||
|
||||
if err := loadRowsMssql(ctx, siteHolderAttachJob, columns, db, rowsChan); err != nil {
|
||||
return fmt.Errorf("Error loading rows (SITE_HOLDER__ATTACH): %w", err)
|
||||
}
|
||||
|
||||
log.Info("Data generation and loading completed successfully (SITE_HOLDER__ATTACH)")
|
||||
wgRowGenerator.Wait()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMaxGDBArchiveOidForAttach(ctx context.Context, db *sql.DB) (int, error) {
|
||||
var maxOid sql.NullInt64
|
||||
|
||||
query := fmt.Sprintf(`
|
||||
SELECT ISNULL(MAX(GDB_ARCHIVE_OID), 0)
|
||||
FROM [%s].[%s]
|
||||
`, siteHolderAttachJob.Schema, siteHolderAttachJob.Table)
|
||||
|
||||
err := db.QueryRowContext(ctx, query).Scan(&maxOid)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if !maxOid.Valid {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return int(maxOid.Int64), nil
|
||||
}
|
||||
|
||||
func generateSiteHolderAttachRows(
|
||||
ctx context.Context,
|
||||
startOid int,
|
||||
totalRows int,
|
||||
chunkSize int,
|
||||
out chan<- []UnknownRowValues,
|
||||
) {
|
||||
defer close(out)
|
||||
|
||||
rowsGenerated := 0
|
||||
currentChunk := make([]UnknownRowValues, 0, chunkSize)
|
||||
|
||||
for i := range totalRows {
|
||||
gdbArchiveOid := startOid + i + 1
|
||||
row := generateSiteHolderAttachRow(gdbArchiveOid)
|
||||
currentChunk = append(currentChunk, row)
|
||||
rowsGenerated++
|
||||
|
||||
if len(currentChunk) == chunkSize {
|
||||
select {
|
||||
case out <- currentChunk:
|
||||
log.Debugf("Sent SITE_HOLDER__ATTACH chunk with %d rows", len(currentChunk))
|
||||
case <-ctx.Done():
|
||||
log.Info("Context cancelled, stopping SITE_HOLDER__ATTACH row generation")
|
||||
return
|
||||
}
|
||||
currentChunk = make([]UnknownRowValues, 0, chunkSize)
|
||||
}
|
||||
|
||||
if rowsGenerated%100_000 == 0 {
|
||||
logSiteHolderAttachSampleRow(rowsGenerated, row)
|
||||
}
|
||||
}
|
||||
|
||||
if len(currentChunk) > 0 {
|
||||
select {
|
||||
case out <- currentChunk:
|
||||
log.Debugf("Sent final SITE_HOLDER__ATTACH chunk with %d rows", len(currentChunk))
|
||||
case <-ctx.Done():
|
||||
log.Info("Context cancelled, stopping SITE_HOLDER__ATTACH row generation")
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Finished generating %d SITE_HOLDER__ATTACH rows", rowsGenerated)
|
||||
}
|
||||
|
||||
func generateSiteHolderAttachRow(gdbArchiveOid int) UnknownRowValues {
|
||||
dateLowerLimit, _ := time.Parse(time.RFC3339, "2020-12-31T23:59:59Z")
|
||||
dateUpperLimit, _ := time.Parse(time.RFC3339, "2025-12-31T23:59:59Z")
|
||||
|
||||
relGlobalID, _ := uuid.New().MarshalBinary()
|
||||
contentType := generateRandomContentType()
|
||||
attName := generateRandomAttachmentName()
|
||||
binaryData := generateRandomBinaryContent()
|
||||
dataSize := len(binaryData)
|
||||
globalID, _ := uuid.New().MarshalBinary()
|
||||
gdbFromDate := generateRandomTimestamp(dateLowerLimit, dateUpperLimit)
|
||||
gdbToDate, _ := time.Parse(time.RFC3339, "9999-12-31T23:59:59Z")
|
||||
attachmentID := rand.Intn(10000) + 1
|
||||
|
||||
return UnknownRowValues{
|
||||
gdbArchiveOid,
|
||||
relGlobalID,
|
||||
contentType,
|
||||
attName,
|
||||
dataSize,
|
||||
binaryData,
|
||||
globalID,
|
||||
gdbFromDate,
|
||||
gdbToDate,
|
||||
attachmentID,
|
||||
}
|
||||
}
|
||||
|
||||
func generateRandomContentType() string {
|
||||
contentTypes := []string{
|
||||
"text/plain",
|
||||
"application/pdf",
|
||||
"image/jpeg",
|
||||
"image/png",
|
||||
"application/msword",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"text/csv",
|
||||
"application/json",
|
||||
}
|
||||
return contentTypes[rand.Intn(len(contentTypes))]
|
||||
}
|
||||
|
||||
func generateRandomAttachmentName() string {
|
||||
extensions := []string{".txt", ".pdf", ".jpg", ".png", ".doc", ".docx", ".csv", ".json"}
|
||||
baseName := generateRandomString(20)
|
||||
extension := extensions[rand.Intn(len(extensions))]
|
||||
return baseName + extension
|
||||
}
|
||||
|
||||
func generateRandomBinaryContent() []byte {
|
||||
sizeOptions := []int{100, 500, 1000, 5000, 10000, 50000, 100000}
|
||||
size := sizeOptions[rand.Intn(len(sizeOptions))]
|
||||
|
||||
var buf bytes.Buffer
|
||||
|
||||
lineCount := rand.Intn(size/50) + 1
|
||||
for range lineCount {
|
||||
line := generateRandomString(rand.Intn(80) + 20)
|
||||
buf.WriteString(line)
|
||||
buf.WriteString("\n")
|
||||
}
|
||||
|
||||
for buf.Len() < size {
|
||||
randomText := generateRandomString(rand.Intn(100) + 50)
|
||||
buf.WriteString(randomText)
|
||||
buf.WriteString("\n")
|
||||
}
|
||||
|
||||
result := buf.Bytes()
|
||||
if len(result) > size {
|
||||
result = result[:size]
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func logSiteHolderAttachSampleRow(id int, rowValues UnknownRowValues) {
|
||||
dataBytes := rowValues[5].([]byte)
|
||||
log.Infof(`
|
||||
Sample SITE_HOLDER__ATTACH row #%d:
|
||||
GDB_ARCHIVE_OID: %v
|
||||
REL_GLOBALID: [binary UUID]
|
||||
CONTENT_TYPE: %v
|
||||
ATT_NAME: %v
|
||||
DATA_SIZE: %v
|
||||
DATA: [%d bytes of binary content]
|
||||
GLOBALID: [binary UUID]
|
||||
GDB_FROM_DATE: %v
|
||||
GDB_TO_DATE: %v
|
||||
ATTACHMENTID: %v
|
||||
`,
|
||||
id,
|
||||
rowValues[0],
|
||||
rowValues[2],
|
||||
rowValues[3],
|
||||
rowValues[4],
|
||||
len(dataBytes),
|
||||
rowValues[7],
|
||||
rowValues[8],
|
||||
rowValues[9],
|
||||
)
|
||||
}
|
||||
Reference in New Issue
Block a user