Skip to content

Commit 9bee340

Browse files
craig[bot]nameisbhaskar
andcommitted
Merge #152427
152427: workload_generator: minor refactoring r=shailendra-patel a=nameisbhaskar A minor refactoring to avoid passing variables to the function and editing the same there. Also, the processDDLRecord function can be reused later to support DDL provided as a flag, rather than relying on debug to always have the "create" statements. Epic: None Release: None Co-authored-by: Bhaskarjyoti Bora <[email protected]>
2 parents 07233e1 + 566ff47 commit 9bee340

File tree

1 file changed

+32
-42
lines changed

1 file changed

+32
-42
lines changed

pkg/workload/workload_generator/schema_generator.go

Lines changed: 32 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,11 @@ func generateDDLFromReader(
142142
// collect raw statements
143143
tableStatements := make(map[string]string)
144144
order := make([]string, 0)
145-
seen := map[string]bool{}
145+
// seen is maintained to ensure that a table is processed only once.
146+
// There is a possibility that the table was re-created multiple times.
147+
seen := make(map[string]struct{})
148+
// schemaReCache is maintained to avoid recompiling regexes for each schema name
149+
// encountered in the TSV file.
146150
schemaReCache := map[string]*regexp.Regexp{}
147151

148152
for {
@@ -159,16 +163,30 @@ func generateDDLFromReader(
159163
if len(rec) == 0 {
160164
break
161165
}
166+
// 1) Quick filter
167+
if rec[colIndex[databaseName]] != dbName ||
168+
rec[colIndex[descriptorType]] != "table" ||
169+
rec[colIndex[constSchemaName]] != "public" {
170+
continue
171+
}
162172

163-
processDDLRecord(
164-
rec,
165-
colIndex,
166-
dbName,
167-
schemaReCache,
168-
seen,
169-
&order,
170-
tableStatements,
171-
)
173+
schemaName := rec[colIndex[constSchemaName]]
174+
_, ok := schemaReCache[schemaName]
175+
if !ok {
176+
schemaReCache[schemaName] = regexp.MustCompile(`\b` + regexp.QuoteMeta(schemaName) + `\.`)
177+
}
178+
179+
// 2) Build identifiers
180+
fullTableName, statement := processDDLRecord(dbName, schemaName,
181+
rec[colIndex[descriptorName]], // table name
182+
rec[colIndex[createStatement]], // statement
183+
schemaReCache[schemaName])
184+
if _, ok := seen[fullTableName]; !ok && fullTableName != "" {
185+
// 5) Record ordering & statement
186+
tableStatements[fullTableName] = statement
187+
order = append(order, fullTableName)
188+
seen[fullTableName] = struct{}{}
189+
}
172190
}
173191
return buildSchemas(order, tableStatements), buildCreateStmts(tableStatements), nil
174192
}
@@ -486,47 +504,19 @@ func openCreateStatementsTSV(zipDir string) (*os.File, error) {
486504

487505
// processDDLRecord inspects one TSV row and, if it represents a public table
488506
// in dbName, normalizes its CREATE TABLE stmt and appends it to order/statements.
507+
// It returns the fully qualified table name and table statements.
489508
func processDDLRecord(
490-
rec []string,
491-
colIndex map[string]int,
492-
dbName string,
493-
schemaReCache map[string]*regexp.Regexp,
494-
seen map[string]bool,
495-
order *[]string,
496-
tableStatements map[string]string,
497-
) {
498-
// 1) Quick filter
499-
if rec[colIndex[databaseName]] != dbName ||
500-
rec[colIndex[descriptorType]] != "table" ||
501-
rec[colIndex[constSchemaName]] != "public" {
502-
return
503-
}
504-
505-
// 2) Build identifiers
506-
schemaName := rec[colIndex[constSchemaName]]
507-
stmt := rec[colIndex[createStatement]]
508-
tableName := rec[colIndex[descriptorName]]
509-
fullTable := fmt.Sprintf("%s.%s.%s", dbName, schemaName, tableName)
510-
509+
dbName, schemaName, tableName, stmt string, pattern *regexp.Regexp,
510+
) (string, string) {
511511
// 3) Normalize schema-qualified references
512-
pattern, ok := schemaReCache[schemaName]
513-
if !ok {
514-
pattern = regexp.MustCompile(`\b` + regexp.QuoteMeta(schemaName) + `\.`)
515-
schemaReCache[schemaName] = pattern
516-
}
517512
stmt = pattern.ReplaceAllString(stmt, dbName+"."+schemaName+".")
518513

519514
// 4) Ensure IF NOT EXISTS
520515
if !ifNotExistsRe.MatchString(stmt) {
521516
stmt = createTableRe.ReplaceAllString(stmt, "${1}IF NOT EXISTS ")
522517
}
523518

524-
// 5) Record ordering & statement
525-
if !seen[fullTable] {
526-
*order = append(*order, fullTable)
527-
seen[fullTable] = true
528-
}
529-
tableStatements[fullTable] = stmt
519+
return fmt.Sprintf("%s.%s.%s", dbName, schemaName, tableName), stmt
530520
}
531521

532522
// buildWorkloadSchema constructs the complete workload schema used for data generation.

0 commit comments

Comments
 (0)