@@ -142,7 +142,11 @@ func generateDDLFromReader(
142
142
// collect raw statements
143
143
tableStatements := make (map [string ]string )
144
144
order := make ([]string , 0 )
145
- seen := map [string ]bool {}
145
+ // seen is maintained to ensure that a table is processed only once.
146
+ // There is a possibility that the table was re-created multiple times.
147
+ seen := make (map [string ]struct {})
148
+ // schemaReCache is maintained to avoid recompiling regexes for each schema name
149
+ // encountered in the TSV file.
146
150
schemaReCache := map [string ]* regexp.Regexp {}
147
151
148
152
for {
@@ -159,16 +163,30 @@ func generateDDLFromReader(
159
163
if len (rec ) == 0 {
160
164
break
161
165
}
166
+ // 1) Quick filter
167
+ if rec [colIndex [databaseName ]] != dbName ||
168
+ rec [colIndex [descriptorType ]] != "table" ||
169
+ rec [colIndex [constSchemaName ]] != "public" {
170
+ continue
171
+ }
162
172
163
- processDDLRecord (
164
- rec ,
165
- colIndex ,
166
- dbName ,
167
- schemaReCache ,
168
- seen ,
169
- & order ,
170
- tableStatements ,
171
- )
173
+ schemaName := rec [colIndex [constSchemaName ]]
174
+ _ , ok := schemaReCache [schemaName ]
175
+ if ! ok {
176
+ schemaReCache [schemaName ] = regexp .MustCompile (`\b` + regexp .QuoteMeta (schemaName ) + `\.` )
177
+ }
178
+
179
+ // 2) Build identifiers
180
+ fullTableName , statement := processDDLRecord (dbName , schemaName ,
181
+ rec [colIndex [descriptorName ]], // table name
182
+ rec [colIndex [createStatement ]], // statement
183
+ schemaReCache [schemaName ])
184
+ if _ , ok := seen [fullTableName ]; ! ok && fullTableName != "" {
185
+ // 5) Record ordering & statement
186
+ tableStatements [fullTableName ] = statement
187
+ order = append (order , fullTableName )
188
+ seen [fullTableName ] = struct {}{}
189
+ }
172
190
}
173
191
return buildSchemas (order , tableStatements ), buildCreateStmts (tableStatements ), nil
174
192
}
@@ -486,47 +504,19 @@ func openCreateStatementsTSV(zipDir string) (*os.File, error) {
486
504
487
505
// processDDLRecord inspects one TSV row and, if it represents a public table
488
506
// in dbName, normalizes its CREATE TABLE stmt and appends it to order/statements.
507
+ // It returns the fully qualified table name and table statements.
489
508
func processDDLRecord (
490
- rec []string ,
491
- colIndex map [string ]int ,
492
- dbName string ,
493
- schemaReCache map [string ]* regexp.Regexp ,
494
- seen map [string ]bool ,
495
- order * []string ,
496
- tableStatements map [string ]string ,
497
- ) {
498
- // 1) Quick filter
499
- if rec [colIndex [databaseName ]] != dbName ||
500
- rec [colIndex [descriptorType ]] != "table" ||
501
- rec [colIndex [constSchemaName ]] != "public" {
502
- return
503
- }
504
-
505
- // 2) Build identifiers
506
- schemaName := rec [colIndex [constSchemaName ]]
507
- stmt := rec [colIndex [createStatement ]]
508
- tableName := rec [colIndex [descriptorName ]]
509
- fullTable := fmt .Sprintf ("%s.%s.%s" , dbName , schemaName , tableName )
510
-
509
+ dbName , schemaName , tableName , stmt string , pattern * regexp.Regexp ,
510
+ ) (string , string ) {
511
511
// 3) Normalize schema-qualified references
512
- pattern , ok := schemaReCache [schemaName ]
513
- if ! ok {
514
- pattern = regexp .MustCompile (`\b` + regexp .QuoteMeta (schemaName ) + `\.` )
515
- schemaReCache [schemaName ] = pattern
516
- }
517
512
stmt = pattern .ReplaceAllString (stmt , dbName + "." + schemaName + "." )
518
513
519
514
// 4) Ensure IF NOT EXISTS
520
515
if ! ifNotExistsRe .MatchString (stmt ) {
521
516
stmt = createTableRe .ReplaceAllString (stmt , "${1}IF NOT EXISTS " )
522
517
}
523
518
524
- // 5) Record ordering & statement
525
- if ! seen [fullTable ] {
526
- * order = append (* order , fullTable )
527
- seen [fullTable ] = true
528
- }
529
- tableStatements [fullTable ] = stmt
519
+ return fmt .Sprintf ("%s.%s.%s" , dbName , schemaName , tableName ), stmt
530
520
}
531
521
532
522
// buildWorkloadSchema constructs the complete workload schema used for data generation.
0 commit comments