Skip to content

Commit b5252fb

Browse files
craig[bot]Pradyum-Git
andcommitted
Merge #149513
149513: Ddl and yaml r=nameisbhaskar a=Pradyum-Git Changed names of ddl_generator.go and its test file to schema_generator.go and schema_generator_test.go to better represent what they do Also added in the code which takes the output from GenerateDDLs and converts them into a schema format which has all the necessary information to be used later by data generators the code declares all the required structs, adds buildWorkloadSchema function to teh schema_generator.go file which is the entry point for the new features, addsa. util.go file which have all the remaining fucntiosn taht fill out teh structs wiuth enough and proper data or constraint aware data generation for initial bulk load of data into tables Release note: None Epic: None Co-authored-by: Pradyum <[email protected]>
2 parents 2067865 + 37406d3 commit b5252fb

File tree

6 files changed

+792
-6
lines changed

6 files changed

+792
-6
lines changed

pkg/workload/workload_generator/BUILD.bazel

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,24 @@ go_library(
44
name = "workload_generator",
55
srcs = [
66
"constants.go",
7-
"ddl_generator.go",
87
"schema_designs.go",
8+
"schema_generator.go",
9+
"utils.go",
910
],
1011
importpath = "github.com/cockroachdb/cockroach/pkg/workload/workload_generator",
1112
visibility = ["//visibility:public"],
12-
deps = ["@com_github_cockroachdb_errors//:errors"],
13+
deps = [
14+
"//pkg/util/timeutil",
15+
"@com_github_cockroachdb_errors//:errors",
16+
],
1317
)
1418

1519
go_test(
1620
name = "workload_generator_test",
17-
srcs = ["ddl_generator_test.go"],
21+
srcs = [
22+
"schema_generator_test.go",
23+
"utils_test.go",
24+
],
1825
embed = [":workload_generator"],
1926
embedsrcs = ["test_data/debug/crdb_internal.create_statements.txt"],
2027
deps = ["@com_github_stretchr_testify//assert"],

pkg/workload/workload_generator/schema_designs.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ func (ts *TableSchema) SetPrimaryKeys(pks []string) {
147147
// ColumnMeta is the per-column metadata (type, args, FK info, etc.) that
148148
// drives our per batch generators.
149149
type ColumnMeta struct {
150-
Type string `yaml:"type"`
150+
Type GeneratorType `yaml:"type"`
151151
Args map[string]interface{} `yaml:"args"`
152152
IsPrimaryKey bool `yaml:"isPrimaryKey"`
153153
IsUnique bool `yaml:"isUnique"`

pkg/workload/workload_generator/ddl_generator.go renamed to pkg/workload/workload_generator/schema_generator.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@ import (
1010
"encoding/csv"
1111
"fmt"
1212
"io"
13+
"math/rand"
1314
"os"
1415
"path/filepath"
1516
"regexp"
1617
"strings"
1718

19+
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
1820
"github.com/cockroachdb/errors"
1921
)
2022

@@ -524,3 +526,52 @@ func processDDLRecord(
524526
}
525527
tableStatements[fullTable] = stmt
526528
}
529+
530+
// buildWorkloadSchema constructs the complete workload schema used for data generation.
531+
// It takes each parsed TableSchema and produces one or more TableBlock entries per table,
532+
// wiring up foreign‐key relationships and scaling row counts appropriately.
533+
//
534+
// The steps are as follows:
535+
// 1. buildInitialBlocks:
536+
// • Creates a TableBlock for each table with a baseline row count (baseRowCount).
537+
// • Converts each Column into ColumnMeta (type, null probability, default, etc.).
538+
// • Collects “seeds” for foreign‐key columns to enable parent→child linkage.
539+
// 2. wireForeignKeys:
540+
// • Scans each TableSchema’s ForeignKeys and populates the corresponding ColumnMeta. FK,
541+
// FKMode, Fanout, CompositeID, and ParentSeed entries.
542+
// 3. adjustFanoutForPureFKPKs:
543+
// • If a table’s every primary‐key column is also a foreign key, drops its fan-out to 1,
544+
// ensuring exactly one child per parent in that “pure FK–PK” scenario.
545+
// 4. computeRowCounts:
546+
// • For each table, computes the total row count by multiplying baseRowCount
547+
// by the smallest product of FK fan-outs, ensuring referential integrity.
548+
//
549+
// Parameters:
550+
// - allSchemas: map of simple table name → *TableSchema, parsed from the DDL.
551+
// - dbName: the database name (used for qualifying schema references).
552+
// - baseRowCount: the initial number of rows per table before applying FK scaling.
553+
//
554+
// Returns:
555+
// - Schema: the finalized workload schema, mapping each table name to a slice of one
556+
// or more TableBlock, each of which drives per-batch generators with the
557+
// correct column metadata and row counts.
558+
func buildWorkloadSchema(
559+
allSchemas map[string]*TableSchema, dbName string, baseRowCount int,
560+
) Schema {
561+
// Initialize RNG for seeding and composite IDs
562+
rng := rand.New(rand.NewSource(timeutil.Now().UnixNano()))
563+
564+
// 1) Build initial blocks and capture FK seeds
565+
blocks, fkSeed := buildInitialBlocks(allSchemas, dbName, rng, baseRowCount)
566+
567+
// 2) Wire up foreign-key relationships in the blocks
568+
wireForeignKeys(blocks, allSchemas, fkSeed, rng)
569+
570+
// 3) If a table's PK cols are all FKs, drop its fanout to 1
571+
adjustFanoutForPureFKPKs(blocks)
572+
573+
// 4) Recompute each block's row count based on FK fanouts
574+
computeRowCounts(blocks, baseRowCount)
575+
576+
return blocks
577+
}

pkg/workload/workload_generator/ddl_generator_test.go renamed to pkg/workload/workload_generator/schema_generator_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
// Use of this software is governed by the CockroachDB Software License
44
// included in the /LICENSE file.
55

6-
// ddl_generator_test.go
7-
// Unit tests for ddl_generator.go
6+
// schema_generator_test.go
7+
// Unit tests for schema_generator.go
88
package workload_generator
99

1010
import (

0 commit comments

Comments
 (0)