Skip to content

Commit 37406d3

Browse files
committed
workload_generator: convert parsed DDL schemas into generator-ready YAML format
Previously, the workload_generator only parsed CREATE TABLE DDLs to extract schema metadata and built anonymization mappings for future use. This was inadequate because there was no structured representation of generator parameters per column or table, nor any built-in support for marshaling that information into YAML for downstream workload generation. To address this, this patch extends the parsed TableSchema into TableBlock and ColumnMeta types containing all necessary generator arguments (fanout, nullability, defaults, FK seeds, etc.), centralizes the buildWorkloadSchema orchestration, and adds easy YAML marshalling for future generator pipelines. Fixes: CRDB-51752 Release note (cli change): The workload_generator now supports parsing DDL schemas into a structured YAML format, enabling more flexible and detailed workload generation configurations.
1 parent 85c9415 commit 37406d3

File tree

6 files changed

+792
-6
lines changed

6 files changed

+792
-6
lines changed

pkg/workload/workload_generator/BUILD.bazel

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,24 @@ go_library(
44
name = "workload_generator",
55
srcs = [
66
"constants.go",
7-
"ddl_generator.go",
87
"schema_designs.go",
8+
"schema_generator.go",
9+
"utils.go",
910
],
1011
importpath = "github.com/cockroachdb/cockroach/pkg/workload/workload_generator",
1112
visibility = ["//visibility:public"],
12-
deps = ["@com_github_cockroachdb_errors//:errors"],
13+
deps = [
14+
"//pkg/util/timeutil",
15+
"@com_github_cockroachdb_errors//:errors",
16+
],
1317
)
1418

1519
go_test(
1620
name = "workload_generator_test",
17-
srcs = ["ddl_generator_test.go"],
21+
srcs = [
22+
"schema_generator_test.go",
23+
"utils_test.go",
24+
],
1825
embed = [":workload_generator"],
1926
embedsrcs = ["test_data/debug/crdb_internal.create_statements.txt"],
2027
deps = ["@com_github_stretchr_testify//assert"],

pkg/workload/workload_generator/schema_designs.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ func (ts *TableSchema) SetPrimaryKeys(pks []string) {
147147
// ColumnMeta is the per-column metadata (type, args, FK info, etc.) that
148148
// drives our per batch generators.
149149
type ColumnMeta struct {
150-
Type string `yaml:"type"`
150+
Type GeneratorType `yaml:"type"`
151151
Args map[string]interface{} `yaml:"args"`
152152
IsPrimaryKey bool `yaml:"isPrimaryKey"`
153153
IsUnique bool `yaml:"isUnique"`

pkg/workload/workload_generator/ddl_generator.go renamed to pkg/workload/workload_generator/schema_generator.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@ import (
1010
"encoding/csv"
1111
"fmt"
1212
"io"
13+
"math/rand"
1314
"os"
1415
"path/filepath"
1516
"regexp"
1617
"strings"
1718

19+
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
1820
"github.com/cockroachdb/errors"
1921
)
2022

@@ -524,3 +526,52 @@ func processDDLRecord(
524526
}
525527
tableStatements[fullTable] = stmt
526528
}
529+
530+
// buildWorkloadSchema constructs the complete workload schema used for data generation.
531+
// It takes each parsed TableSchema and produces one or more TableBlock entries per table,
532+
// wiring up foreign‐key relationships and scaling row counts appropriately.
533+
//
534+
// The steps are as follows:
535+
// 1. buildInitialBlocks:
536+
// • Creates a TableBlock for each table with a baseline row count (baseRowCount).
537+
// • Converts each Column into ColumnMeta (type, null probability, default, etc.).
538+
// • Collects “seeds” for foreign‐key columns to enable parent→child linkage.
539+
// 2. wireForeignKeys:
540+
// • Scans each TableSchema’s ForeignKeys and populates the corresponding ColumnMeta. FK,
541+
// FKMode, Fanout, CompositeID, and ParentSeed entries.
542+
// 3. adjustFanoutForPureFKPKs:
543+
// • If a table’s every primary‐key column is also a foreign key, drops its fan-out to 1,
544+
// ensuring exactly one child per parent in that “pure FK–PK” scenario.
545+
// 4. computeRowCounts:
546+
// • For each table, computes the total row count by multiplying baseRowCount
547+
// by the smallest product of FK fan-outs, ensuring referential integrity.
548+
//
549+
// Parameters:
550+
// - allSchemas: map of simple table name → *TableSchema, parsed from the DDL.
551+
// - dbName: the database name (used for qualifying schema references).
552+
// - baseRowCount: the initial number of rows per table before applying FK scaling.
553+
//
554+
// Returns:
555+
// - Schema: the finalized workload schema, mapping each table name to a slice of one
556+
// or more TableBlock, each of which drives per-batch generators with the
557+
// correct column metadata and row counts.
558+
func buildWorkloadSchema(
559+
allSchemas map[string]*TableSchema, dbName string, baseRowCount int,
560+
) Schema {
561+
// Initialize RNG for seeding and composite IDs
562+
rng := rand.New(rand.NewSource(timeutil.Now().UnixNano()))
563+
564+
// 1) Build initial blocks and capture FK seeds
565+
blocks, fkSeed := buildInitialBlocks(allSchemas, dbName, rng, baseRowCount)
566+
567+
// 2) Wire up foreign-key relationships in the blocks
568+
wireForeignKeys(blocks, allSchemas, fkSeed, rng)
569+
570+
// 3) If a table's PK cols are all FKs, drop its fanout to 1
571+
adjustFanoutForPureFKPKs(blocks)
572+
573+
// 4) Recompute each block's row count based on FK fanouts
574+
computeRowCounts(blocks, baseRowCount)
575+
576+
return blocks
577+
}

pkg/workload/workload_generator/ddl_generator_test.go renamed to pkg/workload/workload_generator/schema_generator_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
// Use of this software is governed by the CockroachDB Software License
44
// included in the /LICENSE file.
55

6-
// ddl_generator_test.go
7-
// Unit tests for ddl_generator.go
6+
// schema_generator_test.go
7+
// Unit tests for schema_generator.go
88
package workload_generator
99

1010
import (

0 commit comments

Comments
 (0)