Skip to content

Commit 5bd693d

Browse files
committed
workload_generator: introduce Generator interface and concrete generator types
Previously, workload_generator had only teh features for parsing teh input ddl to create all required schema and make proper structs with enough information for data generation. The implementation of actual generators to sue that information to generate data was not there. This patch defines a Generator interface and provides a suite of concrete generator implementations—SequenceGen, IntegerGen, FloatGen, StringGen, TimestampGen, UUIDGen, BoolGen, and JsonGen—each producing strings (or "" for SQL NULL). It also adds wrapper types (DefaultWrapper, UniqueWrapper, FkWrapper) to layer default literals, uniqueness constraints, and foreign-key fanout behavior. Finally, makeGenerator is refactored into a clean factory that picks the right base generator from GenType metadata, seeds per-batch RNG streams, and composes wrappers based on ColumnMeta. Fixes: CRDB-51752 Release note (cli change): The workload_generator now exposes a pluggable Generator interface with built-in generators and wrappers for various SQL types—enabling modular, extensible, and reusable workload data generation.
1 parent 5128b47 commit 5bd693d

File tree

7 files changed

+715
-22
lines changed

7 files changed

+715
-22
lines changed

pkg/workload/workload_generator/BUILD.bazel

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
33
go_library(
44
name = "workload_generator",
55
srcs = [
6+
"column_generator.go",
67
"constants.go",
78
"schema_designs.go",
89
"schema_generator.go",
10+
"types.go",
911
"utils.go",
1012
],
1113
importpath = "github.com/cockroachdb/cockroach/pkg/workload/workload_generator",
@@ -19,6 +21,7 @@ go_library(
1921
go_test(
2022
name = "workload_generator_test",
2123
srcs = [
24+
"column_generator_test.go",
2225
"schema_generator_test.go",
2326
"utils_test.go",
2427
],
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package workload_generator
7+
8+
import (
9+
"fmt"
10+
"math/rand"
11+
"strconv"
12+
"strings"
13+
"time"
14+
15+
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
16+
)
17+
18+
// buildGenerator builds a Generator for one column given
19+
// - col: metadata for this column
20+
// - batchIdx: which batch we’re in (0,1,2…)
21+
// - baseBatchSize: how many rows per batch (for sequences)
22+
// - schema: full YAML schema, so we can recurse on FKs
23+
func buildGenerator(col ColumnMeta, batchIdx, batchSize int, schema Schema) Generator {
24+
// seed the per-batch RNG
25+
origSeed := getIntArg(col.Args, "seed", 0)
26+
seed64 := buildBatchSeed(origSeed, batchIdx)
27+
rng := rand.New(rand.NewSource(seed64))
28+
29+
// pick the base generator by type
30+
var base Generator
31+
switch col.Type {
32+
case GenTypeSequence:
33+
// jump start by batchIdx*batchSize
34+
base = buildSequenceGenerator(col, batchIdx, batchSize)
35+
case GenTypeInteger:
36+
base = buildIntegerGenerator(col, rng)
37+
case GenTypeFloat:
38+
base = buildFloatGenerator(col, rng)
39+
case GenTypeString:
40+
base = buildStringGenerator(col, rng)
41+
case GenTypeTimestamp:
42+
base = buildTimestampGenerator(col, rng)
43+
case GenTypeUUID:
44+
base = buildUuidGenerator(col, rng)
45+
case GenTypeBool:
46+
base = buildBooleanGenerator(col, rng)
47+
case GenTypeJson: //missed json type ig, will check
48+
base = buildJsonGenerator(col, rng)
49+
50+
default:
51+
panic("type not supported: " + col.Type)
52+
}
53+
54+
// layer on DefaultWrapper if required
55+
if col.Default != "" && col.DefaultProb > 0 {
56+
// use a distinct sub-seed so the literal probability RNG
57+
// doesn’t collide with the main RNG
58+
wrapperSeed := buildBatchSeed(origSeed, batchIdx^0xdeadbeef)
59+
base = NewDefaultWrapper(base, col.DefaultProb, col.Default, wrapperSeed)
60+
}
61+
62+
// layer on UniqueWrapper if required
63+
if col.IsUnique && !col.HasForeignKey && col.Type != "sequence" {
64+
base = NewUniqueWrapper(base, defaultUniqueCap) // or configurable capacity
65+
}
66+
67+
// layer on FKWrapper if this column has a foreign key
68+
if col.HasForeignKey {
69+
// col.FK might be "tpcc__public__district.d_id"
70+
parts := strings.SplitN(col.FK, ".", 2)
71+
if len(parts) != 2 {
72+
panic(fmt.Sprintf("invalid FK spec %q", col.FK))
73+
}
74+
fqTable, childCol := parts[0], parts[1] // "tpcc__public__district", "d_id"
75+
pathSegments := strings.Split(fqTable, "__") // ["tpcc","public","district"]
76+
parentTable := pathSegments[len(pathSegments)-1] // "district"
77+
78+
// now look up in your schema map:
79+
parentMeta := schema[parentTable][0].Columns[childCol]
80+
parentGen := buildGenerator(parentMeta, batchIdx, batchSize, schema)
81+
base = NewFkWrapper(parentGen, col.Fanout)
82+
}
83+
84+
return base
85+
}
86+
87+
// getIntArg handles int|float64|string in YAML args
88+
func getIntArg(m map[string]interface{}, key string, defaultVal int) int {
89+
switch v := m[key].(type) {
90+
case int:
91+
return v
92+
case int64:
93+
return int(v)
94+
case float64:
95+
return int(v)
96+
case string:
97+
i, _ := strconv.Atoi(v)
98+
return i
99+
default:
100+
return defaultVal
101+
}
102+
}
103+
104+
// getFloatArg handles float64|int|string
105+
func getFloatArg(m map[string]interface{}, key string, defaultVal float64) float64 {
106+
switch v := m[key].(type) {
107+
case float64:
108+
return v
109+
case int:
110+
return float64(v)
111+
case int64:
112+
return float64(v)
113+
case string:
114+
f, _ := strconv.ParseFloat(v, 64)
115+
return f
116+
default:
117+
return defaultVal
118+
}
119+
}
120+
121+
// getStringArg handles string values
122+
func getStringArg(m map[string]interface{}, key string, defaultVal string) string {
123+
if v, ok := m[key]; ok {
124+
if s, ok2 := v.(string); ok2 {
125+
return s
126+
}
127+
}
128+
return defaultVal
129+
}
130+
131+
// ─── Factory ──────────────────────────────────────────────────────────
132+
133+
// splitmix64 scrambles a 32-bit key into a high-quality 64-bit value
134+
func splitmix64(x uint64) uint64 {
135+
x += 0x9e3779b97f4a7c15
136+
x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9
137+
x = (x ^ (x >> 27)) * 0x94d049bb133111eb
138+
return x ^ (x >> 31)
139+
}
140+
141+
// buildBatchSeed deterministically scrambles the original YAML seed
142+
// with the batch index so each batch uses a distinct RNG stream.
143+
func buildBatchSeed(origSeed, batchIdx int) int64 {
144+
key := (uint64(origSeed) << 32) | uint64(batchIdx)
145+
return int64(splitmix64(key))
146+
}
147+
148+
// buildSequenceGenerator creates a SequenceGen that generates sequential integers
149+
func buildSequenceGenerator(col ColumnMeta, batchIdx int, batchSize int) Generator {
150+
baseStart := getIntArg(col.Args, "start", 0)
151+
return &SequenceGen{cur: baseStart + batchIdx*batchSize}
152+
}
153+
154+
// buildIntegerGenerator creates an IntegerGen that generates random integers
155+
func buildIntegerGenerator(col ColumnMeta, rng *rand.Rand) Generator {
156+
minArg := getIntArg(col.Args, "min", 0)
157+
maxArg := getIntArg(col.Args, "max", 0)
158+
nullPct := getFloatArg(col.Args, "null_pct", 0.0)
159+
return &IntegerGen{r: rng, min: minArg, max: maxArg, nullPct: nullPct}
160+
}
161+
162+
// buildFloatGenerator creates a FloatGen that generates random floats
163+
func buildFloatGenerator(col ColumnMeta, rng *rand.Rand) Generator {
164+
minArg := getFloatArg(col.Args, "min", 0.0)
165+
maxArg := getFloatArg(col.Args, "max", 0.0)
166+
round := getIntArg(col.Args, "round", 2)
167+
nullPct := getFloatArg(col.Args, "null_pct", 0.0)
168+
return &FloatGen{r: rng, min: minArg, max: maxArg, round: round, nullPct: nullPct}
169+
}
170+
171+
// buildStringGenerator creates a StringGen that generates random strings
172+
func buildStringGenerator(col ColumnMeta, rng *rand.Rand) Generator {
173+
minArg := getIntArg(col.Args, "min", 0)
174+
maxArg := getIntArg(col.Args, "max", 0)
175+
nullPct := getFloatArg(col.Args, "null_pct", 0.0)
176+
return &StringGen{r: rng, min: minArg, max: maxArg, nullPct: nullPct}
177+
}
178+
179+
// buildTimestampGenerator creates a TimestampGen that generates random timestamps
180+
func buildTimestampGenerator(col ColumnMeta, rng *rand.Rand) Generator {
181+
// parse Python-style format → Go layout
182+
startStr := getStringArg(col.Args, "start", "2000-01-01")
183+
endStr := getStringArg(col.Args, "end", timeutil.Now().Format("2006-01-02"))
184+
pyFmt := getStringArg(col.Args, "format", "%Y-%m-%d %H:%M:%S.%f")
185+
var layout string
186+
switch pyFmt {
187+
case "%Y-%m-%d %H:%M:%S.%f":
188+
layout = "2006-01-02 15:04:05.000000"
189+
case "%Y-%m-%d":
190+
layout = "2006-01-02"
191+
default:
192+
layout = time.RFC3339Nano
193+
}
194+
st, err1 := time.Parse(layout, startStr)
195+
et, err2 := time.Parse(layout, endStr)
196+
if err1 != nil || err2 != nil {
197+
st, _ = time.Parse(time.RFC3339, startStr)
198+
et, _ = time.Parse(time.RFC3339, endStr)
199+
layout = time.RFC3339
200+
}
201+
span := et.UnixNano() - st.UnixNano()
202+
if span <= 0 {
203+
span = 1
204+
}
205+
nullPct := getFloatArg(col.Args, "null_pct", 0.0)
206+
return &TimestampGen{r: rng, startNS: st.UnixNano(), spanNS: span, layout: layout, nullPct: nullPct}
207+
}
208+
209+
// buildUuidGenerator creates a UUIDGen that generates random UUIDs
210+
func buildUuidGenerator(_ ColumnMeta, rng *rand.Rand) Generator {
211+
return &UUIDGen{r: rng}
212+
}
213+
214+
// buildBooleanGenerator creates a BoolGen that generates random booleans
215+
func buildBooleanGenerator(col ColumnMeta, rng *rand.Rand) Generator {
216+
nullPct := getFloatArg(col.Args, "null_pct", 0.0)
217+
return &BoolGen{r: rng, nullPct: nullPct}
218+
}
219+
220+
// buildJsonGenerator creates a JsonGen that generates random JSON strings
221+
func buildJsonGenerator(col ColumnMeta, rng *rand.Rand) Generator {
222+
// JSON is just a StringGen plus a wrapper
223+
minArg := getIntArg(col.Args, "min", defaultJSONMinLen)
224+
maxArg := getIntArg(col.Args, "max", defaultJSONMaxLen)
225+
nullPct := getFloatArg(col.Args, "null_pct", 0.0)
226+
sg := &StringGen{r: rng, min: minArg, max: maxArg, nullPct: nullPct}
227+
return &JsonGen{strGen: sg}
228+
}

0 commit comments

Comments
 (0)