Skip to content

Commit 9aaa4fe

Browse files
Pradyum-Gitnameisbhaskar
authored andcommitted
workload_generator: Fixes and enhancements
This is a continuation of the effort for the workload generator which was worked on as an intern project. This PR addresses multiple issues and enhancements as mentioned: Columns were not identified properly in case there were Joins because only 1 table name was considered from a statement. e.g. SELECT o.id, r.c2, r.c3 FROM orders o JOIN ref_data r ON o.acc_no = r.acc_no JOIN ref_data r2 ON o.acc_no = r2.acc_no WHERE r.c2 = _ Columns with table_alias.column_name is not handled as table aliases were not maintained. e.g. SELECT o.id, r.c2 FROM orders o, ref_data r WHERE o.acc_no = r.acc_no AND o.amount > _ AND r.acc_no = _ OR o.acc_no = _; table names prefixed with database name in it were not parsed correctly. In this case, the database_name column in the crdb_internal.node_statement_statistics.txt file, can have a different database name and not the actual database name. e.g. select a,b,c from db_1.table_name. If a query is run from the current database scope, the queries are marked as the current database even if the query is run for another database. e.g. SQL> use db1; SQL> select * from db2.table_in_db2; The query for db is marked under the database name db1 in the crdb_internal.node_statement_statistics.txt file. This causes an issue as unsupported queries also get added for run. Currently, the workload cannot be re-run as the last value of a sequence column is not maintained from the previous run. So, we had to fresh import the data before each run. Now, the last_value is stored in the YAML so that it can be reused in the next run. There are some additional cleanup changes: Use references instead of pass by value The block for each table was a list, which was unnecessary. checkIfAllPkAreFk had a bug where the function used to return true if there were no primary key in the query. Ensure that we use import instead of insert during dataload. This is fixed by initializing the workload schema in tha Tables call, which ensures that the "support fixtures" check is set to true. Fixes: CRDB-51572 Epic: None
1 parent 8275999 commit 9aaa4fe

File tree

10 files changed

+606
-430
lines changed

10 files changed

+606
-430
lines changed

pkg/workload/workload_generator/column_generator.go

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import (
2020
// - batchIdx: which batch we’re in (0,1,2…)
2121
// - baseBatchSize: how many rows per batch (for sequences)
2222
// - schema: full YAML schema, so we can recurse on FKs
23-
func buildGenerator(col ColumnMeta, batchIdx, batchSize int, schema Schema) Generator {
23+
func buildGenerator(col *ColumnMeta, batchIdx, batchSize int, schema Schema) Generator {
2424
// seed the per-batch RNG
2525
origSeed := getIntArg(col.Args, "seed", 0)
2626
seed64 := buildBatchSeed(origSeed, batchIdx)
@@ -80,7 +80,7 @@ func buildGenerator(col ColumnMeta, batchIdx, batchSize int, schema Schema) Gene
8080
parentTable := pathSegments[len(pathSegments)-1] // "district"
8181

8282
// now look up in your schema map:
83-
parentMeta := schema[parentTable][0].Columns[childCol]
83+
parentMeta := schema[parentTable].Columns[childCol]
8484
parentGen := buildGenerator(parentMeta, batchIdx, batchSize, schema)
8585
base = NewFkWrapper(parentGen, col.Fanout)
8686
}
@@ -150,21 +150,28 @@ func buildBatchSeed(origSeed, batchIdx int) int64 {
150150
}
151151

152152
// buildSequenceGenerator creates a SequenceGen that generates sequential integers
153-
func buildSequenceGenerator(col ColumnMeta, batchIdx int, batchSize int) Generator {
154-
baseStart := getIntArg(col.Args, "start", 0)
155-
return &SequenceGen{cur: baseStart + batchIdx*batchSize}
153+
func buildSequenceGenerator(col *ColumnMeta, batchIdx int, batchSize int) Generator {
154+
current := 0
155+
// LastValue should always be present in the YAML. Just in case this value is not set, the default behavior
156+
// is to start from the "start" argument, which defaults to 0 + batchIdx*batchSize.
157+
if col.LastValue != "" {
158+
current, _ = strconv.Atoi(col.LastValue)
159+
} else {
160+
current = getIntArg(col.Args, "start", 0) + batchIdx*batchSize
161+
}
162+
return &SequenceGen{cur: current}
156163
}
157164

158165
// buildIntegerGenerator creates an IntegerGen that generates random integers
159-
func buildIntegerGenerator(col ColumnMeta, rng *rand.Rand) Generator {
166+
func buildIntegerGenerator(col *ColumnMeta, rng *rand.Rand) Generator {
160167
minArg := getIntArg(col.Args, "min", 0)
161168
maxArg := getIntArg(col.Args, "max", 0)
162169
nullPct := getFloatArg(col.Args, "null_pct", 0.0)
163170
return &IntegerGen{r: rng, min: minArg, max: maxArg, nullPct: nullPct}
164171
}
165172

166173
// buildFloatGenerator creates a FloatGen that generates random floats
167-
func buildFloatGenerator(col ColumnMeta, rng *rand.Rand) Generator {
174+
func buildFloatGenerator(col *ColumnMeta, rng *rand.Rand) Generator {
168175
minArg := getFloatArg(col.Args, "min", 0.0)
169176
maxArg := getFloatArg(col.Args, "max", 0.0)
170177
round := getIntArg(col.Args, "round", 2)
@@ -173,15 +180,15 @@ func buildFloatGenerator(col ColumnMeta, rng *rand.Rand) Generator {
173180
}
174181

175182
// buildStringGenerator creates a StringGen that generates random strings
176-
func buildStringGenerator(col ColumnMeta, rng *rand.Rand) Generator {
183+
func buildStringGenerator(col *ColumnMeta, rng *rand.Rand) Generator {
177184
minArg := getIntArg(col.Args, "min", 0)
178185
maxArg := getIntArg(col.Args, "max", 0)
179186
nullPct := getFloatArg(col.Args, "null_pct", 0.0)
180187
return &StringGen{r: rng, min: minArg, max: maxArg, nullPct: nullPct}
181188
}
182189

183190
// buildTimestampGenerator creates a TimestampGen that generates random timestamps
184-
func buildTimestampGenerator(col ColumnMeta, rng *rand.Rand) Generator {
191+
func buildTimestampGenerator(col *ColumnMeta, rng *rand.Rand) Generator {
185192
// parse Python-style format → Go layout
186193
startStr := getStringArg(col.Args, "start", "2000-01-01")
187194
endStr := getStringArg(col.Args, "end", timeutil.Now().Format("2006-01-02"))
@@ -211,18 +218,18 @@ func buildTimestampGenerator(col ColumnMeta, rng *rand.Rand) Generator {
211218
}
212219

213220
// buildUuidGenerator creates a UUIDGen that generates random UUIDs
214-
func buildUuidGenerator(_ ColumnMeta, rng *rand.Rand) Generator {
221+
func buildUuidGenerator(_ *ColumnMeta, rng *rand.Rand) Generator {
215222
return &UUIDGen{r: rng}
216223
}
217224

218225
// buildBooleanGenerator creates a BoolGen that generates random booleans
219-
func buildBooleanGenerator(col ColumnMeta, rng *rand.Rand) Generator {
226+
func buildBooleanGenerator(col *ColumnMeta, rng *rand.Rand) Generator {
220227
nullPct := getFloatArg(col.Args, "null_pct", 0.0)
221228
return &BoolGen{r: rng, nullPct: nullPct}
222229
}
223230

224231
// buildJsonGenerator creates a JsonGen that generates random JSON strings
225-
func buildJsonGenerator(col ColumnMeta, rng *rand.Rand) Generator {
232+
func buildJsonGenerator(col *ColumnMeta, rng *rand.Rand) Generator {
226233
// JSON is just a StringGen plus a wrapper
227234
minArg := getIntArg(col.Args, "min", defaultJSONMinLen)
228235
maxArg := getIntArg(col.Args, "max", defaultJSONMaxLen)
@@ -232,15 +239,15 @@ func buildJsonGenerator(col ColumnMeta, rng *rand.Rand) Generator {
232239
}
233240

234241
// buildBitGenerator produces random BIT(n) values as strings of '0'/'1'.
235-
func buildBitGenerator(col ColumnMeta, rng *rand.Rand) Generator {
242+
func buildBitGenerator(col *ColumnMeta, rng *rand.Rand) Generator {
236243
// size comes from mapBitType → args["size"]
237244
size := getIntArg(col.Args, "size", 1)
238245
nullPct := getFloatArg(col.Args, "null_pct", 0.0)
239246
return &BitGen{r: rng, size: size, nullPct: nullPct}
240247
}
241248

242249
// buildBytesGenerator produces random []byte for BYTEA/BYTES columns.
243-
func buildBytesGenerator(col ColumnMeta, rng *rand.Rand) Generator {
250+
func buildBytesGenerator(col *ColumnMeta, rng *rand.Rand) Generator {
244251
size := getIntArg(col.Args, "size", 1)
245252
nullPct := getFloatArg(col.Args, "null_pct", 0.0)
246253
return &BytesGen{r: rng, min: size, max: size, nullPct: nullPct}

pkg/workload/workload_generator/column_generator_test.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,15 @@ func TestSplitmix64AndBatchSeed(t *testing.T) {
6262
}
6363

6464
func TestSequenceGenerator(t *testing.T) {
65-
col := ColumnMeta{Args: map[string]interface{}{"start": 5}}
65+
col := &ColumnMeta{Args: map[string]interface{}{"start": 5}}
6666
gen := buildSequenceGenerator(col, 2, 10)
6767
assert.Equal(t, "25", gen.Next(), "first value")
6868
assert.Equal(t, "26", gen.Next(), "second value")
6969
}
7070

7171
func TestIntegerAndFloatGenerators(t *testing.T) {
7272
rng := rand.New(rand.NewSource(0))
73-
icol := ColumnMeta{Args: map[string]interface{}{"min": 1, "max": 3, "null_pct": 0.0}}
73+
icol := &ColumnMeta{Args: map[string]interface{}{"min": 1, "max": 3, "null_pct": 0.0}}
7474
igen := buildIntegerGenerator(icol, rng)
7575
for i := 0; i < 10; i++ {
7676
v := igen.Next()
@@ -79,7 +79,7 @@ func TestIntegerAndFloatGenerators(t *testing.T) {
7979
assert.GreaterOrEqual(t, n, 1)
8080
assert.LessOrEqual(t, n, 3)
8181
}
82-
fcol := ColumnMeta{Args: map[string]interface{}{"min": 1.0, "max": 2.0, "round": 1, "null_pct": 0.0}}
82+
fcol := &ColumnMeta{Args: map[string]interface{}{"min": 1.0, "max": 2.0, "round": 1, "null_pct": 0.0}}
8383
fgen := buildFloatGenerator(fcol, rng)
8484
for i := 0; i < 10; i++ {
8585
v := fgen.Next()
@@ -88,7 +88,7 @@ func TestIntegerAndFloatGenerators(t *testing.T) {
8888
}
8989

9090
func TestTimestampGenerator(t *testing.T) {
91-
col := ColumnMeta{Args: map[string]interface{}{"start": "2000-01-02", "end": "2000-01-03", "format": "%Y-%m-%d"}}
91+
col := &ColumnMeta{Args: map[string]interface{}{"start": "2000-01-02", "end": "2000-01-03", "format": "%Y-%m-%d"}}
9292
rng := rand.New(rand.NewSource(0))
9393
gen := buildTimestampGenerator(col, rng)
9494
v := gen.Next()
@@ -98,7 +98,7 @@ func TestTimestampGenerator(t *testing.T) {
9898

9999
func TestUuidGenerator(t *testing.T) {
100100
rng := rand.New(rand.NewSource(0))
101-
col := ColumnMeta{}
101+
col := &ColumnMeta{}
102102
gen := buildUuidGenerator(col, rng)
103103
v1 := gen.Next()
104104
v2 := gen.Next()
@@ -110,20 +110,20 @@ func TestUuidGenerator(t *testing.T) {
110110

111111
func TestStringBoolJsonGenerators(t *testing.T) {
112112
rng := rand.New(rand.NewSource(1))
113-
scol := ColumnMeta{Args: map[string]interface{}{"min": 2, "max": 4, "null_pct": 0.0}}
113+
scol := &ColumnMeta{Args: map[string]interface{}{"min": 2, "max": 4, "null_pct": 0.0}}
114114
sgen := buildStringGenerator(scol, rng)
115115
for i := 0; i < 5; i++ {
116116
v := sgen.Next()
117117
assert.GreaterOrEqual(t, len(v), 2)
118118
assert.LessOrEqual(t, len(v), 4)
119119
}
120-
bcol := ColumnMeta{Args: map[string]interface{}{"null_pct": 0.0}}
120+
bcol := &ColumnMeta{Args: map[string]interface{}{"null_pct": 0.0}}
121121
bgen := buildBooleanGenerator(bcol, rng)
122122
for i := 0; i < 5; i++ {
123123
v := bgen.Next()
124124
assert.Contains(t, []string{"0", "1"}, v)
125125
}
126-
jcol := ColumnMeta{Args: map[string]interface{}{"min": 1, "max": 1, "null_pct": 0.0}}
126+
jcol := &ColumnMeta{Args: map[string]interface{}{"min": 1, "max": 1, "null_pct": 0.0}}
127127
jgen := buildJsonGenerator(jcol, rng)
128128
v := jgen.Next()
129129
assert.True(t, strings.HasPrefix(v, "{\"k\":\"") && strings.HasSuffix(v, "\"}"), "JSON object")
@@ -177,7 +177,7 @@ func TestMakeGeneratorAllTypes(t *testing.T) {
177177
}},
178178
}
179179
for _, tt := range types {
180-
gen := buildGenerator(ColumnMeta{Type: tt.typ, Args: tt.args}, 0, 1, schema)
180+
gen := buildGenerator(&ColumnMeta{Type: tt.typ, Args: tt.args}, 0, 1, schema)
181181
v := gen.Next()
182182
assert.True(t, tt.checker(v), "%s generator produced %q", tt.typ, v)
183183
}

0 commit comments

Comments
 (0)