Skip to content

Commit 40e7706

Browse files
committed
workload: add simple CHECK constraint support and bit/bytes generators
This patch enhances the workload_generator command by: • Parsing simple comparison CHECK constraints (>, >=, <, <=) from DDL and applying them to the generated schema (setting min/max or start/end args). • Introducing GenTypeBit and GenTypeBytes to handle BIT(n) and BYTES/BYTEA columns with dedicated generators. • Fixing minor bugs in SQL/YAML generation and type mapping. Fixes: CRDB-51752 Epic: None Release note (cli change): Adds support for simple CHECK constraints and bit/bytes column generators in workload_generator.
1 parent 5e66aff commit 40e7706

File tree

5 files changed

+227
-13
lines changed

5 files changed

+227
-13
lines changed

pkg/workload/workload_generator/column_generator.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ func buildGenerator(col ColumnMeta, batchIdx, batchSize int, schema Schema) Gene
4646
base = buildBooleanGenerator(col, rng)
4747
case GenTypeJson: //missed json type ig, will check
4848
base = buildJsonGenerator(col, rng)
49+
case GenTypeBit:
50+
base = buildBitGenerator(col, rng)
51+
case GenTypeBytes:
52+
base = buildBytesGenerator(col, rng)
4953

5054
default:
5155
panic("type not supported: " + col.Type)
@@ -226,3 +230,18 @@ func buildJsonGenerator(col ColumnMeta, rng *rand.Rand) Generator {
226230
sg := &StringGen{r: rng, min: minArg, max: maxArg, nullPct: nullPct}
227231
return &JsonGen{strGen: sg}
228232
}
233+
234+
// buildBitGenerator produces random BIT(n) values as strings of '0'/'1'.
235+
func buildBitGenerator(col ColumnMeta, rng *rand.Rand) Generator {
236+
// size comes from mapBitType → args["size"]
237+
size := getIntArg(col.Args, "size", 1)
238+
nullPct := getFloatArg(col.Args, "null_pct", 0.0)
239+
return &BitGen{r: rng, size: size, nullPct: nullPct}
240+
}
241+
242+
// buildBytesGenerator produces random []byte for BYTEA/BYTES columns.
243+
func buildBytesGenerator(col ColumnMeta, rng *rand.Rand) Generator {
244+
size := getIntArg(col.Args, "size", 1)
245+
nullPct := getFloatArg(col.Args, "null_pct", 0.0)
246+
return &BytesGen{r: rng, min: size, max: size, nullPct: nullPct}
247+
}

pkg/workload/workload_generator/schema_generator.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,9 @@ func processColumnDefs(table *TableSchema, columnDefs []string) {
288288
defaultVal := colMatch[4] // DEFAULT value
289289
foreignKeyTable := colMatch[5] // Referenced table for foreign keys
290290
foreignKeyColumn := colMatch[6] // Referenced column for foreign keys
291-
292-
table.ColumnOrder = append(table.ColumnOrder, name)
291+
// While adding columns to the order, surrounding quotes are stripped if any.
292+
// This is to ensure that column names here match with the column names used as keys in schema maps.
293+
table.ColumnOrder = append(table.ColumnOrder, strings.Trim(name, `"`))
293294
// Extract CHECK constraint if present (requires special handling for nested parentheses)
294295
inlineCheck := ""
295296
checkIdx := checkInlineRe.FindStringIndex(columnDef)
@@ -565,6 +566,8 @@ func buildWorkloadSchema(
565566
// 1) Build initial blocks and capture FK seeds
566567
blocks, fkSeed := buildInitialBlocks(allSchemas, dbName, rng, baseRowCount)
567568

569+
applyCheckConstraints(blocks, allSchemas)
570+
568571
// 2) Wire up foreign-key relationships in the blocks
569572
wireForeignKeys(blocks, allSchemas, fkSeed, rng)
570573

pkg/workload/workload_generator/schema_utils.go

Lines changed: 161 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"regexp"
1313
"strconv"
1414
"strings"
15+
"time"
1516
)
1617

1718
const (
@@ -23,6 +24,9 @@ const (
2324
seedKeyDelimiter = "__"
2425
// nullPct is the key for nullability percentage in args maps
2526
nullPct = "null_pct"
27+
// maxArg and minArg are keys for range limits in args maps
28+
maxArg = "max"
29+
minArg = "min"
2630
)
2731

2832
// GeneratorType is an enum for all the data generator types.
@@ -55,6 +59,12 @@ var (
5559
simpleNumberRe = regexp.MustCompile(`^[+-]?\d+(?:\.\d+)?$`)
5660
quotedStrRe = regexp.MustCompile(`^'.*'$`)
5761
booleanLiteralRe = regexp.MustCompile(`^(?i:true|false)$`)
62+
63+
// Regexes for simple comparison constraints.
64+
gtRe = regexp.MustCompile(`(?i)^([A-Za-z_]\w*)\s*>\s*([^\s]+)$`)
65+
gteRe = regexp.MustCompile(`(?i)^([A-Za-z_]\w*)\s*>=\s*([^\s]+)$`)
66+
ltRe = regexp.MustCompile(`(?i)^([A-Za-z_]\w*)\s*<\s*([^\s]+)$`)
67+
lteRe = regexp.MustCompile(`(?i)^([A-Za-z_]\w*)\s*<=\s*([^\s]+)$`)
5868
)
5969

6070
// Schema is the map of TableBlocks, one per table, which is used by all data generators
@@ -285,13 +295,154 @@ func atoi(s string) int {
285295

286296
// setArgsRange sets the "min" and "max" keys in the args map to the specified range.
287297
func setArgsRange(args map[string]any, min, max int) {
288-
args["min"] = min
289-
args["max"] = max
298+
args[minArg] = min
299+
args[maxArg] = max
290300
}
291301

292302
// canonical replaces "." with "__" to match the legacy YAML format.
293303
func canonical(name string) string { return strings.ReplaceAll(name, ".", "__") }
294304

305+
// bumpTimestampISO returns the given RFC3339Nano timestamp string
306+
// advanced by one nanosecond, or the original string if parsing fails.
307+
func bumpTimestampISO(s string) string {
308+
if t, err := time.Parse(time.RFC3339Nano, s); err == nil {
309+
return t.Add(time.Millisecond).Format(time.RFC3339Nano)
310+
}
311+
// fallback: return original
312+
return s
313+
}
314+
315+
// applyCheckConstraints updates each ColumnMeta in the Schema
316+
// to reflect simple comparison CHECK constraints (>, >=, <, <=)
317+
// for integer, float, and timestamp column types. It reads the
318+
// raw CheckConstraints from allSchemas and sets min/max/start/end
319+
// arguments in-place before data generation
320+
func applyCheckConstraints(blocks Schema, allSchemas map[string]*TableSchema) {
321+
for tbl, blks := range blocks {
322+
schema := allSchemas[tbl]
323+
for i := range blks {
324+
block := &blks[i]
325+
// Iterating over each column in the current TableBlock.
326+
for colName, cm := range block.Columns {
327+
// Columns that are not integer, float, or timestamp are skipped.
328+
switch cm.Type {
329+
case GenTypeInteger, GenTypeFloat, GenTypeTimestamp:
330+
default:
331+
continue
332+
}
333+
// Examining each raw CHECK expression for supported patterns.
334+
for _, chk := range schema.CheckConstraints {
335+
chk = strings.TrimSpace(chk)
336+
// col > val
337+
applyGreaterThanCheck(chk, colName, cm)
338+
// col >= val
339+
applyGreaterThanEqualsCheck(chk, colName, cm)
340+
// col < val
341+
applyLessThanCheck(chk, colName, cm)
342+
// col <= val
343+
applyLessThanEqualsCheck(chk, colName, cm)
344+
}
345+
// The updated ColumnMeta is written back.
346+
block.Columns[colName] = cm
347+
}
348+
}
349+
}
350+
}
351+
352+
// applyLessThanEqualsCheck checks if the given check constraint
353+
// is a "col <= val" expression and updates the ColumnMeta accordingly.
354+
func applyLessThanEqualsCheck(chk string, colName string, cm ColumnMeta) {
355+
if m := lteRe.FindStringSubmatch(chk); m != nil && m[1] == colName {
356+
lit := stripCast(m[2])
357+
switch cm.Type {
358+
case GenTypeInteger:
359+
if v, err := strconv.Atoi(lit); err == nil {
360+
cm.Args[maxArg] = v
361+
}
362+
case GenTypeFloat:
363+
if f, err := strconv.ParseFloat(lit, 64); err == nil {
364+
cm.Args[maxArg] = f
365+
}
366+
case GenTypeTimestamp:
367+
cm.Args["end"] = lit
368+
}
369+
}
370+
}
371+
372+
// applyLessThanCheck checks if the given check constraint
373+
// is a "col < val" expression and updates the ColumnMeta accordingly.
374+
func applyLessThanCheck(chk string, colName string, cm ColumnMeta) {
375+
if m := ltRe.FindStringSubmatch(chk); m != nil && m[1] == colName {
376+
lit := stripCast(m[2])
377+
switch cm.Type {
378+
case GenTypeInteger:
379+
if v, err := strconv.Atoi(lit); err == nil {
380+
cm.Args[maxArg] = v - 1
381+
}
382+
case GenTypeFloat:
383+
if f, err := strconv.ParseFloat(lit, 64); err == nil {
384+
cm.Args[maxArg] = math.Nextafter(f, math.Inf(-1))
385+
}
386+
case GenTypeTimestamp:
387+
// Subtract one millisecond
388+
if t, err := time.Parse(time.RFC3339Nano, lit); err == nil {
389+
cm.Args["end"] = t.Add(-time.Millisecond).Format(time.RFC3339Nano)
390+
}
391+
}
392+
}
393+
}
394+
395+
// applyLessThanEqualsCheck checks if the given check constraint
396+
// is a "col <= val" expression and updates the ColumnMeta accordingly.
397+
func applyGreaterThanEqualsCheck(chk string, colName string, cm ColumnMeta) {
398+
if m := gteRe.FindStringSubmatch(chk); m != nil && m[1] == colName {
399+
lit := stripCast(m[2])
400+
switch cm.Type {
401+
case GenTypeInteger:
402+
if v, err := strconv.Atoi(lit); err == nil {
403+
cm.Args[minArg] = v
404+
}
405+
case GenTypeFloat:
406+
if f, err := strconv.ParseFloat(lit, 64); err == nil {
407+
cm.Args[minArg] = f
408+
}
409+
case GenTypeTimestamp:
410+
cm.Args["start"] = lit
411+
}
412+
}
413+
}
414+
415+
// applyGreaterThanCheck checks if the given check constraint
416+
// is a "col > val" expression and updates the ColumnMeta accordingly.
417+
func applyGreaterThanCheck(chk string, colName string, cm ColumnMeta) {
418+
if m := gtRe.FindStringSubmatch(chk); m != nil && m[1] == colName {
419+
lit := stripCast(m[2])
420+
switch cm.Type {
421+
case GenTypeInteger:
422+
if v, err := strconv.Atoi(lit); err == nil {
423+
cm.Args[minArg] = v + 1
424+
}
425+
case GenTypeFloat:
426+
if f, err := strconv.ParseFloat(lit, 64); err == nil {
427+
cm.Args[minArg] = math.Nextafter(f, math.Inf(1))
428+
}
429+
case GenTypeTimestamp:
430+
cm.Args["start"] = bumpTimestampISO(lit)
431+
}
432+
}
433+
}
434+
435+
// stripCast removes any Cockroach/Postgres cast suffix:
436+
//
437+
// “123:::INT8” → “123”
438+
// “‘2021-01-01’::DATE” → “'2021-01-01'”
439+
func stripCast(lit string) string {
440+
if i := strings.Index(lit, "::"); i >= 0 {
441+
return lit[:i]
442+
}
443+
return lit
444+
}
445+
295446
// mapSQLType maps a SQL column type to the workload generator type and
296447
// argument set expected by cockroach workloads. The returned map may
297448
// include bounds, formatting information or other hints used by the
@@ -331,15 +482,16 @@ func mapSQLType(sql string, col *Column, rng *rand.Rand) (GeneratorType, map[str
331482

332483
case sql == "date":
333484
return mapDateType(sql, col, args)
334-
335-
case sql == "timestamp" || sql == "timestamptz":
485+
// We use hasPrefix(timestamp) to match both "timestamp" and "timestamptz"
486+
// as well as any other variations like "timestamp(6)".
487+
case strings.HasPrefix(sql, "timestamp"):
336488
return mapTimestampType(sql, col, args)
337489

338490
case sql == "bool" || sql == "boolean":
339491
return GenTypeBool, args
340492

341493
case sql == "json" || sql == "jsonb":
342-
mapJsonType(sql, col, args)
494+
return mapJsonType(sql, col, args)
343495
}
344496
setArgsRange(args, 5, 30)
345497
return GenTypeString, args
@@ -404,13 +556,13 @@ func mapDecimalType(sql string, _ *Column, args map[string]any) (GeneratorType,
404556
maxVal = 1.0 - fracUnit
405557
minVal = -maxVal
406558
}
407-
args["min"] = minVal
408-
args["max"] = maxVal
559+
args[minArg] = minVal
560+
args[maxArg] = maxVal
409561
args["round"] = scale
410562
} else {
411563
// fallback for DECIMAL without precision
412-
args["min"] = 0.0
413-
args["max"] = 1.0
564+
args[minArg] = 0.0
565+
args[maxArg] = 1.0
414566
args["round"] = 2
415567
}
416568
return GenTypeFloat, args

pkg/workload/workload_generator/sql_utils.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -516,10 +516,10 @@ func getFieldCol(
516516
numParts := 1
517517
parts := make([]string, numParts)
518518
var placeholder string
519-
for _, schema := range allSchemas {
519+
for tn, schema := range allSchemas {
520520
matched := false
521521
for _, column := range schema.Columns {
522-
if column.Name == col {
522+
if column.Name == col && tn == tableName {
523523
placeholder = column.String()
524524
placeholder = placeholder[1 : len(placeholder)-1]
525525
matched = true

pkg/workload/workload_generator/types.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"fmt"
1111
"math/rand"
1212
"strconv"
13+
"strings"
1314

1415
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
1516
)
@@ -157,6 +158,45 @@ func (g *JsonGen) Next() string {
157158
return fmt.Sprintf(`{"k":"%s"}`, v)
158159
}
159160

161+
// BitGen emits either nil (per nullPct) or a string like "101001".
162+
type BitGen struct {
163+
r *rand.Rand
164+
size int
165+
nullPct float64
166+
}
167+
168+
func (b *BitGen) Next() string {
169+
if b.r.Float64() < b.nullPct {
170+
return ""
171+
}
172+
var sb strings.Builder
173+
for i := 0; i < b.size; i++ {
174+
if b.r.Intn(2) == 0 {
175+
sb.WriteByte('0')
176+
} else {
177+
sb.WriteByte('1')
178+
}
179+
}
180+
return sb.String()
181+
}
182+
183+
type BytesGen struct {
184+
r *rand.Rand
185+
min, max int
186+
nullPct float64
187+
}
188+
189+
func (b *BytesGen) Next() string {
190+
if b.r.Float64() < b.nullPct {
191+
return ""
192+
}
193+
194+
length := b.min
195+
buf := make([]byte, length)
196+
b.r.Read(buf) // fill with random bytes
197+
return string(buf)
198+
}
199+
160200
// ─── Wrappers ──────────────────────────────────────────────────────────
161201

162202
// DefaultWrapper wraps a base Generator and emits a fixed literal

0 commit comments

Comments
 (0)