|
| 1 | +// Copyright 2025 The Cockroach Authors. |
| 2 | +// |
| 3 | +// Use of this software is governed by the CockroachDB Software License |
| 4 | +// included in the /LICENSE file. |
| 5 | + |
| 6 | +package workload_generator |
| 7 | + |
| 8 | +import ( |
| 9 | + "fmt" |
| 10 | + "math/rand" |
| 11 | + "strconv" |
| 12 | + "strings" |
| 13 | + "time" |
| 14 | + |
| 15 | + "github.com/cockroachdb/cockroach/pkg/util/timeutil" |
| 16 | +) |
| 17 | + |
| 18 | +// buildGenerator builds a Generator for one column given |
| 19 | +// - col: metadata for this column |
| 20 | +// - batchIdx: which batch we’re in (0,1,2…) |
| 21 | +// - baseBatchSize: how many rows per batch (for sequences) |
| 22 | +// - schema: full YAML schema, so we can recurse on FKs |
| 23 | +func buildGenerator(col ColumnMeta, batchIdx, batchSize int, schema Schema) Generator { |
| 24 | + // seed the per-batch RNG |
| 25 | + origSeed := getIntArg(col.Args, "seed", 0) |
| 26 | + seed64 := buildBatchSeed(origSeed, batchIdx) |
| 27 | + rng := rand.New(rand.NewSource(seed64)) |
| 28 | + |
| 29 | + // pick the base generator by type |
| 30 | + var base Generator |
| 31 | + switch col.Type { |
| 32 | + case GenTypeSequence: |
| 33 | + // jump start by batchIdx*batchSize |
| 34 | + base = buildSequenceGenerator(col, batchIdx, batchSize) |
| 35 | + case GenTypeInteger: |
| 36 | + base = buildIntegerGenerator(col, rng) |
| 37 | + case GenTypeFloat: |
| 38 | + base = buildFloatGenerator(col, rng) |
| 39 | + case GenTypeString: |
| 40 | + base = buildStringGenerator(col, rng) |
| 41 | + case GenTypeTimestamp: |
| 42 | + base = buildTimestampGenerator(col, rng) |
| 43 | + case GenTypeUUID: |
| 44 | + base = buildUuidGenerator(col, rng) |
| 45 | + case GenTypeBool: |
| 46 | + base = buildBooleanGenerator(col, rng) |
| 47 | + case GenTypeJson: //missed json type ig, will check |
| 48 | + base = buildJsonGenerator(col, rng) |
| 49 | + |
| 50 | + default: |
| 51 | + panic("type not supported: " + col.Type) |
| 52 | + } |
| 53 | + |
| 54 | + // layer on DefaultWrapper if required |
| 55 | + if col.Default != "" && col.DefaultProb > 0 { |
| 56 | + // use a distinct sub-seed so the literal probability RNG |
| 57 | + // doesn’t collide with the main RNG |
| 58 | + wrapperSeed := buildBatchSeed(origSeed, batchIdx^0xdeadbeef) |
| 59 | + base = NewDefaultWrapper(base, col.DefaultProb, col.Default, wrapperSeed) |
| 60 | + } |
| 61 | + |
| 62 | + // layer on UniqueWrapper if required |
| 63 | + if col.IsUnique && !col.HasForeignKey && col.Type != "sequence" { |
| 64 | + base = NewUniqueWrapper(base, defaultUniqueCap) // or configurable capacity |
| 65 | + } |
| 66 | + |
| 67 | + // layer on FKWrapper if this column has a foreign key |
| 68 | + if col.HasForeignKey { |
| 69 | + // col.FK might be "tpcc__public__district.d_id" |
| 70 | + parts := strings.SplitN(col.FK, ".", 2) |
| 71 | + if len(parts) != 2 { |
| 72 | + panic(fmt.Sprintf("invalid FK spec %q", col.FK)) |
| 73 | + } |
| 74 | + fqTable, childCol := parts[0], parts[1] // "tpcc__public__district", "d_id" |
| 75 | + pathSegments := strings.Split(fqTable, "__") // ["tpcc","public","district"] |
| 76 | + parentTable := pathSegments[len(pathSegments)-1] // "district" |
| 77 | + |
| 78 | + // now look up in your schema map: |
| 79 | + parentMeta := schema[parentTable][0].Columns[childCol] |
| 80 | + parentGen := buildGenerator(parentMeta, batchIdx, batchSize, schema) |
| 81 | + base = NewFkWrapper(parentGen, col.Fanout) |
| 82 | + } |
| 83 | + |
| 84 | + return base |
| 85 | +} |
| 86 | + |
| 87 | +// getIntArg handles int|float64|string in YAML args |
| 88 | +func getIntArg(m map[string]interface{}, key string, defaultVal int) int { |
| 89 | + switch v := m[key].(type) { |
| 90 | + case int: |
| 91 | + return v |
| 92 | + case int64: |
| 93 | + return int(v) |
| 94 | + case float64: |
| 95 | + return int(v) |
| 96 | + case string: |
| 97 | + i, _ := strconv.Atoi(v) |
| 98 | + return i |
| 99 | + default: |
| 100 | + return defaultVal |
| 101 | + } |
| 102 | +} |
| 103 | + |
| 104 | +// getFloatArg handles float64|int|string |
| 105 | +func getFloatArg(m map[string]interface{}, key string, defaultVal float64) float64 { |
| 106 | + switch v := m[key].(type) { |
| 107 | + case float64: |
| 108 | + return v |
| 109 | + case int: |
| 110 | + return float64(v) |
| 111 | + case int64: |
| 112 | + return float64(v) |
| 113 | + case string: |
| 114 | + f, _ := strconv.ParseFloat(v, 64) |
| 115 | + return f |
| 116 | + default: |
| 117 | + return defaultVal |
| 118 | + } |
| 119 | +} |
| 120 | + |
| 121 | +// getStringArg handles string values |
| 122 | +func getStringArg(m map[string]interface{}, key string, defaultVal string) string { |
| 123 | + if v, ok := m[key]; ok { |
| 124 | + if s, ok2 := v.(string); ok2 { |
| 125 | + return s |
| 126 | + } |
| 127 | + } |
| 128 | + return defaultVal |
| 129 | +} |
| 130 | + |
| 131 | +// ─── Factory ────────────────────────────────────────────────────────── |
| 132 | + |
| 133 | +// splitmix64 scrambles a 32-bit key into a high-quality 64-bit value |
| 134 | +func splitmix64(x uint64) uint64 { |
| 135 | + x += 0x9e3779b97f4a7c15 |
| 136 | + x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9 |
| 137 | + x = (x ^ (x >> 27)) * 0x94d049bb133111eb |
| 138 | + return x ^ (x >> 31) |
| 139 | +} |
| 140 | + |
| 141 | +// buildBatchSeed deterministically scrambles the original YAML seed |
| 142 | +// with the batch index so each batch uses a distinct RNG stream. |
| 143 | +func buildBatchSeed(origSeed, batchIdx int) int64 { |
| 144 | + key := (uint64(origSeed) << 32) | uint64(batchIdx) |
| 145 | + return int64(splitmix64(key)) |
| 146 | +} |
| 147 | + |
| 148 | +// buildSequenceGenerator creates a SequenceGen that generates sequential integers |
| 149 | +func buildSequenceGenerator(col ColumnMeta, batchIdx int, batchSize int) Generator { |
| 150 | + baseStart := getIntArg(col.Args, "start", 0) |
| 151 | + return &SequenceGen{cur: baseStart + batchIdx*batchSize} |
| 152 | +} |
| 153 | + |
| 154 | +// buildIntegerGenerator creates an IntegerGen that generates random integers |
| 155 | +func buildIntegerGenerator(col ColumnMeta, rng *rand.Rand) Generator { |
| 156 | + minArg := getIntArg(col.Args, "min", 0) |
| 157 | + maxArg := getIntArg(col.Args, "max", 0) |
| 158 | + nullPct := getFloatArg(col.Args, "null_pct", 0.0) |
| 159 | + return &IntegerGen{r: rng, min: minArg, max: maxArg, nullPct: nullPct} |
| 160 | +} |
| 161 | + |
| 162 | +// buildFloatGenerator creates a FloatGen that generates random floats |
| 163 | +func buildFloatGenerator(col ColumnMeta, rng *rand.Rand) Generator { |
| 164 | + minArg := getFloatArg(col.Args, "min", 0.0) |
| 165 | + maxArg := getFloatArg(col.Args, "max", 0.0) |
| 166 | + round := getIntArg(col.Args, "round", 2) |
| 167 | + nullPct := getFloatArg(col.Args, "null_pct", 0.0) |
| 168 | + return &FloatGen{r: rng, min: minArg, max: maxArg, round: round, nullPct: nullPct} |
| 169 | +} |
| 170 | + |
| 171 | +// buildStringGenerator creates a StringGen that generates random strings |
| 172 | +func buildStringGenerator(col ColumnMeta, rng *rand.Rand) Generator { |
| 173 | + minArg := getIntArg(col.Args, "min", 0) |
| 174 | + maxArg := getIntArg(col.Args, "max", 0) |
| 175 | + nullPct := getFloatArg(col.Args, "null_pct", 0.0) |
| 176 | + return &StringGen{r: rng, min: minArg, max: maxArg, nullPct: nullPct} |
| 177 | +} |
| 178 | + |
| 179 | +// buildTimestampGenerator creates a TimestampGen that generates random timestamps |
| 180 | +func buildTimestampGenerator(col ColumnMeta, rng *rand.Rand) Generator { |
| 181 | + // parse Python-style format → Go layout |
| 182 | + startStr := getStringArg(col.Args, "start", "2000-01-01") |
| 183 | + endStr := getStringArg(col.Args, "end", timeutil.Now().Format("2006-01-02")) |
| 184 | + pyFmt := getStringArg(col.Args, "format", "%Y-%m-%d %H:%M:%S.%f") |
| 185 | + var layout string |
| 186 | + switch pyFmt { |
| 187 | + case "%Y-%m-%d %H:%M:%S.%f": |
| 188 | + layout = "2006-01-02 15:04:05.000000" |
| 189 | + case "%Y-%m-%d": |
| 190 | + layout = "2006-01-02" |
| 191 | + default: |
| 192 | + layout = time.RFC3339Nano |
| 193 | + } |
| 194 | + st, err1 := time.Parse(layout, startStr) |
| 195 | + et, err2 := time.Parse(layout, endStr) |
| 196 | + if err1 != nil || err2 != nil { |
| 197 | + st, _ = time.Parse(time.RFC3339, startStr) |
| 198 | + et, _ = time.Parse(time.RFC3339, endStr) |
| 199 | + layout = time.RFC3339 |
| 200 | + } |
| 201 | + span := et.UnixNano() - st.UnixNano() |
| 202 | + if span <= 0 { |
| 203 | + span = 1 |
| 204 | + } |
| 205 | + nullPct := getFloatArg(col.Args, "null_pct", 0.0) |
| 206 | + return &TimestampGen{r: rng, startNS: st.UnixNano(), spanNS: span, layout: layout, nullPct: nullPct} |
| 207 | +} |
| 208 | + |
| 209 | +// buildUuidGenerator creates a UUIDGen that generates random UUIDs |
| 210 | +func buildUuidGenerator(_ ColumnMeta, rng *rand.Rand) Generator { |
| 211 | + return &UUIDGen{r: rng} |
| 212 | +} |
| 213 | + |
| 214 | +// buildBooleanGenerator creates a BoolGen that generates random booleans |
| 215 | +func buildBooleanGenerator(col ColumnMeta, rng *rand.Rand) Generator { |
| 216 | + nullPct := getFloatArg(col.Args, "null_pct", 0.0) |
| 217 | + return &BoolGen{r: rng, nullPct: nullPct} |
| 218 | +} |
| 219 | + |
| 220 | +// buildJsonGenerator creates a JsonGen that generates random JSON strings |
| 221 | +func buildJsonGenerator(col ColumnMeta, rng *rand.Rand) Generator { |
| 222 | + // JSON is just a StringGen plus a wrapper |
| 223 | + minArg := getIntArg(col.Args, "min", defaultJSONMinLen) |
| 224 | + maxArg := getIntArg(col.Args, "max", defaultJSONMaxLen) |
| 225 | + nullPct := getFloatArg(col.Args, "null_pct", 0.0) |
| 226 | + sg := &StringGen{r: rng, min: minArg, max: maxArg, nullPct: nullPct} |
| 227 | + return &JsonGen{strGen: sg} |
| 228 | +} |
0 commit comments