Skip to content

Commit 19fcdb6

Browse files
craig[bot]Pradyum-Git
andcommitted
Merge #150614
150614: workload: add SQL generation and placeholder‐rewriting utilities r=nameisbhaskar a=Pradyum-Git This patch introduces two new files to support end-to-end SQL workload extraction and rewriting: • sql_utils.go: low-level helpers for parsing statement-statistics TSV headers, classifying transactions as read vs write, extracting unaliased table names, and emitting BEGIN/COMMIT blocks in read/write SQL files :contentReference[oaicite:0]{index=0} • generate_sql.go: core workload generator that scans debug-log TSVs, filters by database, unquotes raw SQL, parses statements into an AST, and rewrites all “_”/“__more__” placeholders via a placeholderRewriter (handling LIMIT, INSERT, UPDATE, BETWEEN, IN, CASE, COALESCE, etc.), before grouping statements into `<db>_read.sql` and `<db>_write.sql` :contentReference[oaicite:1]{index=1} Key components: - `replacePlaceholders`: parses raw SQL, applies `buildPlaceholderRewriter`, and formats rewritten statements - `placeholderRewriter` visitor: comprehensive AST pass for tagging placeholders with column metadata - Specialized handlers (`handleSelectLimit`, `handleInsert`, `handleUpdateSet`, `handleRangeCondition`, `handleInOperator`, etc.) for different SQL constructs - Updated `GenerateWorkload` entrypoint: drives TSV scanning, placeholder rewriting, and file output Fixes: CRDB-51752 Release note (cli change): Adds SQL workload extraction and rewriting support, enabling placeholder‐driven data-generation workflows from CockroachDB debug logs. Co-authored-by: Pradyum <[email protected]>
2 parents b8c2405 + a2ab1ad commit 19fcdb6

File tree

11 files changed

+1377
-29
lines changed

11 files changed

+1377
-29
lines changed

pkg/workload/workload_generator/BUILD.bazel

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,20 @@ go_library(
77
"constants.go",
88
"schema_designs.go",
99
"schema_generator.go",
10+
"schema_utils.go",
11+
"sql_generator.go",
12+
"sql_utils.go",
1013
"types.go",
11-
"utils.go",
1214
"workload.go",
1315
],
1416
importpath = "github.com/cockroachdb/cockroach/pkg/workload/workload_generator",
1517
visibility = ["//visibility:public"],
1618
deps = [
1719
"//pkg/col/coldata",
20+
"//pkg/sql/parser",
21+
"//pkg/sql/parser/statements",
22+
"//pkg/sql/sem/tree",
23+
"//pkg/sql/sem/tree/treecmp",
1824
"//pkg/sql/types",
1925
"//pkg/util/bufalloc",
2026
"//pkg/util/timeutil",
@@ -31,9 +37,13 @@ go_test(
3137
srcs = [
3238
"column_generator_test.go",
3339
"schema_generator_test.go",
34-
"utils_test.go",
40+
"schema_utils_test.go",
41+
"sql_test.go",
3542
],
3643
embed = [":workload_generator"],
3744
embedsrcs = ["test_data/debug/crdb_internal.create_statements.txt"],
38-
deps = ["@com_github_stretchr_testify//assert"],
45+
deps = [
46+
"@com_github_stretchr_testify//assert",
47+
"@com_github_stretchr_testify//require",
48+
],
3949
)

pkg/workload/workload_generator/constants.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,6 @@ const (
1616
sqlDefault = "DEFAULT" // sqlDefault is a constant string used to represent default value expressions in column definitions
1717
sqlCheck = "CHECK" // sqlCheck is a constant string used to represent CHECK constraints in column definitions
1818
sqlForeignKey = "FOREIGN KEY" // sqlForeignKey is a constant string used to represent foreign key constraints in column definitions
19+
sqlFamily = "FAMILY" // sqlFamily is a constant string used to represent family definitions in column definitions
20+
sqlConstraint = "CONSTRAINT" // sqlConstraint is a constant string used to represent constraint definitions in column definitions
1921
)

pkg/workload/workload_generator/schema_designs.go

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,28 +31,55 @@ type Column struct {
3131

3232
// String function converts the Column schema details into a parsable placeholder.
3333
func (c *Column) String() string {
34-
parts := []string{c.Name, c.ColType}
34+
// 1. An 8-element slice, containing teh information stored in the column meta is built.
35+
parts := make([]string, 8)
36+
parts[0] = c.Name
37+
parts[1] = c.ColType
38+
3539
if c.IsNullable {
36-
parts = append(parts, sqlNull)
40+
parts[2] = "NULL"
3741
} else {
38-
parts = append(parts, sqlNotNull)
42+
parts[2] = "NOT NULL"
3943
}
44+
4045
if c.IsPrimaryKey {
41-
parts = append(parts, sqlPrimaryKey)
46+
parts[3] = "PRIMARY KEY"
47+
} else {
48+
parts[3] = ""
4249
}
50+
4351
if c.Default != "" {
44-
parts = append(parts, fmt.Sprintf("%s %s", sqlDefault, c.Default))
52+
parts[4] = "DEFAULT " + c.Default
53+
} else {
54+
parts[4] = ""
4555
}
56+
4657
if c.IsUnique {
47-
parts = append(parts, sqlUnique)
58+
parts[5] = "UNIQUE"
59+
} else {
60+
parts[5] = ""
4861
}
62+
4963
if c.FKTable != "" && c.FKColumn != "" {
50-
parts = append(parts, fmt.Sprintf("%s→%s.%s", sqlForeignKey, c.FKTable, c.FKColumn))
64+
parts[6] = fmt.Sprintf("FK→%s.%s", c.FKTable, c.FKColumn)
65+
} else {
66+
parts[6] = ""
5167
}
68+
5269
if c.InlineCheck != "" {
53-
parts = append(parts, fmt.Sprintf("%s(%s)", sqlCheck, c.InlineCheck))
70+
parts[7] = fmt.Sprintf("CHECK(%s)", c.InlineCheck)
71+
} else {
72+
parts[7] = ""
5473
}
55-
return strings.Join(parts, " ")
74+
75+
// 2. Each part (empty → "''") is quoted, escaping any internal apostrophes.
76+
for i, p := range parts {
77+
escaped := strings.ReplaceAll(p, "'", "\\'")
78+
parts[i] = fmt.Sprintf("'%s'", escaped)
79+
}
80+
81+
// 3. The parts are joined with commas.
82+
return strings.Join(parts, ",")
5683
}
5784

5885
// TableSchema stores table level schema information based on input ddl.

pkg/workload/workload_generator/schema_generator.go

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ var (
8080
createTableRe = regexp.MustCompile(`(?i)^(CREATE\s+TABLE\s+)`)
8181
)
8282

83-
// GenerateDDLs extracts and processes DDL statements from a CockroachDB debug zip file.
83+
// generateDDLs extracts and processes DDL statements from a CockroachDB debug zip file.
8484
// It reads the create_statements.txt file from the zip directory, filters statements
8585
// for the specified database, and writes them to an output file. It also parses each
8686
// DDL statement into a TableSchema object and returns a map of table names to their schemas.
@@ -96,7 +96,7 @@ var (
9696
// - error: Any error encountered during processing
9797
//
9898
// TODO: The "anonymize" parameter is unused for now.
99-
func GenerateDDLs(
99+
func generateDDLs(
100100
zipDir,
101101
dbName string, anonymize bool,
102102
) (allSchemas map[string]*TableSchema, createStmts map[string]string, retErr error) {
@@ -111,14 +111,14 @@ func GenerateDDLs(
111111
}
112112
}()
113113

114-
return generateDDLs(bufio.NewReader(f), dbName, anonymize)
114+
return generateDDLFromReader(bufio.NewReader(f), dbName, anonymize)
115115
}
116116

117-
// generateDDLs takes a reader for a TSV file containing DDL statements,
117+
// generateDDLFromReader takes a reader for a TSV file containing DDL statements,
118118
// parses the statements, and returns a map of table names to their schemas
119119
// and a map of short table names to their CREATE TABLE statements.
120120
// It has been deigned this way to maek it unit-testable
121-
func generateDDLs(
121+
func generateDDLFromReader(
122122
r io.Reader, dbName string, anonymize bool,
123123
) (map[string]*TableSchema, map[string]string, error) {
124124
reader := csv.NewReader(r)
@@ -255,12 +255,13 @@ func splitColumnDefsAndTableConstraints(body string) (colDefs, tableConstraints
255255
func hasConstrainingPrefix(up string) bool {
256256
// You could make this a global var if you like, to avoid reallocating the slice.
257257
prefixes := []string{
258-
"CONSTRAINT",
259-
"PRIMARY KEY",
260-
"UNIQUE",
261-
"FOREIGN KEY",
262-
"CHECK",
263-
"INDEX",
258+
sqlConstraint,
259+
sqlPrimaryKey,
260+
sqlUnique,
261+
sqlForeignKey,
262+
sqlCheck,
263+
sqlIndex,
264+
sqlFamily,
264265
}
265266
for _, p := range prefixes {
266267
if strings.HasPrefix(up, p) {

pkg/workload/workload_generator/schema_generator_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,13 +115,13 @@ var data string
115115

116116
func TestGenerateDDLsIntegration(t *testing.T) {
117117
t.Run("expect success", func(t *testing.T) {
118-
schemas, stmts, err := generateDDLs(strings.NewReader(data), testDBName, false)
118+
schemas, stmts, err := generateDDLFromReader(strings.NewReader(data), testDBName, false)
119119
assert.NoError(t, err)
120120
assert.NotEmpty(t, schemas)
121121
assert.NotEmpty(t, stmts)
122122
})
123123
t.Run("expect failure due to invalid file location", func(t *testing.T) {
124-
_, _, err := GenerateDDLs("wrong_file_location", testDBName, false)
124+
_, _, err := generateDDLs("wrong_file_location", testDBName, false)
125125
assert.NotNil(t, err)
126126
})
127127
}
File renamed without changes.

pkg/workload/workload_generator/utils_test.go renamed to pkg/workload/workload_generator/schema_utils_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// Use of this software is governed by the CockroachDB Software License
44
// included in the /LICENSE file.
55

6-
// utils_test.go
6+
// schema_utils_test.go
77
package workload_generator
88

99
import (

0 commit comments

Comments
 (0)