Skip to content

Commit 152ab3f

Browse files
craig[bot]mgartneraa-joshi
committed
147125: bench/tpcc: refactor tpcc benchmark r=mgartner a=mgartner #### bench/tpcc: add wait timeouts The parent/child process synchronization is now more robust with timeouts for some of the waits. Release note: None #### bench/tpcc: make client verify TPCC database exists Release note: None #### bench/tpcc: remove unused setupStmtOption type Release note: None #### bench/tpcc: simplify server instantiation Release note: None #### bench/tpcc: remove unnecessary benchmark closers Release note: None #### bench/tpcc: remove unused serverArgs and setupServerOption types Release note: None #### bench/tpcc: simplify workload flags Release note: None #### bench/tpcc: clean up contexts Release note: None #### bench/tpcc: use testfixtures.ReuseOrGenerate for store directory We now use `testfixtures.ReuseOrGenerate` as a mechanism for reusing a store directory between benchmark runs. The TPC-C data is automatically generated if the store directory does not exist, and an existing store directory is automatically reused if it exists. A user no longer needs to perform a 2-step process and provide `--generate-store-dir` and `--store-dir` flags. The name of the store directory includes `storage.MinimumSupportedFormatVersion` so that TPC-C data will be regenerated if the storage format changes. Release note: None #### bench/tpcc: consolidate utilities in the same file Release note: None #### bench/tpcc: rename files Release note: None #### bench/tpcc: remove non-test file Release note: None #### bench/tpcc: enable synchronization of the raft log This makes the benchmark more representative of a real-world workload. It also didn't have make a meaningful improvement for the generation of TPC-C schema and data, so it's been removed from `TestInternalGenerateStoreDir` too. Release note: None #### bench/tpcc: clean up benchmark names Release note: None #### bench/tpcc: simplify closers Release note: None 147314: cli: improve tsdump upload time r=aa-joshi a=aa-joshi Previously, tsdump upload sub command had 10 workers which were uploading time series data to Datadog. The upload request had retry configuration with `100` max retries with max backoff of `2s`. This was resulting in high upload time for tsdump uploads. This patch updates worker count to `20` with max backoff of `100ms`. Epic: None Fixes: #146089 Release note: None ---- file size: 3.2GB upload time before changes <img width="994" alt="Screenshot 2025-05-27 at 11 04 01 AM" src="https://github.com/user-attachments/assets/e601d919-b082-4248-b409-eb2986ab2f55" /> upload time after changes (iteration 1) <img width="979" alt="Screenshot 2025-05-27 at 11 02 57 AM" src="https://github.com/user-attachments/assets/de7d3302-0362-459c-b452-b3f1f5f6f4ec" /> upload time after changes (iteration 2) <img width="991" alt="Screenshot 2025-05-27 at 11 03 20 AM" src="https://github.com/user-attachments/assets/e3a4f258-b83e-4b4e-b4ce-60c1a085bdaf" /> Co-authored-by: Marcus Gartner <[email protected]> Co-authored-by: Akshay Joshi <[email protected]>
3 parents 898b77e + c2e5095 + e089b5c commit 152ab3f

11 files changed

+283
-463
lines changed

pkg/BUILD.bazel

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -879,7 +879,6 @@ GO_TARGETS = [
879879
"//pkg/bench/hashbench:hashbench_test",
880880
"//pkg/bench/rttanalysis:rttanalysis",
881881
"//pkg/bench/rttanalysis:rttanalysis_test",
882-
"//pkg/bench/tpcc:tpcc",
883882
"//pkg/bench/tpcc:tpcc_test",
884883
"//pkg/bench:bench",
885884
"//pkg/bench:bench_test",

pkg/bench/tpcc/BUILD.bazel

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,38 @@
1-
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
2-
3-
go_library(
4-
name = "tpcc",
5-
srcs = ["tpcc_bench.go"],
6-
importpath = "github.com/cockroachdb/cockroach/pkg/bench/tpcc",
7-
visibility = ["//visibility:public"],
8-
)
1+
load("@io_bazel_rules_go//go:def.bzl", "go_test")
92

103
go_test(
114
name = "tpcc_test",
125
srcs = [
6+
"bench_test.go",
7+
"internal_test.go",
138
"main_test.go",
14-
"subprocess_commands_test.go",
15-
"subprocess_utils_test.go",
16-
"tpcc_bench_generate_data_test.go",
17-
"tpcc_bench_options_test.go",
18-
"tpcc_bench_test.go",
9+
"utils_test.go",
1910
],
20-
embed = [":tpcc"],
11+
data = glob(["testdata/**"]),
2112
deps = [
2213
"//pkg/base",
23-
"//pkg/kv/kvserver/logstore",
2414
"//pkg/security/securityassets",
2515
"//pkg/security/securitytest",
2616
"//pkg/server",
27-
"//pkg/testutils",
17+
"//pkg/storage",
2818
"//pkg/testutils/pgurlutils",
2919
"//pkg/testutils/serverutils",
3020
"//pkg/testutils/skip",
3121
"//pkg/testutils/sqlutils",
3222
"//pkg/testutils/testcluster",
23+
"//pkg/testutils/testfixtures",
3324
"//pkg/util/envutil",
3425
"//pkg/util/leaktest",
3526
"//pkg/util/log",
3627
"//pkg/util/randutil",
28+
"//pkg/util/stop",
3729
"//pkg/util/syncutil",
3830
"//pkg/workload",
3931
"//pkg/workload/histogram",
4032
"//pkg/workload/tpcc",
4133
"//pkg/workload/workloadsql",
4234
"@com_github_cockroachdb_pebble//vfs",
35+
"@com_github_jackc_pgx_v5//:pgx",
4336
"@com_github_stretchr_testify//require",
4437
],
4538
)

pkg/bench/tpcc/bench_test.go

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
// Copyright 2022 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package tpcc
7+
8+
import (
9+
"context"
10+
"net/url"
11+
"os/exec"
12+
"testing"
13+
14+
"github.com/cockroachdb/cockroach/pkg/base"
15+
"github.com/cockroachdb/cockroach/pkg/storage"
16+
"github.com/cockroachdb/cockroach/pkg/testutils/pgurlutils"
17+
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
18+
"github.com/cockroachdb/cockroach/pkg/testutils/testfixtures"
19+
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
20+
"github.com/cockroachdb/cockroach/pkg/util/log"
21+
"github.com/cockroachdb/cockroach/pkg/util/stop"
22+
_ "github.com/cockroachdb/cockroach/pkg/workload/tpcc"
23+
)
24+
25+
// BenchmarkTPCC runs TPC-C transactions against a single warehouse. It runs the
26+
// client side of the workload in a subprocess so that the client overhead is
27+
// not included in CPU and heap profiles.
28+
//
29+
// The benchmark will generate the schema and table data for a single warehouse,
30+
// using a reusable store directory. In future runs the cockroach server will
31+
// clone and use the store directory, rather than regenerating the schema and
32+
// data. This enables faster iteration when re-running the benchmark.
33+
func BenchmarkTPCC(b *testing.B) {
34+
defer leaktest.AfterTest(b)()
35+
defer log.Scope(b).Close(b)
36+
37+
// Reuse or generate TPCC data.
38+
storeName := "bench_tpcc_store_" + storage.MinimumSupportedFormatVersion.String()
39+
storeDir := testfixtures.ReuseOrGenerate(b, storeName, func(dir string) {
40+
c, output := generateStoreDir.withEnv(storeDirEnvVar, dir).exec()
41+
if err := c.Run(); err != nil {
42+
b.Fatalf("failed to generate store dir: %s\n%s", err, output.String())
43+
}
44+
})
45+
46+
for _, impl := range []struct{ name, flag string }{
47+
{"literal", "--literal-implementation=true"},
48+
{"optimized", "--literal-implementation=false"},
49+
} {
50+
b.Run(impl.name, func(b *testing.B) {
51+
for _, mix := range []struct{ name, flag string }{
52+
{"new_order", "--mix=newOrder=1"},
53+
{"payment", "--mix=payment=1"},
54+
{"order_status", "--mix=orderStatus=1"},
55+
{"delivery", "--mix=delivery=1"},
56+
{"stock_level", "--mix=stockLevel=1"},
57+
{"default", "--mix=newOrder=10,payment=10,orderStatus=1,delivery=1,stockLevel=1"},
58+
} {
59+
b.Run(mix.name, func(b *testing.B) {
60+
run(b, storeDir, []string{impl.flag, mix.flag})
61+
})
62+
}
63+
})
64+
65+
}
66+
}
67+
68+
func run(b *testing.B, storeDir string, workloadFlags []string) {
69+
server, pgURL := startCockroach(b, storeDir)
70+
defer server.Stopper().Stop(context.Background())
71+
c, output := startClient(b, pgURL, workloadFlags)
72+
73+
var s synchronizer
74+
s.init(c.Process.Pid)
75+
76+
// Reset the timer when the client starts running queries.
77+
if timedOut := s.waitWithTimeout(); timedOut {
78+
b.Fatalf("waiting on client timed-out:\n%s", output.String())
79+
}
80+
b.ResetTimer()
81+
s.notify(b)
82+
83+
// Stop the timer when the client stops running queries.
84+
s.wait()
85+
b.StopTimer()
86+
87+
if err := c.Wait(); err != nil {
88+
b.Fatalf("client failed: %s\n%s\n%s", err, output.String(), output.String())
89+
}
90+
}
91+
92+
func startCockroach(
93+
b testing.TB, storeDir string,
94+
) (server serverutils.TestServerInterface, pgURL string) {
95+
// Clone the store dir.
96+
td := b.TempDir()
97+
c, output := cloneEngine.
98+
withEnv(srcEngineEnvVar, storeDir).
99+
withEnv(dstEngineEnvVar, td).
100+
exec()
101+
if err := c.Run(); err != nil {
102+
b.Fatalf("failed to clone engine: %s\n%s", err, output.String())
103+
}
104+
105+
// Start the server.
106+
s := serverutils.StartServerOnly(b, base.TestServerArgs{
107+
StoreSpecs: []base.StoreSpec{{Path: td}},
108+
})
109+
110+
// Generate a PG URL.
111+
u, urlCleanup, err := pgurlutils.PGUrlE(
112+
s.AdvSQLAddr(), b.TempDir(), url.User("root"),
113+
)
114+
if err != nil {
115+
b.Fatalf("failed to create pgurl: %s", err)
116+
}
117+
u.Path = databaseName
118+
s.Stopper().AddCloser(stop.CloserFn(urlCleanup))
119+
120+
return s, u.String()
121+
}
122+
123+
func startClient(
124+
b *testing.B, pgURL string, workloadFlags []string,
125+
) (c *exec.Cmd, output *synchronizedBuffer) {
126+
c, output = runClient.
127+
withEnv(nEnvVar, b.N).
128+
withEnv(pgurlEnvVar, pgURL).
129+
exec(workloadFlags...)
130+
if err := c.Start(); err != nil {
131+
b.Fatalf("failed to start client: %s\n%s", err, output.String())
132+
}
133+
return c, output
134+
}

pkg/bench/tpcc/subprocess_commands_test.go renamed to pkg/bench/tpcc/internal_test.go

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import (
1313
"time"
1414

1515
"github.com/cockroachdb/cockroach/pkg/base"
16-
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/logstore"
1716
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
1817
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
1918
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
@@ -22,9 +21,16 @@ import (
2221
"github.com/cockroachdb/cockroach/pkg/workload/histogram"
2322
"github.com/cockroachdb/cockroach/pkg/workload/workloadsql"
2423
"github.com/cockroachdb/pebble/vfs"
24+
"github.com/jackc/pgx/v5"
2525
"github.com/stretchr/testify/require"
2626
)
2727

28+
// This file contains "internal tests" that are run by BenchmarkTPCC in a
29+
// subprocess. They are not real tests at all, and they are skipped if the
30+
// COCKROACH_INTERNAL_TEST environment variable is not set. These tests are run
31+
// in a subprocess so that profiles collected while running the benchmark do not
32+
// include the overhead of the client code.
33+
2834
// databaseName is the name of the database used by this test.
2935
const databaseName = "tpcc"
3036

@@ -54,11 +60,16 @@ func TestInternalCloneEngine(t *testing.T) {
5460
}
5561

5662
src, ok := envutil.EnvString(srcEngineEnvVar, 0)
57-
require.True(t, ok)
63+
if !ok {
64+
t.Fatal("missing src engine env var")
65+
}
5866
dst, ok := envutil.EnvString(dstEngineEnvVar, 0)
59-
require.True(t, ok)
60-
_, err := vfs.Clone(vfs.Default, vfs.Default, src, dst)
61-
require.NoError(t, err)
67+
if !ok {
68+
t.Fatal("missing dst engine env var")
69+
}
70+
if _, err := vfs.Clone(vfs.Default, vfs.Default, src, dst); err != nil {
71+
t.Fatal(err)
72+
}
6273
}
6374

6475
func TestInternalRunClient(t *testing.T) {
@@ -67,21 +78,36 @@ func TestInternalRunClient(t *testing.T) {
6778
}
6879

6980
require.Positive(t, benchmarkN)
81+
ctx := context.Background()
7082

7183
pgURL, ok := envutil.EnvString(pgurlEnvVar, 0)
7284
require.True(t, ok)
7385
ql := makeQueryLoad(t, pgURL)
74-
defer func() { _ = ql.Close(context.Background()) }()
86+
defer func() { _ = ql.Close(ctx) }()
7587
require.True(t, ok)
7688

89+
conn, err := pgx.Connect(ctx, pgURL)
90+
if err != nil {
91+
t.Fatal(err)
92+
}
93+
defer func() { _ = conn.Close(ctx) }()
94+
95+
// Verify the TPC-C database exists.
96+
if _, err := conn.Exec(ctx, "USE "+databaseName); err != nil {
97+
t.Fatal(databaseName + " database does not exist")
98+
}
99+
77100
// Send a signal to the parent process and wait for an ack before
78101
// running queries.
79102
var s synchronizer
80103
s.init(os.Getppid())
81-
s.notifyAndWait(t)
104+
s.notify(t)
105+
if timedOut := s.waitWithTimeout(); timedOut {
106+
t.Fatalf("waiting on parent process timed-out")
107+
}
82108

83109
for i := 0; i < benchmarkN; i++ {
84-
require.NoError(t, ql.WorkerFns[0](context.Background()))
110+
require.NoError(t, ql.WorkerFns[0](ctx))
85111
}
86112

87113
// Notify the parent process that the benchmark has completed.
@@ -95,16 +121,15 @@ func TestInternalGenerateStoreDir(t *testing.T) {
95121

96122
ctx := context.Background()
97123
storeDir, ok := envutil.EnvString(storeDirEnvVar, 0)
98-
require.True(t, ok)
124+
if !ok {
125+
t.Fatal("missing store dir env var")
126+
}
99127

100128
srv, db, _ := serverutils.StartServer(t, base.TestServerArgs{
101129
StoreSpecs: []base.StoreSpec{{Path: storeDir}},
102130
})
103131
defer srv.Stopper().Stop(ctx)
104132

105-
// Make the generation faster.
106-
logstore.DisableSyncRaftLog.Override(context.Background(), &srv.SystemLayer().ClusterSettings().SV, true)
107-
108133
tdb := sqlutils.MakeSQLRunner(db)
109134
tdb.Exec(t, "CREATE DATABASE "+databaseName)
110135
tdb.Exec(t, "USE "+databaseName)

pkg/bench/tpcc/subprocess_utils_test.go

Lines changed: 0 additions & 65 deletions
This file was deleted.

pkg/bench/tpcc/tpcc_bench.go

Lines changed: 0 additions & 6 deletions
This file was deleted.

0 commit comments

Comments
 (0)