bench/rttanalysis: shard TestBenchmarkExpectation to avoid timeouts

spilchen · spilchen · commit 5f61f75dabb6 · 2025-09-19T13:17:27.000Z
The TestBenchmarkExpectation benchmark has been frequently timing out after 15 minutes. This appears to be caused by slow CI machines rather than issues with the test logic itself. To address this, the test is now split into four shards. Each shard is executed separately and receives the full 15-minute timeout budget. This should reduce the likelihood of timeout test failures. Fixes #148384 Release note: none Epic: none
diff --git a/pkg/bench/rttanalysis/BUILD.bazel b/pkg/bench/rttanalysis/BUILD.bazel
@@ -13,6 +13,8 @@ go_library(
     visibility = ["//visibility:public"],
     deps = [
         "//pkg/base",
+        "//pkg/jobs",
+        "//pkg/jobs/jobspb",
         "//pkg/kv/kvclient/kvcoord",
         "//pkg/sql",
         "//pkg/sql/parser",
@@ -56,9 +58,9 @@ go_test(
     data = glob(["testdata/**"]),
     embed = [":rttanalysis"],
     exec_properties = {"test.Pool": "large"},
+    shard_count = 4,
     deps = [
         "//pkg/base",
-        "//pkg/jobs",
         "//pkg/jobs/jobspb",
         "//pkg/security/securityassets",
         "//pkg/security/securitytest",
@@ -68,6 +70,7 @@ go_test(
         "//pkg/testutils/pgurlutils",
         "//pkg/testutils/serverutils",
         "//pkg/testutils/testcluster",
+        "//pkg/util/envutil",
         "//pkg/util/protoutil",
         "//pkg/util/randutil",
     ],
diff --git a/pkg/bench/rttanalysis/registry.go b/pkg/bench/rttanalysis/registry.go
@@ -9,6 +9,8 @@ import (
 	"strings"
 	"testing"
 
+	"github.com/cockroachdb/cockroach/pkg/jobs"
+	"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
 	"github.com/cockroachdb/cockroach/pkg/testutils/skip"
 	"github.com/cockroachdb/errors"
 	"github.com/stretchr/testify/require"
@@ -51,15 +53,66 @@ func (r *Registry) Run(b *testing.B) {
 // benchmarks can be filtered by passing the usual test filters underneath
 // this test's name.
 //
-// It takes a long time and thus is skipped under stress, race
-// and short.
+// It takes a long time and thus is skipped under duress and short.
 func (r *Registry) RunExpectations(t *testing.T) {
-	skip.UnderStress(t)
-	skip.UnderRace(t)
+	r.RunExpectationsSharded(t, 1, 1)
+}
+
+// RunExpectationsSharded runs all the benchmarks for one iteration
+// and validates that the number of RPCs meets the expectation. If run
+// with the --rewrite flag, it will rewrite the run benchmarks. The
+// benchmarks can be filtered by passing the usual test filters underneath
+// this test's name.
+//
+// It takes a long time and thus is skipped under duress and short.
+//
+// When shard and totalShards are provided (> 1), only a subset of benchmarks
+// assigned to the specific shard will be run, enabling parallel execution.
+// Test groups are distributed across shards using round-robin assignment.
+func (r *Registry) RunExpectationsSharded(t *testing.T, shard, totalShards int) {
+	defer jobs.TestingSetIDsToIgnore(map[jobspb.JobID]struct{}{3001: {}, 3002: {}})()
+	skip.UnderDuress(t)
 	skip.UnderShort(t)
-	skip.UnderDeadlock(t)
 
-	runBenchmarkExpectationTests(t, r)
+	// If totalShards is 1, run all tests; otherwise shard them
+	var registryToUse *Registry
+	if totalShards <= 1 {
+		// Run all test groups
+		registryToUse = r
+	} else {
+		// Create a registry with only the test groups assigned to this shard
+		shardRegistry := &Registry{
+			numNodes: r.numNodes,
+			cc:       r.cc,
+			r:        make(map[string][]RoundTripBenchTestCase),
+		}
+
+		// Distribute test groups across shards using round-robin assignment
+		// First, get all group names and sort them for consistent ordering
+		groupNames := make([]string, 0, len(r.r))
+		for groupName := range r.r {
+			groupNames = append(groupNames, groupName)
+		}
+		// Sort for deterministic assignment across runs
+		for i := 0; i < len(groupNames); i++ {
+			for j := i + 1; j < len(groupNames); j++ {
+				if groupNames[i] > groupNames[j] {
+					groupNames[i], groupNames[j] = groupNames[j], groupNames[i]
+				}
+			}
+		}
+
+		// Assign groups to shards using round-robin
+		for i, groupName := range groupNames {
+			assignedShard := (i % totalShards) + 1
+			if assignedShard == shard {
+				shardRegistry.r[groupName] = r.r[groupName]
+			}
+		}
+		registryToUse = shardRegistry
+	}
+
+	runBenchmarkExpectationTests(t, registryToUse)
 }
 
 // Register registers a set of test cases to a given benchmark name. It is
diff --git a/pkg/bench/rttanalysis/validate_benchmark_data_test.go b/pkg/bench/rttanalysis/validate_benchmark_data_test.go
@@ -6,13 +6,44 @@
 package rttanalysis
 
 import (
+	"strconv"
 	"testing"
 
-	"github.com/cockroachdb/cockroach/pkg/jobs"
-	"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
+	"github.com/cockroachdb/cockroach/pkg/util/envutil"
 )
 
-func TestBenchmarkExpectation(t *testing.T) {
-	defer jobs.TestingSetIDsToIgnore(map[jobspb.JobID]struct{}{3001: {}, 3002: {}})()
-	reg.RunExpectations(t)
+// NOTE: If you change the number of shards, you must also update the
+// shard_count in BUILD.bazel to match.
+const shardCount = 4
+
+// Validate that shardCount matches TEST_TOTAL_SHARDS environment variable at init time
+var _ = func() int {
+	totalShardsStr, found := envutil.ExternalEnvString("TEST_TOTAL_SHARDS", 1)
+	if totalShardsStr == "" || !found {
+		return 0
+	}
+	totalShards, err := strconv.Atoi(totalShardsStr)
+	if err != nil {
+		return 0
+	}
+	if totalShards != shardCount {
+		panic("shardCount mismatch: update shard_count in pkg/bench/rttanalysis/BUILD.bazel to match shardCount constant")
+	}
+	return 0
+}()
+
+func TestBenchmarkExpectationShard1(t *testing.T) {
+	reg.RunExpectationsSharded(t, 1, shardCount)
+}
+
+func TestBenchmarkExpectationShard2(t *testing.T) {
+	reg.RunExpectationsSharded(t, 2, shardCount)
+}
+
+func TestBenchmarkExpectationShard3(t *testing.T) {
+	reg.RunExpectationsSharded(t, 3, shardCount)
+}
+
+func TestBenchmarkExpectationShard4(t *testing.T) {
+	reg.RunExpectationsSharded(t, 4, shardCount)
 }