Skip to content

Commit 1c2a2fe

Browse files
committed
roachprod-microbench: add confidence interval oracle test
This test compares the result of the benchseries method with the newly added internal method. Since there is randomization in the bootstrap, a small tolerance is used to compare the results.
1 parent 24b785a commit 1c2a2fe

File tree

3 files changed

+144
-1
lines changed

3 files changed

+144
-1
lines changed

pkg/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ ALL_TESTS = [
151151
"//pkg/cmd/reduce/reduce:reduce_test",
152152
"//pkg/cmd/release:release_test",
153153
"//pkg/cmd/roachprod-microbench/cluster:cluster_test",
154+
"//pkg/cmd/roachprod-microbench/model:model_test",
154155
"//pkg/cmd/roachprod-microbench/util:util_test",
155156
"//pkg/cmd/roachprod-microbench:roachprod-microbench_test",
156157
"//pkg/cmd/roachtest/clusterstats:clusterstats_test",
@@ -1216,6 +1217,7 @@ GO_TARGETS = [
12161217
"//pkg/cmd/roachprod-microbench/cluster:cluster_test",
12171218
"//pkg/cmd/roachprod-microbench/google:google",
12181219
"//pkg/cmd/roachprod-microbench/model:model",
1220+
"//pkg/cmd/roachprod-microbench/model:model_test",
12191221
"//pkg/cmd/roachprod-microbench/parser:parser",
12201222
"//pkg/cmd/roachprod-microbench/util:util",
12211223
"//pkg/cmd/roachprod-microbench/util:util_test",

pkg/cmd/roachprod-microbench/model/BUILD.bazel

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
load("@io_bazel_rules_go//go:def.bzl", "go_library")
1+
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
22

33
go_library(
44
name = "model",
@@ -15,3 +15,14 @@ go_library(
1515
"@org_golang_x_perf//benchmath",
1616
],
1717
)
18+
19+
go_test(
20+
name = "model_test",
21+
srcs = ["math_test.go"],
22+
embed = [":model"],
23+
deps = [
24+
"@com_github_stretchr_testify//require",
25+
"@org_golang_x_perf//benchfmt",
26+
"@org_golang_x_perf//benchseries",
27+
],
28+
)
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
//
6+
7+
package model
8+
9+
import (
10+
"testing"
11+
12+
"github.com/stretchr/testify/require"
13+
"golang.org/x/perf/benchfmt"
14+
"golang.org/x/perf/benchseries"
15+
)
16+
17+
// TestCalculateConfidenceInterval tests the `calculateConfidenceInterval`
18+
// function, which. The test compares the result of the benchseries method with
19+
// the internal method.
20+
func TestCalculateConfidenceInterval(t *testing.T) {
21+
testCases := []struct {
22+
name string
23+
oldValues []float64
24+
newValues []float64
25+
}{
26+
{
27+
name: "identical values",
28+
oldValues: []float64{100, 100, 100, 100, 100},
29+
newValues: []float64{100, 100, 100, 100, 100},
30+
},
31+
{
32+
name: "ten percent increase",
33+
oldValues: []float64{100, 100, 100, 100, 100},
34+
newValues: []float64{110, 110, 110, 110, 110},
35+
},
36+
{
37+
name: "variable values",
38+
oldValues: []float64{95, 98, 100, 102, 105},
39+
newValues: []float64{105, 108, 110, 112, 115},
40+
},
41+
{
42+
name: "high variance",
43+
oldValues: []float64{80, 90, 100, 110, 120},
44+
newValues: []float64{90, 100, 110, 120, 130},
45+
},
46+
{
47+
name: "large dataset",
48+
oldValues: []float64{95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105},
49+
newValues: []float64{105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115},
50+
},
51+
{
52+
name: "high variance large dataset",
53+
oldValues: []float64{80, 85, 90, 95, 100, 105, 110, 115, 120, 125, 130},
54+
newValues: []float64{90, 95, 100, 105, 110, 115, 120, 125, 130, 135, 140},
55+
},
56+
{
57+
name: "small differences",
58+
oldValues: []float64{100, 100.1, 99.9, 100.2, 99.8, 100.3, 99.7, 100.4, 99.6},
59+
newValues: []float64{100.5, 100.6, 100.4, 100.7, 100.3, 100.8, 100.2, 100.9, 100.1},
60+
},
61+
{
62+
name: "negative values",
63+
oldValues: []float64{-100, -90, -80, -70, -60, -50, -40, -30},
64+
newValues: []float64{-90, -80, -70, -60, -50, -40, -30, -20},
65+
},
66+
}
67+
68+
for _, tc := range testCases {
69+
t.Run(tc.name, func(t *testing.T) {
70+
// Calculate confidence interval using the benchseries method
71+
opts := benchseries.DefaultBuilderOptions()
72+
opts.Experiment = "run-stamp"
73+
opts.Compare = "cockroach"
74+
opts.Numerator = "experiment"
75+
opts.Denominator = "baseline"
76+
builder, err := benchseries.NewBuilder(opts)
77+
require.NoError(t, err)
78+
79+
const testTimestamp = "2006-01-02T15:04:05.999Z"
80+
oldValues := make([]benchfmt.Value, len(tc.oldValues))
81+
for i, v := range tc.oldValues {
82+
oldValues[i] = benchfmt.Value{Value: v, Unit: "ns/op"}
83+
}
84+
oldResult := &benchfmt.Result{
85+
Name: benchfmt.Name("Test"),
86+
Values: oldValues,
87+
Config: []benchfmt.Config{
88+
{Key: "run-stamp", Value: []byte(testTimestamp)},
89+
{Key: "cockroach", Value: []byte("baseline")},
90+
},
91+
}
92+
newValues := make([]benchfmt.Value, len(tc.newValues))
93+
for i, v := range tc.newValues {
94+
newValues[i] = benchfmt.Value{Value: v, Unit: "ns/op"}
95+
}
96+
newResult := &benchfmt.Result{
97+
Name: benchfmt.Name("Test"),
98+
Values: newValues,
99+
Config: []benchfmt.Config{
100+
{Key: "run-stamp", Value: []byte(testTimestamp)},
101+
{Key: "experiment-commit-time", Value: []byte(testTimestamp)},
102+
{Key: "cockroach", Value: []byte("experiment")},
103+
},
104+
}
105+
builder.Add(oldResult)
106+
builder.Add(newResult)
107+
comparisons, err := builder.AllComparisonSeries(nil, benchseries.DUPE_REPLACE)
108+
if err != nil {
109+
t.Fatal(err)
110+
}
111+
var oldCI *benchseries.ComparisonSummary
112+
for _, cs := range comparisons {
113+
cs.AddSummaries(0.95, 1000)
114+
for idx := range cs.Benchmarks {
115+
oldCI = cs.Summaries[0][idx]
116+
break
117+
}
118+
}
119+
120+
// Calculate confidence interval using the internal method.
121+
newCI := calculateConfidenceInterval(tc.newValues, tc.oldValues)
122+
123+
// Assert that the two methods give the same result, within a small tolerance.
124+
// The tolerance is due to the randomization in the bootstrap.
125+
const tolerance = 0.05
126+
require.InDelta(t, oldCI.Low, newCI.Low, tolerance, "confidence interval lows differ")
127+
require.InDelta(t, oldCI.High, newCI.High, tolerance, "confidence interval highs differ")
128+
})
129+
}
130+
}

0 commit comments

Comments
 (0)