Skip to content

Commit 6996186

Browse files
committed
microbench-ci: per metric retry
Previously, the retry confirmation logic took all metrics of the current benchmark into account. This is problematic because we could possibly trigger a regression if 3 different metrics regressed on 3 different retries for the same benchmark. This logic has been updated to ensure the same metric has to regress on all retries for the benchmark to be considered a regression. Epic: None Release note: None
1 parent e3eb5f0 commit 6996186

File tree

3 files changed

+253
-25
lines changed

3 files changed

+253
-25
lines changed

pkg/cmd/microbench-ci/run.go

Lines changed: 54 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,12 @@ func (b *Benchmark) run() error {
7777
return err
7878
}
7979
}
80-
status := NoChange
80+
81+
status := make(map[string]Status)
82+
for _, metric := range b.Metrics {
83+
status[metric.Name] = NoChange
84+
}
85+
8186
tries := 0
8287
for retries := 0; retries < b.Retries; retries++ {
8388
log.Printf("Running benchmark %q for %d iterations (attempt %d)", b.Name, b.Count, retries+1)
@@ -98,35 +103,60 @@ func (b *Benchmark) run() error {
98103
if err != nil {
99104
return err
100105
}
101-
currentStatus := compareResult.top()
102-
// If the benchmark shows no change, we can stop running additional retry
103-
// iterations. Retries are run once a benchmark has regressed or improved,
104-
// on the first try, to ensure that the change is not a fluke.
105-
if currentStatus == NoChange {
106-
status = currentStatus
107-
break
106+
107+
// Loop through all metrics and check if they have a consistent status.
108+
// As long as one metric maintains a consistent status (other than
109+
// NoChange), we continue running additional retry iterations.
110+
keepRunning := false
111+
for _, metric := range b.Metrics {
112+
currentStatus := compareResult.status(metric.Name)
113+
// If this is the first run, we set the status to the current
114+
// status. Only continue running if a metric has regressed or
115+
// improved.
116+
if retries == 0 {
117+
status[metric.Name] = currentStatus
118+
if currentStatus != NoChange {
119+
keepRunning = true
120+
}
121+
continue
122+
}
123+
124+
// On a retry, if any metric continues having a consistent state (other
125+
// than NoChange), we run additional retries.
126+
if status[metric.Name] != NoChange {
127+
if status[metric.Name] == currentStatus {
128+
keepRunning = true
129+
} else {
130+
// The metric previously indicated a change, but now
131+
// indicates a change in the opposite direction. Reset the
132+
// status to NoChange to indicate that the benchmark did not
133+
// change.
134+
status[metric.Name] = NoChange
135+
}
136+
}
108137
}
109138

110-
// If this is the first run, we set the status to the current status.
111-
// Otherwise, we check if the regression or improvement is persistent and
112-
// continue running until all retries are exhausted. If the status flips
113-
// from a regression to an improvement or vice versa, we set the status to
114-
// NoChange and stop running, as the results are inconclusive.
115-
if status == NoChange {
116-
status = currentStatus
117-
} else if status != currentStatus {
118-
// If the benchmark shows a different change, the results are inconclusive.
119-
status = NoChange
139+
if !keepRunning {
140+
// Reset all metrics to NoChange to indicate that the benchmark did
141+
// not change, and we can stop running additional retries.
142+
for _, metric := range b.Metrics {
143+
status[metric.Name] = NoChange
144+
}
120145
break
121146
}
122147
}
123148

124-
// Write change marker file if the benchmark changed.
125-
if status != NoChange {
126-
marker := strings.ToUpper(status.String())
127-
err := os.WriteFile(path.Join(suite.artifactsDir(New), b.sanitizedName()+"."+marker), nil, 0644)
128-
if err != nil {
129-
return err
149+
// Write change marker file (per metric) if the benchmark changed. It's
150+
// possible that both a regression and an improvement occurred, for
151+
// different metrics of the same benchmark, in which case both markers will
152+
// be written.
153+
for _, metric := range b.Metrics {
154+
if status[metric.Name] != NoChange {
155+
marker := strings.ToUpper(status[metric.Name].String())
156+
err := os.WriteFile(path.Join(suite.artifactsDir(New), b.sanitizedName()+"."+marker), nil, 0644)
157+
if err != nil {
158+
return err
159+
}
130160
}
131161
}
132162

pkg/cmd/microbench-ci/testdata/regression.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Check if summary is generated correctly
1+
# Check if a regression is detected correctly
22
config old=abcdef123 new=qwerty456
33
benchmarks:
44
- display_name: Sysbench
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
# B/op has a regression here but it's not a tracked metric
2+
config old=abcdef123 new=qwerty456
3+
benchmarks:
4+
- display_name: Sysbench
5+
labels: ["KV", "3node", "oltp_write_only"]
6+
name: "BenchmarkSysbench/KV/3node/oltp_write_only"
7+
package: "pkg/sql/tests"
8+
runner_group: 1
9+
count: 15
10+
iterations: 3000
11+
compare_alpha: 0.025
12+
retries: 3
13+
metrics:
14+
- name: "sec/op"
15+
threshold: .4
16+
- name: "allocs/op"
17+
threshold: .1
18+
19+
----
20+
21+
logs name=BenchmarkSysbench/KV/3node/oltp_write_only path=/abcdef123/bin/pkg_sql_tests
22+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 2010583 ns/op 0 errs/op 717706 B/op 3919 allocs/op
23+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 2005436 ns/op 0 errs/op 716050 B/op 3911 allocs/op
24+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1981209 ns/op 0 errs/op 715962 B/op 3909 allocs/op
25+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1987300 ns/op 0 errs/op 715946 B/op 3910 allocs/op
26+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1974348 ns/op 0 errs/op 716478 B/op 3906 allocs/op
27+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1983581 ns/op 0 errs/op 717322 B/op 3910 allocs/op
28+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1994232 ns/op 0 errs/op 716922 B/op 3908 allocs/op
29+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1983097 ns/op 0 errs/op 715641 B/op 3906 allocs/op
30+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1982270 ns/op 0 errs/op 716610 B/op 3908 allocs/op
31+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1980803 ns/op 0 errs/op 715987 B/op 3909 allocs/op
32+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1992155 ns/op 0 errs/op 717364 B/op 3919 allocs/op
33+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1972700 ns/op 0 errs/op 717226 B/op 3918 allocs/op
34+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1998160 ns/op 0 errs/op 716602 B/op 3914 allocs/op
35+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1996845 ns/op 0 errs/op 717028 B/op 3920 allocs/op
36+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1968723 ns/op 0 errs/op 717026 B/op 3912 allocs/op
37+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1981820 ns/op 0 errs/op 717480 B/op 3912 allocs/op
38+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1969319 ns/op 0 errs/op 716572 B/op 3905 allocs/op
39+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1977508 ns/op 0 errs/op 716006 B/op 3912 allocs/op
40+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1964868 ns/op 0 errs/op 716440 B/op 3909 allocs/op
41+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1980378 ns/op 0 errs/op 716154 B/op 3910 allocs/op
42+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1989629 ns/op 0 errs/op 716370 B/op 3913 allocs/op
43+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1983806 ns/op 0 errs/op 716039 B/op 3914 allocs/op
44+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1979153 ns/op 0 errs/op 717207 B/op 3914 allocs/op
45+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1986316 ns/op 0 errs/op 716790 B/op 3915 allocs/op
46+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1986306 ns/op 0 errs/op 716253 B/op 3910 allocs/op
47+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1985084 ns/op 0 errs/op 715918 B/op 3909 allocs/op
48+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1979732 ns/op 0 errs/op 716557 B/op 3912 allocs/op
49+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1977564 ns/op 0 errs/op 715952 B/op 3909 allocs/op
50+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1961340 ns/op 0 errs/op 716488 B/op 3908 allocs/op
51+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1976378 ns/op 0 errs/op 716275 B/op 3913 allocs/op
52+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 2011667 ns/op 0 errs/op 717283 B/op 3916 allocs/op
53+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1990182 ns/op 0 errs/op 716692 B/op 3911 allocs/op
54+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1972881 ns/op 0 errs/op 716424 B/op 3910 allocs/op
55+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1978773 ns/op 0 errs/op 715860 B/op 3912 allocs/op
56+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 2002254 ns/op 0 errs/op 716303 B/op 3917 allocs/op
57+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1963199 ns/op 0 errs/op 716159 B/op 3911 allocs/op
58+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1999707 ns/op 0 errs/op 716553 B/op 3906 allocs/op
59+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1990646 ns/op 0 errs/op 716526 B/op 3909 allocs/op
60+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1972422 ns/op 0 errs/op 715956 B/op 3909 allocs/op
61+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1978369 ns/op 0 errs/op 716951 B/op 3912 allocs/op
62+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1991775 ns/op 0 errs/op 716621 B/op 3905 allocs/op
63+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1987556 ns/op 0 errs/op 716506 B/op 3910 allocs/op
64+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1990526 ns/op 0 errs/op 717152 B/op 3915 allocs/op
65+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1991222 ns/op 0 errs/op 716889 B/op 3907 allocs/op
66+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1980741 ns/op 0 errs/op 717213 B/op 3911 allocs/op
67+
68+
----
69+
70+
logs name=BenchmarkSysbench/KV/3node/oltp_write_only path=/qwerty456/bin/pkg_sql_tests
71+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1986895 ns/op 0 errs/op 724798 B/op 3909 allocs/op
72+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1995839 ns/op 0 errs/op 724829 B/op 3912 allocs/op
73+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1987508 ns/op 0 errs/op 724196 B/op 3905 allocs/op
74+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1984248 ns/op 0 errs/op 724730 B/op 3912 allocs/op
75+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1991809 ns/op 0 errs/op 724011 B/op 3906 allocs/op
76+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1984626 ns/op 0 errs/op 724361 B/op 3912 allocs/op
77+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1993185 ns/op 0 errs/op 723414 B/op 3903 allocs/op
78+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1980299 ns/op 0 errs/op 724307 B/op 3908 allocs/op
79+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1993854 ns/op 0 errs/op 724590 B/op 3912 allocs/op
80+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1995498 ns/op 0 errs/op 723605 B/op 3910 allocs/op
81+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1977474 ns/op 0 errs/op 723423 B/op 3909 allocs/op
82+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1985457 ns/op 0 errs/op 725141 B/op 3911 allocs/op
83+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1988712 ns/op 0 errs/op 724529 B/op 3913 allocs/op
84+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1990460 ns/op 0 errs/op 724658 B/op 3910 allocs/op
85+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1978531 ns/op 0 errs/op 723692 B/op 3903 allocs/op
86+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1982988 ns/op 0 errs/op 724105 B/op 3907 allocs/op
87+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1979725 ns/op 0 errs/op 724580 B/op 3909 allocs/op
88+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1969740 ns/op 0 errs/op 725165 B/op 3907 allocs/op
89+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1978476 ns/op 0 errs/op 723907 B/op 3907 allocs/op
90+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1986046 ns/op 0 errs/op 724690 B/op 3908 allocs/op
91+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1983005 ns/op 0 errs/op 723630 B/op 3903 allocs/op
92+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1985877 ns/op 0 errs/op 725046 B/op 3913 allocs/op
93+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1985969 ns/op 0 errs/op 724280 B/op 3908 allocs/op
94+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1990456 ns/op 0 errs/op 724112 B/op 3914 allocs/op
95+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1974094 ns/op 0 errs/op 724271 B/op 3911 allocs/op
96+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1970782 ns/op 0 errs/op 724034 B/op 3913 allocs/op
97+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1978738 ns/op 0 errs/op 724649 B/op 3910 allocs/op
98+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1979672 ns/op 0 errs/op 724220 B/op 3913 allocs/op
99+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1984406 ns/op 0 errs/op 724720 B/op 3906 allocs/op
100+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1978842 ns/op 0 errs/op 725047 B/op 3917 allocs/op
101+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1968689 ns/op 0 errs/op 724048 B/op 3905 allocs/op
102+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1993203 ns/op 0 errs/op 724026 B/op 3908 allocs/op
103+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1999129 ns/op 0 errs/op 725372 B/op 3911 allocs/op
104+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1979229 ns/op 0 errs/op 724394 B/op 3906 allocs/op
105+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1994094 ns/op 0 errs/op 724728 B/op 3910 allocs/op
106+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1978132 ns/op 0 errs/op 725382 B/op 3906 allocs/op
107+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1992907 ns/op 0 errs/op 724435 B/op 3912 allocs/op
108+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1987388 ns/op 0 errs/op 723936 B/op 3902 allocs/op
109+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1985245 ns/op 0 errs/op 723979 B/op 3908 allocs/op
110+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1972526 ns/op 0 errs/op 725111 B/op 3908 allocs/op
111+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1985051 ns/op 0 errs/op 724535 B/op 3909 allocs/op
112+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1969862 ns/op 0 errs/op 724386 B/op 3905 allocs/op
113+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1969139 ns/op 0 errs/op 724111 B/op 3905 allocs/op
114+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1986218 ns/op 0 errs/op 724252 B/op 3909 allocs/op
115+
BenchmarkSysbench/KV/3node/oltp_write_only-4 3000 1984661 ns/op 0 errs/op 724847 B/op 3909 allocs/op
116+
117+
----
118+
119+
run group=1
120+
----
121+
----
122+
123+
<details><summary><strong>⚪ Sysbench</strong> [KV, 3node, oltp_write_only]</summary>
124+
125+
| Metric | Old Commit | New Commit | Delta | Note |
126+
|-----------------------------|----------------|----------------|------------|--------------|
127+
| ⚪ **sec/op** | 1.984m ±1% | 1.988m ±0% | ~ | p=0.806 n=15 |
128+
| ⚪ **allocs/op** | 3.910k ±0% | 3.910k ±0% | ~ | p=0.251 n=15 |
129+
130+
<details><summary>Reproduce</summary>
131+
132+
**benchdiff binaries**:
133+
```shell
134+
mkdir -p benchdiff/qwerty4/bin/1058449141
135+
gcloud storage cp gs://cockroach-microbench-ci/builds/qwerty456/bin/pkg_sql_tests benchdiff/qwerty4/bin/1058449141/cockroachdb_cockroach_pkg_sql_tests
136+
chmod +x benchdiff/qwerty4/bin/1058449141/cockroachdb_cockroach_pkg_sql_tests
137+
mkdir -p benchdiff/abcdef1/bin/1058449141
138+
gcloud storage cp gs://cockroach-microbench-ci/builds/abcdef123/bin/pkg_sql_tests benchdiff/abcdef1/bin/1058449141/cockroachdb_cockroach_pkg_sql_tests
139+
chmod +x benchdiff/abcdef1/bin/1058449141/cockroachdb_cockroach_pkg_sql_tests
140+
```
141+
**benchdiff command**:
142+
```shell
143+
benchdiff --run=^BenchmarkSysbench/KV/3node/oltp_write_only$ --old=abcdef1 --new=qwerty4 ./pkg/sql/tests
144+
```
145+
146+
</details>
147+
148+
</details>
149+
150+
<details><summary>Artifacts</summary>
151+
152+
**download**:
153+
```shell
154+
mkdir -p new
155+
gcloud storage cp gs://cockroach-microbench-ci/artifacts/qwerty456//\* new/
156+
mkdir -p old
157+
gcloud storage cp gs://cockroach-microbench-ci/artifacts/abcdef123//\* old/
158+
```
159+
160+
</details>
161+
162+
<details><summary>Legend</summary>
163+
164+
- ⚪ **Neutral:** No significant performance change.
165+
- 🔴 **Regression:** Likely performance regression, requiring investigation.
166+
- 🟢 **Improvement:** Likely performance gain.
167+
168+
</details>
169+
170+
No regressions detected!
171+
172+
_built with commit: [qwerty456](https://github.com/cockroachdb/cockroach/commit/qwerty456)_
173+
----
174+
----
175+
176+
tree
177+
----
178+
----
179+
180+
/abcdef123
181+
/abcdef123/artifacts
182+
/abcdef123/artifacts/cleaned_Sysbench_KV_3node_oltp_write_only.log
183+
/abcdef123/artifacts/cpu_Sysbench_KV_3node_oltp_write_only_merged.prof
184+
/abcdef123/artifacts/memory_Sysbench_KV_3node_oltp_write_only_merged.prof
185+
/abcdef123/artifacts/mutex_Sysbench_KV_3node_oltp_write_only_merged.prof
186+
/abcdef123/artifacts/raw_Sysbench_KV_3node_oltp_write_only.log
187+
/github-summary.md
188+
/qwerty456
189+
/qwerty456/artifacts
190+
/qwerty456/artifacts/cleaned_Sysbench_KV_3node_oltp_write_only.log
191+
/qwerty456/artifacts/cpu_Sysbench_KV_3node_oltp_write_only_merged.prof
192+
/qwerty456/artifacts/memory_Sysbench_KV_3node_oltp_write_only_merged.prof
193+
/qwerty456/artifacts/mutex_Sysbench_KV_3node_oltp_write_only_merged.prof
194+
/qwerty456/artifacts/raw_Sysbench_KV_3node_oltp_write_only.log
195+
/suite.yml
196+
/summary.json
197+
----
198+
----

0 commit comments

Comments
 (0)