Skip to content

Commit b4ac112

Browse files
committed
microbench-ci: compiler induced variance threshold
Previously, thresholds were removed in favor of better p-value tuning. However, this did not account for reproducible compiler induced variance. In these situations the binaries reliably reproduce a regression even though the code change is unrelated. To account for this, we now check if the delta is less than a threshold we consider the metric to have no change. The thresholds were determined heuristically by running the benchmarks with and without the change and looking at the results.
1 parent 1e81f05 commit b4ac112

File tree

6 files changed

+45
-12
lines changed

6 files changed

+45
-12
lines changed

pkg/cmd/microbench-ci/benchmark.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,17 @@ type (
2828
Iterations int `yaml:"iterations"`
2929
CompareAlpha float64 `yaml:"compare_alpha"`
3030
Retries int `yaml:"retries"`
31-
Metrics []string `yaml:"metrics"`
31+
Metrics []Metric `yaml:"metrics"`
3232
}
3333
Benchmarks []Benchmark
3434
ProfileType string
3535
)
3636

37+
type Metric struct {
38+
Name string `yaml:"name"`
39+
Threshold float64 `yaml:"threshold"`
40+
}
41+
3742
const (
3843
ProfileCPU ProfileType = "cpu"
3944
ProfileMemory ProfileType = "memory"

pkg/cmd/microbench-ci/compare.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"bufio"
1010
"bytes"
1111
"fmt"
12+
"math"
1213
"os"
1314
"path"
1415

@@ -66,6 +67,22 @@ func (c *CompareResult) status(metricName string) Status {
6667
} else if cc.Delta*float64(entry.Better) < 0 {
6768
status = Regressed
6869
}
70+
71+
// Check if the metric has a delta cutoff threshold.
72+
threshold := 0.0
73+
for _, metric := range c.Benchmark.Metrics {
74+
if metric.Name == metricName {
75+
threshold = metric.Threshold
76+
break
77+
}
78+
}
79+
// If the threshold is set and the delta is less than the threshold, we
80+
// consider the metric to have no change. This accounts for compiler induced
81+
// variance, where the regression might be reproducible, but the change is
82+
// unrelated to the changes in the code.
83+
if math.Abs(cc.Delta) < threshold {
84+
status = NoChange
85+
}
6986
return status
7087
}
7188

pkg/cmd/microbench-ci/config/pull-request-suite.yml

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@ benchmarks:
99
compare_alpha: 0.025
1010
retries: 3
1111
metrics:
12-
- "sec/op"
13-
- "allocs/op"
12+
- name: "sec/op"
13+
threshold: .3
14+
- name: "allocs/op"
15+
threshold: .1
1416

1517
- display_name: Sysbench
1618
labels: ["KV", "3node", "oltp_read_only"]
@@ -22,8 +24,10 @@ benchmarks:
2224
compare_alpha: 0.025
2325
retries: 3
2426
metrics:
25-
- "sec/op"
26-
- "allocs/op"
27+
- name: "sec/op"
28+
threshold: .4
29+
- name: "allocs/op"
30+
threshold: .1
2731

2832
- display_name: Sysbench
2933
labels: ["KV", "3node", "oltp_write_only"]
@@ -35,5 +39,7 @@ benchmarks:
3539
compare_alpha: 0.025
3640
retries: 3
3741
metrics:
38-
- "sec/op"
39-
- "allocs/op"
42+
- name: "sec/op"
43+
threshold: .4
44+
- name: "allocs/op"
45+
threshold: .1

pkg/cmd/microbench-ci/report.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ func (c *CompareResult) generateSummaryData(
5959
statusTemplateFunc func(status Status) string,
6060
) []SummaryData {
6161
summaryData := make([]SummaryData, 0, len(c.MetricMap))
62-
for _, metricName := range c.Benchmark.Metrics {
62+
for _, metric := range c.Benchmark.Metrics {
63+
metricName := metric.Name
6364
entry := c.MetricMap[metricName]
6465
if entry == nil {
6566
log.Printf("WARN: no metric found for benchmark metric %q", metricName)

pkg/cmd/microbench-ci/testdata/regression.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ benchmarks:
1111
compare_alpha: 0.05
1212
retries: 3
1313
metrics:
14-
- "sec/op"
15-
- "allocs/op"
14+
- name: "sec/op"
15+
threshold: 0.005
16+
- name: "allocs/op"
17+
threshold: 0.002
1618

1719
----
1820

pkg/cmd/microbench-ci/testdata/summary.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ benchmarks:
1111
compare_alpha: 0.05
1212
retries: 3
1313
metrics:
14-
- "sec/op"
15-
- "allocs/op"
14+
- name: "sec/op"
15+
threshold: 0.001
16+
- name: "allocs/op"
17+
threshold: 0.001
1618

1719
----
1820

0 commit comments

Comments
 (0)