Skip to content

Commit 3647a54

Browse files
craig[bot]herkolategan
andcommitted
Merge #141990
141990: roachprod-microbench: confidence interval calculation r=srosenberg a=herkolategan This PR replaces the existing `benchseries` implementation with a new metrics model for populating the microbenchmark dashboards. The previous approach was difficult to maintain, prone to panics due to missing metrics, and required excessive boilerplate code to format the metrics for `benchseries`. The updated implementation now uses an internal confidence interval calculation integrated within the metrics builder, eliminating the need for additional computation steps through `benchseries`. Additionally, this change introduces the bootstrap confidence interval calculation for two sets of microbenchmark results. This method allows for better estimation of performance uncertainty by resampling benchmark results. It provides a distribution of outcomes that accounts for performance variability and system fluctuations without assuming normality. Epic: None Release note: None Co-authored-by: Herko Lategan <[email protected]>
2 parents 8d3d95c + 1c2a2fe commit 3647a54

File tree

16 files changed

+399
-216
lines changed

16 files changed

+399
-216
lines changed

pkg/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ ALL_TESTS = [
151151
"//pkg/cmd/reduce/reduce:reduce_test",
152152
"//pkg/cmd/release:release_test",
153153
"//pkg/cmd/roachprod-microbench/cluster:cluster_test",
154+
"//pkg/cmd/roachprod-microbench/model:model_test",
154155
"//pkg/cmd/roachprod-microbench/util:util_test",
155156
"//pkg/cmd/roachprod-microbench:roachprod-microbench_test",
156157
"//pkg/cmd/roachtest/clusterstats:clusterstats_test",
@@ -1218,6 +1219,7 @@ GO_TARGETS = [
12181219
"//pkg/cmd/roachprod-microbench/cluster:cluster_test",
12191220
"//pkg/cmd/roachprod-microbench/google:google",
12201221
"//pkg/cmd/roachprod-microbench/model:model",
1222+
"//pkg/cmd/roachprod-microbench/model:model_test",
12211223
"//pkg/cmd/roachprod-microbench/parser:parser",
12221224
"//pkg/cmd/roachprod-microbench/util:util",
12231225
"//pkg/cmd/roachprod-microbench/util:util_test",

pkg/cmd/roachprod-microbench/compare.go

Lines changed: 38 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
package main
77

88
import (
9-
"bytes"
109
"context"
1110
"fmt"
1211
"log"
@@ -350,159 +349,54 @@ func (c *compare) compareUsingThreshold(comparisonResultsMap model.ComparisonRes
350349
return nil
351350
}
352351

353-
func (c *compare) createBenchSeries() ([]*benchseries.ComparisonSeries, error) {
354-
opts := benchseries.DefaultBuilderOptions()
355-
opts.Experiment = "run-time"
356-
opts.Compare = "cockroach"
357-
builder, err := benchseries.NewBuilder(opts)
358-
if err != nil {
359-
return nil, err
360-
}
361-
362-
var benchBuf bytes.Buffer
363-
readFileFn := func(filePath string, required bool) error {
364-
data, err := os.ReadFile(filePath)
365-
if err != nil {
366-
if !required && oserror.IsNotExist(err) {
367-
return nil
368-
}
369-
return errors.Wrapf(err, "failed to read file %s", filePath)
370-
}
371-
benchBuf.Write(data)
372-
benchBuf.WriteString("\n")
373-
return nil
374-
}
375-
376-
for k, v := range c.influxConfig.metadata {
377-
benchBuf.WriteString(fmt.Sprintf("%s: %s\n", k, v))
378-
}
379-
380-
logPaths := map[string]string{
381-
"experiment": c.experimentDir,
382-
"baseline": c.baselineDir,
383-
}
384-
for logType, dir := range logPaths {
385-
benchBuf.WriteString(fmt.Sprintf("cockroach: %s\n", logType))
386-
logPath := filepath.Join(dir, "metadata.log")
387-
if err = readFileFn(logPath, true); err != nil {
388-
return nil, err
389-
}
390-
for _, pkg := range c.packages {
391-
benchBuf.WriteString(fmt.Sprintf("pkg: %s\n", pkg))
392-
logPath = filepath.Join(dir, getReportLogName(reportLogName, pkg))
393-
if err = readFileFn(logPath, false); err != nil {
394-
return nil, err
395-
}
396-
}
397-
}
398-
399-
benchReader := benchfmt.NewReader(bytes.NewReader(benchBuf.Bytes()), "buffer")
400-
recordsMap := make(map[string][]*benchfmt.Result)
401-
seen := make(map[string]map[string]struct{})
402-
for benchReader.Scan() {
403-
switch rec := benchReader.Result().(type) {
404-
case *benchfmt.SyntaxError:
405-
// In case the benchmark log is corrupted or contains a syntax error, we
406-
// want to return an error to the caller.
407-
return nil, fmt.Errorf("syntax error: %v", rec)
408-
case *benchfmt.Result:
409-
var cmp, pkg string
410-
for _, config := range rec.Config {
411-
if config.Key == "pkg" {
412-
pkg = string(config.Value)
413-
}
414-
if config.Key == opts.Compare {
415-
cmp = string(config.Value)
416-
}
417-
}
418-
key := pkg + util.PackageSeparator + string(rec.Name)
419-
// Update the name to include the package name. This is a workaround for
420-
// `benchseries`, that currently does not support package names.
421-
rec.Name = benchfmt.Name(key)
422-
recordsMap[key] = append(recordsMap[key], rec.Clone())
423-
// Determine if we've seen this package/benchmark combination for both
424-
// the baseline and experiment run.
425-
if _, ok := seen[key]; !ok {
426-
seen[key] = make(map[string]struct{})
427-
}
428-
seen[key][cmp] = struct{}{}
429-
}
430-
}
431-
432-
// Add only the benchmarks that have been seen in both the baseline and
433-
// experiment run.
434-
for key, records := range recordsMap {
435-
if len(seen[key]) != 2 {
436-
continue
437-
}
438-
for _, rec := range records {
439-
builder.Add(rec)
440-
}
441-
}
442-
443-
comparisons, err := builder.AllComparisonSeries(nil, benchseries.DUPE_REPLACE)
444-
if err != nil {
445-
return nil, errors.Wrap(err, "failed to create comparison series")
446-
}
447-
return comparisons, nil
448-
}
449-
450-
func (c *compare) pushToInfluxDB() error {
352+
func (c *compare) pushToInfluxDB(comparisonResultsMap model.ComparisonResultsMap) error {
451353
client := influxdb2.NewClient(c.influxConfig.host, c.influxConfig.token)
452354
defer client.Close()
453355
writeAPI := client.WriteAPI("cockroach", "microbench")
454356
errorChan := writeAPI.Errors()
455357

456-
comparisons, err := c.createBenchSeries()
358+
metadata, err := loadMetadata(filepath.Join(c.experimentDir, "metadata.log"))
457359
if err != nil {
458360
return err
459361
}
362+
experimentTime := metadata.ExperimentCommitTime
363+
normalizedDateString, err := benchseries.NormalizeDateString(experimentTime)
364+
if err != nil {
365+
return errors.Wrap(err, "error normalizing experiment commit date")
366+
}
367+
ts, err := benchseries.ParseNormalizedDateString(normalizedDateString)
368+
if err != nil {
369+
return errors.Wrap(err, "error parsing experiment commit date")
370+
}
460371

461-
for _, cs := range comparisons {
462-
cs.AddSummaries(0.95, 1000)
463-
residues := make(map[string]string)
464-
for _, r := range cs.Residues {
465-
residues[r.S] = r.Slice[0]
466-
}
467-
468-
for idx, benchmarkName := range cs.Benchmarks {
469-
if len(cs.Summaries) == 0 {
470-
log.Printf("WARN: no summaries for %s", benchmarkName)
471-
continue
472-
}
473-
sum := cs.Summaries[0][idx]
474-
if !sum.Defined() {
475-
continue
476-
}
477-
478-
experimentTime := cs.Series[0]
479-
ts, err := benchseries.ParseNormalizedDateString(experimentTime)
480-
if err != nil {
481-
return errors.Wrap(err, "error parsing experiment commit date")
482-
}
483-
fields := map[string]interface{}{
484-
"low": sum.Low,
485-
"center": sum.Center,
486-
"high": sum.High,
487-
"upload-time": residues["upload-time"],
488-
"baseline-commit": cs.HashPairs[experimentTime].DenHash,
489-
"experiment-commit": cs.HashPairs[experimentTime].NumHash,
490-
"benchmarks-commit": residues["benchmarks-commit"],
491-
}
492-
pkg := strings.Split(benchmarkName, util.PackageSeparator)[0]
493-
benchmarkName = strings.Split(benchmarkName, util.PackageSeparator)[1]
494-
tags := map[string]string{
495-
"name": benchmarkName,
496-
"unit": cs.Unit,
497-
"pkg": pkg,
498-
"repository": "cockroach",
499-
"branch": residues["branch"],
500-
"goarch": residues["goarch"],
501-
"goos": residues["goos"],
502-
"machine-type": residues["machine-type"],
372+
for _, group := range comparisonResultsMap {
373+
for _, result := range group {
374+
for _, detail := range result.Comparisons {
375+
ci := detail.Comparison.ConfidenceInterval
376+
fields := map[string]interface{}{
377+
"low": ci.Low,
378+
"center": ci.Center,
379+
"high": ci.High,
380+
"upload-time": metadata.RunTime,
381+
"baseline-commit": metadata.BaselineCommit,
382+
"experiment-commit": metadata.ExperimentCommit,
383+
"benchmarks-commit": metadata.BenchmarksCommit,
384+
}
385+
pkg := strings.Split(detail.BenchmarkName, util.PackageSeparator)[0]
386+
benchmarkName := strings.Split(detail.BenchmarkName, util.PackageSeparator)[1]
387+
tags := map[string]string{
388+
"name": benchmarkName,
389+
"unit": result.Metric.Unit,
390+
"pkg": pkg,
391+
"repository": "cockroach",
392+
"branch": "master",
393+
"goarch": metadata.GoArch,
394+
"goos": metadata.GoOS,
395+
"machine-type": metadata.Machine,
396+
}
397+
p := influxdb2.NewPoint("benchmark-result", tags, fields, ts)
398+
writeAPI.WritePoint(p)
503399
}
504-
p := influxdb2.NewPoint("benchmark-result", tags, fields, ts)
505-
writeAPI.WritePoint(p)
506400
}
507401
}
508402
done := make(chan struct{})

pkg/cmd/roachprod-microbench/compare_test.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,19 @@ func metricsToText(metricMaps map[string]*model.MetricMap) string {
3838

3939
for _, entryKey := range entryKeys {
4040
entry := metric.BenchmarkEntries[entryKey]
41-
centers := make([]float64, len(entry.Summaries))
41+
centers := make([]string, len(entry.Summaries))
4242
summaryKeys := maps.Keys(entry.Summaries)
4343
sort.Strings(summaryKeys)
4444
for i, key := range summaryKeys {
45-
centers[i] = entry.Summaries[key].Center
45+
centers[i] = fmt.Sprintf("%4f", entry.Summaries[key].Center)
4646
}
4747
comparison := metric.ComputeComparison(entryKey, "baseline", "experiment")
48-
fmt.Fprintf(buf, "BenchmarkEntry %s %s %v %s\n",
49-
entryKey, comparison.FormattedDelta, centers, comparison.Distribution.String(),
48+
confidenceStr := fmt.Sprintf("%4f %4f %4f",
49+
comparison.ConfidenceInterval.Low,
50+
comparison.ConfidenceInterval.Center,
51+
comparison.ConfidenceInterval.High)
52+
fmt.Fprintf(buf, "BenchmarkEntry %s %s %v %s %v\n",
53+
entryKey, comparison.FormattedDelta, centers, comparison.Distribution.String(), confidenceStr,
5054
)
5155
}
5256
}
@@ -55,7 +59,7 @@ func metricsToText(metricMaps map[string]*model.MetricMap) string {
5559
}
5660

5761
func TestCompareBenchmarks(t *testing.T) {
58-
ddFilePath := path.Join(datapathutils.TestDataPath(t), "compare")
62+
ddFilePath := path.Join(datapathutils.TestDataPath(t), "compare.txt")
5963
datadriven.RunTest(t, ddFilePath, func(t *testing.T, d *datadriven.TestData) string {
6064
if d.Cmd != "compare" {
6165
d.Fatalf(t, "unknown command %s", d.Cmd)

pkg/cmd/roachprod-microbench/executor_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import (
1616
)
1717

1818
func TestExtractBenchmarkResultsDataDriven(t *testing.T) {
19-
ddFilePath := path.Join(datapathutils.TestDataPath(t), "benchmark")
19+
ddFilePath := path.Join(datapathutils.TestDataPath(t), "benchmark.txt")
2020
datadriven.RunTest(t, ddFilePath, func(t *testing.T, d *datadriven.TestData) string {
2121
if d.Cmd != "benchmark" {
2222
d.Fatalf(t, "unknown command %s", d.Cmd)

pkg/cmd/roachprod-microbench/export_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ func TestExport(t *testing.T) {
2323
testLabels := make(map[string]string)
2424
testLabels["some"] = "42test"
2525
testLabels["abc/def"] = "good/label?"
26-
ddFilePath := path.Join(datapathutils.TestDataPath(t), "export")
26+
ddFilePath := path.Join(datapathutils.TestDataPath(t), "export.txt")
2727
datadriven.RunTest(t, ddFilePath, func(t *testing.T, d *datadriven.TestData) string {
2828
if d.Cmd != "export" {
2929
d.Fatalf(t, "unknown command %s", d.Cmd)

pkg/cmd/roachprod-microbench/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ func makeCompareCommand() *cobra.Command {
173173
}
174174

175175
if c.influxConfig.token != "" {
176-
err = c.pushToInfluxDB()
176+
err = c.pushToInfluxDB(comparisonResult)
177177
if err != nil {
178178
return err
179179
}

pkg/cmd/roachprod-microbench/metadata.go

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,24 @@
55

66
package main
77

8-
import "os"
8+
import (
9+
"bufio"
10+
"os"
11+
"reflect"
12+
"strings"
13+
)
14+
15+
type Metadata struct {
16+
ExperimentCommitTime string `field:"experiment-commit-time"`
17+
Repository string `field:"repository"`
18+
BaselineCommit string `field:"baseline-commit"`
19+
GoOS string `field:"goos"`
20+
ExperimentCommit string `field:"experiment-commit"`
21+
RunTime string `field:"run-time"`
22+
BenchmarksCommit string `field:"benchmarks-commit"`
23+
Machine string `field:"machine"`
24+
GoArch string `field:"goarch"`
25+
}
926

1027
// getPackagesFromLogs scans a directory for benchmark report logs and
1128
// creates a list of packages that were used to generate the logs.
@@ -26,3 +43,42 @@ func getPackagesFromLogs(dir string) ([]string, error) {
2643
}
2744
return packages, nil
2845
}
46+
47+
// loadMetadata reads a Go benchmark metadata file and returns a Metadata
48+
// struct.
49+
func loadMetadata(logFile string) (Metadata, error) {
50+
metadata := Metadata{}
51+
file, err := os.Open(logFile)
52+
if err != nil {
53+
return metadata, err
54+
}
55+
defer file.Close()
56+
57+
metadataMap := make(map[string]string)
58+
scanner := bufio.NewScanner(file)
59+
for scanner.Scan() {
60+
line := scanner.Text()
61+
parts := strings.SplitN(line, ":", 2)
62+
if len(parts) != 2 {
63+
continue
64+
}
65+
key := strings.TrimSpace(parts[0])
66+
value := strings.TrimSpace(parts[1])
67+
metadataMap[key] = value
68+
}
69+
70+
if err = scanner.Err(); err != nil {
71+
return metadata, err
72+
}
73+
74+
v := reflect.ValueOf(&metadata).Elem()
75+
for i := 0; i < v.NumField(); i++ {
76+
field := v.Type().Field(i)
77+
fieldName := field.Tag.Get("field")
78+
if value, ok := metadataMap[fieldName]; ok {
79+
v.Field(i).SetString(value)
80+
}
81+
}
82+
83+
return metadata, nil
84+
}

pkg/cmd/roachprod-microbench/model/BUILD.bazel

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
load("@io_bazel_rules_go//go:def.bzl", "go_library")
1+
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
22

33
go_library(
44
name = "model",
55
srcs = [
66
"builder.go",
7+
"math.go",
78
"metric.go",
89
"options.go",
910
],
@@ -14,3 +15,14 @@ go_library(
1415
"@org_golang_x_perf//benchmath",
1516
],
1617
)
18+
19+
go_test(
20+
name = "model_test",
21+
srcs = ["math_test.go"],
22+
embed = [":model"],
23+
deps = [
24+
"@com_github_stretchr_testify//require",
25+
"@org_golang_x_perf//benchfmt",
26+
"@org_golang_x_perf//benchseries",
27+
],
28+
)

pkg/cmd/roachprod-microbench/model/builder.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ func (m *Metric) ComputeComparison(benchmarkName, oldID, newID string) *Comparis
9595
return nil
9696
}
9797
}
98-
// Compute the comparison and delta.
98+
// Compute the comparison, confidence interval and delta.
9999
comparison := Comparison{}
100100
oldSample, newSample := benchmarkEntry.Samples[oldID], benchmarkEntry.Samples[newID]
101101
comparison.Distribution = m.Assumption.Compare(oldSample, newSample)
@@ -106,6 +106,7 @@ func (m *Metric) ComputeComparison(benchmarkName, oldID, newID string) *Comparis
106106
} else {
107107
comparison.Delta = ((newSummary.Center / oldSummary.Center) - 1.0) * 100
108108
}
109+
comparison.ConfidenceInterval = calculateConfidenceInterval(newSample.Values, oldSample.Values)
109110
return &comparison
110111
}
111112

0 commit comments

Comments
 (0)