roachprod-microbench: confidence interval calculation

herkolategan · herkolategan · commit c7ac7d853f7c · 2025-04-01T14:56:07.000+01:00
This change adds the ability to calculate the bootstrap confidence interval given two sets
of microbenchmark results.

When applying the bootstrap confidence interval method to microbenchmarks, it
helps estimate the uncertainty in performance measurements. By resampling
benchmark results with replacement and computing the statistic (e.g., mean) over
multiple iterations, we obtain a distribution of possible outcomes. The
confidence interval derived from this distribution provides a robust estimate of
performance variability, accounting for noise and system fluctuations without
assuming normality.

Epic: None
Release note: None
diff --git a/pkg/cmd/roachprod-microbench/model/BUILD.bazel b/pkg/cmd/roachprod-microbench/model/BUILD.bazel
@@ -4,6 +4,7 @@ go_library(
     name = "model",
     srcs = [
         "builder.go",
+        "math.go",
         "metric.go",
         "options.go",
     ],
diff --git a/pkg/cmd/roachprod-microbench/model/builder.go b/pkg/cmd/roachprod-microbench/model/builder.go
@@ -95,7 +95,7 @@ func (m *Metric) ComputeComparison(benchmarkName, oldID, newID string) *Comparis
 			return nil
 		}
 	}
-	// Compute the comparison and delta.
+	// Compute the comparison, confidence interval and delta.
 	comparison := Comparison{}
 	oldSample, newSample := benchmarkEntry.Samples[oldID], benchmarkEntry.Samples[newID]
 	comparison.Distribution = m.Assumption.Compare(oldSample, newSample)
@@ -106,6 +106,7 @@ func (m *Metric) ComputeComparison(benchmarkName, oldID, newID string) *Comparis
 	} else {
 		comparison.Delta = ((newSummary.Center / oldSummary.Center) - 1.0) * 100
 	}
+	comparison.ConfidenceInterval = calculateConfidenceInterval(newSample.Values, oldSample.Values)
 	return &comparison
 }
 
diff --git a/pkg/cmd/roachprod-microbench/model/math.go b/pkg/cmd/roachprod-microbench/model/math.go
@@ -0,0 +1,76 @@
+// Copyright 2025 The Cockroach Authors.
+//
+// Use of this software is governed by the CockroachDB Software License
+// included in the /LICENSE file.
+//
+
+package model
+
+import (
+	"math"
+	"math/rand"
+	"sort"
+)
+
+const resampleCount = 1000
+const confidence = 0.95
+
+// calculateConfidenceInterval calculates the confidence interval for the ratio
+// of two sets of values. The confidence interval is calculated using a
+// bootstrap method.
+func calculateConfidenceInterval(newValues, oldValues []float64) ConfidenceInterval {
+	rng := rand.New(rand.NewSource(hash(newValues) + hash(oldValues)))
+	ratios := make([]float64, 0, resampleCount)
+	resNew := make([]float64, len(newValues))
+	resOld := make([]float64, len(oldValues))
+	for range resampleCount {
+		resample(rng, newValues, resNew)
+		sort.Float64s(resNew)
+		resample(rng, oldValues, resOld)
+		sort.Float64s(resOld)
+
+		medOld := median(resOld)
+		// Skip if the old median is 0 to avoid division by zero.
+		if medOld != 0 {
+			ratios = append(ratios, median(resNew)/medOld)
+		}
+	}
+	if len(ratios) == 0 {
+		return ConfidenceInterval{}
+	}
+	sort.Float64s(ratios)
+	alpha := (1.0 - confidence) / 2.0
+	lowerIndex := int(math.Floor(float64(len(ratios)) * alpha))
+	upperIndex := int(math.Floor(float64(len(ratios)) * (1.0 - alpha)))
+	return ConfidenceInterval{
+		Low:    ratios[lowerIndex],
+		High:   ratios[upperIndex],
+		Center: median(ratios),
+	}
+}
+
+// resample samples a slice of values with replacement.
+func resample(r *rand.Rand, src, dest []float64) {
+	length := len(src)
+	for i := range dest {
+		dest[i] = src[r.Intn(length)]
+	}
+}
+
+// hash returns an arbitrary hash of the given values.
+func hash(data []float64) int64 {
+	var hashValue int64
+	for _, d := range data {
+		hashValue += (int64)(math.Float64bits(d))
+	}
+	return hashValue
+}
+
+// median returns the median of a sorted slice of values.
+func median(values []float64) float64 {
+	length := len(values)
+	if length%2 == 0 {
+		return (values[length/2] + values[length/2-1]) / 2
+	}
+	return values[length/2]
+}
diff --git a/pkg/cmd/roachprod-microbench/model/metric.go b/pkg/cmd/roachprod-microbench/model/metric.go
@@ -39,9 +39,17 @@ type BenchmarkEntry struct {
 
 // Comparison contains the results of comparing two microbenchmarks.
 type Comparison struct {
-	Distribution   benchmath.Comparison
-	Delta          float64
-	FormattedDelta string
+	Distribution       benchmath.Comparison
+	ConfidenceInterval ConfidenceInterval
+	Delta              float64
+	FormattedDelta     string
+}
+
+// ConfidenceInterval holds the low and high bounds of a confidence interval.
+type ConfidenceInterval struct {
+	Low    float64
+	High   float64
+	Center float64
 }
 
 // ComparisonResult holds the comparison results for a specific metric.

Original file line number	Diff line number	Diff line change
`@@ -95,7 +95,7 @@ func (m Metric) ComputeComparison(benchmarkName, oldID, newID string) Comparis`
`95`	`95`	`return nil`
`96`	`96`	`}`
`97`	`97`	`}`
`98`		`- // Compute the comparison and delta.`
	`98`	`+ // Compute the comparison, confidence interval and delta.`
`99`	`99`	`comparison := Comparison{}`
`100`	`100`	`oldSample, newSample := benchmarkEntry.Samples[oldID], benchmarkEntry.Samples[newID]`
`101`	`101`	`comparison.Distribution = m.Assumption.Compare(oldSample, newSample)`
`@@ -106,6 +106,7 @@ func (m Metric) ComputeComparison(benchmarkName, oldID, newID string) Comparis`
`106`	`106`	`} else {`
`107`	`107`	`comparison.Delta = ((newSummary.Center / oldSummary.Center) - 1.0) * 100`
`108`	`108`	`}`
	`109`	`+ comparison.ConfidenceInterval = calculateConfidenceInterval(newSample.Values, oldSample.Values)`
`109`	`110`	`return &comparison`
`110`	`111`	`}`
`111`	`112`