cockroachdb
diff --git a/‎pkg/BUILD.bazel‎
Lines changed: 2 additions & 0 deletions b/‎pkg/BUILD.bazel‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pkg/cmd/roachprod-microbench/compare.go‎
Lines changed: 38 additions & 144 deletions b/‎pkg/cmd/roachprod-microbench/compare.go‎
Lines changed: 38 additions & 144 deletions
diff --git a/‎pkg/cmd/roachprod-microbench/compare_test.go‎
Lines changed: 9 additions & 5 deletions b/‎pkg/cmd/roachprod-microbench/compare_test.go‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎pkg/cmd/roachprod-microbench/executor_test.go‎
Lines changed: 1 addition & 1 deletion b/‎pkg/cmd/roachprod-microbench/executor_test.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/cmd/roachprod-microbench/export_test.go‎
Lines changed: 1 addition & 1 deletion b/‎pkg/cmd/roachprod-microbench/export_test.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/cmd/roachprod-microbench/main.go‎
Lines changed: 1 addition & 1 deletion b/‎pkg/cmd/roachprod-microbench/main.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/cmd/roachprod-microbench/metadata.go‎
Lines changed: 57 additions & 1 deletion b/‎pkg/cmd/roachprod-microbench/metadata.go‎
Lines changed: 57 additions & 1 deletion
diff --git a/‎pkg/cmd/roachprod-microbench/model/BUILD.bazel‎
Lines changed: 13 additions & 1 deletion b/‎pkg/cmd/roachprod-microbench/model/BUILD.bazel‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎pkg/cmd/roachprod-microbench/model/builder.go‎
Lines changed: 2 additions & 1 deletion b/‎pkg/cmd/roachprod-microbench/model/builder.go‎
Lines changed: 2 additions & 1 deletion
@@ -151,6 +151,7 @@ ALL_TESTS = [
     "//pkg/cmd/reduce/reduce:reduce_test",
     "//pkg/cmd/release:release_test",
     "//pkg/cmd/roachprod-microbench/cluster:cluster_test",
+    "//pkg/cmd/roachprod-microbench/model:model_test",
     "//pkg/cmd/roachprod-microbench/util:util_test",
     "//pkg/cmd/roachprod-microbench:roachprod-microbench_test",
     "//pkg/cmd/roachtest/clusterstats:clusterstats_test",
@@ -1218,6 +1219,7 @@ GO_TARGETS = [
     "//pkg/cmd/roachprod-microbench/cluster:cluster_test",
     "//pkg/cmd/roachprod-microbench/google:google",
     "//pkg/cmd/roachprod-microbench/model:model",
+    "//pkg/cmd/roachprod-microbench/model:model_test",
     "//pkg/cmd/roachprod-microbench/parser:parser",
     "//pkg/cmd/roachprod-microbench/util:util",
     "//pkg/cmd/roachprod-microbench/util:util_test",
 
@@ -6,7 +6,6 @@
 package main
 
 import (
-	"bytes"
 	"context"
 	"fmt"
 	"log"
@@ -350,159 +349,54 @@ func (c *compare) compareUsingThreshold(comparisonResultsMap model.ComparisonRes
 	return nil
 }
 
-func (c *compare) createBenchSeries() ([]*benchseries.ComparisonSeries, error) {
-	opts := benchseries.DefaultBuilderOptions()
-	opts.Experiment = "run-time"
-	opts.Compare = "cockroach"
-	builder, err := benchseries.NewBuilder(opts)
-	if err != nil {
-		return nil, err
-	}
-
-	var benchBuf bytes.Buffer
-	readFileFn := func(filePath string, required bool) error {
-		data, err := os.ReadFile(filePath)
-		if err != nil {
-			if !required && oserror.IsNotExist(err) {
-				return nil
-			}
-			return errors.Wrapf(err, "failed to read file %s", filePath)
-		}
-		benchBuf.Write(data)
-		benchBuf.WriteString("\n")
-		return nil
-	}
-
-	for k, v := range c.influxConfig.metadata {
-		benchBuf.WriteString(fmt.Sprintf("%s: %s\n", k, v))
-	}
-
-	logPaths := map[string]string{
-		"experiment": c.experimentDir,
-		"baseline":   c.baselineDir,
-	}
-	for logType, dir := range logPaths {
-		benchBuf.WriteString(fmt.Sprintf("cockroach: %s\n", logType))
-		logPath := filepath.Join(dir, "metadata.log")
-		if err = readFileFn(logPath, true); err != nil {
-			return nil, err
-		}
-		for _, pkg := range c.packages {
-			benchBuf.WriteString(fmt.Sprintf("pkg: %s\n", pkg))
-			logPath = filepath.Join(dir, getReportLogName(reportLogName, pkg))
-			if err = readFileFn(logPath, false); err != nil {
-				return nil, err
-			}
-		}
-	}
-
-	benchReader := benchfmt.NewReader(bytes.NewReader(benchBuf.Bytes()), "buffer")
-	recordsMap := make(map[string][]*benchfmt.Result)
-	seen := make(map[string]map[string]struct{})
-	for benchReader.Scan() {
-		switch rec := benchReader.Result().(type) {
-		case *benchfmt.SyntaxError:
-			// In case the benchmark log is corrupted or contains a syntax error, we
-			// want to return an error to the caller.
-			return nil, fmt.Errorf("syntax error: %v", rec)
-		case *benchfmt.Result:
-			var cmp, pkg string
-			for _, config := range rec.Config {
-				if config.Key == "pkg" {
-					pkg = string(config.Value)
-				}
-				if config.Key == opts.Compare {
-					cmp = string(config.Value)
-				}
-			}
-			key := pkg + util.PackageSeparator + string(rec.Name)
-			// Update the name to include the package name. This is a workaround for
-			// `benchseries`, that currently does not support package names.
-			rec.Name = benchfmt.Name(key)
-			recordsMap[key] = append(recordsMap[key], rec.Clone())
-			// Determine if we've seen this package/benchmark combination for both
-			// the baseline and experiment run.
-			if _, ok := seen[key]; !ok {
-				seen[key] = make(map[string]struct{})
-			}
-			seen[key][cmp] = struct{}{}
-		}
-	}
-
-	// Add only the benchmarks that have been seen in both the baseline and
-	// experiment run.
-	for key, records := range recordsMap {
-		if len(seen[key]) != 2 {
-			continue
-		}
-		for _, rec := range records {
-			builder.Add(rec)
-		}
-	}
-
-	comparisons, err := builder.AllComparisonSeries(nil, benchseries.DUPE_REPLACE)
-	if err != nil {
-		return nil, errors.Wrap(err, "failed to create comparison series")
-	}
-	return comparisons, nil
-}
-
-func (c *compare) pushToInfluxDB() error {
+func (c *compare) pushToInfluxDB(comparisonResultsMap model.ComparisonResultsMap) error {
 	client := influxdb2.NewClient(c.influxConfig.host, c.influxConfig.token)
 	defer client.Close()
 	writeAPI := client.WriteAPI("cockroach", "microbench")
 	errorChan := writeAPI.Errors()
 
-	comparisons, err := c.createBenchSeries()
+	metadata, err := loadMetadata(filepath.Join(c.experimentDir, "metadata.log"))
 	if err != nil {
 		return err
 	}
+	experimentTime := metadata.ExperimentCommitTime
+	normalizedDateString, err := benchseries.NormalizeDateString(experimentTime)
+	if err != nil {
+		return errors.Wrap(err, "error normalizing experiment commit date")
+	}
+	ts, err := benchseries.ParseNormalizedDateString(normalizedDateString)
+	if err != nil {
+		return errors.Wrap(err, "error parsing experiment commit date")
+	}
 
-	for _, cs := range comparisons {
-		cs.AddSummaries(0.95, 1000)
-		residues := make(map[string]string)
-		for _, r := range cs.Residues {
-			residues[r.S] = r.Slice[0]
-		}
-
-		for idx, benchmarkName := range cs.Benchmarks {
-			if len(cs.Summaries) == 0 {
-				log.Printf("WARN: no summaries for %s", benchmarkName)
-				continue
-			}
-			sum := cs.Summaries[0][idx]
-			if !sum.Defined() {
-				continue
-			}
-
-			experimentTime := cs.Series[0]
-			ts, err := benchseries.ParseNormalizedDateString(experimentTime)
-			if err != nil {
-				return errors.Wrap(err, "error parsing experiment commit date")
-			}
-			fields := map[string]interface{}{
-				"low":               sum.Low,
-				"center":            sum.Center,
-				"high":              sum.High,
-				"upload-time":       residues["upload-time"],
-				"baseline-commit":   cs.HashPairs[experimentTime].DenHash,
-				"experiment-commit": cs.HashPairs[experimentTime].NumHash,
-				"benchmarks-commit": residues["benchmarks-commit"],
-			}
-			pkg := strings.Split(benchmarkName, util.PackageSeparator)[0]
-			benchmarkName = strings.Split(benchmarkName, util.PackageSeparator)[1]
-			tags := map[string]string{
-				"name":         benchmarkName,
-				"unit":         cs.Unit,
-				"pkg":          pkg,
-				"repository":   "cockroach",
-				"branch":       residues["branch"],
-				"goarch":       residues["goarch"],
-				"goos":         residues["goos"],
-				"machine-type": residues["machine-type"],
+	for _, group := range comparisonResultsMap {
+		for _, result := range group {
+			for _, detail := range result.Comparisons {
+				ci := detail.Comparison.ConfidenceInterval
+				fields := map[string]interface{}{
+					"low":               ci.Low,
+					"center":            ci.Center,
+					"high":              ci.High,
+					"upload-time":       metadata.RunTime,
+					"baseline-commit":   metadata.BaselineCommit,
+					"experiment-commit": metadata.ExperimentCommit,
+					"benchmarks-commit": metadata.BenchmarksCommit,
+				}
+				pkg := strings.Split(detail.BenchmarkName, util.PackageSeparator)[0]
+				benchmarkName := strings.Split(detail.BenchmarkName, util.PackageSeparator)[1]
+				tags := map[string]string{
+					"name":         benchmarkName,
+					"unit":         result.Metric.Unit,
+					"pkg":          pkg,
+					"repository":   "cockroach",
+					"branch":       "master",
+					"goarch":       metadata.GoArch,
+					"goos":         metadata.GoOS,
+					"machine-type": metadata.Machine,
+				}
+				p := influxdb2.NewPoint("benchmark-result", tags, fields, ts)
+				writeAPI.WritePoint(p)
 			}
-			p := influxdb2.NewPoint("benchmark-result", tags, fields, ts)
-			writeAPI.WritePoint(p)
 		}
 	}
 	done := make(chan struct{})
 
@@ -38,15 +38,19 @@ func metricsToText(metricMaps map[string]*model.MetricMap) string {
 
 			for _, entryKey := range entryKeys {
 				entry := metric.BenchmarkEntries[entryKey]
-				centers := make([]float64, len(entry.Summaries))
+				centers := make([]string, len(entry.Summaries))
 				summaryKeys := maps.Keys(entry.Summaries)
 				sort.Strings(summaryKeys)
 				for i, key := range summaryKeys {
-					centers[i] = entry.Summaries[key].Center
+					centers[i] = fmt.Sprintf("%4f", entry.Summaries[key].Center)
 				}
 				comparison := metric.ComputeComparison(entryKey, "baseline", "experiment")
-				fmt.Fprintf(buf, "BenchmarkEntry %s %s %v %s\n",
-					entryKey, comparison.FormattedDelta, centers, comparison.Distribution.String(),
+				confidenceStr := fmt.Sprintf("%4f %4f %4f",
+					comparison.ConfidenceInterval.Low,
+					comparison.ConfidenceInterval.Center,
+					comparison.ConfidenceInterval.High)
+				fmt.Fprintf(buf, "BenchmarkEntry %s %s %v %s %v\n",
+					entryKey, comparison.FormattedDelta, centers, comparison.Distribution.String(), confidenceStr,
 				)
 			}
 		}
@@ -55,7 +59,7 @@ func metricsToText(metricMaps map[string]*model.MetricMap) string {
 }
 
 func TestCompareBenchmarks(t *testing.T) {
-	ddFilePath := path.Join(datapathutils.TestDataPath(t), "compare")
+	ddFilePath := path.Join(datapathutils.TestDataPath(t), "compare.txt")
 	datadriven.RunTest(t, ddFilePath, func(t *testing.T, d *datadriven.TestData) string {
 		if d.Cmd != "compare" {
 			d.Fatalf(t, "unknown command %s", d.Cmd)
 
@@ -16,7 +16,7 @@ import (
 )
 
 func TestExtractBenchmarkResultsDataDriven(t *testing.T) {
-	ddFilePath := path.Join(datapathutils.TestDataPath(t), "benchmark")
+	ddFilePath := path.Join(datapathutils.TestDataPath(t), "benchmark.txt")
 	datadriven.RunTest(t, ddFilePath, func(t *testing.T, d *datadriven.TestData) string {
 		if d.Cmd != "benchmark" {
 			d.Fatalf(t, "unknown command %s", d.Cmd)
 
@@ -23,7 +23,7 @@ func TestExport(t *testing.T) {
 	testLabels := make(map[string]string)
 	testLabels["some"] = "42test"
 	testLabels["abc/def"] = "good/label?"
-	ddFilePath := path.Join(datapathutils.TestDataPath(t), "export")
+	ddFilePath := path.Join(datapathutils.TestDataPath(t), "export.txt")
 	datadriven.RunTest(t, ddFilePath, func(t *testing.T, d *datadriven.TestData) string {
 		if d.Cmd != "export" {
 			d.Fatalf(t, "unknown command %s", d.Cmd)
 
@@ -173,7 +173,7 @@ func makeCompareCommand() *cobra.Command {
 		}
 
 		if c.influxConfig.token != "" {
-			err = c.pushToInfluxDB()
+			err = c.pushToInfluxDB(comparisonResult)
 			if err != nil {
 				return err
 			}
 
@@ -5,7 +5,24 @@
 
 package main
 
-import "os"
+import (
+	"bufio"
+	"os"
+	"reflect"
+	"strings"
+)
+
+type Metadata struct {
+	ExperimentCommitTime string `field:"experiment-commit-time"`
+	Repository           string `field:"repository"`
+	BaselineCommit       string `field:"baseline-commit"`
+	GoOS                 string `field:"goos"`
+	ExperimentCommit     string `field:"experiment-commit"`
+	RunTime              string `field:"run-time"`
+	BenchmarksCommit     string `field:"benchmarks-commit"`
+	Machine              string `field:"machine"`
+	GoArch               string `field:"goarch"`
+}
 
 // getPackagesFromLogs scans a directory for benchmark report logs and
 // creates a list of packages that were used to generate the logs.
@@ -26,3 +43,42 @@ func getPackagesFromLogs(dir string) ([]string, error) {
 	}
 	return packages, nil
 }
+
+// loadMetadata reads a Go benchmark metadata file and returns a Metadata
+// struct.
+func loadMetadata(logFile string) (Metadata, error) {
+	metadata := Metadata{}
+	file, err := os.Open(logFile)
+	if err != nil {
+		return metadata, err
+	}
+	defer file.Close()
+
+	metadataMap := make(map[string]string)
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		line := scanner.Text()
+		parts := strings.SplitN(line, ":", 2)
+		if len(parts) != 2 {
+			continue
+		}
+		key := strings.TrimSpace(parts[0])
+		value := strings.TrimSpace(parts[1])
+		metadataMap[key] = value
+	}
+
+	if err = scanner.Err(); err != nil {
+		return metadata, err
+	}
+
+	v := reflect.ValueOf(&metadata).Elem()
+	for i := 0; i < v.NumField(); i++ {
+		field := v.Type().Field(i)
+		fieldName := field.Tag.Get("field")
+		if value, ok := metadataMap[fieldName]; ok {
+			v.Field(i).SetString(value)
+		}
+	}
+
+	return metadata, nil
+}
@@ -1,9 +1,10 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
 
 go_library(
     name = "model",
     srcs = [
         "builder.go",
+        "math.go",
         "metric.go",
         "options.go",
     ],
@@ -14,3 +15,14 @@ go_library(
         "@org_golang_x_perf//benchmath",
     ],
 )
+
+go_test(
+    name = "model_test",
+    srcs = ["math_test.go"],
+    embed = [":model"],
+    deps = [
+        "@com_github_stretchr_testify//require",
+        "@org_golang_x_perf//benchfmt",
+        "@org_golang_x_perf//benchseries",
+    ],
+)
@@ -95,7 +95,7 @@ func (m *Metric) ComputeComparison(benchmarkName, oldID, newID string) *Comparis
 			return nil
 		}
 	}
-	// Compute the comparison and delta.
+	// Compute the comparison, confidence interval and delta.
 	comparison := Comparison{}
 	oldSample, newSample := benchmarkEntry.Samples[oldID], benchmarkEntry.Samples[newID]
 	comparison.Distribution = m.Assumption.Compare(oldSample, newSample)
@@ -106,6 +106,7 @@ func (m *Metric) ComputeComparison(benchmarkName, oldID, newID string) *Comparis
 	} else {
 		comparison.Delta = ((newSummary.Center / oldSummary.Center) - 1.0) * 100
 	}
+	comparison.ConfidenceInterval = calculateConfidenceInterval(newSample.Values, oldSample.Values)
 	return &comparison
 }
Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,7 @@ import (`
`16`	`16`	`)`
`17`	`17`
`18`	`18`	`func TestExtractBenchmarkResultsDataDriven(t *testing.T) {`
`19`		`- ddFilePath := path.Join(datapathutils.TestDataPath(t), "benchmark")`
	`19`	`+ ddFilePath := path.Join(datapathutils.TestDataPath(t), "benchmark.txt")`
`20`	`20`	`datadriven.RunTest(t, ddFilePath, func(t testing.T, d datadriven.TestData) string {`
`21`	`21`	`if d.Cmd != "benchmark" {`
`22`	`22`	`d.Fatalf(t, "unknown command %s", d.Cmd)`
Original file line number	Diff line number	Diff line change
`@@ -173,7 +173,7 @@ func makeCompareCommand() *cobra.Command {`
`173`	`173`	`}`
`174`	`174`
`175`	`175`	`if c.influxConfig.token != "" {`
`176`		`- err = c.pushToInfluxDB()`
	`176`	`+ err = c.pushToInfluxDB(comparisonResult)`
`177`	`177`	`if err != nil {`
`178`	`178`	`return err`
`179`	`179`	`}`
Original file line number	Diff line number	Diff line change
`@@ -95,7 +95,7 @@ func (m Metric) ComputeComparison(benchmarkName, oldID, newID string) Comparis`
`95`	`95`	`return nil`
`96`	`96`	`}`
`97`	`97`	`}`
`98`		`- // Compute the comparison and delta.`
	`98`	`+ // Compute the comparison, confidence interval and delta.`
`99`	`99`	`comparison := Comparison{}`
`100`	`100`	`oldSample, newSample := benchmarkEntry.Samples[oldID], benchmarkEntry.Samples[newID]`
`101`	`101`	`comparison.Distribution = m.Assumption.Compare(oldSample, newSample)`
`@@ -106,6 +106,7 @@ func (m Metric) ComputeComparison(benchmarkName, oldID, newID string) Comparis`
`106`	`106`	`} else {`
`107`	`107`	`comparison.Delta = ((newSummary.Center / oldSummary.Center) - 1.0) * 100`
`108`	`108`	`}`
	`109`	`+ comparison.ConfidenceInterval = calculateConfidenceInterval(newSample.Values, oldSample.Values)`
`109`	`110`	`return &comparison`
`110`	`111`	`}`
`111`	`112`