diff --git a/.evergreen/perfcomp/README.md b/.evergreen/perfcomp/README.md new file mode 100644 index 00000000..81e3b3af --- /dev/null +++ b/.evergreen/perfcomp/README.md @@ -0,0 +1,87 @@ +# perfcomp + +**perfcomp** is a performance analyzer on a PR commit basis. + +## 📦 Installation + +To install the latest version: + +```bash +go install github.com/mongodb-labs/drivers-evergreen-tools/perfcomp/cmd/perfcomp@latest +``` + +Or build it locally in `bin/perfcomp`: + +```bash +bash build.sh +``` + +## 🔧 Usage + +### Parameters + +To use `perfcomp`, you should have an analytics node URI env variable called `PERF_URI_PRIVATE_ENDPOINT`. You can request for it from the devprod performance team. + +To run in your project repository, you need to create a [performance context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/contexts) that captures all benchmarks in your project. This needs to be a triage context. Feel free to refer to the [Go Driver context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/context/name/GoDriver%20perf%20task) as a template. + +> _If you are creating a triage context for the first time, it may take a few hours for your project's data to be tagged._ + +You also need the name of the performance task and variant specific to your project. You can do a query in the analytics node `raw_results` collection: + +``` +db.raw_results.find({ + “info.project”: “”, + “info.version”: “" +}) +``` + +and look for the `variant` and `task_name` properties. + +### perfcomp CLI + +```bash +perfcomp is a cli that reports stat-sig results between evergreen patches with the mainline commit + +Usage: + perfcomp [command] + +Available Commands: + compare compare evergreen patch to mainline commit + mdreport generates markdown output after run +``` + +### Commands + +#### compare + +```bash +compare evergreen patch to mainline commit + +Usage: + perfcomp compare [version_id] [flags] + +Flags: + --perf-context string specify the performance triage context, ex. "GoDriver perf task" (required) + --project string specify the name of an existing Evergreen project, ex. "mongo-go-driver" (required) + --task string specify the evergreen perf task name, ex. "perf" (required) + --variant string specify the perf task variant, ex. "perf" (required) +``` + +#### mdreport + +```bash +generates markdown output after compare run (must be run after `compare`) + +Usage: + perfcomp mdreport +``` + +### Run via shell script + +Alternatively, you can run the perfcomp shell script. This script will run build and then run `compare`. From the root directory, + +```bash +PERF_URI_PRIVATE_ENDPOINT="" VERSION_ID="" PROJECT="" CONTEXT="" TASK="" VARIANT="" .evergreen/run-perf-comp.sh +``` + +If you would like to see a markdown preview of the report, you can also pass in `HEAD_SHA=""`. This will generate `.evergreen/perfcomp/perf-report.md`. diff --git a/.evergreen/perfcomp/build.sh b/.evergreen/perfcomp/build.sh new file mode 100755 index 00000000..847b6141 --- /dev/null +++ b/.evergreen/perfcomp/build.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -euo pipefail + +BIN_DIR="bin" +mkdir -p $BIN_DIR +go build -o $BIN_DIR/perfcomp ./cmd/perfcomp/ diff --git a/.evergreen/perfcomp/cmd/perfcomp/compare.go b/.evergreen/perfcomp/cmd/perfcomp/compare.go new file mode 100644 index 00000000..59c28878 --- /dev/null +++ b/.evergreen/perfcomp/cmd/perfcomp/compare.go @@ -0,0 +1,145 @@ +package main + +import ( + "context" + "fmt" + "log" + "math" + "os" + "sort" + "strings" + "text/tabwriter" + "time" + + "github.com/mongodb-labs/drivers-evergreen-tools/perfcomp" + "github.com/spf13/cobra" +) + +func newCompareCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "compare", + Short: "compare evergreen patch to mainline commit", + // Version id is a required argument + Args: func(cmd *cobra.Command, args []string) error { + if len(args) < 1 { + return fmt.Errorf("this command requires an evergreen patch version ID") + } + return nil + }, + } + + var project, task, variant, perfcontext string + cmd.Flags().StringVar(&project, "project", "", `specify the name of an existing Evergreen project, ex. "mongo-go-driver"`) + cmd.Flags().StringVar(&perfcontext, "perf-context", "", `specify the performance triage context, ex. "GoDriver perf task"`) + // TODO(DRIVERS-3264): Use first task / variant of the project by default for perf filtering + cmd.Flags().StringVar(&task, "task", "", `specify the evergreen performance task name, ex. "perf"`) + cmd.Flags().StringVar(&variant, "variant", "", `specify the performance variant, ex. "perf"`) + + for _, flag := range []string{"project", "task", "variant", "context"} { + cmd.MarkFlagRequired(flag) + } + + cmd.Run = func(cmd *cobra.Command, args []string) { + // Check for variables + uri := os.Getenv("PERF_URI_PRIVATE_ENDPOINT") + if uri == "" { + log.Fatal("PERF_URI_PRIVATE_ENDPOINT env variable is not set") + } + + // Validate all flags + for _, flag := range []string{"project", "task", "variant", "perf-context"} { + if flag == "" { + log.Fatalf("must provide %s", flag) + } + } + + // Run compare function + err := runCompare(cmd, args, + perfcomp.WithProject(project), + perfcomp.WithTask(task), + perfcomp.WithVariant(variant), + perfcomp.WithContext(perfcontext), + ) + if err != nil { + log.Fatalf("failed to compare: %v", err) + } + } + + return cmd +} + +func createComment(result perfcomp.CompareResult) string { + var comment strings.Builder + + if len(result.SigEnergyStats) == 0 { + comment.Reset() + fmt.Fprintf(&comment, "There were no significant changes to the performance to report for version %s.\n", result.Version) + } else { + fmt.Fprintf(&comment, "The following benchmark tests for version %s had statistically significant changes (i.e., |z-score| > 1.96):\n\n", result.Version) + + w := tabwriter.NewWriter(&comment, 0, 0, 1, ' ', 0) + + fmt.Fprintln(w, "| Benchmark\t| Measurement\t| % Change\t| Patch Value\t| Stable Region\t| H-Score\t| Z-Score\t| ") + fmt.Fprintln(w, "| ---------\t| -----------\t| --------\t| -----------\t| -------------\t| -------\t| -------\t|") + + sort.Slice(result.SigEnergyStats, func(i, j int) bool { + return math.Abs(result.SigEnergyStats[i].PercentChange) > math.Abs(result.SigEnergyStats[j].PercentChange) + }) + for _, es := range result.SigEnergyStats { + fmt.Fprintf(w, "| %s\t| %s\t| %.4f\t| %.4f\t| Avg: %.4f, Med: %.4f, Stdev: %.4f\t| %.4f\t| %.4f\t|\n", + es.Benchmark, + es.Measurement, + es.PercentChange, + es.MeasurementVal, + es.StableRegion.Mean, + es.StableRegion.Median, + es.StableRegion.Std, + es.HScore, + es.ZScore, + ) + } + + w.Flush() + } + + comment.WriteString("\n*For a comprehensive view of all microbenchmark results for this PR's commit, please check out the Evergreen perf task for this patch.*") + return comment.String() + +} + +func runCompare(cmd *cobra.Command, args []string, opts ...perfcomp.CompareOption) error { + perfAnalyticsConnString := os.Getenv("PERF_URI_PRIVATE_ENDPOINT") + version := args[len(args)-1] + opts = append(opts, perfcomp.WithVersion(version)) + + ctx, cancel := context.WithTimeout(cmd.Context(), 5*time.Second) + defer cancel() + + res, err := perfcomp.Compare(ctx, perfAnalyticsConnString, opts...) + if err != nil { + log.Fatalf("failed to compare: %v", err) + } + + res.CommitSHA = os.Getenv("HEAD_SHA") + res.MainlineCommit = os.Getenv("BASE_SHA") + + prComment := createComment(*res) + log.Println("🧪 Performance Results") + log.Println(prComment) + + if res.CommitSHA != "" { + // Write results to .txt file to parse into markdown comment + fWrite, err := os.Create(perfReportFileTxt) + if err != nil { + log.Fatalf("Could not create %s: %v", perfReportFileTxt, err) + } + defer fWrite.Close() + + fmt.Fprintf(fWrite, "Version ID: %s\n", version) + fmt.Fprintf(fWrite, "Commit SHA: %s\n", res.CommitSHA) + fmt.Fprintln(fWrite, prComment) + log.Printf("PR commit %s: saved to %s for markdown comment.\n", res.CommitSHA, perfReportFileTxt) + } + + return nil +} diff --git a/.evergreen/perfcomp/cmd/perfcomp/main.go b/.evergreen/perfcomp/cmd/perfcomp/main.go new file mode 100644 index 00000000..377abf8f --- /dev/null +++ b/.evergreen/perfcomp/cmd/perfcomp/main.go @@ -0,0 +1,22 @@ +package main + +import ( + "log" + + "github.com/spf13/cobra" +) + +func main() { + cmd := &cobra.Command{ + Use: "perfcomp", + Short: "perfcomp is a cli that reports stat-sig results between evergreen patches with the mainline commit", + Version: "0.0.0-alpha", + } + + cmd.AddCommand(newCompareCommand()) + cmd.AddCommand(newMdCommand()) + + if err := cmd.Execute(); err != nil { + log.Fatalf("error: %v", err) + } +} diff --git a/.evergreen/perfcomp/cmd/perfcomp/mdreport.go b/.evergreen/perfcomp/cmd/perfcomp/mdreport.go new file mode 100644 index 00000000..9179a04a --- /dev/null +++ b/.evergreen/perfcomp/cmd/perfcomp/mdreport.go @@ -0,0 +1,144 @@ +package main + +import ( + "bufio" + "fmt" + "log" + "net/url" + "os" + "strings" + + "github.com/spf13/cobra" +) + +const perfReportFileTxt = "perf-report.txt" +const perfReportFileMd = "perf-report.md" +const perfVariant = "^perf$" +const hscoreDefLink = "https://en.wikipedia.org/wiki/Energy_distance#:~:text=E%2Dcoefficient%20of%20inhomogeneity" +const zscoreDefLink = "https://en.wikipedia.org/wiki/Standard_score#Calculation" + +func newMdCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "mdreport", + Short: "generates markdown output after run", + } + + cmd.Run = func(cmd *cobra.Command, args []string) { + if err := runMdCommand(cmd, args); err != nil { + log.Fatalf("failed to generate md: %v", err) + } + } + + return cmd +} + +func runMdCommand(cmd *cobra.Command, args []string) error { + var line string + + // open file to read + fRead, err := os.Open(perfReportFileTxt) + if err != nil { + return fmt.Errorf("failed to open perf report file %s: %v", perfReportFileTxt, err) + } + defer fRead.Close() + + // open file to write + fWrite, err := os.Create(perfReportFileMd) + if err != nil { + return fmt.Errorf("failed to create perf report file %s: %v", perfReportFileMd, err) + } + defer fWrite.Close() + + fmt.Fprintf(fWrite, "## 🧪 Performance Results\n") + + // read the file line by line using scanner + scanner := bufio.NewScanner(fRead) + + var version string + var evgLink string + + for scanner.Scan() { + line = scanner.Text() + if strings.Contains(line, "Version ID:") { + // parse version + version = strings.Split(line, " ")[2] + } else if strings.Contains(line, "Commit SHA:") { + // parse commit SHA and write header + fmt.Fprintf(fWrite, "\n
\n%s\n\t
\n\n", line) + } else if strings.Contains(line, "version "+version) { + // dynamic Evergreen perf task link + evgLink, err = generateEvgLink(version, perfVariant) + if err != nil { + log.Println(err) + fmt.Fprintf(fWrite, "%s\n", line) + } else { + printUrlToLine(fWrite, line, evgLink, "version", -1) + } + } else if strings.Contains(line, "For a comprehensive view of all microbenchmark results for this PR's commit, please check out the Evergreen perf task for this patch.") { + // last line of comment + evgLink, err = generateEvgLink(version, "") + if err != nil { + log.Println(err) + fmt.Fprintf(fWrite, "%s\n", line) + } else { + printUrlToLine(fWrite, line, evgLink, "Evergreen", 0) + } + } else if strings.Contains(line, ", ") { + line = strings.ReplaceAll(line, ", ", "
") + fmt.Fprintf(fWrite, "%s\n", line) + } else if strings.Contains(line, "H-Score") { + linkedWord := "[H-Score](" + hscoreDefLink + ")" + line = strings.ReplaceAll(line, "H-Score", linkedWord) + linkedWord = "[Z-Score](" + zscoreDefLink + ")" + line = strings.ReplaceAll(line, "Z-Score", linkedWord) + fmt.Fprintf(fWrite, "%s\n", line) + } else { + // all other regular lines + fmt.Fprintf(fWrite, "%s\n", line) + } + } + + fmt.Fprintf(fWrite, "
\n") + return nil +} + +func generateEvgLink(version string, variant string) (string, error) { + baseUrl := "https://spruce.mongodb.com" + page := "0" + sorts := "STATUS:ASC;BASE_STATUS:DESC" + + u, err := url.Parse(baseUrl) + if err != nil { + return "", fmt.Errorf("error parsing URL: %v", err) + } + + u.Path = fmt.Sprintf("version/%s/tasks", version) + + // construct query parameters + queryParams := url.Values{} + queryParams.Add("page", page) + queryParams.Add("sorts", sorts) + if variant != "" { + queryParams.Add("variant", variant) + } + + u.RawQuery = queryParams.Encode() + return u.String(), nil +} + +func printUrlToLine(fWrite *os.File, line string, link string, targetWord string, shift int) { + words := strings.Split(line, " ") + for i, w := range words { + if i > 0 && i+shift < len(words) && words[i+shift] == targetWord { + fmt.Fprintf(fWrite, "[%s](%s)", w, link) + } else { + fmt.Fprint(fWrite, w) + } + + if i < len(words)-1 { + fmt.Fprint(fWrite, " ") + } else { + fmt.Fprint(fWrite, "\n") + } + } +} diff --git a/.evergreen/perfcomp/compare.go b/.evergreen/perfcomp/compare.go new file mode 100644 index 00000000..4b78702f --- /dev/null +++ b/.evergreen/perfcomp/compare.go @@ -0,0 +1,482 @@ +package perfcomp + +import ( + "context" + "fmt" + "log" + "math" + "time" + + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + "go.mongodb.org/mongo-driver/v2/mongo/options" + "gonum.org/v1/gonum/mat" +) + +// RawData defines the shape of the data in the raw_results collection. +// raw_results stores results by benchmark, which holds the values of all its measurements. +// A single measurement from a single benchmark is called a microbenchmark. +type RawData struct { + Info Info + CreatedAt any `bson:"created_at"` + CompletedAt any `bson:"completed_at"` + Rollups Rollups // List of all measurement results + FailedRollupAttempts int64 `bson:"failed_rollup_attempts"` +} + +type Info struct { + Project string `bson:"project"` + Version string `bson:"version"` // Evergreen version that produced the results + Variant string `bson:"variant"` + Order int64 `bson:"order"` + TaskName string `bson:"task_name"` + TaskID string `bson:"task_id"` + Execution int64 `bson:"execution"` + Mainline bool `bson:"mainline"` + OverrideInfo OverrideInfo + TestName string `bson:"test_name"` // Benchmark name + Args map[string]any `bson:"args"` +} + +type OverrideInfo struct { + OverrideMainline bool `bson:"override_mainline"` + BaseOrder any `bson:"base_order"` + Reason any `bson:"reason"` + User any `bson:"user"` +} + +type Rollups struct { + Stats []Stat +} + +type Stat struct { + Name string `bson:"name"` // Measurement name + Val float64 `bson:"val"` // Microbenchmark result + Metadata any `bson:"metadata"` +} + +// StableRegion defines the shape of the data in the stable_regions collection. +// A stable region is a group of consecutive microbenchmark values between two change points. +type StableRegion struct { + TimeSeriesInfo TimeSeriesInfo + Start any `bson:"start"` + End any `bson:"end"` + Values []float64 `bson:"values"` // All microbenchmark values that make up the stable region + StartOrder int64 `bson:"start_order"` + EndOrder int64 `bson:"end_order"` + Mean float64 `bson:"mean"` + Std float64 `bson:"std"` + Median float64 `bson:"median"` + Max float64 `bson:"max"` + Min float64 `bson:"min"` + CoefficientOfVariation float64 `bson:"coefficient_of_variation"` + LastSuccessfulUpdate any `bson:"last_successful_update"` + Last bool `bson:"last"` + Contexts []any `bson:"contexts"` // Performance context (e.g. "Go Driver perf comp") +} + +type TimeSeriesInfo struct { + Project string `bson:"project"` + Variant string `bson:"variant"` + Task string `bson:"task"` + Test string `bson:"test"` // Benchmark name + Measurement string `bson:"measurement"` // Measurement name + Args map[string]any `bson:"args"` +} + +// EnergyStats stores the calculated energy statistics for a patch version's specific +// microbenchmark compared to the mainline's stable region for that same microbenchmark. +type EnergyStats struct { + Project string + Benchmark string + Measurement string + PatchVersion string // Evergreen version that produced the results + StableRegion StableRegion // Latest stable region from the mainline this patch is comparing against + MeasurementVal float64 // Microbenchmark result of the patch version + PercentChange float64 + EnergyStatistic float64 + TestStatistic float64 + HScore float64 + ZScore float64 +} + +// CompareResult is the collection of the energy statistics of all microbenchmarks with +// statistically significant changes for this patch. +type CompareResult struct { + CommitSHA string // Head commit SHA + MainlineCommit string // Base commit SHA + Version string // Evergreen patch version + SigEnergyStats []EnergyStats +} + +// Performance analytics node db and collection names +const expandedMetricsDB = "expanded_metrics" +const rawResultsColl = "raw_results" +const stableRegionsColl = "stable_regions" + +// CompareOptions stores the information for each project to use as filters. +type CompareOptions struct { + Project string // Required + PerfContext string // Required + Task string // Required + Variant string // Required + Version string // Required +} + +type CompareOption func(*CompareOptions) + +// WithProject sets the evergreen project on the CompareOptions, for example "mongo-go-driver". +func WithProject(project string) CompareOption { + return func(opts *CompareOptions) { + opts.Project = project + } +} + +// WithContext sets the performance triage context on the CompareOptions, for example "GoDriver perf task". +func WithContext(context string) CompareOption { + return func(opts *CompareOptions) { + opts.PerfContext = context + } +} + +// WithTask sets the evergreen performance task on the CompareOptions, for example "perf". +func WithTask(task string) CompareOption { + return func(opts *CompareOptions) { + opts.Task = task + } +} + +// WithTask sets the performance task variant on the CompareOptions, for example "perf". +func WithVariant(variant string) CompareOption { + return func(opts *CompareOptions) { + opts.Variant = variant + } +} + +// WithVersion sets the evergreen version on the CompareOptions, for example "688a39d27d916e0007cf8723". +func WithVersion(version string) CompareOption { + return func(opts *CompareOptions) { + opts.Version = version + } +} + +func validateOptions(copts CompareOptions) error { + if copts.Project == "" { + return fmt.Errorf("project is required") + } + if copts.PerfContext == "" { + return fmt.Errorf("context is required") + } + if copts.Task == "" { + return fmt.Errorf("task is required") + } + if copts.Variant == "" { + return fmt.Errorf("variant is required") + } + if copts.Version == "" { + return fmt.Errorf("version is required") + } + return nil +} + +// Compare will return statistical results for a patch version using the +// stable region defined by the performance analytics cluster. +func Compare(ctx context.Context, perfAnalyticsConnString string, opts ...CompareOption) (*CompareResult, error) { + copts := &CompareOptions{} + for _, fn := range opts { + fn(copts) + } + + if err := validateOptions(*copts); err != nil { + return nil, fmt.Errorf("invalid configuration: %w", err) + } + + // Connect to analytics node + client, err := mongo.Connect(options.Client().ApplyURI(perfAnalyticsConnString)) + if err != nil { + return nil, fmt.Errorf("error connecting client: %v", err) + } + + defer func() { // Defer disconnect client + err = client.Disconnect(context.Background()) + if err != nil { + log.Fatalf("failed to disconnect client: %v", err) + } + }() + + err = client.Ping(ctx, nil) + if err != nil { + return nil, fmt.Errorf("error pinging MongoDB Analytics: %v", err) + } + log.Println("Successfully connected to MongoDB Analytics node.") + + db := client.Database(expandedMetricsDB) + + // Get raw data, most recent stable region, and calculate energy stats + findCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + patchRawData, err := findRawData(findCtx, db.Collection(rawResultsColl), copts) + if err != nil { + return nil, fmt.Errorf("error getting raw data: %v", err) + } + + allEnergyStats, err := getEnergyStatsForAllBenchmarks(findCtx, patchRawData, db.Collection(stableRegionsColl), copts) + if err != nil { + return nil, fmt.Errorf("error getting energy statistics: %v", err) + } + + // Get statistically significant benchmarks + statSigBenchmarks := getStatSigBenchmarks(allEnergyStats) + compareResult := CompareResult{ + Version: copts.Version, + SigEnergyStats: statSigBenchmarks, + } + + return &compareResult, nil +} + +// Gets all raw benchmark data for a specific Evergreen version. +func findRawData(ctx context.Context, coll *mongo.Collection, copts *CompareOptions) ([]RawData, error) { + filter := bson.D{ + {"info.project", copts.Project}, + {"info.version", copts.Version}, + {"info.variant", copts.Variant}, + {"info.task_name", copts.Task}, + } + + cursor, err := coll.Find(ctx, filter) + if err != nil { + return nil, fmt.Errorf( + "error retrieving raw data for version %q: %v", + copts.Version, + err, + ) + } + defer func() { + err = cursor.Close(ctx) + if err != nil { + log.Fatalf("error closing cursor while retrieving raw data for version %q: %v", copts.Version, err) + } + }() + + var rawData []RawData + err = cursor.All(ctx, &rawData) + if err != nil { + return nil, fmt.Errorf( + "error decoding raw data from version %q: %v", + copts.Version, + err, + ) + } + log.Printf("Successfully retrieved %d docs from version %s.\n", len(rawData), copts.Version) + return rawData, err +} + +// Finds the most recent stable region of the mainline version for a specific microbenchmark. +func findLastStableRegion(ctx context.Context, testname string, measurement string, coll *mongo.Collection, copts *CompareOptions) (*StableRegion, error) { + filter := bson.D{ + {"time_series_info.project", copts.Project}, + {"time_series_info.variant", copts.Variant}, + {"time_series_info.task", copts.Task}, + {"time_series_info.test", testname}, + {"time_series_info.measurement", measurement}, + {"last", true}, + {"contexts", bson.D{{"$in", bson.A{copts.PerfContext}}}}, + } + + findOptions := options.FindOne().SetSort(bson.D{{"end", -1}}) + + var sr StableRegion + err := coll.FindOne(ctx, filter, findOptions).Decode(&sr) + if err != nil { + return nil, err + } + return &sr, nil +} + +// Calculate the energy statistics for all measurements in a benchmark. +func getEnergyStatsForOneBenchmark(ctx context.Context, rd RawData, coll *mongo.Collection, copts *CompareOptions) ([]*EnergyStats, error) { + testname := rd.Info.TestName + var energyStats []*EnergyStats + + for i := range rd.Rollups.Stats { + project := rd.Info.Project + measName := rd.Rollups.Stats[i].Name + measVal := rd.Rollups.Stats[i].Val + + stableRegion, err := findLastStableRegion(ctx, testname, measName, coll, copts) + if err != nil { + return nil, fmt.Errorf( + "error finding last stable region for test %q, measurement %q: %v", + testname, + measName, + err, + ) + } + + // The performance analyzer compares the measurement value from the patch to a stable region that succeeds the latest change point. + // For example, if there were 5 measurements since the last change point, then the stable region is the 5 latest values for the measurement. + stableRegionVec := mat.NewDense(len(stableRegion.Values), 1, stableRegion.Values) + measValVec := mat.NewDense(1, 1, []float64{measVal}) // singleton + + estat, tstat, hscore, err := calcEnergyStatistics(stableRegionVec, measValVec) + if err != nil { + return nil, fmt.Errorf( + "could not calculate energy stats for test %q, measurement %q: %v", + testname, + measName, + err, + ) + } + + zscore := calcZScore(measVal, stableRegion.Mean, stableRegion.Std) + pChange := calcPercentChange(measVal, stableRegion.Mean) + + es := EnergyStats{ + Project: project, + Benchmark: testname, + Measurement: measName, + PatchVersion: rd.Info.Version, + StableRegion: *stableRegion, + MeasurementVal: measVal, + PercentChange: pChange, + EnergyStatistic: estat, + TestStatistic: tstat, + HScore: hscore, + ZScore: zscore, + } + energyStats = append(energyStats, &es) + } + + return energyStats, nil +} + +func getEnergyStatsForAllBenchmarks(ctx context.Context, patchRawData []RawData, coll *mongo.Collection, copts *CompareOptions) ([]*EnergyStats, error) { + var allEnergyStats []*EnergyStats + for _, rd := range patchRawData { + energyStats, err := getEnergyStatsForOneBenchmark(ctx, rd, coll, copts) + if err != nil { + return nil, fmt.Errorf( + "could not get energy stats for %q: %v", + rd.Info.TestName, + err, + ) + } else { + allEnergyStats = append(allEnergyStats, energyStats...) + } + } + return allEnergyStats, nil +} + +func getStatSigBenchmarks(energyStats []*EnergyStats) []EnergyStats { + + var significantEnergyStats []EnergyStats + for _, es := range energyStats { + // The "iterations" measurement is the number of iterations that the Go + // benchmark suite had to run to converge on a benchmark measurement. It + // is not comparable between benchmark runs, so is not a useful + // measurement to print here. Omit it. + if es.Measurement != "iterations" && math.Abs(es.ZScore) > 1.96 { + significantEnergyStats = append(significantEnergyStats, *es) + } + } + + return significantEnergyStats +} + +// Given two matrices, this function returns +// (e, t, h) = (E-statistic, test statistic, e-coefficient of inhomogeneity) +func calcEnergyStatistics(x, y *mat.Dense) (float64, float64, float64, error) { + xrows, xcols := x.Dims() + yrows, ycols := y.Dims() + + if xcols != ycols { + return 0, 0, 0, fmt.Errorf("both inputs must have the same number of columns") + } + if xrows == 0 || yrows == 0 { + return 0, 0, 0, fmt.Errorf("inputs cannot be empty") + } + + xrowsf := float64(xrows) + yrowsf := float64(yrows) + + var A float64 // E|X-Y| + if xrowsf > 0 && yrowsf > 0 { + dist, err := calcDistance(x, y) + if err != nil { + return 0, 0, 0, err + } + A = dist / (xrowsf * yrowsf) + } else { + A = 0 + } + + var B float64 // E|X-X'| + if xrowsf > 0 { + dist, err := calcDistance(x, x) + if err != nil { + return 0, 0, 0, err + } + B = dist / (xrowsf * xrowsf) + } else { + B = 0 + } + + var C float64 // E|Y-Y'| + if yrowsf > 0 { + dist, err := calcDistance(y, y) + if err != nil { + return 0, 0, 0, err + } + C = dist / (yrowsf * yrowsf) + } else { + C = 0 + } + + E := 2*A - B - C // D^2(F_x, F_y) + T := ((xrowsf * yrowsf) / (xrowsf + yrowsf)) * E + var H float64 + if A > 0 { + H = E / (2 * A) + } else { + H = 0 + } + return E, T, H, nil +} + +// Given two vectors (expected 1 col), +// this function returns the sum of distances between each pair. +func calcDistance(x, y *mat.Dense) (float64, error) { + xrows, xcols := x.Dims() + yrows, ycols := y.Dims() + + if xcols != 1 || ycols != 1 { + return 0, fmt.Errorf("both inputs must be column vectors") + } + + var sum float64 + + for i := 0; i < xrows; i++ { + for j := 0; j < yrows; j++ { + sum += math.Abs(x.At(i, 0) - y.At(j, 0)) + } + } + return sum, nil +} + +// Calculate the Z score for result x, compared to mean mu and st dev sigma. +func calcZScore(x, mu, sigma float64) float64 { + if sigma == 0 { + return math.NaN() + } + return (x - mu) / sigma +} + +// Calculate the percentage change for result x compared to mean mu. +func calcPercentChange(x, mu float64) float64 { + if mu == 0 { + return math.NaN() + } + return ((x - mu) / mu) * 100 +} diff --git a/.evergreen/perfcomp/compare_test.go b/.evergreen/perfcomp/compare_test.go new file mode 100644 index 00000000..b1711b0b --- /dev/null +++ b/.evergreen/perfcomp/compare_test.go @@ -0,0 +1,110 @@ +package perfcomp + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "gonum.org/v1/gonum/mat" +) + +func createTestVectors(start1 int, stop1 int, step1 int, start2 int, stop2 int, step2 int) (*mat.Dense, *mat.Dense) { + xData := []float64{} + yData := []float64{} + + for i := start1; i < stop1; i += step1 { + xData = append(xData, float64(i)) + } + for j := start2; j < stop2; j += step2 { + yData = append(yData, float64(j)) + } + + x := mat.NewDense(len(xData), 1, xData) + y := mat.NewDense(len(yData), 1, yData) + + return x, y +} + +// TestCalcEnergyStatistics verifies that the energy calculation algorithms are correct. +func TestCalcEnergyStatistics(t *testing.T) { + t.Run("similar distributions should have small e,t,h values ", func(t *testing.T) { + x, y := createTestVectors(1, 100, 1, 1, 105, 1) + e, tstat, h, _ := calcEnergyStatistics(x, y) + + del := 1e-3 + // Limit precision of comparison to 3 digits after the decimal. + assert.InDelta(t, 0.160, e, del) // |0.160 - e| < 0.001 + assert.InDelta(t, 8.136, tstat, del) + assert.InDelta(t, 0.002, h, del) + }) + + t.Run("different distributions should have large e,t,h values", func(t *testing.T) { + x, y := createTestVectors(1, 100, 1, 10000, 13000, 14) + e, tstat, h, _ := calcEnergyStatistics(x, y) + del := 1e-3 + + assert.InDelta(t, 21859.691, e, del) + assert.InDelta(t, 1481794.709, tstat, del) + assert.InDelta(t, 0.954, h, del) + }) + + t.Run("uni-variate distributions", func(t *testing.T) { + x, y := createTestVectors(1, 300, 1, 1000, 5000, 10) + e, tstat, h, _ := calcEnergyStatistics(x, y) + del := 1e-3 + + assert.InDelta(t, 4257.009, e, del) + assert.InDelta(t, 728381.015, tstat, del) + assert.InDelta(t, 0.748, h, del) + }) + + t.Run("equal distributions should have all 0 values", func(t *testing.T) { + x := mat.NewDense(10, 1, []float64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1}) + y := mat.NewDense(1, 1, []float64{1}) + + e, tstat, h, _ := calcEnergyStatistics(x, y) + + assert.Equal(t, 0.0, e) + assert.Equal(t, 0.0, tstat) + assert.Equal(t, 0.0, h) + }) + + t.Run("energy stats returns errors on malformed input", func(t *testing.T) { + x := mat.NewDense(2, 2, make([]float64, 4)) + y := mat.NewDense(2, 3, make([]float64, 6)) + + _, _, _, err := calcEnergyStatistics(x, y) + assert.NotEqual(t, nil, err) + assert.ErrorContains(t, err, "both inputs must have the same number of columns") + + x.Reset() + y = &mat.Dense{} + + _, _, _, err = calcEnergyStatistics(x, y) + assert.NotEqual(t, nil, err) + assert.ErrorContains(t, err, "inputs cannot be empty") + + x = mat.NewDense(2, 2, make([]float64, 4)) + y = mat.NewDense(3, 2, make([]float64, 6)) + + _, _, _, err = calcEnergyStatistics(x, y) + assert.NotEqual(t, nil, err) + assert.ErrorContains(t, err, "both inputs must be column vectors") + }) +} + +// TestFindSigBenchmarks tests that statistically significant benchmarks are correctly flagged. +func TestFindSigBenchmarks(t *testing.T) { + var dummyEnergyStats []*EnergyStats + assert.Equal(t, 0, len(getStatSigBenchmarks(dummyEnergyStats))) + + for i := -2.5; i < 3; i += 0.5 { + es := EnergyStats{ + ZScore: i, + } + dummyEnergyStats = append(dummyEnergyStats, &es) + } + assert.Equal(t, 11, len(dummyEnergyStats)) + + sigBenchmarks := getStatSigBenchmarks(dummyEnergyStats) + assert.Equal(t, 4, len(sigBenchmarks)) +} diff --git a/.evergreen/perfcomp/go.mod b/.evergreen/perfcomp/go.mod new file mode 100644 index 00000000..4d2f9e18 --- /dev/null +++ b/.evergreen/perfcomp/go.mod @@ -0,0 +1,28 @@ +module github.com/mongodb-labs/drivers-evergreen-tools/perfcomp + +go 1.24.0 + +require github.com/spf13/cobra v1.9.1 + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/golang/snappy v1.0.0 // indirect + github.com/klauspost/compress v1.16.7 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/xdg-go/pbkdf2 v1.0.0 // indirect + github.com/xdg-go/scram v1.1.2 // indirect + github.com/xdg-go/stringprep v1.0.4 // indirect + github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 // indirect + golang.org/x/crypto v0.33.0 // indirect + golang.org/x/sync v0.12.0 // indirect + golang.org/x/text v0.23.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) + +require ( + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/spf13/pflag v1.0.6 // indirect + github.com/stretchr/testify v1.10.0 + go.mongodb.org/mongo-driver/v2 v2.2.3 + gonum.org/v1/gonum v0.16.0 +) diff --git a/.evergreen/perfcomp/go.sum b/.evergreen/perfcomp/go.sum new file mode 100644 index 00000000..d215447a --- /dev/null +++ b/.evergreen/perfcomp/go.sum @@ -0,0 +1,65 @@ +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs= +github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= +github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= +github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= +github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= +github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= +github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= +github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 h1:ilQV1hzziu+LLM3zUTJ0trRztfwgjqKnBWNtSRkbmwM= +github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78/go.mod h1:aL8wCCfTfSfmXjznFBSZNN13rSJjlIOI1fUNAtF7rmI= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +go.mongodb.org/mongo-driver/v2 v2.2.3 h1:72uiGYXeSnUEQk37xvV9r067xzFQod4SOeAoOuq3+GM= +go.mongodb.org/mongo-driver/v2 v2.2.3/go.mod h1:qQkDMhCGWl3FN509DfdPd4GRBLU/41zqF/k8eTRceps= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus= +golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= +golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= +golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/.evergreen/run-perf-comp.sh b/.evergreen/run-perf-comp.sh new file mode 100755 index 00000000..4d54bd66 --- /dev/null +++ b/.evergreen/run-perf-comp.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +set -eux pipefail + +GOVERSION="${GOVERSION:-1.24}" +GOPATH="${GOPATH:-$HOME/go}" + +# Detect OS +OS="$(uname -s)" + +# If GOROOT is not set, determine it based on the OS and user-provided +# GOVERSION. +if [[ -z "${GOROOT:-}" ]]; then + case "$OS" in + Darwin) + if [[ -d "/usr/local/go" ]]; then + GOROOT="/usr/local/go" # likely place for local development + else + GOROOT="/opt/golang/go${GOVERSION}" # for spawn host + fi + ;; + Linux) + GOROOT="/opt/golang/go${GOVERSION}" + ;; + MINGW* | MSYS* | CYGWIN*) + GOROOT="C:\\golang\\go${GOVERSION}" + ;; + *) + echo "unsupported OS: $OS" + exit 1 + ;; + esac +fi + +PATH="${GOROOT}/bin:${GOPATH}/bin:${PATH}" +export GOROOT PATH + +echo "Using Go SDK at: $GOROOT (version: $GOVERSION)" + +test -x "${GOROOT}/bin/go" || { + echo "Go SDK not found at: $GOROOT" + exit 1 +} + +# Resolve this script’s dir, then go up one level +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" + +# Enter the perfcomp sub‐directory +cd "$PROJECT_ROOT/.evergreen/perfcomp" + +# Build the perfcomp binary. +bash build.sh + +if [[ ! -x "./bin/perfcomp" ]]; then + echo "Error: ./bin/perfcomp not found or not executable. Please run 'bash build.sh' first." >&2 + exit 1 +else + echo "Found ./bin/perfcomp" +fi + +: "${PERF_URI_PRIVATE_ENDPOINT:?Error: PERF_URI_PRIVATE_ENDPOINT must be set}" +: "${VERSION_ID:?Error: VERSION_ID must be set}" +: "${PROJECT:?Error: PROJECT must be set}" +: "${CONTEXT:?Error: CONTEXT must be set}" +: "${TASK:?Error: TASKNAME must be set}" +: "${VARIANT:?Error: VARIANT must be set}" + +./bin/perfcomp compare --project ${PROJECT} --perf-context "${CONTEXT}" --task ${TASK} --variant ${VARIANT} ${VERSION_ID} + +if [[ -n "${HEAD_SHA+set}" ]]; then + ./bin/perfcomp mdreport + rm perf-report.txt +fi diff --git a/.gitignore b/.gitignore index c7c4d930..e71f1a76 100644 --- a/.gitignore +++ b/.gitignore @@ -133,6 +133,9 @@ mo-expansion.sh .dockerignore .local container_id.txt +bin +perf-report.md +perf-report.txt # Azure functions. .python_packages/