From 817285fd8f1529315bea973bf7dfad8db953eb1c Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Thu, 7 Aug 2025 10:35:05 -0400 Subject: [PATCH 01/25] Set up for perfcomp build from DET --- .evergreen/perfcomp/build.sh | 6 ++++ .evergreen/perfcomp/go.mod | 24 ++++++++++++++ .evergreen/perfcomp/go.sum | 60 ++++++++++++++++++++++++++++++++++ .evergreen/run-perf-comp.sh | 62 ++++++++++++++++++++++++++++++++++++ 4 files changed, 152 insertions(+) create mode 100755 .evergreen/perfcomp/build.sh create mode 100644 .evergreen/perfcomp/go.mod create mode 100644 .evergreen/perfcomp/go.sum create mode 100755 .evergreen/run-perf-comp.sh diff --git a/.evergreen/perfcomp/build.sh b/.evergreen/perfcomp/build.sh new file mode 100755 index 00000000..847b6141 --- /dev/null +++ b/.evergreen/perfcomp/build.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -euo pipefail + +BIN_DIR="bin" +mkdir -p $BIN_DIR +go build -o $BIN_DIR/perfcomp ./cmd/perfcomp/ diff --git a/.evergreen/perfcomp/go.mod b/.evergreen/perfcomp/go.mod new file mode 100644 index 00000000..ceade597 --- /dev/null +++ b/.evergreen/perfcomp/go.mod @@ -0,0 +1,24 @@ +module github.com/mongodb-labs/drivers-evergreen-tools/perfcomp + +go 1.24.4 + +require github.com/spf13/cobra v1.9.1 + +require ( + github.com/golang/snappy v1.0.0 // indirect + github.com/klauspost/compress v1.16.7 // indirect + github.com/xdg-go/pbkdf2 v1.0.0 // indirect + github.com/xdg-go/scram v1.1.2 // indirect + github.com/xdg-go/stringprep v1.0.4 // indirect + github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 // indirect + golang.org/x/crypto v0.33.0 // indirect + golang.org/x/sync v0.12.0 // indirect + golang.org/x/text v0.23.0 // indirect +) + +require ( + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/spf13/pflag v1.0.6 // indirect + go.mongodb.org/mongo-driver/v2 v2.2.3 + gonum.org/v1/gonum v0.16.0 +) diff --git a/.evergreen/perfcomp/go.sum b/.evergreen/perfcomp/go.sum new file mode 100644 index 00000000..f0b8d0c3 --- /dev/null +++ b/.evergreen/perfcomp/go.sum @@ -0,0 +1,60 @@ +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs= +github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= +github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= +github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= +github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= +github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= +github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= +github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 h1:ilQV1hzziu+LLM3zUTJ0trRztfwgjqKnBWNtSRkbmwM= +github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78/go.mod h1:aL8wCCfTfSfmXjznFBSZNN13rSJjlIOI1fUNAtF7rmI= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +go.mongodb.org/mongo-driver/v2 v2.2.3 h1:72uiGYXeSnUEQk37xvV9r067xzFQod4SOeAoOuq3+GM= +go.mongodb.org/mongo-driver/v2 v2.2.3/go.mod h1:qQkDMhCGWl3FN509DfdPd4GRBLU/41zqF/k8eTRceps= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus= +golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= +golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= +golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/.evergreen/run-perf-comp.sh b/.evergreen/run-perf-comp.sh new file mode 100755 index 00000000..5f20640d --- /dev/null +++ b/.evergreen/run-perf-comp.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +set -eux pipefail + +GOVERSION="${GOVERESION:-1.24}" +GOPATH="${GOPATH:-$HOME/go}" + +# Detect OS +OS="$(uname -s)" + +# If GOROOT is not set, determine it based on the OS and user-provided +# GOVERSION. +if [[ -z "${GOROOT:-}" ]]; then + case "$OS" in + Darwin) + if [[ -d "/usr/local/go" ]]; then + GOROOT="/usr/local/go" # likely place for local development + else + GOROOT="/opt/golang/go${GOVERSION}" # for spawn host + fi + ;; + Linux) + GOROOT="/opt/golang/go${GOVERSION}" + ;; + MINGW* | MSYS* | CYGWIN*) + GOROOT="C:\\golang\\go${GOVERSION}" + ;; + *) + echo "unsupported OS: $OS" + exit 1 + ;; + esac +fi + +PATH="${GOROOT}/bin:${GOPATH}/bin:${PATH}" +export GOROOT PATH + +echo "Using Go SDK at: $GOROOT (version: $GOVERSION)" + +test -x "${GOROOT}/bin/go" || { + echo "Go SDK not found at: $GOROOT" + exit 1 +} + +# Resolve this script’s dir, then go up one level +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" + +# Enter the perfcomp sub‐directory +cd "$PROJECT_ROOT/.evergreen/perfcomp" + +# Build the mongproxy binary. +bash build.sh + +if [[ ! -x "./bin/perfcomp" ]]; then + echo "Error: ./bin/perfcomp not found or not executable. Please run 'bash build.sh' first." >&2 + exit 1 +else + echo "Found ./bin/perfcomp" +fi + +: "${PERF_URI_PRIVATE_ENDPOINT:?Error: PERF_URI_PRIVATE_ENDPOINT must be set}" +: "${VERSION_ID:?Error: VERSION_ID must be set}" From 826490d3fb6a75ae31e152523ed47e50a3c9d8c0 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Thu, 7 Aug 2025 10:36:25 -0400 Subject: [PATCH 02/25] Ignore bin and temp files created by perfcomp --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index c7c4d930..e71f1a76 100644 --- a/.gitignore +++ b/.gitignore @@ -133,6 +133,9 @@ mo-expansion.sh .dockerignore .local container_id.txt +bin +perf-report.md +perf-report.txt # Azure functions. .python_packages/ From 508e17bda94b9fed23f6e7be27bf90b9d6a176e3 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Thu, 7 Aug 2025 10:37:29 -0400 Subject: [PATCH 03/25] Perf analysis compare logic --- .evergreen/perfcomp/compare.go | 403 +++++++++++++++++++++++++++++++++ 1 file changed, 403 insertions(+) create mode 100644 .evergreen/perfcomp/compare.go diff --git a/.evergreen/perfcomp/compare.go b/.evergreen/perfcomp/compare.go new file mode 100644 index 00000000..c53a78d1 --- /dev/null +++ b/.evergreen/perfcomp/compare.go @@ -0,0 +1,403 @@ +package perfcomp + +import ( + "context" + "fmt" + "log" + "math" + "time" + + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + "go.mongodb.org/mongo-driver/v2/mongo/options" + "gonum.org/v1/gonum/mat" +) + +type OverrideInfo struct { + OverrideMainline bool `bson:"override_mainline"` + BaseOrder any `bson:"base_order"` + Reason any `bson:"reason"` + User any `bson:"user"` +} + +type Info struct { + Project string `bson:"project"` + Version string `bson:"version"` + Variant string `bson:"variant"` + Order int64 `bson:"order"` + TaskName string `bson:"task_name"` + TaskID string `bson:"task_id"` + Execution int64 `bson:"execution"` + Mainline bool `bson:"mainline"` + OverrideInfo OverrideInfo + TestName string `bson:"test_name"` + Args map[string]any `bson:"args"` +} + +type Stat struct { + Name string `bson:"name"` + Val float64 `bson:"val"` + Metadata any `bson:"metadata"` +} + +type Rollups struct { + Stats []Stat +} + +type RawData struct { + Info Info + CreatedAt any `bson:"created_at"` + CompletedAt any `bson:"completed_at"` + Rollups Rollups + FailedRollupAttempts int64 `bson:"failed_rollup_attempts"` +} + +type TimeSeriesInfo struct { + Project string `bson:"project"` + Variant string `bson:"variant"` + Task string `bson:"task"` + Test string `bson:"test"` + Measurement string `bson:"measurement"` + Args map[string]any `bson:"args"` +} + +type StableRegion struct { + TimeSeriesInfo TimeSeriesInfo + Start any `bson:"start"` + End any `bson:"end"` + Values []float64 `bson:"values"` + StartOrder int64 `bson:"start_order"` + EndOrder int64 `bson:"end_order"` + Mean float64 `bson:"mean"` + Std float64 `bson:"std"` + Median float64 `bson:"median"` + Max float64 `bson:"max"` + Min float64 `bson:"min"` + CoefficientOfVariation float64 `bson:"coefficient_of_variation"` + LastSuccessfulUpdate any `bson:"last_successful_update"` + Last bool `bson:"last"` + Contexts []any `bson:"contexts"` +} + +type EnergyStats struct { + Project string + Benchmark string + Measurement string + PatchVersion string + StableRegion StableRegion + MeasurementVal float64 + PercentChange float64 + EnergyStatistic float64 + TestStatistic float64 + HScore float64 + ZScore float64 +} + +type CompareResult struct { + CommitSHA string + MainlineCommit string + Version string + SigEnergyStats []EnergyStats +} + +const expandedMetricsDB = "expanded_metrics" +const rawResultsColl = "raw_results" +const stableRegionsColl = "stable_regions" + +// Compare will return statistical results for a patch version using the +// stable region defined by the performance analyzer cluster. +func Compare(ctx context.Context, versionID string, perfAnalyzerConnString string, project string, perfContext string) (*CompareResult, error) { + + // Connect to analytics node + client, err := mongo.Connect(options.Client().ApplyURI(perfAnalyzerConnString)) + if err != nil { + return nil, fmt.Errorf("Error connecting client: %v", err) + } + + defer func() { // Defer disconnect client + err = client.Disconnect(context.Background()) + if err != nil { + log.Fatalf("Failed to disconnect client: %v", err) + } + }() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err = client.Ping(ctx, nil) + if err != nil { + return nil, fmt.Errorf("Error pinging MongoDB Analytics: %v", err) + } + log.Println("Successfully connected to MongoDB Analytics node.") + + db := client.Database(expandedMetricsDB) + + // Get raw data, most recent stable region, and calculate energy stats + findCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + patchRawData, err := findRawData(findCtx, project, versionID, db.Collection(rawResultsColl)) + if err != nil { + return nil, fmt.Errorf("Error getting raw data: %v", err) + } + + allEnergyStats, err := getEnergyStatsForAllBenchMarks(findCtx, patchRawData, db.Collection(stableRegionsColl), perfContext) + if err != nil { + return nil, fmt.Errorf("Error getting energy statistics: %v", err) + } + + // Get statistically significant benchmarks + statSigBenchmarks := getStatSigBenchmarks(allEnergyStats) + compareResult := CompareResult{ + Version: versionID, + SigEnergyStats: statSigBenchmarks, + } + + return &compareResult, nil +} + +func findRawData(ctx context.Context, project string, version string, coll *mongo.Collection) ([]RawData, error) { + filter := bson.D{ + {"info.project", project}, + {"info.version", version}, + {"info.variant", "perf"}, + {"info.task_name", "perf"}, + } + + cursor, err := coll.Find(ctx, filter) + if err != nil { + log.Fatalf( + "Error retrieving raw data for version %q: %v", + version, + err, + ) + } + defer func() { + err = cursor.Close(ctx) + if err != nil { + log.Fatalf("Error closing cursor while retrieving raw data for version %q: %v", version, err) + } + }() + + log.Printf("Successfully retrieved %d docs from version %s.\n", cursor.RemainingBatchLength(), version) + + var rawData []RawData + err = cursor.All(ctx, &rawData) + if err != nil { + log.Fatalf( + "Error decoding raw data from version %q: %v", + version, + err, + ) + } + + return rawData, err +} + +// Find the most recent stable region of the mainline version for a specific test/measurement +func findLastStableRegion(ctx context.Context, project string, testname string, measurement string, coll *mongo.Collection, perfContext string) (*StableRegion, error) { + filter := bson.D{ + {"time_series_info.project", project}, + {"time_series_info.variant", "perf"}, + {"time_series_info.task", "perf"}, + {"time_series_info.test", testname}, + {"time_series_info.measurement", measurement}, + {"last", true}, + {"contexts", bson.D{{"$in", bson.A{perfContext}}}}, // TODO (GODRIVER-3102): Refactor perf context for project switching. + } + + findOptions := options.FindOne().SetSort(bson.D{{"end", -1}}) + + var sr *StableRegion + err := coll.FindOne(ctx, filter, findOptions).Decode(&sr) + if err != nil { + return nil, err + } + return sr, nil +} + +// For a specific test and measurement +func getEnergyStatsForOneBenchmark(ctx context.Context, rd RawData, coll *mongo.Collection, perfContext string) ([]*EnergyStats, error) { + testname := rd.Info.TestName + var energyStats []*EnergyStats + + for i := range rd.Rollups.Stats { + project := rd.Info.Project + measName := rd.Rollups.Stats[i].Name + measVal := rd.Rollups.Stats[i].Val + + stableRegion, err := findLastStableRegion(ctx, project, testname, measName, coll, perfContext) + if err != nil { + log.Fatalf( + "Error finding last stable region for test %q, measurement %q: %v", + testname, + measName, + err, + ) + } + + // The performance analyzer compares the measurement value from the patch to a stable region that succeeds the latest change point. + // For example, if there were 5 measurements since the last change point, then the stable region is the 5 latest values for the measurement. + stableRegionVec := mat.NewDense(len(stableRegion.Values), 1, stableRegion.Values) + measValVec := mat.NewDense(1, 1, []float64{measVal}) // singleton + + estat, tstat, hscore, err := getEnergyStatistics(stableRegionVec, measValVec) + if err != nil { + log.Fatalf( + "Could not calculate energy stats for test %q, measurement %q: %v", + testname, + measName, + err, + ) + } + + zscore := getZScore(measVal, stableRegion.Mean, stableRegion.Std) + pChange := getPercentageChange(measVal, stableRegion.Mean) + + es := EnergyStats{ + Project: project, + Benchmark: testname, + Measurement: measName, + PatchVersion: rd.Info.Version, + StableRegion: *stableRegion, + MeasurementVal: measVal, + PercentChange: pChange, + EnergyStatistic: estat, + TestStatistic: tstat, + HScore: hscore, + ZScore: zscore, + } + energyStats = append(energyStats, &es) + } + + return energyStats, nil +} + +func getEnergyStatsForAllBenchMarks(ctx context.Context, patchRawData []RawData, coll *mongo.Collection, perfContext string) ([]*EnergyStats, error) { + var allEnergyStats []*EnergyStats + for _, rd := range patchRawData { + energyStats, err := getEnergyStatsForOneBenchmark(ctx, rd, coll, perfContext) + if err != nil { + log.Fatalf( + "Could not get energy stats for %q: %v", + rd.Info.TestName, + err, + ) + } else { + allEnergyStats = append(allEnergyStats, energyStats...) + } + } + return allEnergyStats, nil +} + +func getStatSigBenchmarks(energyStats []*EnergyStats) []EnergyStats { // TODO + + var significantEnergyStats []EnergyStats + for _, es := range energyStats { + // The "iterations" measurement is the number of iterations that the Go + // benchmark suite had to run to converge on a benchmark measurement. It + // is not comparable between benchmark runs, so is not a useful + // measurement to print here. Omit it. + if es.Measurement != "iterations" && math.Abs(es.ZScore) > 1.96 { + significantEnergyStats = append(significantEnergyStats, *es) + } + } + + return significantEnergyStats +} + +// Given two matrices, this function returns +// (e, t, h) = (E-statistic, test statistic, e-coefficient of inhomogeneity) +func getEnergyStatistics(x, y *mat.Dense) (float64, float64, float64, error) { + xrows, xcols := x.Dims() + yrows, ycols := y.Dims() + + if xcols != ycols { + return 0, 0, 0, fmt.Errorf("both inputs must have the same number of columns") + } + if xrows == 0 || yrows == 0 { + return 0, 0, 0, fmt.Errorf("inputs cannot be empty") + } + + xrowsf := float64(xrows) + yrowsf := float64(yrows) + + var A float64 // E|X-Y| + if xrowsf > 0 && yrowsf > 0 { + dist, err := getDistance(x, y) + if err != nil { + return 0, 0, 0, err + } + A = dist / (xrowsf * yrowsf) + } else { + A = 0 + } + + var B float64 // E|X-X'| + if xrowsf > 0 { + dist, err := getDistance(x, x) + if err != nil { + return 0, 0, 0, err + } + B = dist / (xrowsf * xrowsf) + } else { + B = 0 + } + + var C float64 // E|Y-Y'| + if yrowsf > 0 { + dist, err := getDistance(y, y) + if err != nil { + return 0, 0, 0, err + } + C = dist / (yrowsf * yrowsf) + } else { + C = 0 + } + + E := 2*A - B - C // D^2(F_x, F_y) + T := ((xrowsf * yrowsf) / (xrowsf + yrowsf)) * E + var H float64 + if A > 0 { + H = E / (2 * A) + } else { + H = 0 + } + return E, T, H, nil +} + +// Given two vectors (expected 1 col), +// this function returns the sum of distances between each pair. +func getDistance(x, y *mat.Dense) (float64, error) { + xrows, xcols := x.Dims() + yrows, ycols := y.Dims() + + if xcols != 1 || ycols != 1 { + return 0, fmt.Errorf("both inputs must be column vectors") + } + + var sum float64 + + for i := 0; i < xrows; i++ { + for j := 0; j < yrows; j++ { + sum += math.Abs(x.At(i, 0) - y.At(j, 0)) + } + } + return sum, nil +} + +// Get Z score for result x, compared to mean u and st dev o. +func getZScore(x, mu, sigma float64) float64 { + if sigma == 0 { + return math.NaN() + } + return (x - mu) / sigma +} + +// Get percentage change for result x compared to mean u. +func getPercentageChange(x, mu float64) float64 { + if mu == 0 { + return math.NaN() + } + return ((x - mu) / mu) * 100 +} From 33e3c7f0cd3e7332755f3d8241fe3fbf5a7d7d61 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Thu, 7 Aug 2025 10:38:59 -0400 Subject: [PATCH 04/25] Parse perf txt to md report --- .evergreen/perfcomp/cmd/perfcomp/mdreport.go | 144 +++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 .evergreen/perfcomp/cmd/perfcomp/mdreport.go diff --git a/.evergreen/perfcomp/cmd/perfcomp/mdreport.go b/.evergreen/perfcomp/cmd/perfcomp/mdreport.go new file mode 100644 index 00000000..4a74c3bd --- /dev/null +++ b/.evergreen/perfcomp/cmd/perfcomp/mdreport.go @@ -0,0 +1,144 @@ +package main + +import ( + "bufio" + "fmt" + "log" + "net/url" + "os" + "strings" + + "github.com/spf13/cobra" +) + +const perfReportFileTxt = "perf-report.txt" +const perfReportFileMd = "perf-report.md" +const perfVariant = "^perf$" +const hscoreDefLink = "https://en.wikipedia.org/wiki/Energy_distance#:~:text=E%2Dcoefficient%20of%20inhomogeneity" +const zscoreDefLink = "https://en.wikipedia.org/wiki/Standard_score#Calculation" + +func newMdCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "mdreport", + Short: "generates markdown output after run", + } + + cmd.Run = func(cmd *cobra.Command, args []string) { + if err := runMdCommand(cmd, args); err != nil { + log.Fatalf("failed to generate md: %v", err) + } + } + + return cmd +} + +func runMdCommand(cmd *cobra.Command, args []string) error { + var line string + + // open file to read + fRead, err := os.Open(perfReportFileTxt) + if err != nil { + log.Fatalf("Could not open %s: %v", perfReportFileTxt, err) + } + defer fRead.Close() + + // open file to write + fWrite, err := os.Create(perfReportFileMd) + if err != nil { + log.Fatalf("Could not create %s: %v", perfReportFileMd, err) + } + defer fWrite.Close() + + fmt.Fprintf(fWrite, "## 🧪 Performance Results\n") + + // read the file line by line using scanner + scanner := bufio.NewScanner(fRead) + + var version string + var evgLink string + + for scanner.Scan() { + line = scanner.Text() + if strings.Contains(line, "Version ID:") { + // parse version + version = strings.Split(line, " ")[2] + } else if strings.Contains(line, "Commit SHA:") { + // parse commit SHA and write header + fmt.Fprintf(fWrite, "\n
\n%s\n\t
\n\n", line) + } else if strings.Contains(line, "version "+version) { + // dynamic Evergreen perf task link + evgLink, err = generateEvgLink(version, perfVariant) + if err != nil { + log.Println(err) + fmt.Fprintf(fWrite, "%s\n", line) + } else { + printUrlToLine(fWrite, line, evgLink, "version", -1) + } + } else if strings.Contains(line, "For a comprehensive view of all microbenchmark results for this PR's commit, please check out the Evergreen perf task for this patch.") { + // last line of comment + evgLink, err = generateEvgLink(version, "") + if err != nil { + log.Println(err) + fmt.Fprintf(fWrite, "%s\n", line) + } else { + printUrlToLine(fWrite, line, evgLink, "Evergreen", 0) + } + } else if strings.Contains(line, ", ") { + line = strings.ReplaceAll(line, ", ", "
") + fmt.Fprintf(fWrite, "%s\n", line) + } else if strings.Contains(line, "H-Score") { + linkedWord := "[H-Score](" + hscoreDefLink + ")" + line = strings.ReplaceAll(line, "H-Score", linkedWord) + linkedWord = "[Z-Score](" + zscoreDefLink + ")" + line = strings.ReplaceAll(line, "Z-Score", linkedWord) + fmt.Fprintf(fWrite, "%s\n", line) + } else { + // all other regular lines + fmt.Fprintf(fWrite, "%s\n", line) + } + } + + fmt.Fprintf(fWrite, "
\n") + return nil +} + +func generateEvgLink(version string, variant string) (string, error) { + baseUrl := "https://spruce.mongodb.com" + page := "0" + sorts := "STATUS:ASC;BASE_STATUS:DESC" + + u, err := url.Parse(baseUrl) + if err != nil { + return "", fmt.Errorf("Error parsing URL: %v", err) + } + + u.Path = fmt.Sprintf("version/%s/tasks", version) + + // construct query parameters + queryParams := url.Values{} + queryParams.Add("page", page) + queryParams.Add("sorts", sorts) + if variant != "" { + queryParams.Add("variant", variant) + } + + u.RawQuery = queryParams.Encode() + return u.String(), nil +} + +func printUrlToLine(fWrite *os.File, line string, link string, targetWord string, step int) { + words := strings.Split(line, " ") + for i, w := range words { + if i > 0 && words[i+step] == targetWord { + fmt.Fprintf(fWrite, "[%s](%s)", w, link) + } else { + fmt.Fprint(fWrite, w) + } + + if i < len(words)-1 { + fmt.Fprint(fWrite, " ") + } else { + fmt.Fprint(fWrite, "\n") + } + } +} From 8805889d0fea1ab63593f9470d887c36a8ed223f Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Thu, 7 Aug 2025 10:39:46 -0400 Subject: [PATCH 05/25] Project-generalized perf comp CLI ported over from Go Driver --- .evergreen/perfcomp/cmd/perfcomp/compare.go | 122 ++++++++++++++++++++ .evergreen/perfcomp/cmd/perfcomp/main.go | 22 ++++ 2 files changed, 144 insertions(+) create mode 100644 .evergreen/perfcomp/cmd/perfcomp/compare.go create mode 100644 .evergreen/perfcomp/cmd/perfcomp/main.go diff --git a/.evergreen/perfcomp/cmd/perfcomp/compare.go b/.evergreen/perfcomp/cmd/perfcomp/compare.go new file mode 100644 index 00000000..a031b430 --- /dev/null +++ b/.evergreen/perfcomp/cmd/perfcomp/compare.go @@ -0,0 +1,122 @@ +package main + +import ( + "fmt" + "log" + "math" + "os" + "sort" + "strings" + "text/tabwriter" + + "github.com/mongodb-labs/drivers-evergreen-tools/perfcomp" + "github.com/spf13/cobra" +) + +// For support for other projects, a performance context needs to be created and added here. +var projectToPerfContext = map[string]string{ + "mongo-go-driver": "GoDriver perf task", +} + +func newCompareCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "compare", + Short: "compare evergreen patch to mainline commit", + // Version id is a required argument + Args: func(cmd *cobra.Command, args []string) error { + if len(args) < 1 { + return fmt.Errorf("this command requires an evergreen patch version ID") + } + return nil + }, + } + + cmd.Flags().String("project", "mongo-go-driver", "specify the name of an existing Evergreen project") + + cmd.Run = func(cmd *cobra.Command, args []string) { + // Check for variables + uri := os.Getenv("PERF_URI_PRIVATE_ENDPOINT") + if uri == "" { + log.Fatal("PERF_URI_PRIVATE_ENDPOINT env variable is not set") + } + + // Retrieve the project flag value + project, err := cmd.Flags().GetString("project") + if err != nil { + log.Fatalf("failed to get project flag: %v", err) + } + + // Validate the project flag and perf context + if project == "" { + log.Fatal("must provide project") + } + perfContext, ok := projectToPerfContext[project] + if !ok { + log.Fatalf("support for project %q is not configured yet", project) + } + + if err := runCompare(cmd, args, project, perfContext); err != nil { + log.Fatalf("failed to compare: %v", err) + } + } + + return cmd +} + +func createComment(result perfcomp.CompareResult) string { + var comment strings.Builder + fmt.Fprintf(&comment, "The following benchmark tests for version %s had statistically significant changes (i.e., |z-score| > 1.96):\n\n", result.Version) + + w := tabwriter.NewWriter(&comment, 0, 0, 1, ' ', 0) + fmt.Fprintln(w, "| Benchmark\t| Measurement\t| % Change\t| Patch Value\t| Stable Region\t| H-Score\t| Z-Score\t| ") + fmt.Fprintln(w, "| ---------\t| -----------\t| --------\t| -----------\t| -------------\t| -------\t| -------\t|") + + if len(result.SigEnergyStats) == 0 { + comment.Reset() + fmt.Fprintf(&comment, "There were no significant changes to the performance to report for version %s.\n", result.Version) + } else { + sort.Slice(result.SigEnergyStats, func(i, j int) bool { + return math.Abs(result.SigEnergyStats[i].PercentChange) > math.Abs(result.SigEnergyStats[j].PercentChange) + }) + for _, es := range result.SigEnergyStats { + fmt.Fprintf(w, "| %s\t| %s\t| %.4f\t| %.4f\t| Avg: %.4f, Med: %.4f, Stdev: %.4f\t| %.4f\t| %.4f\t|\n", es.Benchmark, es.Measurement, es.PercentChange, es.MeasurementVal, es.StableRegion.Mean, es.StableRegion.Median, es.StableRegion.Std, es.HScore, es.ZScore) + } + } + w.Flush() + + comment.WriteString("\n*For a comprehensive view of all microbenchmark results for this PR's commit, please check out the Evergreen perf task for this patch.*") + return comment.String() + +} + +func runCompare(cmd *cobra.Command, args []string, project string, perfContext string) error { + perfAnalyzerConnString := os.Getenv("PERF_URI_PRIVATE_ENDPOINT") + version := args[len(args)-1] + + res, err := perfcomp.Compare(cmd.Context(), version, perfAnalyzerConnString, project, perfContext) + if err != nil { + log.Fatalf("failed to compare: %v", err) + } + res.CommitSHA = os.Getenv("HEAD_SHA") + res.MainlineCommit = os.Getenv("BASE_SHA") + + prComment := createComment(*res) + log.Println("🧪 Performance Results") + log.Println(prComment) + + if res.CommitSHA != "" { + // Write results to .txt file to parse into markdown comment + fWrite, err := os.Create(perfReportFileTxt) + if err != nil { + log.Fatalf("Could not create %s: %v", perfReportFileTxt, err) + } + defer fWrite.Close() + + fmt.Fprintf(fWrite, "Version ID: %s\n", version) + fmt.Fprintf(fWrite, "Commit SHA: %s\n", res.CommitSHA) + fmt.Fprintln(fWrite, prComment) + log.Printf("PR commit %s: saved to %s for markdown comment.\n", res.CommitSHA, perfReportFileTxt) + } + + return nil +} diff --git a/.evergreen/perfcomp/cmd/perfcomp/main.go b/.evergreen/perfcomp/cmd/perfcomp/main.go new file mode 100644 index 00000000..377abf8f --- /dev/null +++ b/.evergreen/perfcomp/cmd/perfcomp/main.go @@ -0,0 +1,22 @@ +package main + +import ( + "log" + + "github.com/spf13/cobra" +) + +func main() { + cmd := &cobra.Command{ + Use: "perfcomp", + Short: "perfcomp is a cli that reports stat-sig results between evergreen patches with the mainline commit", + Version: "0.0.0-alpha", + } + + cmd.AddCommand(newCompareCommand()) + cmd.AddCommand(newMdCommand()) + + if err := cmd.Execute(); err != nil { + log.Fatalf("error: %v", err) + } +} From 26597c1ad65b061f35aa1249c5d88bc6424493bb Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Thu, 7 Aug 2025 11:31:16 -0400 Subject: [PATCH 06/25] Add README for usage and instructions --- .evergreen/perfcomp/README.md | 55 +++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 .evergreen/perfcomp/README.md diff --git a/.evergreen/perfcomp/README.md b/.evergreen/perfcomp/README.md new file mode 100644 index 00000000..922d3276 --- /dev/null +++ b/.evergreen/perfcomp/README.md @@ -0,0 +1,55 @@ +# perfcomp + +**perfcomp** is a performance analyzer on a PR commit basis. + +## 📦 Installation + +To install the latest version: + +```bash +go install github.com/mongodb-labs/drivers-evergreen-tools/perfcomp +``` + +Or build it locally in `bin/perfcomp`: + +```bash +bash build.sh +``` + +## 🔧 Usage + +To use `perfcomp`, you should have an analytics node URI env variable called `PERF_URI_PRIVATE_ENDPOINT`. You can request for it from the devprod performance team. + +To run in your project repository, you need to create a [performance context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/contexts) that captures all benchmarks in your project. Feel free to refer to the [Go Driver context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/context/name/GoDriver%20perf%20task) as a template. Then, add the context to the `projectToPerfContext` map in `./cmd/perfcomp/compare.go`. + +```bash +perfcomp is a cli that reports stat-sig results between evergreen patches with the mainline commit + +Usage: + perfcomp [command] + +Available Commands: + compare compare evergreen patch to mainline commit + mdreport generates markdown output after run +``` + +### Commands + +#### compare +```bash +compare evergreen patch to mainline commit + +Usage: + perfcomp compare [version_id] [flags] + +Flags: + --project string specify the name of an existing Evergreen project (default "mongo-go-driver") +``` + +#### mdreport +```bash +generates markdown output after compare run (must be run after `compare`) + +Usage: + perfcomp mdreport +``` From 4c02876d9c4c937b3c7168432520514c39e658d9 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Fri, 8 Aug 2025 12:43:22 -0400 Subject: [PATCH 07/25] Rename functions and add comments for clarity --- .evergreen/perfcomp/compare.go | 111 ++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 50 deletions(-) diff --git a/.evergreen/perfcomp/compare.go b/.evergreen/perfcomp/compare.go index c53a78d1..a9e245d7 100644 --- a/.evergreen/perfcomp/compare.go +++ b/.evergreen/perfcomp/compare.go @@ -13,16 +13,20 @@ import ( "gonum.org/v1/gonum/mat" ) -type OverrideInfo struct { - OverrideMainline bool `bson:"override_mainline"` - BaseOrder any `bson:"base_order"` - Reason any `bson:"reason"` - User any `bson:"user"` +// RawData defines the shape of the data in the raw_results collection. +// raw_results stores results by benchmark, which holds the values of all its measurements. +// A single measurement from a single benchmark is called a microbenchmark. +type RawData struct { + Info Info + CreatedAt any `bson:"created_at"` + CompletedAt any `bson:"completed_at"` + Rollups Rollups // List of all measurement results + FailedRollupAttempts int64 `bson:"failed_rollup_attempts"` } type Info struct { Project string `bson:"project"` - Version string `bson:"version"` + Version string `bson:"version"` // Evergreen version that produced the results Variant string `bson:"variant"` Order int64 `bson:"order"` TaskName string `bson:"task_name"` @@ -30,42 +34,34 @@ type Info struct { Execution int64 `bson:"execution"` Mainline bool `bson:"mainline"` OverrideInfo OverrideInfo - TestName string `bson:"test_name"` + TestName string `bson:"test_name"` // Benchmark name Args map[string]any `bson:"args"` } -type Stat struct { - Name string `bson:"name"` - Val float64 `bson:"val"` - Metadata any `bson:"metadata"` +type OverrideInfo struct { + OverrideMainline bool `bson:"override_mainline"` + BaseOrder any `bson:"base_order"` + Reason any `bson:"reason"` + User any `bson:"user"` } type Rollups struct { Stats []Stat } -type RawData struct { - Info Info - CreatedAt any `bson:"created_at"` - CompletedAt any `bson:"completed_at"` - Rollups Rollups - FailedRollupAttempts int64 `bson:"failed_rollup_attempts"` -} - -type TimeSeriesInfo struct { - Project string `bson:"project"` - Variant string `bson:"variant"` - Task string `bson:"task"` - Test string `bson:"test"` - Measurement string `bson:"measurement"` - Args map[string]any `bson:"args"` +type Stat struct { + Name string `bson:"name"` // Measurement name + Val float64 `bson:"val"` // Microbenchmark result + Metadata any `bson:"metadata"` } +// StableRegion defines the shape of the data in the stable_regions collection. +// A stable region is a group of consecutive microbenchmark values between two change points. type StableRegion struct { TimeSeriesInfo TimeSeriesInfo Start any `bson:"start"` End any `bson:"end"` - Values []float64 `bson:"values"` + Values []float64 `bson:"values"` // All microbenchmark values that makes up the stable region StartOrder int64 `bson:"start_order"` EndOrder int64 `bson:"end_order"` Mean float64 `bson:"mean"` @@ -76,16 +72,27 @@ type StableRegion struct { CoefficientOfVariation float64 `bson:"coefficient_of_variation"` LastSuccessfulUpdate any `bson:"last_successful_update"` Last bool `bson:"last"` - Contexts []any `bson:"contexts"` + Contexts []any `bson:"contexts"` // Performance context (e.g. "Go Driver perf comp") +} + +type TimeSeriesInfo struct { + Project string `bson:"project"` + Variant string `bson:"variant"` + Task string `bson:"task"` + Test string `bson:"test"` // Benchmark name + Measurement string `bson:"measurement"` // Measurement name + Args map[string]any `bson:"args"` } +// EnergyStats stores the calculated energy statistics for a patch version's specific +// microbenchmark compared to the mainline's stable region for that same microbenchmark. type EnergyStats struct { Project string Benchmark string Measurement string - PatchVersion string - StableRegion StableRegion - MeasurementVal float64 + PatchVersion string // Evergreen version that produced the results + StableRegion StableRegion // Latest stable region from the mainline this patch is comparing against + MeasurementVal float64 // Microbenchmark result of the patch version PercentChange float64 EnergyStatistic float64 TestStatistic float64 @@ -93,19 +100,22 @@ type EnergyStats struct { ZScore float64 } +// CompareResult is the collection of the energy statistics of all microbenchmarks with +// statistically significant changes for this patch. type CompareResult struct { - CommitSHA string - MainlineCommit string - Version string + CommitSHA string // Head commit SHA + MainlineCommit string // Base commit SHA + Version string // Evergreen patch version SigEnergyStats []EnergyStats } +// Performance analytics node db and collection names const expandedMetricsDB = "expanded_metrics" const rawResultsColl = "raw_results" const stableRegionsColl = "stable_regions" // Compare will return statistical results for a patch version using the -// stable region defined by the performance analyzer cluster. +// stable region defined by the performance analytics cluster. func Compare(ctx context.Context, versionID string, perfAnalyzerConnString string, project string, perfContext string) (*CompareResult, error) { // Connect to analytics node @@ -156,6 +166,7 @@ func Compare(ctx context.Context, versionID string, perfAnalyzerConnString strin return &compareResult, nil } +// Gets all raw benchmark data for a specific Evergreen version. func findRawData(ctx context.Context, project string, version string, coll *mongo.Collection) ([]RawData, error) { filter := bson.D{ {"info.project", project}, @@ -194,7 +205,7 @@ func findRawData(ctx context.Context, project string, version string, coll *mong return rawData, err } -// Find the most recent stable region of the mainline version for a specific test/measurement +// Finds the most recent stable region of the mainline version for a specific microbenchmark. func findLastStableRegion(ctx context.Context, project string, testname string, measurement string, coll *mongo.Collection, perfContext string) (*StableRegion, error) { filter := bson.D{ {"time_series_info.project", project}, @@ -216,7 +227,7 @@ func findLastStableRegion(ctx context.Context, project string, testname string, return sr, nil } -// For a specific test and measurement +// Calculate the energy statistics for all measurements in a benchmark. func getEnergyStatsForOneBenchmark(ctx context.Context, rd RawData, coll *mongo.Collection, perfContext string) ([]*EnergyStats, error) { testname := rd.Info.TestName var energyStats []*EnergyStats @@ -241,7 +252,7 @@ func getEnergyStatsForOneBenchmark(ctx context.Context, rd RawData, coll *mongo. stableRegionVec := mat.NewDense(len(stableRegion.Values), 1, stableRegion.Values) measValVec := mat.NewDense(1, 1, []float64{measVal}) // singleton - estat, tstat, hscore, err := getEnergyStatistics(stableRegionVec, measValVec) + estat, tstat, hscore, err := calcEnergyStatistics(stableRegionVec, measValVec) if err != nil { log.Fatalf( "Could not calculate energy stats for test %q, measurement %q: %v", @@ -251,8 +262,8 @@ func getEnergyStatsForOneBenchmark(ctx context.Context, rd RawData, coll *mongo. ) } - zscore := getZScore(measVal, stableRegion.Mean, stableRegion.Std) - pChange := getPercentageChange(measVal, stableRegion.Mean) + zscore := calcZScore(measVal, stableRegion.Mean, stableRegion.Std) + pChange := calcPercentChange(measVal, stableRegion.Mean) es := EnergyStats{ Project: project, @@ -290,7 +301,7 @@ func getEnergyStatsForAllBenchMarks(ctx context.Context, patchRawData []RawData, return allEnergyStats, nil } -func getStatSigBenchmarks(energyStats []*EnergyStats) []EnergyStats { // TODO +func getStatSigBenchmarks(energyStats []*EnergyStats) []EnergyStats { var significantEnergyStats []EnergyStats for _, es := range energyStats { @@ -308,7 +319,7 @@ func getStatSigBenchmarks(energyStats []*EnergyStats) []EnergyStats { // TODO // Given two matrices, this function returns // (e, t, h) = (E-statistic, test statistic, e-coefficient of inhomogeneity) -func getEnergyStatistics(x, y *mat.Dense) (float64, float64, float64, error) { +func calcEnergyStatistics(x, y *mat.Dense) (float64, float64, float64, error) { xrows, xcols := x.Dims() yrows, ycols := y.Dims() @@ -324,7 +335,7 @@ func getEnergyStatistics(x, y *mat.Dense) (float64, float64, float64, error) { var A float64 // E|X-Y| if xrowsf > 0 && yrowsf > 0 { - dist, err := getDistance(x, y) + dist, err := calcDistance(x, y) if err != nil { return 0, 0, 0, err } @@ -335,7 +346,7 @@ func getEnergyStatistics(x, y *mat.Dense) (float64, float64, float64, error) { var B float64 // E|X-X'| if xrowsf > 0 { - dist, err := getDistance(x, x) + dist, err := calcDistance(x, x) if err != nil { return 0, 0, 0, err } @@ -346,7 +357,7 @@ func getEnergyStatistics(x, y *mat.Dense) (float64, float64, float64, error) { var C float64 // E|Y-Y'| if yrowsf > 0 { - dist, err := getDistance(y, y) + dist, err := calcDistance(y, y) if err != nil { return 0, 0, 0, err } @@ -368,7 +379,7 @@ func getEnergyStatistics(x, y *mat.Dense) (float64, float64, float64, error) { // Given two vectors (expected 1 col), // this function returns the sum of distances between each pair. -func getDistance(x, y *mat.Dense) (float64, error) { +func calcDistance(x, y *mat.Dense) (float64, error) { xrows, xcols := x.Dims() yrows, ycols := y.Dims() @@ -386,16 +397,16 @@ func getDistance(x, y *mat.Dense) (float64, error) { return sum, nil } -// Get Z score for result x, compared to mean u and st dev o. -func getZScore(x, mu, sigma float64) float64 { +// Calculate the Z score for result x, compared to mean mu and st dev sigma. +func calcZScore(x, mu, sigma float64) float64 { if sigma == 0 { return math.NaN() } return (x - mu) / sigma } -// Get percentage change for result x compared to mean u. -func getPercentageChange(x, mu float64) float64 { +// Calculate the percentage change for result x compared to mean mu. +func calcPercentChange(x, mu float64) float64 { if mu == 0 { return math.NaN() } From 0338c9e650a1c7c095193411c5b811d35974ac51 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Fri, 8 Aug 2025 12:43:34 -0400 Subject: [PATCH 08/25] Add testing for compare.go --- .evergreen/perfcomp/compare_test.go | 110 ++++++++++++++++++++++++++++ .evergreen/perfcomp/go.mod | 4 + .evergreen/perfcomp/go.sum | 5 ++ 3 files changed, 119 insertions(+) create mode 100644 .evergreen/perfcomp/compare_test.go diff --git a/.evergreen/perfcomp/compare_test.go b/.evergreen/perfcomp/compare_test.go new file mode 100644 index 00000000..b1711b0b --- /dev/null +++ b/.evergreen/perfcomp/compare_test.go @@ -0,0 +1,110 @@ +package perfcomp + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "gonum.org/v1/gonum/mat" +) + +func createTestVectors(start1 int, stop1 int, step1 int, start2 int, stop2 int, step2 int) (*mat.Dense, *mat.Dense) { + xData := []float64{} + yData := []float64{} + + for i := start1; i < stop1; i += step1 { + xData = append(xData, float64(i)) + } + for j := start2; j < stop2; j += step2 { + yData = append(yData, float64(j)) + } + + x := mat.NewDense(len(xData), 1, xData) + y := mat.NewDense(len(yData), 1, yData) + + return x, y +} + +// TestCalcEnergyStatistics verifies that the energy calculation algorithms are correct. +func TestCalcEnergyStatistics(t *testing.T) { + t.Run("similar distributions should have small e,t,h values ", func(t *testing.T) { + x, y := createTestVectors(1, 100, 1, 1, 105, 1) + e, tstat, h, _ := calcEnergyStatistics(x, y) + + del := 1e-3 + // Limit precision of comparison to 3 digits after the decimal. + assert.InDelta(t, 0.160, e, del) // |0.160 - e| < 0.001 + assert.InDelta(t, 8.136, tstat, del) + assert.InDelta(t, 0.002, h, del) + }) + + t.Run("different distributions should have large e,t,h values", func(t *testing.T) { + x, y := createTestVectors(1, 100, 1, 10000, 13000, 14) + e, tstat, h, _ := calcEnergyStatistics(x, y) + del := 1e-3 + + assert.InDelta(t, 21859.691, e, del) + assert.InDelta(t, 1481794.709, tstat, del) + assert.InDelta(t, 0.954, h, del) + }) + + t.Run("uni-variate distributions", func(t *testing.T) { + x, y := createTestVectors(1, 300, 1, 1000, 5000, 10) + e, tstat, h, _ := calcEnergyStatistics(x, y) + del := 1e-3 + + assert.InDelta(t, 4257.009, e, del) + assert.InDelta(t, 728381.015, tstat, del) + assert.InDelta(t, 0.748, h, del) + }) + + t.Run("equal distributions should have all 0 values", func(t *testing.T) { + x := mat.NewDense(10, 1, []float64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1}) + y := mat.NewDense(1, 1, []float64{1}) + + e, tstat, h, _ := calcEnergyStatistics(x, y) + + assert.Equal(t, 0.0, e) + assert.Equal(t, 0.0, tstat) + assert.Equal(t, 0.0, h) + }) + + t.Run("energy stats returns errors on malformed input", func(t *testing.T) { + x := mat.NewDense(2, 2, make([]float64, 4)) + y := mat.NewDense(2, 3, make([]float64, 6)) + + _, _, _, err := calcEnergyStatistics(x, y) + assert.NotEqual(t, nil, err) + assert.ErrorContains(t, err, "both inputs must have the same number of columns") + + x.Reset() + y = &mat.Dense{} + + _, _, _, err = calcEnergyStatistics(x, y) + assert.NotEqual(t, nil, err) + assert.ErrorContains(t, err, "inputs cannot be empty") + + x = mat.NewDense(2, 2, make([]float64, 4)) + y = mat.NewDense(3, 2, make([]float64, 6)) + + _, _, _, err = calcEnergyStatistics(x, y) + assert.NotEqual(t, nil, err) + assert.ErrorContains(t, err, "both inputs must be column vectors") + }) +} + +// TestFindSigBenchmarks tests that statistically significant benchmarks are correctly flagged. +func TestFindSigBenchmarks(t *testing.T) { + var dummyEnergyStats []*EnergyStats + assert.Equal(t, 0, len(getStatSigBenchmarks(dummyEnergyStats))) + + for i := -2.5; i < 3; i += 0.5 { + es := EnergyStats{ + ZScore: i, + } + dummyEnergyStats = append(dummyEnergyStats, &es) + } + assert.Equal(t, 11, len(dummyEnergyStats)) + + sigBenchmarks := getStatSigBenchmarks(dummyEnergyStats) + assert.Equal(t, 4, len(sigBenchmarks)) +} diff --git a/.evergreen/perfcomp/go.mod b/.evergreen/perfcomp/go.mod index ceade597..c55573d6 100644 --- a/.evergreen/perfcomp/go.mod +++ b/.evergreen/perfcomp/go.mod @@ -5,8 +5,10 @@ go 1.24.4 require github.com/spf13/cobra v1.9.1 require ( + github.com/davecgh/go-spew v1.1.1 // indirect github.com/golang/snappy v1.0.0 // indirect github.com/klauspost/compress v1.16.7 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/xdg-go/pbkdf2 v1.0.0 // indirect github.com/xdg-go/scram v1.1.2 // indirect github.com/xdg-go/stringprep v1.0.4 // indirect @@ -14,11 +16,13 @@ require ( golang.org/x/crypto v0.33.0 // indirect golang.org/x/sync v0.12.0 // indirect golang.org/x/text v0.23.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/spf13/pflag v1.0.6 // indirect + github.com/stretchr/testify v1.10.0 go.mongodb.org/mongo-driver/v2 v2.2.3 gonum.org/v1/gonum v0.16.0 ) diff --git a/.evergreen/perfcomp/go.sum b/.evergreen/perfcomp/go.sum index f0b8d0c3..d215447a 100644 --- a/.evergreen/perfcomp/go.sum +++ b/.evergreen/perfcomp/go.sum @@ -9,11 +9,15 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= @@ -57,4 +61,5 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From e8e535f0d1baf6e2baf81194aa861ffcc175de5f Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Fri, 8 Aug 2025 22:15:11 -0400 Subject: [PATCH 09/25] Error messages should be lowercase --- .evergreen/perfcomp/cmd/perfcomp/mdreport.go | 2 +- .evergreen/perfcomp/compare.go | 22 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.evergreen/perfcomp/cmd/perfcomp/mdreport.go b/.evergreen/perfcomp/cmd/perfcomp/mdreport.go index 4a74c3bd..5388e16a 100644 --- a/.evergreen/perfcomp/cmd/perfcomp/mdreport.go +++ b/.evergreen/perfcomp/cmd/perfcomp/mdreport.go @@ -109,7 +109,7 @@ func generateEvgLink(version string, variant string) (string, error) { u, err := url.Parse(baseUrl) if err != nil { - return "", fmt.Errorf("Error parsing URL: %v", err) + return "", fmt.Errorf("error parsing URL: %v", err) } u.Path = fmt.Sprintf("version/%s/tasks", version) diff --git a/.evergreen/perfcomp/compare.go b/.evergreen/perfcomp/compare.go index a9e245d7..9c5ec9f0 100644 --- a/.evergreen/perfcomp/compare.go +++ b/.evergreen/perfcomp/compare.go @@ -121,13 +121,13 @@ func Compare(ctx context.Context, versionID string, perfAnalyzerConnString strin // Connect to analytics node client, err := mongo.Connect(options.Client().ApplyURI(perfAnalyzerConnString)) if err != nil { - return nil, fmt.Errorf("Error connecting client: %v", err) + return nil, fmt.Errorf("error connecting client: %v", err) } defer func() { // Defer disconnect client err = client.Disconnect(context.Background()) if err != nil { - log.Fatalf("Failed to disconnect client: %v", err) + log.Fatalf("failed to disconnect client: %v", err) } }() @@ -136,7 +136,7 @@ func Compare(ctx context.Context, versionID string, perfAnalyzerConnString strin err = client.Ping(ctx, nil) if err != nil { - return nil, fmt.Errorf("Error pinging MongoDB Analytics: %v", err) + return nil, fmt.Errorf("error pinging MongoDB Analytics: %v", err) } log.Println("Successfully connected to MongoDB Analytics node.") @@ -148,12 +148,12 @@ func Compare(ctx context.Context, versionID string, perfAnalyzerConnString strin patchRawData, err := findRawData(findCtx, project, versionID, db.Collection(rawResultsColl)) if err != nil { - return nil, fmt.Errorf("Error getting raw data: %v", err) + return nil, fmt.Errorf("error getting raw data: %v", err) } allEnergyStats, err := getEnergyStatsForAllBenchMarks(findCtx, patchRawData, db.Collection(stableRegionsColl), perfContext) if err != nil { - return nil, fmt.Errorf("Error getting energy statistics: %v", err) + return nil, fmt.Errorf("error getting energy statistics: %v", err) } // Get statistically significant benchmarks @@ -178,7 +178,7 @@ func findRawData(ctx context.Context, project string, version string, coll *mong cursor, err := coll.Find(ctx, filter) if err != nil { log.Fatalf( - "Error retrieving raw data for version %q: %v", + "error retrieving raw data for version %q: %v", version, err, ) @@ -186,7 +186,7 @@ func findRawData(ctx context.Context, project string, version string, coll *mong defer func() { err = cursor.Close(ctx) if err != nil { - log.Fatalf("Error closing cursor while retrieving raw data for version %q: %v", version, err) + log.Fatalf("error closing cursor while retrieving raw data for version %q: %v", version, err) } }() @@ -196,7 +196,7 @@ func findRawData(ctx context.Context, project string, version string, coll *mong err = cursor.All(ctx, &rawData) if err != nil { log.Fatalf( - "Error decoding raw data from version %q: %v", + "error decoding raw data from version %q: %v", version, err, ) @@ -240,7 +240,7 @@ func getEnergyStatsForOneBenchmark(ctx context.Context, rd RawData, coll *mongo. stableRegion, err := findLastStableRegion(ctx, project, testname, measName, coll, perfContext) if err != nil { log.Fatalf( - "Error finding last stable region for test %q, measurement %q: %v", + "error finding last stable region for test %q, measurement %q: %v", testname, measName, err, @@ -255,7 +255,7 @@ func getEnergyStatsForOneBenchmark(ctx context.Context, rd RawData, coll *mongo. estat, tstat, hscore, err := calcEnergyStatistics(stableRegionVec, measValVec) if err != nil { log.Fatalf( - "Could not calculate energy stats for test %q, measurement %q: %v", + "could not calculate energy stats for test %q, measurement %q: %v", testname, measName, err, @@ -290,7 +290,7 @@ func getEnergyStatsForAllBenchMarks(ctx context.Context, patchRawData []RawData, energyStats, err := getEnergyStatsForOneBenchmark(ctx, rd, coll, perfContext) if err != nil { log.Fatalf( - "Could not get energy stats for %q: %v", + "could not get energy stats for %q: %v", rd.Info.TestName, err, ) From a4e766d3490ef04f80fb4ce165e7040e690d149e Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Fri, 8 Aug 2025 22:37:36 -0400 Subject: [PATCH 10/25] Define context in caller runCompare --- .evergreen/perfcomp/cmd/perfcomp/compare.go | 7 ++++++- .evergreen/perfcomp/compare.go | 3 --- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.evergreen/perfcomp/cmd/perfcomp/compare.go b/.evergreen/perfcomp/cmd/perfcomp/compare.go index a031b430..28d95bca 100644 --- a/.evergreen/perfcomp/cmd/perfcomp/compare.go +++ b/.evergreen/perfcomp/cmd/perfcomp/compare.go @@ -1,6 +1,7 @@ package main import ( + "context" "fmt" "log" "math" @@ -8,6 +9,7 @@ import ( "sort" "strings" "text/tabwriter" + "time" "github.com/mongodb-labs/drivers-evergreen-tools/perfcomp" "github.com/spf13/cobra" @@ -93,7 +95,10 @@ func runCompare(cmd *cobra.Command, args []string, project string, perfContext s perfAnalyzerConnString := os.Getenv("PERF_URI_PRIVATE_ENDPOINT") version := args[len(args)-1] - res, err := perfcomp.Compare(cmd.Context(), version, perfAnalyzerConnString, project, perfContext) + ctx, cancel := context.WithTimeout(cmd.Context(), 5*time.Second) + defer cancel() + + res, err := perfcomp.Compare(ctx, version, perfAnalyzerConnString, project, perfContext) if err != nil { log.Fatalf("failed to compare: %v", err) } diff --git a/.evergreen/perfcomp/compare.go b/.evergreen/perfcomp/compare.go index 9c5ec9f0..daaecc13 100644 --- a/.evergreen/perfcomp/compare.go +++ b/.evergreen/perfcomp/compare.go @@ -131,9 +131,6 @@ func Compare(ctx context.Context, versionID string, perfAnalyzerConnString strin } }() - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - err = client.Ping(ctx, nil) if err != nil { return nil, fmt.Errorf("error pinging MongoDB Analytics: %v", err) From 13305638096c85e8680b5b9f9477bcc9825b89a6 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Fri, 8 Aug 2025 22:45:03 -0400 Subject: [PATCH 11/25] Rename connection string --- .evergreen/perfcomp/cmd/perfcomp/compare.go | 4 ++-- .evergreen/perfcomp/compare.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.evergreen/perfcomp/cmd/perfcomp/compare.go b/.evergreen/perfcomp/cmd/perfcomp/compare.go index 28d95bca..2b49e6bd 100644 --- a/.evergreen/perfcomp/cmd/perfcomp/compare.go +++ b/.evergreen/perfcomp/cmd/perfcomp/compare.go @@ -92,13 +92,13 @@ func createComment(result perfcomp.CompareResult) string { } func runCompare(cmd *cobra.Command, args []string, project string, perfContext string) error { - perfAnalyzerConnString := os.Getenv("PERF_URI_PRIVATE_ENDPOINT") + perfAnalyticsConnString := os.Getenv("PERF_URI_PRIVATE_ENDPOINT") version := args[len(args)-1] ctx, cancel := context.WithTimeout(cmd.Context(), 5*time.Second) defer cancel() - res, err := perfcomp.Compare(ctx, version, perfAnalyzerConnString, project, perfContext) + res, err := perfcomp.Compare(ctx, version, perfAnalyticsConnString, project, perfContext) if err != nil { log.Fatalf("failed to compare: %v", err) } diff --git a/.evergreen/perfcomp/compare.go b/.evergreen/perfcomp/compare.go index daaecc13..ad5bda14 100644 --- a/.evergreen/perfcomp/compare.go +++ b/.evergreen/perfcomp/compare.go @@ -116,10 +116,10 @@ const stableRegionsColl = "stable_regions" // Compare will return statistical results for a patch version using the // stable region defined by the performance analytics cluster. -func Compare(ctx context.Context, versionID string, perfAnalyzerConnString string, project string, perfContext string) (*CompareResult, error) { +func Compare(ctx context.Context, versionID string, perfAnalyticsConnString string, project string, perfContext string) (*CompareResult, error) { // Connect to analytics node - client, err := mongo.Connect(options.Client().ApplyURI(perfAnalyzerConnString)) + client, err := mongo.Connect(options.Client().ApplyURI(perfAnalyticsConnString)) if err != nil { return nil, fmt.Errorf("error connecting client: %v", err) } From abaa20c38eea32a5e7514f6d73d10226faad6e82 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Mon, 11 Aug 2025 13:57:39 -0400 Subject: [PATCH 12/25] Fix create comment for no stat sig benchmarks --- .evergreen/perfcomp/cmd/perfcomp/compare.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.evergreen/perfcomp/cmd/perfcomp/compare.go b/.evergreen/perfcomp/cmd/perfcomp/compare.go index 2b49e6bd..6340f741 100644 --- a/.evergreen/perfcomp/cmd/perfcomp/compare.go +++ b/.evergreen/perfcomp/cmd/perfcomp/compare.go @@ -67,24 +67,27 @@ func newCompareCommand() *cobra.Command { func createComment(result perfcomp.CompareResult) string { var comment strings.Builder - fmt.Fprintf(&comment, "The following benchmark tests for version %s had statistically significant changes (i.e., |z-score| > 1.96):\n\n", result.Version) - - w := tabwriter.NewWriter(&comment, 0, 0, 1, ' ', 0) - fmt.Fprintln(w, "| Benchmark\t| Measurement\t| % Change\t| Patch Value\t| Stable Region\t| H-Score\t| Z-Score\t| ") - fmt.Fprintln(w, "| ---------\t| -----------\t| --------\t| -----------\t| -------------\t| -------\t| -------\t|") if len(result.SigEnergyStats) == 0 { comment.Reset() fmt.Fprintf(&comment, "There were no significant changes to the performance to report for version %s.\n", result.Version) } else { + fmt.Fprintf(&comment, "The following benchmark tests for version %s had statistically significant changes (i.e., |z-score| > 1.96):\n\n", result.Version) + + w := tabwriter.NewWriter(&comment, 0, 0, 1, ' ', 0) + + fmt.Fprintln(w, "| Benchmark\t| Measurement\t| % Change\t| Patch Value\t| Stable Region\t| H-Score\t| Z-Score\t| ") + fmt.Fprintln(w, "| ---------\t| -----------\t| --------\t| -----------\t| -------------\t| -------\t| -------\t|") + sort.Slice(result.SigEnergyStats, func(i, j int) bool { return math.Abs(result.SigEnergyStats[i].PercentChange) > math.Abs(result.SigEnergyStats[j].PercentChange) }) for _, es := range result.SigEnergyStats { fmt.Fprintf(w, "| %s\t| %s\t| %.4f\t| %.4f\t| Avg: %.4f, Med: %.4f, Stdev: %.4f\t| %.4f\t| %.4f\t|\n", es.Benchmark, es.Measurement, es.PercentChange, es.MeasurementVal, es.StableRegion.Mean, es.StableRegion.Median, es.StableRegion.Std, es.HScore, es.ZScore) } + + w.Flush() } - w.Flush() comment.WriteString("\n*For a comprehensive view of all microbenchmark results for this PR's commit, please check out the Evergreen perf task for this patch.*") return comment.String() From b6bff324692bde7f8e5b1a0fcb9dad20d46ac81e Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Tue, 12 Aug 2025 10:47:02 -0400 Subject: [PATCH 13/25] Update README to remind rerun build.sh --- .evergreen/perfcomp/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/perfcomp/README.md b/.evergreen/perfcomp/README.md index 922d3276..717bb7a5 100644 --- a/.evergreen/perfcomp/README.md +++ b/.evergreen/perfcomp/README.md @@ -20,7 +20,7 @@ bash build.sh To use `perfcomp`, you should have an analytics node URI env variable called `PERF_URI_PRIVATE_ENDPOINT`. You can request for it from the devprod performance team. -To run in your project repository, you need to create a [performance context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/contexts) that captures all benchmarks in your project. Feel free to refer to the [Go Driver context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/context/name/GoDriver%20perf%20task) as a template. Then, add the context to the `projectToPerfContext` map in `./cmd/perfcomp/compare.go`. +To run in your project repository, you need to create a [performance context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/contexts) that captures all benchmarks in your project. Feel free to refer to the [Go Driver context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/context/name/GoDriver%20perf%20task) as a template. Then, add the context to the `projectToPerfContext` map in `./cmd/perfcomp/compare.go`. You will need to re-run `build.sh` after updating the map. ```bash perfcomp is a cli that reports stat-sig results between evergreen patches with the mainline commit From 0569078675dad907ff89b673f678376fc7fb151c Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Tue, 12 Aug 2025 10:48:09 -0400 Subject: [PATCH 14/25] Shell script builds and runs perfcomp --- .evergreen/perfcomp/README.md | 10 ++++++++++ .evergreen/run-perf-comp.sh | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/.evergreen/perfcomp/README.md b/.evergreen/perfcomp/README.md index 717bb7a5..9494b851 100644 --- a/.evergreen/perfcomp/README.md +++ b/.evergreen/perfcomp/README.md @@ -53,3 +53,13 @@ generates markdown output after compare run (must be run after `compare`) Usage: perfcomp mdreport ``` + +### Run via shell script + +Alternatively, you can run the perfcomp shell script. This script will run build and then run `compare`. From the root directory, + +```bash +PERF_URI_PRIVATE_ENDPOINT="" VERSION_ID="" .evergreen/run-perf-comp.sh +``` + +If you would like to see a markdown preview of the report, you can also pass in `HEAD_SHA=""`. This will generate `.evergreen/perfcomp/perf-report.md`. diff --git a/.evergreen/run-perf-comp.sh b/.evergreen/run-perf-comp.sh index 5f20640d..69044c31 100755 --- a/.evergreen/run-perf-comp.sh +++ b/.evergreen/run-perf-comp.sh @@ -60,3 +60,10 @@ fi : "${PERF_URI_PRIVATE_ENDPOINT:?Error: PERF_URI_PRIVATE_ENDPOINT must be set}" : "${VERSION_ID:?Error: VERSION_ID must be set}" + +./bin/perfcomp compare ${VERSION_ID} + +if [[ -n "${HEAD_SHA+set}" ]]; then + ./bin/perfcomp mdreport + rm perf-report.txt +fi From 4f0d90354dd740850be86437bfd4592ae6a11605 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Tue, 12 Aug 2025 15:59:18 -0400 Subject: [PATCH 15/25] Project flag must be mandatory --- .evergreen/perfcomp/README.md | 6 +++--- .evergreen/perfcomp/cmd/perfcomp/compare.go | 3 ++- .evergreen/run-perf-comp.sh | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.evergreen/perfcomp/README.md b/.evergreen/perfcomp/README.md index 9494b851..3bcec0b4 100644 --- a/.evergreen/perfcomp/README.md +++ b/.evergreen/perfcomp/README.md @@ -7,7 +7,7 @@ To install the latest version: ```bash -go install github.com/mongodb-labs/drivers-evergreen-tools/perfcomp +go install github.com/mongodb-labs/drivers-evergreen-tools/perfcomp/cmd/perfcomp@latest ``` Or build it locally in `bin/perfcomp`: @@ -43,7 +43,7 @@ Usage: perfcomp compare [version_id] [flags] Flags: - --project string specify the name of an existing Evergreen project (default "mongo-go-driver") + --project string specify the name of an existing Evergreen project (required) ``` #### mdreport @@ -59,7 +59,7 @@ Usage: Alternatively, you can run the perfcomp shell script. This script will run build and then run `compare`. From the root directory, ```bash -PERF_URI_PRIVATE_ENDPOINT="" VERSION_ID="" .evergreen/run-perf-comp.sh +PERF_URI_PRIVATE_ENDPOINT="" VERSION_ID="" PROJECT="" .evergreen/run-perf-comp.sh ``` If you would like to see a markdown preview of the report, you can also pass in `HEAD_SHA=""`. This will generate `.evergreen/perfcomp/perf-report.md`. diff --git a/.evergreen/perfcomp/cmd/perfcomp/compare.go b/.evergreen/perfcomp/cmd/perfcomp/compare.go index 6340f741..4a26dc2d 100644 --- a/.evergreen/perfcomp/cmd/perfcomp/compare.go +++ b/.evergreen/perfcomp/cmd/perfcomp/compare.go @@ -33,7 +33,8 @@ func newCompareCommand() *cobra.Command { }, } - cmd.Flags().String("project", "mongo-go-driver", "specify the name of an existing Evergreen project") + cmd.Flags().String("project", "", "specify the name of an existing Evergreen project") + cmd.MarkFlagRequired("project") cmd.Run = func(cmd *cobra.Command, args []string) { // Check for variables diff --git a/.evergreen/run-perf-comp.sh b/.evergreen/run-perf-comp.sh index 69044c31..3703d046 100755 --- a/.evergreen/run-perf-comp.sh +++ b/.evergreen/run-perf-comp.sh @@ -60,8 +60,9 @@ fi : "${PERF_URI_PRIVATE_ENDPOINT:?Error: PERF_URI_PRIVATE_ENDPOINT must be set}" : "${VERSION_ID:?Error: VERSION_ID must be set}" +: "${PROJECT:?Error: PROJECT must be set}" -./bin/perfcomp compare ${VERSION_ID} +./bin/perfcomp compare --project ${PROJECT} ${VERSION_ID} if [[ -n "${HEAD_SHA+set}" ]]; then ./bin/perfcomp mdreport From 82f1dc96710a8fbab8ac97d89f7a8d75ba6dd761 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Tue, 12 Aug 2025 16:00:47 -0400 Subject: [PATCH 16/25] Propogate errors and other fixes --- .evergreen/perfcomp/cmd/perfcomp/mdreport.go | 8 +++---- .evergreen/perfcomp/compare.go | 22 +++++++++----------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/.evergreen/perfcomp/cmd/perfcomp/mdreport.go b/.evergreen/perfcomp/cmd/perfcomp/mdreport.go index 5388e16a..9179a04a 100644 --- a/.evergreen/perfcomp/cmd/perfcomp/mdreport.go +++ b/.evergreen/perfcomp/cmd/perfcomp/mdreport.go @@ -38,14 +38,14 @@ func runMdCommand(cmd *cobra.Command, args []string) error { // open file to read fRead, err := os.Open(perfReportFileTxt) if err != nil { - log.Fatalf("Could not open %s: %v", perfReportFileTxt, err) + return fmt.Errorf("failed to open perf report file %s: %v", perfReportFileTxt, err) } defer fRead.Close() // open file to write fWrite, err := os.Create(perfReportFileMd) if err != nil { - log.Fatalf("Could not create %s: %v", perfReportFileMd, err) + return fmt.Errorf("failed to create perf report file %s: %v", perfReportFileMd, err) } defer fWrite.Close() @@ -126,10 +126,10 @@ func generateEvgLink(version string, variant string) (string, error) { return u.String(), nil } -func printUrlToLine(fWrite *os.File, line string, link string, targetWord string, step int) { +func printUrlToLine(fWrite *os.File, line string, link string, targetWord string, shift int) { words := strings.Split(line, " ") for i, w := range words { - if i > 0 && words[i+step] == targetWord { + if i > 0 && i+shift < len(words) && words[i+shift] == targetWord { fmt.Fprintf(fWrite, "[%s](%s)", w, link) } else { fmt.Fprint(fWrite, w) diff --git a/.evergreen/perfcomp/compare.go b/.evergreen/perfcomp/compare.go index ad5bda14..bf8bd7f6 100644 --- a/.evergreen/perfcomp/compare.go +++ b/.evergreen/perfcomp/compare.go @@ -148,7 +148,7 @@ func Compare(ctx context.Context, versionID string, perfAnalyticsConnString stri return nil, fmt.Errorf("error getting raw data: %v", err) } - allEnergyStats, err := getEnergyStatsForAllBenchMarks(findCtx, patchRawData, db.Collection(stableRegionsColl), perfContext) + allEnergyStats, err := getEnergyStatsForAllBenchmarks(findCtx, patchRawData, db.Collection(stableRegionsColl), perfContext) if err != nil { return nil, fmt.Errorf("error getting energy statistics: %v", err) } @@ -174,7 +174,7 @@ func findRawData(ctx context.Context, project string, version string, coll *mong cursor, err := coll.Find(ctx, filter) if err != nil { - log.Fatalf( + return nil, fmt.Errorf( "error retrieving raw data for version %q: %v", version, err, @@ -187,18 +187,16 @@ func findRawData(ctx context.Context, project string, version string, coll *mong } }() - log.Printf("Successfully retrieved %d docs from version %s.\n", cursor.RemainingBatchLength(), version) - var rawData []RawData err = cursor.All(ctx, &rawData) if err != nil { - log.Fatalf( + return nil, fmt.Errorf( "error decoding raw data from version %q: %v", version, err, ) } - + log.Printf("Successfully retrieved %d docs from version %s.\n", len(rawData), version) return rawData, err } @@ -216,12 +214,12 @@ func findLastStableRegion(ctx context.Context, project string, testname string, findOptions := options.FindOne().SetSort(bson.D{{"end", -1}}) - var sr *StableRegion + var sr StableRegion err := coll.FindOne(ctx, filter, findOptions).Decode(&sr) if err != nil { return nil, err } - return sr, nil + return &sr, nil } // Calculate the energy statistics for all measurements in a benchmark. @@ -236,7 +234,7 @@ func getEnergyStatsForOneBenchmark(ctx context.Context, rd RawData, coll *mongo. stableRegion, err := findLastStableRegion(ctx, project, testname, measName, coll, perfContext) if err != nil { - log.Fatalf( + return nil, fmt.Errorf( "error finding last stable region for test %q, measurement %q: %v", testname, measName, @@ -251,7 +249,7 @@ func getEnergyStatsForOneBenchmark(ctx context.Context, rd RawData, coll *mongo. estat, tstat, hscore, err := calcEnergyStatistics(stableRegionVec, measValVec) if err != nil { - log.Fatalf( + return nil, fmt.Errorf( "could not calculate energy stats for test %q, measurement %q: %v", testname, measName, @@ -281,12 +279,12 @@ func getEnergyStatsForOneBenchmark(ctx context.Context, rd RawData, coll *mongo. return energyStats, nil } -func getEnergyStatsForAllBenchMarks(ctx context.Context, patchRawData []RawData, coll *mongo.Collection, perfContext string) ([]*EnergyStats, error) { +func getEnergyStatsForAllBenchmarks(ctx context.Context, patchRawData []RawData, coll *mongo.Collection, perfContext string) ([]*EnergyStats, error) { var allEnergyStats []*EnergyStats for _, rd := range patchRawData { energyStats, err := getEnergyStatsForOneBenchmark(ctx, rd, coll, perfContext) if err != nil { - log.Fatalf( + return nil, fmt.Errorf( "could not get energy stats for %q: %v", rd.Info.TestName, err, From 141a56848dbe5d682e286fec5a45539398f4464c Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Tue, 12 Aug 2025 16:01:06 -0400 Subject: [PATCH 17/25] Cleanup go version --- .evergreen/perfcomp/go.mod | 2 +- .evergreen/run-perf-comp.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.evergreen/perfcomp/go.mod b/.evergreen/perfcomp/go.mod index c55573d6..ae56447c 100644 --- a/.evergreen/perfcomp/go.mod +++ b/.evergreen/perfcomp/go.mod @@ -1,6 +1,6 @@ module github.com/mongodb-labs/drivers-evergreen-tools/perfcomp -go 1.24.4 +go 1.24 require github.com/spf13/cobra v1.9.1 diff --git a/.evergreen/run-perf-comp.sh b/.evergreen/run-perf-comp.sh index 3703d046..b5acf2e7 100755 --- a/.evergreen/run-perf-comp.sh +++ b/.evergreen/run-perf-comp.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -eux pipefail -GOVERSION="${GOVERESION:-1.24}" +GOVERSION="${GOVERSION:-1.24}" GOPATH="${GOPATH:-$HOME/go}" # Detect OS @@ -48,7 +48,7 @@ PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" # Enter the perfcomp sub‐directory cd "$PROJECT_ROOT/.evergreen/perfcomp" -# Build the mongproxy binary. +# Build the perfcomp binary. bash build.sh if [[ ! -x "./bin/perfcomp" ]]; then From 73ca146944c1eb8b8259cbe6efd822dbc0815a32 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Tue, 12 Aug 2025 16:14:23 -0400 Subject: [PATCH 18/25] Cleanup --- .evergreen/perfcomp/compare.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.evergreen/perfcomp/compare.go b/.evergreen/perfcomp/compare.go index bf8bd7f6..6bfa5caa 100644 --- a/.evergreen/perfcomp/compare.go +++ b/.evergreen/perfcomp/compare.go @@ -61,7 +61,7 @@ type StableRegion struct { TimeSeriesInfo TimeSeriesInfo Start any `bson:"start"` End any `bson:"end"` - Values []float64 `bson:"values"` // All microbenchmark values that makes up the stable region + Values []float64 `bson:"values"` // All microbenchmark values that make up the stable region StartOrder int64 `bson:"start_order"` EndOrder int64 `bson:"end_order"` Mean float64 `bson:"mean"` @@ -209,7 +209,7 @@ func findLastStableRegion(ctx context.Context, project string, testname string, {"time_series_info.test", testname}, {"time_series_info.measurement", measurement}, {"last", true}, - {"contexts", bson.D{{"$in", bson.A{perfContext}}}}, // TODO (GODRIVER-3102): Refactor perf context for project switching. + {"contexts", bson.D{{"$in", bson.A{perfContext}}}}, } findOptions := options.FindOne().SetSort(bson.D{{"end", -1}}) From 876925c2b682ec78dee1cf345523c5d732e3c135 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Thu, 14 Aug 2025 12:06:40 -0400 Subject: [PATCH 19/25] Update triage context instructions --- .evergreen/perfcomp/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.evergreen/perfcomp/README.md b/.evergreen/perfcomp/README.md index 3bcec0b4..2d321fdf 100644 --- a/.evergreen/perfcomp/README.md +++ b/.evergreen/perfcomp/README.md @@ -20,7 +20,9 @@ bash build.sh To use `perfcomp`, you should have an analytics node URI env variable called `PERF_URI_PRIVATE_ENDPOINT`. You can request for it from the devprod performance team. -To run in your project repository, you need to create a [performance context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/contexts) that captures all benchmarks in your project. Feel free to refer to the [Go Driver context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/context/name/GoDriver%20perf%20task) as a template. Then, add the context to the `projectToPerfContext` map in `./cmd/perfcomp/compare.go`. You will need to re-run `build.sh` after updating the map. +To run in your project repository, you need to create a [performance context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/contexts) that captures all benchmarks in your project. This needs to be a triage context. Feel free to refer to the [Go Driver context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/context/name/GoDriver%20perf%20task) as a template. Then, add the context to the `projectToPerfContext` map in `./cmd/perfcomp/compare.go`. You will need to re-run `build.sh` after updating the map. + +>*If you are creating a triage context for the first time, it may take a few hours for your project's data to be tagged.* ```bash perfcomp is a cli that reports stat-sig results between evergreen patches with the mainline commit From 837ef5e189671093749c0d9cffa4d230e9acf218 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Fri, 15 Aug 2025 02:56:11 -0400 Subject: [PATCH 20/25] Add compare options for context,project,task,variant,version --- .evergreen/perfcomp/cmd/perfcomp/compare.go | 55 ++++++--- .evergreen/perfcomp/compare.go | 117 ++++++++++++++++---- 2 files changed, 132 insertions(+), 40 deletions(-) diff --git a/.evergreen/perfcomp/cmd/perfcomp/compare.go b/.evergreen/perfcomp/cmd/perfcomp/compare.go index 4a26dc2d..6fa13f99 100644 --- a/.evergreen/perfcomp/cmd/perfcomp/compare.go +++ b/.evergreen/perfcomp/cmd/perfcomp/compare.go @@ -15,11 +15,6 @@ import ( "github.com/spf13/cobra" ) -// For support for other projects, a performance context needs to be created and added here. -var projectToPerfContext = map[string]string{ - "mongo-go-driver": "GoDriver perf task", -} - func newCompareCommand() *cobra.Command { cmd := &cobra.Command{ Use: "compare", @@ -33,8 +28,14 @@ func newCompareCommand() *cobra.Command { }, } - cmd.Flags().String("project", "", "specify the name of an existing Evergreen project") - cmd.MarkFlagRequired("project") + cmd.Flags().String("project", "", "specify the name of an existing Evergreen project, ex. \"mongo-go-driver\"") + cmd.Flags().String("task", "", "specify the evergreen perf task name, ex. \"perf\"") + cmd.Flags().String("variant", "", "specify the perf task variant, ex. \"perf\"") + cmd.Flags().String("context", "", "specify the performance triage context, ex. \"GoDriver perf task\"") + + for _, flag := range []string{"project", "task", "variant", "context"} { + cmd.MarkFlagRequired(flag) + } cmd.Run = func(cmd *cobra.Command, args []string) { // Check for variables @@ -43,22 +44,33 @@ func newCompareCommand() *cobra.Command { log.Fatal("PERF_URI_PRIVATE_ENDPOINT env variable is not set") } - // Retrieve the project flag value + // Retrieve and validate flag values project, err := cmd.Flags().GetString("project") if err != nil { log.Fatalf("failed to get project flag: %v", err) } - - // Validate the project flag and perf context - if project == "" { - log.Fatal("must provide project") + task, err := cmd.Flags().GetString("task") + if err != nil { + log.Fatalf("failed to get task flag: %v", err) } - perfContext, ok := projectToPerfContext[project] - if !ok { - log.Fatalf("support for project %q is not configured yet", project) + variant, err := cmd.Flags().GetString("variant") + if err != nil { + log.Fatalf("failed to get variant flag: %v", err) + } + context, err := cmd.Flags().GetString("context") + if err != nil { + log.Fatalf("failed to get context flag: %v", err) + } + + // Validate all flags + for _, flag := range []string{project, task, variant, context} { + if flag == "" { + log.Fatalf("must provide %s", flag) + } } - if err := runCompare(cmd, args, project, perfContext); err != nil { + // Run compare function + if err := runCompare(cmd, args, project, task, variant, context); err != nil { log.Fatalf("failed to compare: %v", err) } } @@ -95,17 +107,24 @@ func createComment(result perfcomp.CompareResult) string { } -func runCompare(cmd *cobra.Command, args []string, project string, perfContext string) error { +func runCompare(cmd *cobra.Command, args []string, project string, taskName string, variant string, perfContext string) error { perfAnalyticsConnString := os.Getenv("PERF_URI_PRIVATE_ENDPOINT") version := args[len(args)-1] ctx, cancel := context.WithTimeout(cmd.Context(), 5*time.Second) defer cancel() - res, err := perfcomp.Compare(ctx, version, perfAnalyticsConnString, project, perfContext) + res, err := perfcomp.Compare(ctx, perfAnalyticsConnString, + perfcomp.WithVersion(version), + perfcomp.WithProject(project), + perfcomp.WithTask(taskName), + perfcomp.WithVariant(variant), + perfcomp.WithContext(perfContext), + ) if err != nil { log.Fatalf("failed to compare: %v", err) } + res.CommitSHA = os.Getenv("HEAD_SHA") res.MainlineCommit = os.Getenv("BASE_SHA") diff --git a/.evergreen/perfcomp/compare.go b/.evergreen/perfcomp/compare.go index 6bfa5caa..13ad5afa 100644 --- a/.evergreen/perfcomp/compare.go +++ b/.evergreen/perfcomp/compare.go @@ -114,9 +114,82 @@ const expandedMetricsDB = "expanded_metrics" const rawResultsColl = "raw_results" const stableRegionsColl = "stable_regions" +// CompareOptions stores the information for each project to use as filters. +type CompareOptions struct { + Project string // Required + Context string // Required + Task string // Required + Variant string // Required + Version string // Required +} + +type CompareOption func(*CompareOptions) + +// WithProject sets the evergreen project on the CompareOptions, for example "mongo-go-driver". +func WithProject(project string) CompareOption { + return func(opts *CompareOptions) { + opts.Project = project + } +} + +// WithContext sets the performance triage context on the CompareOptions, for example "GoDriver perf task". +func WithContext(context string) CompareOption { + return func(opts *CompareOptions) { + opts.Context = context + } +} + +// WithTask sets the evergreen performance task on the CompareOptions, for example "perf". +func WithTask(task string) CompareOption { + return func(opts *CompareOptions) { + opts.Task = task + } +} + +// WithTask sets the performance task variant on the CompareOptions, for example "perf". +func WithVariant(variant string) CompareOption { + return func(opts *CompareOptions) { + opts.Variant = variant + } +} + +// WithVersion sets the evergreen version on the CompareOptions, for example "688a39d27d916e0007cf8723". +func WithVersion(version string) CompareOption { + return func(opts *CompareOptions) { + opts.Version = version + } +} + +func validateOptions(copts CompareOptions) error { + if copts.Project == "" { + return fmt.Errorf("project is required") + } + if copts.Context == "" { + return fmt.Errorf("context is required") + } + if copts.Task == "" { + return fmt.Errorf("task is required") + } + if copts.Variant == "" { + return fmt.Errorf("variant is required") + } + if copts.Version == "" { + return fmt.Errorf("version is required") + } + return nil +} + // Compare will return statistical results for a patch version using the // stable region defined by the performance analytics cluster. -func Compare(ctx context.Context, versionID string, perfAnalyticsConnString string, project string, perfContext string) (*CompareResult, error) { +func Compare(ctx context.Context, perfAnalyticsConnString string, opts ...CompareOption) (*CompareResult, error) { + copts := &CompareOptions{} + for _, fn := range opts { + fn(copts) + } + + if err := validateOptions(*copts); err != nil { + return nil, fmt.Errorf("invalid configuration: %w", err) + } // Connect to analytics node client, err := mongo.Connect(options.Client().ApplyURI(perfAnalyticsConnString)) @@ -143,12 +216,12 @@ func Compare(ctx context.Context, versionID string, perfAnalyticsConnString stri findCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() - patchRawData, err := findRawData(findCtx, project, versionID, db.Collection(rawResultsColl)) + patchRawData, err := findRawData(findCtx, db.Collection(rawResultsColl), copts) if err != nil { return nil, fmt.Errorf("error getting raw data: %v", err) } - allEnergyStats, err := getEnergyStatsForAllBenchmarks(findCtx, patchRawData, db.Collection(stableRegionsColl), perfContext) + allEnergyStats, err := getEnergyStatsForAllBenchmarks(findCtx, patchRawData, db.Collection(stableRegionsColl), copts) if err != nil { return nil, fmt.Errorf("error getting energy statistics: %v", err) } @@ -156,7 +229,7 @@ func Compare(ctx context.Context, versionID string, perfAnalyticsConnString stri // Get statistically significant benchmarks statSigBenchmarks := getStatSigBenchmarks(allEnergyStats) compareResult := CompareResult{ - Version: versionID, + Version: copts.Version, SigEnergyStats: statSigBenchmarks, } @@ -164,26 +237,26 @@ func Compare(ctx context.Context, versionID string, perfAnalyticsConnString stri } // Gets all raw benchmark data for a specific Evergreen version. -func findRawData(ctx context.Context, project string, version string, coll *mongo.Collection) ([]RawData, error) { +func findRawData(ctx context.Context, coll *mongo.Collection, copts *CompareOptions) ([]RawData, error) { filter := bson.D{ - {"info.project", project}, - {"info.version", version}, - {"info.variant", "perf"}, - {"info.task_name", "perf"}, + {"info.project", copts.Project}, + {"info.version", copts.Version}, + {"info.variant", copts.Variant}, + {"info.task_name", copts.Task}, } cursor, err := coll.Find(ctx, filter) if err != nil { return nil, fmt.Errorf( "error retrieving raw data for version %q: %v", - version, + copts.Version, err, ) } defer func() { err = cursor.Close(ctx) if err != nil { - log.Fatalf("error closing cursor while retrieving raw data for version %q: %v", version, err) + log.Fatalf("error closing cursor while retrieving raw data for version %q: %v", copts.Version, err) } }() @@ -192,24 +265,24 @@ func findRawData(ctx context.Context, project string, version string, coll *mong if err != nil { return nil, fmt.Errorf( "error decoding raw data from version %q: %v", - version, + copts.Version, err, ) } - log.Printf("Successfully retrieved %d docs from version %s.\n", len(rawData), version) + log.Printf("Successfully retrieved %d docs from version %s.\n", len(rawData), copts.Version) return rawData, err } // Finds the most recent stable region of the mainline version for a specific microbenchmark. -func findLastStableRegion(ctx context.Context, project string, testname string, measurement string, coll *mongo.Collection, perfContext string) (*StableRegion, error) { +func findLastStableRegion(ctx context.Context, testname string, measurement string, coll *mongo.Collection, copts *CompareOptions) (*StableRegion, error) { filter := bson.D{ - {"time_series_info.project", project}, - {"time_series_info.variant", "perf"}, - {"time_series_info.task", "perf"}, + {"time_series_info.project", copts.Project}, + {"time_series_info.variant", copts.Variant}, + {"time_series_info.task", copts.Task}, {"time_series_info.test", testname}, {"time_series_info.measurement", measurement}, {"last", true}, - {"contexts", bson.D{{"$in", bson.A{perfContext}}}}, + {"contexts", bson.D{{"$in", bson.A{copts.Context}}}}, } findOptions := options.FindOne().SetSort(bson.D{{"end", -1}}) @@ -223,7 +296,7 @@ func findLastStableRegion(ctx context.Context, project string, testname string, } // Calculate the energy statistics for all measurements in a benchmark. -func getEnergyStatsForOneBenchmark(ctx context.Context, rd RawData, coll *mongo.Collection, perfContext string) ([]*EnergyStats, error) { +func getEnergyStatsForOneBenchmark(ctx context.Context, rd RawData, coll *mongo.Collection, copts *CompareOptions) ([]*EnergyStats, error) { testname := rd.Info.TestName var energyStats []*EnergyStats @@ -232,7 +305,7 @@ func getEnergyStatsForOneBenchmark(ctx context.Context, rd RawData, coll *mongo. measName := rd.Rollups.Stats[i].Name measVal := rd.Rollups.Stats[i].Val - stableRegion, err := findLastStableRegion(ctx, project, testname, measName, coll, perfContext) + stableRegion, err := findLastStableRegion(ctx, testname, measName, coll, copts) if err != nil { return nil, fmt.Errorf( "error finding last stable region for test %q, measurement %q: %v", @@ -279,10 +352,10 @@ func getEnergyStatsForOneBenchmark(ctx context.Context, rd RawData, coll *mongo. return energyStats, nil } -func getEnergyStatsForAllBenchmarks(ctx context.Context, patchRawData []RawData, coll *mongo.Collection, perfContext string) ([]*EnergyStats, error) { +func getEnergyStatsForAllBenchmarks(ctx context.Context, patchRawData []RawData, coll *mongo.Collection, copts *CompareOptions) ([]*EnergyStats, error) { var allEnergyStats []*EnergyStats for _, rd := range patchRawData { - energyStats, err := getEnergyStatsForOneBenchmark(ctx, rd, coll, perfContext) + energyStats, err := getEnergyStatsForOneBenchmark(ctx, rd, coll, copts) if err != nil { return nil, fmt.Errorf( "could not get energy stats for %q: %v", From bea95829fa2bfd3046b8c7353565e383b591c26a Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Fri, 15 Aug 2025 02:56:40 -0400 Subject: [PATCH 21/25] Update shell script with required flags --- .evergreen/run-perf-comp.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.evergreen/run-perf-comp.sh b/.evergreen/run-perf-comp.sh index b5acf2e7..04f4fce3 100755 --- a/.evergreen/run-perf-comp.sh +++ b/.evergreen/run-perf-comp.sh @@ -61,8 +61,11 @@ fi : "${PERF_URI_PRIVATE_ENDPOINT:?Error: PERF_URI_PRIVATE_ENDPOINT must be set}" : "${VERSION_ID:?Error: VERSION_ID must be set}" : "${PROJECT:?Error: PROJECT must be set}" +: "${CONTEXT:?Error: CONTEXT must be set}" +: "${TASK:?Error: TASKNAME must be set}" +: "${VARIANT:?Error: VARIANT must be set}" -./bin/perfcomp compare --project ${PROJECT} ${VERSION_ID} +./bin/perfcomp compare --project ${PROJECT} --context "${CONTEXT}" --task ${TASK} --variant ${VARIANT} ${VERSION_ID} if [[ -n "${HEAD_SHA+set}" ]]; then ./bin/perfcomp mdreport From 1dac81766e10f537f1b2ad5e01081a9aaed533c3 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Fri, 15 Aug 2025 02:56:57 -0400 Subject: [PATCH 22/25] Update README with flag behaviour --- .evergreen/perfcomp/README.md | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/.evergreen/perfcomp/README.md b/.evergreen/perfcomp/README.md index 2d321fdf..148ed8f6 100644 --- a/.evergreen/perfcomp/README.md +++ b/.evergreen/perfcomp/README.md @@ -7,7 +7,7 @@ To install the latest version: ```bash -go install github.com/mongodb-labs/drivers-evergreen-tools/perfcomp/cmd/perfcomp@latest +go install github.com/mongodb-labs/drivers-evergreen-tools/perfcomp/cmd/perfcomp@latest ``` Or build it locally in `bin/perfcomp`: @@ -18,11 +18,26 @@ bash build.sh ## 🔧 Usage +### Parameters + To use `perfcomp`, you should have an analytics node URI env variable called `PERF_URI_PRIVATE_ENDPOINT`. You can request for it from the devprod performance team. -To run in your project repository, you need to create a [performance context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/contexts) that captures all benchmarks in your project. This needs to be a triage context. Feel free to refer to the [Go Driver context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/context/name/GoDriver%20perf%20task) as a template. Then, add the context to the `projectToPerfContext` map in `./cmd/perfcomp/compare.go`. You will need to re-run `build.sh` after updating the map. +To run in your project repository, you need to create a [performance context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/contexts) that captures all benchmarks in your project. This needs to be a triage context. Feel free to refer to the [Go Driver context](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/context/name/GoDriver%20perf%20task) as a template. + +> _If you are creating a triage context for the first time, it may take a few hours for your project's data to be tagged._ + +You also need the name of the performance task and variant specific to your project. You can do a query in the analytics node `raw_results` collection: ->*If you are creating a triage context for the first time, it may take a few hours for your project's data to be tagged.* +``` +db.raw_results.find({ + “info.project”: “”, + “info.version”: “" +}) +``` + +and look for the `variant` and `task_name` properties. + +### perfcomp CLI ```bash perfcomp is a cli that reports stat-sig results between evergreen patches with the mainline commit @@ -38,6 +53,7 @@ Available Commands: ### Commands #### compare + ```bash compare evergreen patch to mainline commit @@ -45,10 +61,14 @@ Usage: perfcomp compare [version_id] [flags] Flags: - --project string specify the name of an existing Evergreen project (required) + --context string specify the performance triage context, ex. "GoDriver perf task" (required) + --project string specify the name of an existing Evergreen project, ex. "mongo-go-driver" (required) + --task string specify the evergreen perf task name, ex. "perf" (required) + --variant string specify the perf task variant, ex. "perf" (required) ``` #### mdreport + ```bash generates markdown output after compare run (must be run after `compare`) @@ -61,7 +81,7 @@ Usage: Alternatively, you can run the perfcomp shell script. This script will run build and then run `compare`. From the root directory, ```bash -PERF_URI_PRIVATE_ENDPOINT="" VERSION_ID="" PROJECT="" .evergreen/run-perf-comp.sh +PERF_URI_PRIVATE_ENDPOINT="" VERSION_ID="" PROJECT="" CONTEXT="" TASK="" VARIANT="" .evergreen/run-perf-comp.sh ``` If you would like to see a markdown preview of the report, you can also pass in `HEAD_SHA=""`. This will generate `.evergreen/perfcomp/perf-report.md`. From 48ed412c9d38724432040253b6962fc394f83a7a Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Fri, 15 Aug 2025 21:51:29 -0400 Subject: [PATCH 23/25] Update flags --- .evergreen/perfcomp/README.md | 8 +-- .evergreen/perfcomp/cmd/perfcomp/compare.go | 61 ++++++++++----------- .evergreen/perfcomp/compare.go | 26 ++++----- .evergreen/run-perf-comp.sh | 2 +- 4 files changed, 46 insertions(+), 51 deletions(-) diff --git a/.evergreen/perfcomp/README.md b/.evergreen/perfcomp/README.md index 148ed8f6..81e3b3af 100644 --- a/.evergreen/perfcomp/README.md +++ b/.evergreen/perfcomp/README.md @@ -61,10 +61,10 @@ Usage: perfcomp compare [version_id] [flags] Flags: - --context string specify the performance triage context, ex. "GoDriver perf task" (required) - --project string specify the name of an existing Evergreen project, ex. "mongo-go-driver" (required) - --task string specify the evergreen perf task name, ex. "perf" (required) - --variant string specify the perf task variant, ex. "perf" (required) + --perf-context string specify the performance triage context, ex. "GoDriver perf task" (required) + --project string specify the name of an existing Evergreen project, ex. "mongo-go-driver" (required) + --task string specify the evergreen perf task name, ex. "perf" (required) + --variant string specify the perf task variant, ex. "perf" (required) ``` #### mdreport diff --git a/.evergreen/perfcomp/cmd/perfcomp/compare.go b/.evergreen/perfcomp/cmd/perfcomp/compare.go index 6fa13f99..2b58001c 100644 --- a/.evergreen/perfcomp/cmd/perfcomp/compare.go +++ b/.evergreen/perfcomp/cmd/perfcomp/compare.go @@ -28,10 +28,12 @@ func newCompareCommand() *cobra.Command { }, } - cmd.Flags().String("project", "", "specify the name of an existing Evergreen project, ex. \"mongo-go-driver\"") - cmd.Flags().String("task", "", "specify the evergreen perf task name, ex. \"perf\"") - cmd.Flags().String("variant", "", "specify the perf task variant, ex. \"perf\"") - cmd.Flags().String("context", "", "specify the performance triage context, ex. \"GoDriver perf task\"") + var project, task, variant, perfcontext string + cmd.Flags().StringVar(&project, "project", "", `specify the name of an existing Evergreen project, ex. "mongo-go-driver"`) + cmd.Flags().StringVar(&perfcontext, "perf-context", "", `specify the performance triage context, ex. "GoDriver perf task"`) + // TODO(DRIVERS-3264): Use first task / variant of the project by default for perf filtering + cmd.Flags().StringVar(&task, "task", "", `specify the evergreen performance task name, ex. "perf"`) + cmd.Flags().StringVar(&variant, "variant", "", `specify the performance variant, ex. "perf"`) for _, flag := range []string{"project", "task", "variant", "context"} { cmd.MarkFlagRequired(flag) @@ -44,33 +46,21 @@ func newCompareCommand() *cobra.Command { log.Fatal("PERF_URI_PRIVATE_ENDPOINT env variable is not set") } - // Retrieve and validate flag values - project, err := cmd.Flags().GetString("project") - if err != nil { - log.Fatalf("failed to get project flag: %v", err) - } - task, err := cmd.Flags().GetString("task") - if err != nil { - log.Fatalf("failed to get task flag: %v", err) - } - variant, err := cmd.Flags().GetString("variant") - if err != nil { - log.Fatalf("failed to get variant flag: %v", err) - } - context, err := cmd.Flags().GetString("context") - if err != nil { - log.Fatalf("failed to get context flag: %v", err) - } - // Validate all flags - for _, flag := range []string{project, task, variant, context} { + for _, flag := range []string{project, task, variant, perfcontext} { if flag == "" { log.Fatalf("must provide %s", flag) } } // Run compare function - if err := runCompare(cmd, args, project, task, variant, context); err != nil { + err := runCompare(cmd, args, + perfcomp.WithProject(project), + perfcomp.WithTask(task), + perfcomp.WithVariant(variant), + perfcomp.WithContext(perfcontext), + ) + if err != nil { log.Fatalf("failed to compare: %v", err) } } @@ -96,7 +86,17 @@ func createComment(result perfcomp.CompareResult) string { return math.Abs(result.SigEnergyStats[i].PercentChange) > math.Abs(result.SigEnergyStats[j].PercentChange) }) for _, es := range result.SigEnergyStats { - fmt.Fprintf(w, "| %s\t| %s\t| %.4f\t| %.4f\t| Avg: %.4f, Med: %.4f, Stdev: %.4f\t| %.4f\t| %.4f\t|\n", es.Benchmark, es.Measurement, es.PercentChange, es.MeasurementVal, es.StableRegion.Mean, es.StableRegion.Median, es.StableRegion.Std, es.HScore, es.ZScore) + fmt.Fprintf(w, "| %s\t| %s\t| %.4f\t| %.4f\t| Avg: %.4f, Med: %.4f, Stdev: %.4f\t| %.4f\t| %.4f\t|\n", + es.Benchmark, + es.Measurement, + es.PercentChange, + es.MeasurementVal, + es.StableRegion.Mean, + es.StableRegion.Median, + es.StableRegion.Std, + es.HScore, + es.ZScore, + ) } w.Flush() @@ -107,20 +107,15 @@ func createComment(result perfcomp.CompareResult) string { } -func runCompare(cmd *cobra.Command, args []string, project string, taskName string, variant string, perfContext string) error { +func runCompare(cmd *cobra.Command, args []string, opts ...perfcomp.CompareOption) error { perfAnalyticsConnString := os.Getenv("PERF_URI_PRIVATE_ENDPOINT") version := args[len(args)-1] + opts = append(opts, perfcomp.WithVersion(version)) ctx, cancel := context.WithTimeout(cmd.Context(), 5*time.Second) defer cancel() - res, err := perfcomp.Compare(ctx, perfAnalyticsConnString, - perfcomp.WithVersion(version), - perfcomp.WithProject(project), - perfcomp.WithTask(taskName), - perfcomp.WithVariant(variant), - perfcomp.WithContext(perfContext), - ) + res, err := perfcomp.Compare(ctx, perfAnalyticsConnString, opts...) if err != nil { log.Fatalf("failed to compare: %v", err) } diff --git a/.evergreen/perfcomp/compare.go b/.evergreen/perfcomp/compare.go index 13ad5afa..4b78702f 100644 --- a/.evergreen/perfcomp/compare.go +++ b/.evergreen/perfcomp/compare.go @@ -116,46 +116,46 @@ const stableRegionsColl = "stable_regions" // CompareOptions stores the information for each project to use as filters. type CompareOptions struct { - Project string // Required - Context string // Required - Task string // Required - Variant string // Required - Version string // Required + Project string // Required + PerfContext string // Required + Task string // Required + Variant string // Required + Version string // Required } type CompareOption func(*CompareOptions) // WithProject sets the evergreen project on the CompareOptions, for example "mongo-go-driver". func WithProject(project string) CompareOption { - return func(opts *CompareOptions) { + return func(opts *CompareOptions) { opts.Project = project } } // WithContext sets the performance triage context on the CompareOptions, for example "GoDriver perf task". func WithContext(context string) CompareOption { - return func(opts *CompareOptions) { - opts.Context = context + return func(opts *CompareOptions) { + opts.PerfContext = context } } // WithTask sets the evergreen performance task on the CompareOptions, for example "perf". func WithTask(task string) CompareOption { - return func(opts *CompareOptions) { + return func(opts *CompareOptions) { opts.Task = task } } // WithTask sets the performance task variant on the CompareOptions, for example "perf". func WithVariant(variant string) CompareOption { - return func(opts *CompareOptions) { + return func(opts *CompareOptions) { opts.Variant = variant } } // WithVersion sets the evergreen version on the CompareOptions, for example "688a39d27d916e0007cf8723". func WithVersion(version string) CompareOption { - return func(opts *CompareOptions) { + return func(opts *CompareOptions) { opts.Version = version } } @@ -164,7 +164,7 @@ func validateOptions(copts CompareOptions) error { if copts.Project == "" { return fmt.Errorf("project is required") } - if copts.Context == "" { + if copts.PerfContext == "" { return fmt.Errorf("context is required") } if copts.Task == "" { @@ -282,7 +282,7 @@ func findLastStableRegion(ctx context.Context, testname string, measurement stri {"time_series_info.test", testname}, {"time_series_info.measurement", measurement}, {"last", true}, - {"contexts", bson.D{{"$in", bson.A{copts.Context}}}}, + {"contexts", bson.D{{"$in", bson.A{copts.PerfContext}}}}, } findOptions := options.FindOne().SetSort(bson.D{{"end", -1}}) diff --git a/.evergreen/run-perf-comp.sh b/.evergreen/run-perf-comp.sh index 04f4fce3..4d54bd66 100755 --- a/.evergreen/run-perf-comp.sh +++ b/.evergreen/run-perf-comp.sh @@ -65,7 +65,7 @@ fi : "${TASK:?Error: TASKNAME must be set}" : "${VARIANT:?Error: VARIANT must be set}" -./bin/perfcomp compare --project ${PROJECT} --context "${CONTEXT}" --task ${TASK} --variant ${VARIANT} ${VERSION_ID} +./bin/perfcomp compare --project ${PROJECT} --perf-context "${CONTEXT}" --task ${TASK} --variant ${VARIANT} ${VERSION_ID} if [[ -n "${HEAD_SHA+set}" ]]; then ./bin/perfcomp mdreport From 463eb92771e05e7a5d85e63470c521f15b7364c9 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Mon, 18 Aug 2025 10:06:59 -0400 Subject: [PATCH 24/25] Fix validation --- .evergreen/perfcomp/cmd/perfcomp/compare.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/perfcomp/cmd/perfcomp/compare.go b/.evergreen/perfcomp/cmd/perfcomp/compare.go index 2b58001c..59c28878 100644 --- a/.evergreen/perfcomp/cmd/perfcomp/compare.go +++ b/.evergreen/perfcomp/cmd/perfcomp/compare.go @@ -47,7 +47,7 @@ func newCompareCommand() *cobra.Command { } // Validate all flags - for _, flag := range []string{project, task, variant, perfcontext} { + for _, flag := range []string{"project", "task", "variant", "perf-context"} { if flag == "" { log.Fatalf("must provide %s", flag) } From 862a16717228bbfafb0c3c94a7534394683f7125 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Mon, 18 Aug 2025 11:48:55 -0400 Subject: [PATCH 25/25] Update go.mod --- .evergreen/perfcomp/go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/perfcomp/go.mod b/.evergreen/perfcomp/go.mod index ae56447c..4d2f9e18 100644 --- a/.evergreen/perfcomp/go.mod +++ b/.evergreen/perfcomp/go.mod @@ -1,6 +1,6 @@ module github.com/mongodb-labs/drivers-evergreen-tools/perfcomp -go 1.24 +go 1.24.0 require github.com/spf13/cobra v1.9.1