From 93a5971eb77b743be3d99728b8349a833daa914f Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Tue, 1 Jul 2025 14:12:11 -0400 Subject: [PATCH 01/23] add send-perf-pr-comment and testing script --- .evergreen/config.yml | 8 ++++++++ etc/perf-pr-comment.sh | 6 ++++++ 2 files changed, 14 insertions(+) create mode 100755 etc/perf-pr-comment.sh diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 05832906d1..903acb5f90 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -285,6 +285,13 @@ functions: echo "Response Body: $response_body" echo "HTTP Status: $http_status" + send-perf-pr-comment: + - command: shell.exec + params: + working_dir: src/go.mongodb.org/mongo-driver + binary: bash + script: etc/perf-pr-comment.sh + run-enterprise-auth-tests: - command: ec2.assume_role params: @@ -676,6 +683,7 @@ tasks: binary: bash args: [*task-runner, driver-benchmark] - func: send-perf-data + - func: send-perf-pr-comment - name: test-standalone-noauth-nossl tags: ["test", "standalone"] diff --git a/etc/perf-pr-comment.sh b/etc/perf-pr-comment.sh new file mode 100755 index 0000000000..cf5d3bad4d --- /dev/null +++ b/etc/perf-pr-comment.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +set -e +set -x + +echo "hello world - perf comment" From 0103afdea67885a8bb925dad271fd441bb80f7e5 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Tue, 1 Jul 2025 14:20:34 -0400 Subject: [PATCH 02/23] refactor to Taskfile --- .evergreen/config.yml | 6 +++--- Taskfile.yml | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 903acb5f90..420f5c996d 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -286,11 +286,11 @@ functions: echo "HTTP Status: $http_status" send-perf-pr-comment: - - command: shell.exec + - command: subprocess.exec + type: test params: - working_dir: src/go.mongodb.org/mongo-driver binary: bash - script: etc/perf-pr-comment.sh + args: [*task-runner, perf-pr-comment] run-enterprise-auth-tests: - command: ec2.assume_role diff --git a/Taskfile.yml b/Taskfile.yml index 3473cb4981..8b2c7df0e3 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -70,6 +70,8 @@ tasks: pr-task: bash etc/pr-task.sh + perf-pr-comment: bash etc/perf-pr-comment.sh + # Lint with various GOOS and GOARCH tasks to catch static analysis failures that may only affect # specific operating systems or architectures. For example, staticcheck will only check for 64-bit # alignment of atomically accessed variables on 32-bit architectures (see From 364f2796cf6b2ca3324652478b83f966f0213be7 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Wed, 2 Jul 2025 11:02:04 -0400 Subject: [PATCH 03/23] added test ping, but might have network issues --- .evergreen/config.yml | 1 + Taskfile.yml | 2 +- internal/cmd/perfnotif/main.go | 45 ++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 internal/cmd/perfnotif/main.go diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 420f5c996d..94a2024093 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -290,6 +290,7 @@ functions: type: test params: binary: bash + include_expansions_in_env: [perf_uri] args: [*task-runner, perf-pr-comment] run-enterprise-auth-tests: diff --git a/Taskfile.yml b/Taskfile.yml index 8b2c7df0e3..617340ac2d 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -70,7 +70,7 @@ tasks: pr-task: bash etc/pr-task.sh - perf-pr-comment: bash etc/perf-pr-comment.sh + perf-pr-comment: go run internal/cmd/perfnotif/main.go # Lint with various GOOS and GOARCH tasks to catch static analysis failures that may only affect # specific operating systems or architectures. For example, staticcheck will only check for 64-bit diff --git a/internal/cmd/perfnotif/main.go b/internal/cmd/perfnotif/main.go new file mode 100644 index 0000000000..722acb110c --- /dev/null +++ b/internal/cmd/perfnotif/main.go @@ -0,0 +1,45 @@ +// Copyright (C) MongoDB, Inc. 2025-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package main + +import ( + "context" + "fmt" + "log" + "os" + "time" + + "go.mongodb.org/mongo-driver/v2/mongo" + "go.mongodb.org/mongo-driver/v2/mongo/options" +) + +func main() { + uri := os.Getenv("perf_uri") + if uri == "" { + log.Panic("perf_uri env variable is not set") + } + + client, err := mongo.Connect(options.Client().ApplyURI(uri)) + if err != nil { + log.Panicf("Error connecting client: %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err = client.Ping(ctx, nil) + if err != nil { + log.Panicf("Error pinging MongoDB Analytics: %v", err) + } + + fmt.Println("Successfully connected to MongoDB Analytics node.") + + err = client.Disconnect(context.Background()) + if err != nil { + log.Panicf("Failed to disconnect client: %v", err) + } +} From 2db16858daeaaf240dbb0698192c57101ca03f0b Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Wed, 2 Jul 2025 15:13:29 -0400 Subject: [PATCH 04/23] try with private endpoint --- .evergreen/config.yml | 2 +- internal/cmd/perfnotif/main.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 94a2024093..51b34f0777 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -290,7 +290,7 @@ functions: type: test params: binary: bash - include_expansions_in_env: [perf_uri] + include_expansions_in_env: [perf_uri_private_endpoint] args: [*task-runner, perf-pr-comment] run-enterprise-auth-tests: diff --git a/internal/cmd/perfnotif/main.go b/internal/cmd/perfnotif/main.go index 722acb110c..22e7e15d5d 100644 --- a/internal/cmd/perfnotif/main.go +++ b/internal/cmd/perfnotif/main.go @@ -18,9 +18,9 @@ import ( ) func main() { - uri := os.Getenv("perf_uri") + uri := os.Getenv("perf_uri_private_endpoint") if uri == "" { - log.Panic("perf_uri env variable is not set") + log.Panic("perf_uri_private_endpoint env variable is not set") } client, err := mongo.Connect(options.Client().ApplyURI(uri)) From 24d243d303970c1d0ffae873560151ec59d11986 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Thu, 3 Jul 2025 10:50:38 -0400 Subject: [PATCH 05/23] test filtering documents --- internal/cmd/perfnotif/main.go | 43 ++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/internal/cmd/perfnotif/main.go b/internal/cmd/perfnotif/main.go index 22e7e15d5d..9e8c1ac2af 100644 --- a/internal/cmd/perfnotif/main.go +++ b/internal/cmd/perfnotif/main.go @@ -13,6 +13,7 @@ import ( "os" "time" + "go.mongodb.org/mongo-driver/v2/bson" "go.mongodb.org/mongo-driver/v2/mongo" "go.mongodb.org/mongo-driver/v2/mongo/options" ) @@ -30,16 +31,54 @@ func main() { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - err = client.Ping(ctx, nil) if err != nil { log.Panicf("Error pinging MongoDB Analytics: %v", err) } - fmt.Println("Successfully connected to MongoDB Analytics node.") + coll := client.Database("expanded_metrics").Collection("change_points") + var cursor *mongo.Cursor + cursor, err = getDocsWithContext(coll) + if err != nil { + log.Panicf("Error retrieving documents from collection.") + } + fmt.Printf("Successfully retrieved %d documents.", cursor.RemainingBatchLength()) + err = client.Disconnect(context.Background()) if err != nil { log.Panicf("Failed to disconnect client: %v", err) } + +} + +func getDocsWithContext(coll *mongo.Collection) (*mongo.Cursor, error) { + filter := bson.D{ + {"time_series_info.project", "mongo-go-driver"}, + {"time_series_info.variant", "perf"}, + {"time_series_info.task", "perf"}, + // {"commit", commitName}, + {"triage_contexts", bson.M{"$in": []string{"GoDriver perf (h-score)"}}}, + } + + projection := bson.D{ + {"time_series_info.project", 1}, + {"time_series_info.task", 1}, + {"time_series_info.test", 1}, + {"triage_contexts", 1}, + {"h_score", 1}, + } + + findOptions := options.Find().SetProjection(projection) + + findCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + cursor, err := coll.Find(findCtx, filter, findOptions) + if err != nil { + return nil, err + } + defer cursor.Close(findCtx) + + return cursor, err } From 4a9511e7b70848964072ca73a15e162c487c6432 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Tue, 8 Jul 2025 09:48:29 -0400 Subject: [PATCH 06/23] decode docs and display in evg logs, dummy commit for now --- .evergreen/config.yml | 2 ++ internal/cmd/perfnotif/main.go | 61 ++++++++++++++++++++++++++++++---- 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 51b34f0777..2855d4b2ad 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -290,6 +290,8 @@ functions: type: test params: binary: bash + env: + COMMIT: "${github_commit}" include_expansions_in_env: [perf_uri_private_endpoint] args: [*task-runner, perf-pr-comment] diff --git a/internal/cmd/perfnotif/main.go b/internal/cmd/perfnotif/main.go index 9e8c1ac2af..6516eeda0d 100644 --- a/internal/cmd/perfnotif/main.go +++ b/internal/cmd/perfnotif/main.go @@ -18,6 +18,17 @@ import ( "go.mongodb.org/mongo-driver/v2/mongo/options" ) +type ChangePoint struct { + TimeSeriesInfo struct { + Project string `bson:"project"` + Task string `bson:"task"` + Test string `bson:"test"` + Measurement string `bson:"measurement"` + } `bson:"time_series_info"` + TriageContexts []string `bson:"triage_contexts"` + HScore float64 `bson:"h_score"` +} + func main() { uri := os.Getenv("perf_uri_private_endpoint") if uri == "" { @@ -37,13 +48,33 @@ func main() { } fmt.Println("Successfully connected to MongoDB Analytics node.") + commit := os.Getenv("COMMIT") // TODO: get PR from evergreen instead of just the commit, and use PR to get latest commit + if commit == "" { + log.Panic("could not retrieve commit number") + } + coll := client.Database("expanded_metrics").Collection("change_points") - var cursor *mongo.Cursor - cursor, err = getDocsWithContext(coll) + var changePoints []ChangePoint + changePoints, err = getDocsWithContext(coll, "50cf0c20d228975074c0010bfb688917e25934a4") // TODO: restore test commit if err != nil { - log.Panicf("Error retrieving documents from collection.") + log.Panicf("Error retrieving and decoding documents from collection: %v.", err) + } + + if len(changePoints) == 0 { + log.Panicf("Nothing was decoded") + } + + fmt.Print("Documents:") + for _, cp := range changePoints { + fmt.Printf(" Project: %s, Task: %s, Test: %s, Measurement: %s, Triage Contexts: %v, H-Score: %f\n", + cp.TimeSeriesInfo.Project, + cp.TimeSeriesInfo.Task, + cp.TimeSeriesInfo.Test, + cp.TimeSeriesInfo.Measurement, + cp.TriageContexts, + cp.HScore, + ) } - fmt.Printf("Successfully retrieved %d documents.", cursor.RemainingBatchLength()) err = client.Disconnect(context.Background()) if err != nil { @@ -52,12 +83,12 @@ func main() { } -func getDocsWithContext(coll *mongo.Collection) (*mongo.Cursor, error) { +func getDocsWithContext(coll *mongo.Collection, commit string) ([]ChangePoint, error) { filter := bson.D{ {"time_series_info.project", "mongo-go-driver"}, {"time_series_info.variant", "perf"}, {"time_series_info.task", "perf"}, - // {"commit", commitName}, + {"commit", commit}, {"triage_contexts", bson.M{"$in": []string{"GoDriver perf (h-score)"}}}, } @@ -65,6 +96,7 @@ func getDocsWithContext(coll *mongo.Collection) (*mongo.Cursor, error) { {"time_series_info.project", 1}, {"time_series_info.task", 1}, {"time_series_info.test", 1}, + {"time_series_info.measurement", 1}, {"triage_contexts", 1}, {"h_score", 1}, } @@ -80,5 +112,20 @@ func getDocsWithContext(coll *mongo.Collection) (*mongo.Cursor, error) { } defer cursor.Close(findCtx) - return cursor, err + fmt.Printf("Successfully retrieved %d documents from commit %s.", cursor.RemainingBatchLength(), commit) + + var changePoints []ChangePoint + for cursor.Next(findCtx) { + var cp ChangePoint + if err := cursor.Decode(&cp); err != nil { + return nil, err + } + changePoints = append(changePoints, cp) + } + + if err := cursor.Err(); err != nil { + return nil, err + } + + return changePoints, nil } From 04b822bc0649c757f05c773c8165e02f9dece35c Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Tue, 8 Jul 2025 10:11:37 -0400 Subject: [PATCH 07/23] refactor to use shell script in taskfile --- Taskfile.yml | 2 +- etc/perf-pr-comment.sh | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Taskfile.yml b/Taskfile.yml index 617340ac2d..8b2c7df0e3 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -70,7 +70,7 @@ tasks: pr-task: bash etc/pr-task.sh - perf-pr-comment: go run internal/cmd/perfnotif/main.go + perf-pr-comment: bash etc/perf-pr-comment.sh # Lint with various GOOS and GOARCH tasks to catch static analysis failures that may only affect # specific operating systems or architectures. For example, staticcheck will only check for 64-bit diff --git a/etc/perf-pr-comment.sh b/etc/perf-pr-comment.sh index cf5d3bad4d..aba08743aa 100755 --- a/etc/perf-pr-comment.sh +++ b/etc/perf-pr-comment.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash +# perf-pr-comment +# Generates a report of Go Driver perf changes for the current branch. -set -e -set -x +set -eux -echo "hello world - perf comment" +go run ./internal/cmd/perfnotif/main.go From 769f15058b7a468bfabc9b7beea2810c4dd3c7b1 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Tue, 8 Jul 2025 11:35:13 -0400 Subject: [PATCH 08/23] add md comment TODO perf baron link, remove hardcoded commit number --- internal/cmd/perfnotif/main.go | 46 +++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/internal/cmd/perfnotif/main.go b/internal/cmd/perfnotif/main.go index 6516eeda0d..53e53d054e 100644 --- a/internal/cmd/perfnotif/main.go +++ b/internal/cmd/perfnotif/main.go @@ -7,6 +7,7 @@ package main import ( + "bytes" "context" "fmt" "log" @@ -48,33 +49,20 @@ func main() { } fmt.Println("Successfully connected to MongoDB Analytics node.") - commit := os.Getenv("COMMIT") // TODO: get PR from evergreen instead of just the commit, and use PR to get latest commit + commit := os.Getenv("COMMIT") if commit == "" { log.Panic("could not retrieve commit number") } coll := client.Database("expanded_metrics").Collection("change_points") var changePoints []ChangePoint - changePoints, err = getDocsWithContext(coll, "50cf0c20d228975074c0010bfb688917e25934a4") // TODO: restore test commit + changePoints, err = getDocsWithContext(coll, commit) if err != nil { log.Panicf("Error retrieving and decoding documents from collection: %v.", err) } - if len(changePoints) == 0 { - log.Panicf("Nothing was decoded") - } - - fmt.Print("Documents:") - for _, cp := range changePoints { - fmt.Printf(" Project: %s, Task: %s, Test: %s, Measurement: %s, Triage Contexts: %v, H-Score: %f\n", - cp.TimeSeriesInfo.Project, - cp.TimeSeriesInfo.Task, - cp.TimeSeriesInfo.Test, - cp.TimeSeriesInfo.Measurement, - cp.TriageContexts, - cp.HScore, - ) - } + var markdownComment = getMarkdownComment(changePoints) + fmt.Print(markdownComment.String()) err = client.Disconnect(context.Background()) if err != nil { @@ -112,7 +100,7 @@ func getDocsWithContext(coll *mongo.Collection, commit string) ([]ChangePoint, e } defer cursor.Close(findCtx) - fmt.Printf("Successfully retrieved %d documents from commit %s.", cursor.RemainingBatchLength(), commit) + fmt.Printf("Successfully retrieved %d documents from commit %s.\n", cursor.RemainingBatchLength(), commit) var changePoints []ChangePoint for cursor.Next(findCtx) { @@ -129,3 +117,25 @@ func getDocsWithContext(coll *mongo.Collection, commit string) ([]ChangePoint, e return changePoints, nil } + +func getMarkdownComment(changePoints []ChangePoint) bytes.Buffer { + var buffer bytes.Buffer + + buffer.WriteString("# 👋 GoDriver Performance Notification\n") + + if len(changePoints) > 0 { + buffer.WriteString("The following benchmark tests had statistically significant changes (i.e., h-score > 0.6):\n") + buffer.WriteString("| Benchmark Test | Measurement | H-Score | Performance Baron |\n") + buffer.WriteString("|---|---|---|---|\n") + + for _, cp := range changePoints { + var perfBaronLink = "" + fmt.Fprintf(&buffer, "| %s | %s | %f | %s |\n", cp.TimeSeriesInfo.Test, cp.TimeSeriesInfo.Measurement, cp.HScore, perfBaronLink) + } + } else { + buffer.WriteString("There were no significant changes to the performance to report.\n") + } + buffer.WriteString("For a comprehensive view of all microbenchmark results for this PR's commit, please visit this link.") + + return buffer +} From fb6cac663129bc9eea06fd68fe2075b254e711c8 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Tue, 8 Jul 2025 14:10:50 -0400 Subject: [PATCH 09/23] add todo for link generation --- internal/cmd/perfnotif/main.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/internal/cmd/perfnotif/main.go b/internal/cmd/perfnotif/main.go index 53e53d054e..4d6606527f 100644 --- a/internal/cmd/perfnotif/main.go +++ b/internal/cmd/perfnotif/main.go @@ -129,13 +129,15 @@ func getMarkdownComment(changePoints []ChangePoint) bytes.Buffer { buffer.WriteString("|---|---|---|---|\n") for _, cp := range changePoints { - var perfBaronLink = "" - fmt.Fprintf(&buffer, "| %s | %s | %f | %s |\n", cp.TimeSeriesInfo.Test, cp.TimeSeriesInfo.Measurement, cp.HScore, perfBaronLink) + // TODO: update this to dynamically generate link + var perfBaronLink = "https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/baron" + fmt.Fprintf(&buffer, "| %s | %s | %f | [linked here](%s) |\n", cp.TimeSeriesInfo.Test, cp.TimeSeriesInfo.Measurement, cp.HScore, perfBaronLink) } } else { buffer.WriteString("There were no significant changes to the performance to report.\n") } - buffer.WriteString("For a comprehensive view of all microbenchmark results for this PR's commit, please visit this link.") + // TODO: update this to dynamically generate link + buffer.WriteString("*For a comprehensive view of all microbenchmark results for this PR's commit, please visit [this link](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/baron?change_point_filters=%5B%7B%22active%22%3Atrue%2C%22name%22%3A%22commit%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22commit_date%22%2C%22operator%22%3A%22after%22%2C%22type%22%3A%22date%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22calculated_on%22%2C%22operator%22%3A%22after%22%2C%22type%22%3A%22date%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22project%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22mongo-go-driver%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22variant%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22perf%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22task%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22perf%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22test%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22measurement%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22args%22%2C%22operator%22%3A%22eq%22%2C%22type%22%3A%22json%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22percent_change%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22z_score_change%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22h_score%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22absolute_change%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22build_failures%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22bf_suggestions%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22triage_status%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22changeType%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22triage_contexts%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%2C%22value%22%3A%5B%22GoDriver+perf+%28h-score%29%22%5D%7D%5D).*") return buffer } From 877d2fb64c17440aadc6119a808b96bc5680c066 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Thu, 10 Jul 2025 17:29:51 -0400 Subject: [PATCH 10/23] add rawdata struct --- internal/cmd/perfnotif/main.go | 144 +++++++++++++-------------------- 1 file changed, 54 insertions(+), 90 deletions(-) diff --git a/internal/cmd/perfnotif/main.go b/internal/cmd/perfnotif/main.go index 4d6606527f..7c19eff421 100644 --- a/internal/cmd/perfnotif/main.go +++ b/internal/cmd/perfnotif/main.go @@ -7,27 +7,46 @@ package main import ( - "bytes" "context" "fmt" "log" "os" "time" - "go.mongodb.org/mongo-driver/v2/bson" "go.mongodb.org/mongo-driver/v2/mongo" "go.mongodb.org/mongo-driver/v2/mongo/options" ) -type ChangePoint struct { - TimeSeriesInfo struct { - Project string `bson:"project"` - Task string `bson:"task"` - Test string `bson:"test"` - Measurement string `bson:"measurement"` - } `bson:"time_series_info"` - TriageContexts []string `bson:"triage_contexts"` - HScore float64 `bson:"h_score"` +type RawData struct { + ID string `json:"_id"` + Info struct { + Project string `json:"project"` + Version string `json:"version"` + Variant string `json:"variant"` + Order int64 `json:"order"` + TaskName string `json:"task_name"` + TaskID string `json:"task_id"` + Execution int64 `json:"execution"` + Mainline bool `json:"mainline"` + OverrideInfo struct { + OverrideMainline bool `json:"override_mainline"` + BaseOrder interface{} `json:"base_order"` + Reason interface{} `json:"reason"` + User interface{} `json:"user"` + } + TestName string `json:"test_name"` + Args []interface{} `json:"args"` + } + CreatedAt interface{} `json:"created_at"` + CompletedAt interface{} `json:"completed_at"` + Rollups struct { + Stats struct { + Name string `json:"name"` + Val float64 `json:"val"` + Metadata interface{} `json:"metadata"` + } + } + FailedRollupAttempts int64 `json:"failed_rollup_attempts"` } func main() { @@ -54,15 +73,7 @@ func main() { log.Panic("could not retrieve commit number") } - coll := client.Database("expanded_metrics").Collection("change_points") - var changePoints []ChangePoint - changePoints, err = getDocsWithContext(coll, commit) - if err != nil { - log.Panicf("Error retrieving and decoding documents from collection: %v.", err) - } - - var markdownComment = getMarkdownComment(changePoints) - fmt.Print(markdownComment.String()) + // coll := client.Database("expanded_metrics").Collection("raw_results") err = client.Disconnect(context.Background()) if err != nil { @@ -71,73 +82,26 @@ func main() { } -func getDocsWithContext(coll *mongo.Collection, commit string) ([]ChangePoint, error) { - filter := bson.D{ - {"time_series_info.project", "mongo-go-driver"}, - {"time_series_info.variant", "perf"}, - {"time_series_info.task", "perf"}, - {"commit", commit}, - {"triage_contexts", bson.M{"$in": []string{"GoDriver perf (h-score)"}}}, - } - - projection := bson.D{ - {"time_series_info.project", 1}, - {"time_series_info.task", 1}, - {"time_series_info.test", 1}, - {"time_series_info.measurement", 1}, - {"triage_contexts", 1}, - {"h_score", 1}, - } - - findOptions := options.Find().SetProjection(projection) - - findCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - cursor, err := coll.Find(findCtx, filter, findOptions) - if err != nil { - return nil, err - } - defer cursor.Close(findCtx) - - fmt.Printf("Successfully retrieved %d documents from commit %s.\n", cursor.RemainingBatchLength(), commit) - - var changePoints []ChangePoint - for cursor.Next(findCtx) { - var cp ChangePoint - if err := cursor.Decode(&cp); err != nil { - return nil, err - } - changePoints = append(changePoints, cp) - } - - if err := cursor.Err(); err != nil { - return nil, err - } - - return changePoints, nil -} - -func getMarkdownComment(changePoints []ChangePoint) bytes.Buffer { - var buffer bytes.Buffer - - buffer.WriteString("# 👋 GoDriver Performance Notification\n") - - if len(changePoints) > 0 { - buffer.WriteString("The following benchmark tests had statistically significant changes (i.e., h-score > 0.6):\n") - buffer.WriteString("| Benchmark Test | Measurement | H-Score | Performance Baron |\n") - buffer.WriteString("|---|---|---|---|\n") - - for _, cp := range changePoints { - // TODO: update this to dynamically generate link - var perfBaronLink = "https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/baron" - fmt.Fprintf(&buffer, "| %s | %s | %f | [linked here](%s) |\n", cp.TimeSeriesInfo.Test, cp.TimeSeriesInfo.Measurement, cp.HScore, perfBaronLink) - } - } else { - buffer.WriteString("There were no significant changes to the performance to report.\n") - } - // TODO: update this to dynamically generate link - buffer.WriteString("*For a comprehensive view of all microbenchmark results for this PR's commit, please visit [this link](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/baron?change_point_filters=%5B%7B%22active%22%3Atrue%2C%22name%22%3A%22commit%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22commit_date%22%2C%22operator%22%3A%22after%22%2C%22type%22%3A%22date%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22calculated_on%22%2C%22operator%22%3A%22after%22%2C%22type%22%3A%22date%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22project%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22mongo-go-driver%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22variant%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22perf%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22task%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22perf%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22test%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22measurement%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22args%22%2C%22operator%22%3A%22eq%22%2C%22type%22%3A%22json%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22percent_change%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22z_score_change%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22h_score%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22absolute_change%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22build_failures%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22bf_suggestions%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22triage_status%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22changeType%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22triage_contexts%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%2C%22value%22%3A%5B%22GoDriver+perf+%28h-score%29%22%5D%7D%5D).*") - - return buffer -} +// func getMarkdownComment(changePoints []ChangePoint) bytes.Buffer { +// var buffer bytes.Buffer + +// buffer.WriteString("# 👋 GoDriver Performance Notification\n") + +// if len(changePoints) > 0 { +// buffer.WriteString("The following benchmark tests had statistically significant changes (i.e., h-score > 0.6):\n") +// buffer.WriteString("| Benchmark Test | Measurement | H-Score | Performance Baron |\n") +// buffer.WriteString("|---|---|---|---|\n") + +// for _, cp := range changePoints { +// // TODO: update this to dynamically generate link +// var perfBaronLink = "https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/baron" +// fmt.Fprintf(&buffer, "| %s | %s | %f | [linked here](%s) |\n", cp.TimeSeriesInfo.Test, cp.TimeSeriesInfo.Measurement, cp.HScore, perfBaronLink) +// } +// } else { +// buffer.WriteString("There were no significant changes to the performance to report.\n") +// } +// // TODO: update this to dynamically generate link +// buffer.WriteString("*For a comprehensive view of all microbenchmark results for this PR's commit, please visit [this link](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/baron?change_point_filters=%5B%7B%22active%22%3Atrue%2C%22name%22%3A%22commit%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22commit_date%22%2C%22operator%22%3A%22after%22%2C%22type%22%3A%22date%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22calculated_on%22%2C%22operator%22%3A%22after%22%2C%22type%22%3A%22date%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22project%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22mongo-go-driver%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22variant%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22perf%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22task%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22perf%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22test%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22measurement%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22args%22%2C%22operator%22%3A%22eq%22%2C%22type%22%3A%22json%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22percent_change%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22z_score_change%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22h_score%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22absolute_change%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22build_failures%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22bf_suggestions%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22triage_status%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22changeType%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22triage_contexts%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%2C%22value%22%3A%5B%22GoDriver+perf+%28h-score%29%22%5D%7D%5D).*") + +// return buffer +// } From 66b9adcfb7c300b6e2f58517d2ff9a04cfae6096 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Fri, 11 Jul 2025 11:14:17 -0400 Subject: [PATCH 11/23] get raw data for version --- internal/cmd/perfnotif/main.go | 135 +++++++++++++++++++-------------- 1 file changed, 79 insertions(+), 56 deletions(-) diff --git a/internal/cmd/perfnotif/main.go b/internal/cmd/perfnotif/main.go index 7c19eff421..63f3d5b230 100644 --- a/internal/cmd/perfnotif/main.go +++ b/internal/cmd/perfnotif/main.go @@ -13,40 +13,78 @@ import ( "os" "time" + "go.mongodb.org/mongo-driver/v2/bson" "go.mongodb.org/mongo-driver/v2/mongo" "go.mongodb.org/mongo-driver/v2/mongo/options" ) type RawData struct { - ID string `json:"_id"` Info struct { - Project string `json:"project"` - Version string `json:"version"` - Variant string `json:"variant"` - Order int64 `json:"order"` - TaskName string `json:"task_name"` - TaskID string `json:"task_id"` - Execution int64 `json:"execution"` - Mainline bool `json:"mainline"` + Project string `bson:"project"` + Version string `bson:"version"` + Variant string `bson:"variant"` + Order int64 `bson:"order"` + TaskName string `bson:"task_name"` + TaskID string `bson:"task_id"` + Execution int64 `bson:"execution"` + Mainline bool `bson:"mainline"` OverrideInfo struct { - OverrideMainline bool `json:"override_mainline"` - BaseOrder interface{} `json:"base_order"` - Reason interface{} `json:"reason"` - User interface{} `json:"user"` + OverrideMainline bool `bson:"override_mainline"` + BaseOrder interface{} `bson:"base_order"` + Reason interface{} `bson:"reason"` + User interface{} `bson:"user"` } - TestName string `json:"test_name"` - Args []interface{} `json:"args"` + TestName string `bson:"test_name"` + Args []interface{} `bson:"args"` } - CreatedAt interface{} `json:"created_at"` - CompletedAt interface{} `json:"completed_at"` + CreatedAt interface{} `bson:"created_at"` + CompletedAt interface{} `bson:"completed_at"` Rollups struct { - Stats struct { - Name string `json:"name"` - Val float64 `json:"val"` - Metadata interface{} `json:"metadata"` + Stats []struct { + Name string `bson:"name"` + Val float64 `bson:"val"` + Metadata interface{} `bson:"metadata"` } } - FailedRollupAttempts int64 `json:"failed_rollup_attempts"` + FailedRollupAttempts int64 `bson:"failed_rollup_attempts"` +} + +// findRawData will get all of the rawData for the given version +func findRawData(version string, coll *mongo.Collection) ([]RawData, error) { + filter := bson.D{ + {"info.project", "mongo-go-driver"}, + {"info.version", version}, + {"info.variant", "perf"}, + {"info.task_name", "perf"}, + } + + findOptions := options.Find() + + findCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + cursor, err := coll.Find(findCtx, filter, findOptions) + if err != nil { + return nil, err + } + defer cursor.Close(findCtx) + + fmt.Printf("Successfully retrieved %d docs from version %s.\n", cursor.RemainingBatchLength(), version) + + var rawData []RawData + for cursor.Next(findCtx) { + var rd RawData + if err := cursor.Decode(&rd); err != nil { + return nil, err + } + rawData = append(rawData, rd) + } + + if err := cursor.Err(); err != nil { + return nil, err + } + + return rawData, nil } func main() { @@ -55,16 +93,16 @@ func main() { log.Panic("perf_uri_private_endpoint env variable is not set") } - client, err := mongo.Connect(options.Client().ApplyURI(uri)) - if err != nil { - log.Panicf("Error connecting client: %v", err) + client, err1 := mongo.Connect(options.Client().ApplyURI(uri)) + if err1 != nil { + log.Panicf("Error connecting client: %v", err1) } ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - err = client.Ping(ctx, nil) - if err != nil { - log.Panicf("Error pinging MongoDB Analytics: %v", err) + err2 := client.Ping(ctx, nil) + if err2 != nil { + log.Panicf("Error pinging MongoDB Analytics: %v", err2) } fmt.Println("Successfully connected to MongoDB Analytics node.") @@ -73,35 +111,20 @@ func main() { log.Panic("could not retrieve commit number") } - // coll := client.Database("expanded_metrics").Collection("raw_results") + coll := client.Database("expanded_metrics").Collection("raw_results") + version := os.Getenv("VERSION") + if version == "" { + log.Panic("could not retrieve version") + } + rawData, err3 := findRawData(version, coll) + if err3 != nil { + log.Panicf("Error getting raw data: %v", err3) + } + fmt.Println(rawData) - err = client.Disconnect(context.Background()) - if err != nil { - log.Panicf("Failed to disconnect client: %v", err) + err0 := client.Disconnect(context.Background()) + if err0 != nil { + log.Panicf("Failed to disconnect client: %v", err0) } } - -// func getMarkdownComment(changePoints []ChangePoint) bytes.Buffer { -// var buffer bytes.Buffer - -// buffer.WriteString("# 👋 GoDriver Performance Notification\n") - -// if len(changePoints) > 0 { -// buffer.WriteString("The following benchmark tests had statistically significant changes (i.e., h-score > 0.6):\n") -// buffer.WriteString("| Benchmark Test | Measurement | H-Score | Performance Baron |\n") -// buffer.WriteString("|---|---|---|---|\n") - -// for _, cp := range changePoints { -// // TODO: update this to dynamically generate link -// var perfBaronLink = "https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/baron" -// fmt.Fprintf(&buffer, "| %s | %s | %f | [linked here](%s) |\n", cp.TimeSeriesInfo.Test, cp.TimeSeriesInfo.Measurement, cp.HScore, perfBaronLink) -// } -// } else { -// buffer.WriteString("There were no significant changes to the performance to report.\n") -// } -// // TODO: update this to dynamically generate link -// buffer.WriteString("*For a comprehensive view of all microbenchmark results for this PR's commit, please visit [this link](https://performance-monitoring-and-analysis.server-tig.prod.corp.mongodb.com/baron?change_point_filters=%5B%7B%22active%22%3Atrue%2C%22name%22%3A%22commit%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22commit_date%22%2C%22operator%22%3A%22after%22%2C%22type%22%3A%22date%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22calculated_on%22%2C%22operator%22%3A%22after%22%2C%22type%22%3A%22date%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22project%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22mongo-go-driver%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22variant%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22perf%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22task%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22perf%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22test%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22measurement%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22args%22%2C%22operator%22%3A%22eq%22%2C%22type%22%3A%22json%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22percent_change%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22z_score_change%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22h_score%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22absolute_change%22%2C%22operator%22%3A%22gt%22%2C%22type%22%3A%22number%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22build_failures%22%2C%22operator%22%3A%22matches%22%2C%22type%22%3A%22regex%22%2C%22value%22%3A%22%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22bf_suggestions%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22triage_status%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22changeType%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%7D%2C%7B%22active%22%3Atrue%2C%22name%22%3A%22triage_contexts%22%2C%22operator%22%3A%22inlist%22%2C%22type%22%3A%22listSelect%22%2C%22value%22%3A%5B%22GoDriver+perf+%28h-score%29%22%5D%7D%5D).*") - -// return buffer -// } From 077f102f4c42ab3a0c1463e2076a245b297c928a Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Fri, 11 Jul 2025 12:03:06 -0400 Subject: [PATCH 12/23] parse commit SHA from task ID --- internal/cmd/perfnotif/main.go | 93 ++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 39 deletions(-) diff --git a/internal/cmd/perfnotif/main.go b/internal/cmd/perfnotif/main.go index 63f3d5b230..99ecd52ea2 100644 --- a/internal/cmd/perfnotif/main.go +++ b/internal/cmd/perfnotif/main.go @@ -8,9 +8,11 @@ package main import ( "context" + "errors" "fmt" "log" "os" + "strings" "time" "go.mongodb.org/mongo-driver/v2/bson" @@ -49,6 +51,48 @@ type RawData struct { FailedRollupAttempts int64 `bson:"failed_rollup_attempts"` } +func main() { + uri := os.Getenv("perf_uri_private_endpoint") + if uri == "" { + log.Panic("perf_uri_private_endpoint env variable is not set") + } + + client, err1 := mongo.Connect(options.Client().ApplyURI(uri)) + if err1 != nil { + log.Panicf("Error connecting client: %v", err1) + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + err2 := client.Ping(ctx, nil) + if err2 != nil { + log.Panicf("Error pinging MongoDB Analytics: %v", err2) + } + fmt.Println("Successfully connected to MongoDB Analytics node.") + + coll := client.Database("expanded_metrics").Collection("raw_results") + version := os.Getenv("VERSION") + if version == "" { + log.Panic("could not retrieve version") + } + rawData, err3 := findRawData(version, coll) + if err3 != nil { + log.Panicf("Error getting raw data: %v", err3) + } + + commits, err := parseMainelineCommits(rawData) + if err != nil { + log.Panicf("Error parsing commits: %v", err) + } + fmt.Println(commits) + + err0 := client.Disconnect(context.Background()) + if err0 != nil { + log.Panicf("Failed to disconnect client: %v", err0) + } + +} + // findRawData will get all of the rawData for the given version func findRawData(version string, coll *mongo.Collection) ([]RawData, error) { filter := bson.D{ @@ -87,44 +131,15 @@ func findRawData(version string, coll *mongo.Collection) ([]RawData, error) { return rawData, nil } -func main() { - uri := os.Getenv("perf_uri_private_endpoint") - if uri == "" { - log.Panic("perf_uri_private_endpoint env variable is not set") - } - - client, err1 := mongo.Connect(options.Client().ApplyURI(uri)) - if err1 != nil { - log.Panicf("Error connecting client: %v", err1) - } - - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - err2 := client.Ping(ctx, nil) - if err2 != nil { - log.Panicf("Error pinging MongoDB Analytics: %v", err2) - } - fmt.Println("Successfully connected to MongoDB Analytics node.") - - commit := os.Getenv("COMMIT") - if commit == "" { - log.Panic("could not retrieve commit number") - } - - coll := client.Database("expanded_metrics").Collection("raw_results") - version := os.Getenv("VERSION") - if version == "" { - log.Panic("could not retrieve version") - } - rawData, err3 := findRawData(version, coll) - if err3 != nil { - log.Panicf("Error getting raw data: %v", err3) - } - fmt.Println(rawData) - - err0 := client.Disconnect(context.Background()) - if err0 != nil { - log.Panicf("Failed to disconnect client: %v", err0) +func parseMainelineCommits(rawData []RawData) ([]string, error) { + commits := make([]string, 0, len(rawData)) + for _, rd := range rawData { + taskID := rd.Info.TaskID + pieces := strings.Split(taskID, "_") // Format: mongo_go_driver_perf_perf__ + if len(pieces) < 6 { + return nil, errors.New("task ID doesn't hold commit SHA") + } + commits = append(commits, pieces[5]) } - + return commits, nil } From 2c7bf59338dce5594e231fb3a45cb607c54fbdd9 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Fri, 11 Jul 2025 12:28:25 -0400 Subject: [PATCH 13/23] update to match evg --- internal/cmd/perfnotif/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/cmd/perfnotif/main.go b/internal/cmd/perfnotif/main.go index 99ecd52ea2..6d496a7a4e 100644 --- a/internal/cmd/perfnotif/main.go +++ b/internal/cmd/perfnotif/main.go @@ -71,7 +71,7 @@ func main() { fmt.Println("Successfully connected to MongoDB Analytics node.") coll := client.Database("expanded_metrics").Collection("raw_results") - version := os.Getenv("VERSION") + version := os.Getenv("VERSION_ID") if version == "" { log.Panic("could not retrieve version") } From dfcb16ba7a4b8d3176dcb360e2b296f6d6819ee8 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Fri, 11 Jul 2025 15:47:01 -0400 Subject: [PATCH 14/23] convert energy stats to go --- go.work | 1 + internal/cmd/perfcomp/energystatistics.go | 679 +++++++++++++++++++ internal/cmd/perfcomp/go.mod | 22 + internal/cmd/perfcomp/go.sum | 48 ++ internal/cmd/{perfnotif => perfcomp}/main.go | 2 +- 5 files changed, 751 insertions(+), 1 deletion(-) create mode 100644 internal/cmd/perfcomp/energystatistics.go create mode 100644 internal/cmd/perfcomp/go.mod create mode 100644 internal/cmd/perfcomp/go.sum rename internal/cmd/{perfnotif => perfcomp}/main.go (99%) diff --git a/go.work b/go.work index 23ad2ff8a7..9f345c684c 100644 --- a/go.work +++ b/go.work @@ -7,6 +7,7 @@ use ( ./examples/_logger/zerolog ./internal/cmd/benchmark ./internal/cmd/compilecheck + ./internal/cmd/perfcomp ./internal/cmd/faas/awslambda/mongodb ./internal/test/goleak ) diff --git a/internal/cmd/perfcomp/energystatistics.go b/internal/cmd/perfcomp/energystatistics.go new file mode 100644 index 0000000000..aa7fbc631a --- /dev/null +++ b/internal/cmd/perfcomp/energystatistics.go @@ -0,0 +1,679 @@ +// Copyright (C) MongoDB, Inc. 2025-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package main + +import ( + "errors" + "fmt" + "log" + "math" + "math/rand" + "sort" + "time" + + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/mat" +) + +// EnergyStatistics represents the E-statistic, Test statistic, and E-coefficient of inhomogeneity. +type EnergyStatistics struct { + E float64 + T float64 + H float64 +} + +// EnergyStatisticsWithProbabilities represents Energy Statistics and permutation test results. +type EnergyStatisticsWithProbabilities struct { + EnergyStatistics // Embeds EnergyStatistics + EPValue float64 + TPValue float64 + HPValue float64 +} + +// _convert converts a series into a 2-dimensional Gonum matrix of float64. +// It accepts []float64 or [][]float64. If a []float64 is provided, it is +// converted into a column vector (N x 1 matrix). +func _convert(series interface{}) (*mat.Dense, error) { + var data []float64 + var rows, cols int + + switch s := series.(type) { + case []float64: + // If 1D slice, treat as a column vector (N x 1) + data = s + rows = len(s) + cols = 1 + case [][]float64: + if len(s) == 0 { + return mat.NewDense(0, 0, nil), nil // Empty matrix + } + rows = len(s) + cols = len(s[0]) + for _, row := range s { + if len(row) != cols { + return nil, errors.New("input [][]float64 has inconsistent row lengths") + } + data = append(data, row...) // Flatten the 2D slice into a 1D slice + } + case *mat.Dense: + // If it's already a mat.Dense, handle potential 1D row vector to column vector conversion + r, c := s.Dims() + if r == 1 && c > 1 { // If it's a row vector (1 x N), transpose to column vector (N x 1) + transposed := mat.NewDense(c, 1, nil) + transposed.Copy(s.T()) + return transposed, nil + } + return s, nil // Already in a suitable format + default: + return nil, errors.New("series is not the expected type ([]float64, [][]float64, or *mat.Dense)") + } + + if len(data) == 0 { + return mat.NewDense(0, 0, nil), nil // Return empty matrix if no data + } + + // Create a new Dense matrix with the collected data + return mat.NewDense(rows, cols, data), nil +} + +// _getValidInput returns a valid form of input as a Gonum matrix. +// It performs initial validation, ensuring the input is not empty. +func _getValidInput(series interface{}) (*mat.Dense, error) { + m, err := _convert(series) + if err != nil { + return nil, err + } + r, _ := m.Dims() + if r == 0 { + return nil, errors.New("distribution cannot be empty") + } + return m, nil +} + +// _getDistanceMatrix returns the matrix of pairwise Euclidean distances within the series. +// For an m x n series, it returns an m x m matrix where (i,j)th value is the Euclidean +// distance between the i-th and j-th observations (rows) of the series. +func _getDistanceMatrix(series *mat.Dense) (*mat.Dense, error) { + r, c := series.Dims() + if r == 0 { + return mat.NewDense(0, 0, nil), nil // Return empty matrix for empty series + } + + distMatrix := mat.NewDense(r, r, nil) + + // Calculate Euclidean distance between each pair of rows + for i := 0; i < r; i++ { + // Extract row i as a vector + vecI := mat.NewVecDense(c, nil) + for k := 0; k < c; k++ { + vecI.SetVec(k, series.At(i, k)) + } + + for j := i; j < r; j++ { // Iterate from i to r-1 to fill upper triangle and diagonal + // Extract row j as a vector + vecJ := mat.NewVecDense(c, nil) + for k := 0; k < c; k++ { + vecJ.SetVec(k, series.At(j, k)) + } + + // Calculate Euclidean distance: ||vecI - vecJ||_2 + var diff mat.VecDense + diff.SubVec(vecI, vecJ) + dist := floats.Norm(diff.RawVector().Data, 2) // Euclidean norm (L2 norm) + + distMatrix.Set(i, j, dist) + distMatrix.Set(j, i, dist) // Distance matrix is symmetric + } + } + return distMatrix, nil +} + +// _calculateStats calculates the E-statistic, Test statistic, and E-coefficient of inhomogeneity. +// It takes the sums of distances within distributions X (x), within Y (y), and between X and Y (xy), +// along with their respective lengths (n, m). +func _calculateStats(x, y, xy float64, n, m int) (e, t, h float64) { + // Calculate average distances + xyAvg := 0.0 + if n > 0 && m > 0 { + xyAvg = xy / float64(n*m) + } + + xAvg := 0.0 + if n > 0 { + xAvg = x / float64(n*n) + } + + yAvg := 0.0 + if m > 0 { + yAvg = y / float64(m*m) + } + + // E-statistic + e = 2*xyAvg - xAvg - yAvg + + // Test statistic + t = 0.0 + if n+m > 0 { + t = (float64(n*m) / float64(n+m)) * e + } + + // E-coefficient of inhomogeneity + h = 0.0 + if xyAvg > 0 { + h = e / (2 * xyAvg) + } + return e, t, h +} + +// _calculateTStats finds t-statistic values given a distance matrix. +// It iteratively calculates the test statistic for all possible partition points (tau). +func _calculateTStats(distanceMatrix *mat.Dense) ([]float64, error) { + N, _ := distanceMatrix.Dims() + if N == 0 { + return []float64{}, nil // No statistics for empty matrix + } + + statistics := make([]float64, N) + + // Initialize 'y' sum: In Python, this is `np.sum(distance_matrix[row, row:])` for all rows, + // which sums the upper triangle (including diagonal) of the entire distance matrix. + initialYSum := 0.0 + for r := 0; r < N; r++ { + for c := r; c < N; c++ { + initialYSum += distanceMatrix.At(r, c) + } + } + + // Initialize sums for the first partition (tau = 0) + xy := 0.0 + x := 0.0 + y := initialYSum // Initial 'y' contains the sum of all distances (as if all are in Y) + + // Iterate through all possible partition points (tau) + for tau := 0; tau < N; tau++ { + // Calculate the test statistic for the current partition + // Note: The `_calculateStats` function expects `x` and `y` to represent sums over unique pairs (e.g., upper triangle). + // The way `x` and `y` are accumulated in this loop (via `columnDelta` and `rowDelta`) effectively sums over + // the full symmetric parts, making `2*x` and `2*y` in `_calculateStats` necessary to match the E-statistic definition. + _, t, _ := _calculateStats(x, y, xy, tau, N-tau) + statistics[tau] = t + + // Update sums for the next iteration (moving the partition point `tau` one step to the right) + + // columnDelta: sum |Xi - X_tau| for i < tau (distances from elements in X to the new element at tau) + columnDelta := 0.0 + for rIdx := 0; rIdx < tau; rIdx++ { + columnDelta += distanceMatrix.At(rIdx, tau) + } + + // rowDelta: sum |X_tau - Yj| for tau <= j (distances from the new element at tau to elements in Y) + rowDelta := 0.0 + for cIdx := tau; cIdx < N; cIdx++ { + rowDelta += distanceMatrix.At(tau, cIdx) + } + + // Update the sums based on the movement of tau + xy = xy - columnDelta + rowDelta // Distances between X and Y + x = x + columnDelta // Distances within X + y = y - rowDelta // Distances within Y + } + + return statistics, nil +} + +// _getNextSignificantChangePoint calculates the next significant change point using a permutation test. +// It searches for change points within windows defined by existing change points. +func _getNextSignificantChangePoint( + distances *mat.Dense, + changePoints []int, + memo map[[2]int]struct { // Memoization cache for window calculations + idx int + val float64 + }, + pvalue float64, + permutations int, +) (int, error) { + N, _ := distances.Dims() + if N == 0 { + return -1, nil // No change point for empty distances + } + + // Define windows based on existing change points + windows := []int{0} + windows = append(windows, changePoints...) + windows = append(windows, N) + sort.Ints(windows) // Ensure windows are sorted using sort.Ints + + type candidate struct { + idx int + val float64 + } + var candidates []candidate + + // Iterate through each window to find the best candidate change point + for i := 0; i < len(windows)-1; i++ { + a, b := windows[i], windows[i+1] + boundsKey := [2]int{a, b} // Key for memoization + + if val, ok := memo[boundsKey]; ok { + candidates = append(candidates, candidate{idx: val.idx, val: val.val}) + } else { + // Extract sub-matrix for the current window + windowDistances := distances.Slice(a, b, a, b).(*mat.Dense) + stats, err := _calculateTStats(windowDistances) + if err != nil { + return -1, fmt.Errorf("error calculating t-stats for window [%d:%d]: %w", a, b, err) + } + + if len(stats) == 0 { + continue // Skip empty stats (e.g., for very small windows) + } + + // Find the index of the maximum T-statistic within the window + idx := 0 + maxStat := stats[0] + for k, s := range stats { + if s > maxStat { + maxStat = s + idx = k + } + } + newCandidate := candidate{idx: idx + a, val: maxStat} // Adjust index to global scale + candidates = append(candidates, newCandidate) + memo[boundsKey] = struct { // Store in memo for future use + idx int + val float64 + }{idx: newCandidate.idx, val: newCandidate.val} + } + } + + if len(candidates) == 0 { + return -1, nil // No valid candidates found + } + + // Find the overall best candidate among all windows + bestCandidate := candidates[0] + for _, c := range candidates { + if c.val > bestCandidate.val { + bestCandidate = c + } + } + + // Perform permutation test + betterNum := 0 + src := rand.NewSource(time.Now().UnixNano()) + r := rand.New(src) // New random source for each call to ensure different permutations + + for p := 0; p < permutations; p++ { + permuteT := make([]float64, 0, len(windows)-1) + for i := 0; i < len(windows)-1; i++ { + a, b := windows[i], windows[i+1] + windowSize := b - a + if windowSize == 0 { + continue + } + + // Create shuffled indices for the current window + rowIndices := make([]int, windowSize) + for k := 0; k < windowSize; k++ { + rowIndices[k] = k + a // Global indices + } + r.Shuffle(len(rowIndices), func(i, j int) { + rowIndices[i], rowIndices[j] = rowIndices[j], rowIndices[i] + }) + + // Create shuffled sub-matrix using the shuffled global indices + shuffledDistances := mat.NewDense(windowSize, windowSize, nil) + for row := 0; row < windowSize; row++ { + for col := 0; col < windowSize; col++ { + // Use shuffled global indices to pick elements from the original distances matrix + shuffledDistances.Set(row, col, distances.At(rowIndices[row], rowIndices[col])) + } + } + + stats, err := _calculateTStats(shuffledDistances) + if err != nil { + return -1, fmt.Errorf("error calculating t-stats for shuffled window [%d:%d]: %w", a, b, err) + } + + if len(stats) == 0 { + continue + } + + // Find the maximum T-statistic for the current permutation + maxPermuteStat := stats[0] + for _, s := range stats { + if s > maxPermuteStat { + maxPermuteStat = s + } + } + permuteT = append(permuteT, maxPermuteStat) + } + + if len(permuteT) == 0 { + continue // If all windows were empty or invalid for this permutation + } + + // Find the overall best T-statistic for this permutation + bestPermute := permuteT[0] + for _, val := range permuteT { + if val > bestPermute { + bestPermute = val + } + } + + if bestPermute >= bestCandidate.val { + betterNum++ + } + } + + // Calculate probability (p-value) + probability := float64(betterNum) / float64(permutations+1) + if probability <= pvalue { + return bestCandidate.idx, nil // Return the significant change point + } + return -1, nil // No significant change point found +} + +// _getEnergyStatisticsFromDistanceMatrix returns energy statistics from a combined distance matrix. +// It partitions the combined distance matrix into within-X, within-Y, and between-XY distances +// based on the provided lengths n (for X) and m (for Y). +func _getEnergyStatisticsFromDistanceMatrix(distanceMatrix *mat.Dense, n, m int) (*EnergyStatistics, error) { + lenDistanceMatrix, _ := distanceMatrix.Dims() + + if lenDistanceMatrix == 0 { + return &EnergyStatistics{E: 0, T: 0, H: 0}, nil + } + + // Sum distances within X (top-left sub-matrix) + xSum := 0.0 + if n > 0 { + for r := 0; r < n; r++ { + for c := 0; c < n; c++ { + xSum += distanceMatrix.At(r, c) + } + } + } + + // Sum distances within Y (bottom-right sub-matrix) + ySum := 0.0 + if m > 0 { + for r := n; r < lenDistanceMatrix; r++ { + for c := n; c < lenDistanceMatrix; c++ { + ySum += distanceMatrix.At(r, c) + } + } + } + + // Sum distances between X and Y (bottom-left sub-matrix, which is equivalent to top-right due to symmetry) + xySum := 0.0 + if n > 0 && m > 0 { + for r := n; r < lenDistanceMatrix; r++ { // Rows from Y partition + for c := 0; c < n; c++ { // Columns from X partition + xySum += distanceMatrix.At(r, c) + } + } + } + + e, t, h := _calculateStats(xSum, ySum, xySum, n, m) + return &EnergyStatistics{E: e, T: t, H: h}, nil +} + +// EDivisive calculates the change points in the series using the e-divisive algorithm. +// It iteratively finds significant change points until no more are found based on the p-value. +func EDivisive(series interface{}, pvalue float64, permutations int) ([]int, error) { + seriesMat, err := _getValidInput(series) + if err != nil { + return nil, err + } + + distances, err := _getDistanceMatrix(seriesMat) + if err != nil { + return nil, err + } + + changePoints := []int{} + memo := make(map[[2]int]struct { + idx int + val float64 + }) // Cache for _getNextSignificantChangePoint + + for { + significantChangePoint, err := _getNextSignificantChangePoint( + distances, changePoints, memo, pvalue, permutations, + ) + if err != nil { + return nil, err + } + if significantChangePoint == -1 { + break // No more significant change points found + } + changePoints = append(changePoints, significantChangePoint) + } + + sort.Ints(changePoints) // Ensure change points are sorted + return changePoints, nil +} + +// GetEnergyStatistics calculates energy statistics of distributions x and y. +// It combines x and y, calculates the full distance matrix, and then derives +// the E-statistic, Test statistic, and E-coefficient of inhomogeneity. +func GetEnergyStatistics(x, y interface{}) (*EnergyStatistics, error) { + xMat, err := _getValidInput(x) + if err != nil { + return nil, err + } + yMat, err := _getValidInput(y) + if err != nil { + return nil, err + } + + n, _ := xMat.Dims() + m, _ := yMat.Dims() + + // Ensure x and y have the same number of variables (columns) + _, xCols := xMat.Dims() + _, yCols := yMat.Dims() + if xCols != yCols { + return nil, errors.New("distributions x and y must have the same number of variables (columns)") + } + + // Concatenate x and y into a single combined matrix + combinedRows := n + m + combinedData := make([]float64, combinedRows*xCols) + + // Copy data from xMat + for r := 0; r < n; r++ { + for c := 0; c < xCols; c++ { + combinedData[r*xCols+c] = xMat.At(r, c) + } + } + // Copy data from yMat (offset by n rows) + for r := 0; r < m; r++ { + for c := 0; c < yCols; c++ { + combinedData[(n+r)*yCols+c] = yMat.At(r, c) + } + } + combinedMat := mat.NewDense(combinedRows, xCols, combinedData) + + // Calculate the distance matrix for the combined data + distances, err := _getDistanceMatrix(combinedMat) + if err != nil { + return nil, err + } + + // Derive energy statistics from the combined distance matrix + return _getEnergyStatisticsFromDistanceMatrix(distances, n, m) +} + +// GetEnergyStatisticsAndProbabilities returns energy statistics and the corresponding +// permutation test results (p-values) for distributions x and y. +func GetEnergyStatisticsAndProbabilities(x, y interface{}, permutations int) (*EnergyStatisticsWithProbabilities, error) { + xMat, err := _getValidInput(x) + if err != nil { + return nil, err + } + yMat, err := _getValidInput(y) + if err != nil { + return nil, err + } + + n, _ := xMat.Dims() + m, _ := yMat.Dims() + + // Ensure x and y have the same number of variables (columns) + _, xCols := xMat.Dims() + _, yCols := yMat.Dims() + if xCols != yCols { + return nil, errors.New("distributions x and y must have the same number of variables (columns)") + } + + // Concatenate x and y into a single combined matrix + combinedRows := n + m + combinedData := make([]float64, combinedRows*xCols) + + // Copy data from xMat + for r := 0; r < n; r++ { + for c := 0; c < xCols; c++ { + combinedData[r*xCols+c] = xMat.At(r, c) + } + } + // Copy data from yMat (offset by n rows) + for r := 0; r < m; r++ { + for c := 0; c < yCols; c++ { + combinedData[(n+r)*yCols+c] = yMat.At(r, c) + } + } + combinedMat := mat.NewDense(combinedRows, xCols, combinedData) + + // Calculate the distance matrix for the combined data (this matrix will be shuffled) + distancesBetweenAll, err := _getDistanceMatrix(combinedMat) + if err != nil { + return nil, err + } + + lenCombined, _ := distancesBetweenAll.Dims() + + // Counters for permutation test + countE := 0 + countT := 0 + countH := 0 + + // Initialize random number generator for shuffling + src := rand.NewSource(time.Now().UnixNano()) + r := rand.New(src) + + // Create initial row indices (0 to lenCombined-1) + rowIndices := make([]int, lenCombined) + for i := 0; i < lenCombined; i++ { + rowIndices[i] = i + } + + // Calculate initial energy statistics for the original (unshuffled) data + energyStatistics, err := _getEnergyStatisticsFromDistanceMatrix(distancesBetweenAll, n, m) + if err != nil { + return nil, err + } + + // Perform permutation test + for p := 0; p < permutations; p++ { + // Shuffle the row indices + r.Shuffle(len(rowIndices), func(i, j int) { + rowIndices[i], rowIndices[j] = rowIndices[j], rowIndices[i] + }) + + // Create a new shuffled distance matrix by reordering rows/columns of the original + // distance matrix according to the shuffled rowIndices. This simulates shuffling + // the original combined data and then calculating distances. + shuffledDistances := mat.NewDense(lenCombined, lenCombined, nil) + for row := 0; row < lenCombined; row++ { + for col := 0; col < lenCombined; col++ { + shuffledDistances.Set(row, col, distancesBetweenAll.At(rowIndices[row], rowIndices[col])) + } + } + + // Calculate energy statistics for the shuffled data + shuffledEnergyStatistics, err := _getEnergyStatisticsFromDistanceMatrix(shuffledDistances, n, m) + if err != nil { + return nil, err + } + + // Compare shuffled statistics with original statistics + if shuffledEnergyStatistics.E >= energyStatistics.E { + countE++ + } + if shuffledEnergyStatistics.T >= energyStatistics.T { + countT++ + } + if shuffledEnergyStatistics.H >= energyStatistics.H { + countH++ + } + } + + // Calculate p-values + total := float64(permutations + 1) // Include the original observation in the total count + return &EnergyStatisticsWithProbabilities{ + EnergyStatistics: *energyStatistics, // Original statistics + EPValue: float64(countE) / total, + TPValue: float64(countT) / total, + HPValue: float64(countH) / total, + }, nil +} + +func main() { + // Initialize random number generator for reproducibility (optional, but good for tests) + seed := time.Now().UnixNano() + r := rand.New(rand.NewSource(seed)) + + // --- Test Case 1: x and y are different distributions --- + + // Generate x: 100x5 matrix with random values between 0 and 1 (mimics np.random.rand) + xData := make([]float64, 100*5) + for i := range xData { + xData[i] = r.Float64() + } + x := mat.NewDense(100, 5, xData) + + // Generate y: 100x5 matrix with normal distribution (mean 1000, std dev 1) (mimics np.random.normal) + yData := make([]float64, 100*5) + for i := range yData { + // Box-Muller transform to get normally distributed numbers + u1, u2 := r.Float64(), r.Float64() + z0 := math.Sqrt(-2.0*math.Log(u1)) * math.Cos(2*math.Pi*u2) + yData[i] = 1000 + 1*z0 // mean + std_dev * z0 + } + y := mat.NewDense(100, 5, yData) + + permutations := 1000 + + fmt.Println("--- Expected h around 1 (x and y are different) ---") + energyStats1, err := GetEnergyStatisticsAndProbabilities(x, y, permutations) + if err != nil { + log.Fatalf("Error calculating energy statistics for different distributions: %v", err) + } + + fmt.Printf("E-statistic: %.4f (p-value: %.4f)\n", energyStats1.E, energyStats1.EPValue) + fmt.Printf("Test statistic: %.4f (p-value: %.4f)\n", energyStats1.T, energyStats1.TPValue) + fmt.Printf("E-coefficient of inhomogeneity (h): %.4f (p-value: %.4f)\n\n", energyStats1.H, energyStats1.HPValue) + + // --- Test Case 2: y is the same as x (expected h around 0) --- + + // Set y to be the same as x + // In Go, assigning a pointer means both variables point to the same underlying data. + // This correctly mimics `y = x` in Python where `y` becomes a reference to the same array. + y = x + + fmt.Println("--- Expected h around 0 (y is the same as x) ---") + energyStats2, err := GetEnergyStatisticsAndProbabilities(x, y, permutations) + if err != nil { + log.Fatalf("Error calculating energy statistics for identical distributions: %v", err) + } + + fmt.Printf("E-statistic: %.4f (p-value: %.4f)\n", energyStats2.E, energyStats2.EPValue) + fmt.Printf("Test statistic: %.4f (p-value: %.4f)\n", energyStats2.T, energyStats2.TPValue) + fmt.Printf("E-coefficient of inhomogeneity (h): %.4f (p-value: %.4f)\n", energyStats2.H, energyStats2.HPValue) +} diff --git a/internal/cmd/perfcomp/go.mod b/internal/cmd/perfcomp/go.mod new file mode 100644 index 0000000000..6446bed897 --- /dev/null +++ b/internal/cmd/perfcomp/go.mod @@ -0,0 +1,22 @@ +module go.mongodb.go/mongo-driver/v2/internal/cmd/perfcomp + +go 1.23 + +replace go.mongodb.org/mongo-driver/v2 => ../../../ + +require ( + go.mongodb.org/mongo-driver/v2 v2.2.2 + gonum.org/v1/gonum v0.16.0 +) + +require ( + github.com/golang/snappy v1.0.0 // indirect + github.com/klauspost/compress v1.16.7 // indirect + github.com/xdg-go/pbkdf2 v1.0.0 // indirect + github.com/xdg-go/scram v1.1.2 // indirect + github.com/xdg-go/stringprep v1.0.4 // indirect + github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 // indirect + golang.org/x/crypto v0.33.0 // indirect + golang.org/x/sync v0.12.0 // indirect + golang.org/x/text v0.23.0 // indirect +) diff --git a/internal/cmd/perfcomp/go.sum b/internal/cmd/perfcomp/go.sum new file mode 100644 index 0000000000..49f669457a --- /dev/null +++ b/internal/cmd/perfcomp/go.sum @@ -0,0 +1,48 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs= +github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= +github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= +github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= +github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= +github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= +github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 h1:ilQV1hzziu+LLM3zUTJ0trRztfwgjqKnBWNtSRkbmwM= +github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78/go.mod h1:aL8wCCfTfSfmXjznFBSZNN13rSJjlIOI1fUNAtF7rmI= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus= +golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= +golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= +golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= diff --git a/internal/cmd/perfnotif/main.go b/internal/cmd/perfcomp/main.go similarity index 99% rename from internal/cmd/perfnotif/main.go rename to internal/cmd/perfcomp/main.go index 6d496a7a4e..8c10aed914 100644 --- a/internal/cmd/perfnotif/main.go +++ b/internal/cmd/perfcomp/main.go @@ -51,7 +51,7 @@ type RawData struct { FailedRollupAttempts int64 `bson:"failed_rollup_attempts"` } -func main() { +func main1() { uri := os.Getenv("perf_uri_private_endpoint") if uri == "" { log.Panic("perf_uri_private_endpoint env variable is not set") From 4d285676baa1520c11694b5d646d45efc21a9f3c Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Fri, 11 Jul 2025 16:11:10 -0400 Subject: [PATCH 15/23] restore main --- internal/cmd/perfcomp/energystatistics.go | 2 +- internal/cmd/perfcomp/main.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/cmd/perfcomp/energystatistics.go b/internal/cmd/perfcomp/energystatistics.go index aa7fbc631a..30773e8160 100644 --- a/internal/cmd/perfcomp/energystatistics.go +++ b/internal/cmd/perfcomp/energystatistics.go @@ -624,7 +624,7 @@ func GetEnergyStatisticsAndProbabilities(x, y interface{}, permutations int) (*E }, nil } -func main() { +func TestEnergyStatistics() { // Initialize random number generator for reproducibility (optional, but good for tests) seed := time.Now().UnixNano() r := rand.New(rand.NewSource(seed)) diff --git a/internal/cmd/perfcomp/main.go b/internal/cmd/perfcomp/main.go index 8c10aed914..6d496a7a4e 100644 --- a/internal/cmd/perfcomp/main.go +++ b/internal/cmd/perfcomp/main.go @@ -51,7 +51,7 @@ type RawData struct { FailedRollupAttempts int64 `bson:"failed_rollup_attempts"` } -func main1() { +func main() { uri := os.Getenv("perf_uri_private_endpoint") if uri == "" { log.Panic("perf_uri_private_endpoint env variable is not set") From 554b901cbf3fa4c23785caf6276d4fb6ef1ebc6c Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Fri, 11 Jul 2025 16:18:27 -0400 Subject: [PATCH 16/23] cleanup --- internal/cmd/perfcomp/energystatistics.go | 109 +++++++--------------- 1 file changed, 34 insertions(+), 75 deletions(-) diff --git a/internal/cmd/perfcomp/energystatistics.go b/internal/cmd/perfcomp/energystatistics.go index 30773e8160..a1bfd61e64 100644 --- a/internal/cmd/perfcomp/energystatistics.go +++ b/internal/cmd/perfcomp/energystatistics.go @@ -19,16 +19,19 @@ import ( "gonum.org/v1/gonum/mat" ) -// EnergyStatistics represents the E-statistic, Test statistic, and E-coefficient of inhomogeneity. +// Class for representing Energy Statistics. +// E - E-statistic +// T - Test statistic +// H - E-coefficient of inhomogeneity type EnergyStatistics struct { E float64 T float64 H float64 } -// EnergyStatisticsWithProbabilities represents Energy Statistics and permutation test results. +// Class for representing Energy Statistics and permutation test result. type EnergyStatisticsWithProbabilities struct { - EnergyStatistics // Embeds EnergyStatistics + EnergyStatistics EPValue float64 TPValue float64 HPValue float64 @@ -43,13 +46,12 @@ func _convert(series interface{}) (*mat.Dense, error) { switch s := series.(type) { case []float64: - // If 1D slice, treat as a column vector (N x 1) data = s rows = len(s) cols = 1 case [][]float64: if len(s) == 0 { - return mat.NewDense(0, 0, nil), nil // Empty matrix + return mat.NewDense(0, 0, nil), nil } rows = len(s) cols = len(s[0]) @@ -57,7 +59,7 @@ func _convert(series interface{}) (*mat.Dense, error) { if len(row) != cols { return nil, errors.New("input [][]float64 has inconsistent row lengths") } - data = append(data, row...) // Flatten the 2D slice into a 1D slice + data = append(data, row...) } case *mat.Dense: // If it's already a mat.Dense, handle potential 1D row vector to column vector conversion @@ -67,21 +69,19 @@ func _convert(series interface{}) (*mat.Dense, error) { transposed.Copy(s.T()) return transposed, nil } - return s, nil // Already in a suitable format + return s, nil default: return nil, errors.New("series is not the expected type ([]float64, [][]float64, or *mat.Dense)") } if len(data) == 0 { - return mat.NewDense(0, 0, nil), nil // Return empty matrix if no data + return mat.NewDense(0, 0, nil), nil } - // Create a new Dense matrix with the collected data return mat.NewDense(rows, cols, data), nil } // _getValidInput returns a valid form of input as a Gonum matrix. -// It performs initial validation, ensuring the input is not empty. func _getValidInput(series interface{}) (*mat.Dense, error) { m, err := _convert(series) if err != nil { @@ -100,21 +100,19 @@ func _getValidInput(series interface{}) (*mat.Dense, error) { func _getDistanceMatrix(series *mat.Dense) (*mat.Dense, error) { r, c := series.Dims() if r == 0 { - return mat.NewDense(0, 0, nil), nil // Return empty matrix for empty series + return mat.NewDense(0, 0, nil), nil } distMatrix := mat.NewDense(r, r, nil) // Calculate Euclidean distance between each pair of rows for i := 0; i < r; i++ { - // Extract row i as a vector vecI := mat.NewVecDense(c, nil) for k := 0; k < c; k++ { vecI.SetVec(k, series.At(i, k)) } - for j := i; j < r; j++ { // Iterate from i to r-1 to fill upper triangle and diagonal - // Extract row j as a vector + for j := i; j < r; j++ { vecJ := mat.NewVecDense(c, nil) for k := 0; k < c; k++ { vecJ.SetVec(k, series.At(j, k)) @@ -126,7 +124,7 @@ func _getDistanceMatrix(series *mat.Dense) (*mat.Dense, error) { dist := floats.Norm(diff.RawVector().Data, 2) // Euclidean norm (L2 norm) distMatrix.Set(i, j, dist) - distMatrix.Set(j, i, dist) // Distance matrix is symmetric + distMatrix.Set(j, i, dist) } } return distMatrix, nil @@ -136,7 +134,6 @@ func _getDistanceMatrix(series *mat.Dense) (*mat.Dense, error) { // It takes the sums of distances within distributions X (x), within Y (y), and between X and Y (xy), // along with their respective lengths (n, m). func _calculateStats(x, y, xy float64, n, m int) (e, t, h float64) { - // Calculate average distances xyAvg := 0.0 if n > 0 && m > 0 { xyAvg = xy / float64(n*m) @@ -174,13 +171,11 @@ func _calculateStats(x, y, xy float64, n, m int) (e, t, h float64) { func _calculateTStats(distanceMatrix *mat.Dense) ([]float64, error) { N, _ := distanceMatrix.Dims() if N == 0 { - return []float64{}, nil // No statistics for empty matrix + return []float64{}, nil } statistics := make([]float64, N) - // Initialize 'y' sum: In Python, this is `np.sum(distance_matrix[row, row:])` for all rows, - // which sums the upper triangle (including diagonal) of the entire distance matrix. initialYSum := 0.0 for r := 0; r < N; r++ { for c := r; c < N; c++ { @@ -188,22 +183,14 @@ func _calculateTStats(distanceMatrix *mat.Dense) ([]float64, error) { } } - // Initialize sums for the first partition (tau = 0) xy := 0.0 x := 0.0 - y := initialYSum // Initial 'y' contains the sum of all distances (as if all are in Y) + y := initialYSum - // Iterate through all possible partition points (tau) for tau := 0; tau < N; tau++ { - // Calculate the test statistic for the current partition - // Note: The `_calculateStats` function expects `x` and `y` to represent sums over unique pairs (e.g., upper triangle). - // The way `x` and `y` are accumulated in this loop (via `columnDelta` and `rowDelta`) effectively sums over - // the full symmetric parts, making `2*x` and `2*y` in `_calculateStats` necessary to match the E-statistic definition. _, t, _ := _calculateStats(x, y, xy, tau, N-tau) statistics[tau] = t - // Update sums for the next iteration (moving the partition point `tau` one step to the right) - // columnDelta: sum |Xi - X_tau| for i < tau (distances from elements in X to the new element at tau) columnDelta := 0.0 for rIdx := 0; rIdx < tau; rIdx++ { @@ -216,7 +203,6 @@ func _calculateTStats(distanceMatrix *mat.Dense) ([]float64, error) { rowDelta += distanceMatrix.At(tau, cIdx) } - // Update the sums based on the movement of tau xy = xy - columnDelta + rowDelta // Distances between X and Y x = x + columnDelta // Distances within X y = y - rowDelta // Distances within Y @@ -230,7 +216,7 @@ func _calculateTStats(distanceMatrix *mat.Dense) ([]float64, error) { func _getNextSignificantChangePoint( distances *mat.Dense, changePoints []int, - memo map[[2]int]struct { // Memoization cache for window calculations + memo map[[2]int]struct { idx int val float64 }, @@ -239,14 +225,13 @@ func _getNextSignificantChangePoint( ) (int, error) { N, _ := distances.Dims() if N == 0 { - return -1, nil // No change point for empty distances + return -1, nil } - // Define windows based on existing change points windows := []int{0} windows = append(windows, changePoints...) windows = append(windows, N) - sort.Ints(windows) // Ensure windows are sorted using sort.Ints + sort.Ints(windows) type candidate struct { idx int @@ -254,15 +239,13 @@ func _getNextSignificantChangePoint( } var candidates []candidate - // Iterate through each window to find the best candidate change point for i := 0; i < len(windows)-1; i++ { a, b := windows[i], windows[i+1] - boundsKey := [2]int{a, b} // Key for memoization + boundsKey := [2]int{a, b} if val, ok := memo[boundsKey]; ok { candidates = append(candidates, candidate{idx: val.idx, val: val.val}) } else { - // Extract sub-matrix for the current window windowDistances := distances.Slice(a, b, a, b).(*mat.Dense) stats, err := _calculateTStats(windowDistances) if err != nil { @@ -270,7 +253,7 @@ func _getNextSignificantChangePoint( } if len(stats) == 0 { - continue // Skip empty stats (e.g., for very small windows) + continue } // Find the index of the maximum T-statistic within the window @@ -282,9 +265,9 @@ func _getNextSignificantChangePoint( idx = k } } - newCandidate := candidate{idx: idx + a, val: maxStat} // Adjust index to global scale + newCandidate := candidate{idx: idx + a, val: maxStat} candidates = append(candidates, newCandidate) - memo[boundsKey] = struct { // Store in memo for future use + memo[boundsKey] = struct { idx int val float64 }{idx: newCandidate.idx, val: newCandidate.val} @@ -292,7 +275,7 @@ func _getNextSignificantChangePoint( } if len(candidates) == 0 { - return -1, nil // No valid candidates found + return -1, nil } // Find the overall best candidate among all windows @@ -303,10 +286,9 @@ func _getNextSignificantChangePoint( } } - // Perform permutation test betterNum := 0 src := rand.NewSource(time.Now().UnixNano()) - r := rand.New(src) // New random source for each call to ensure different permutations + r := rand.New(src) for p := 0; p < permutations; p++ { permuteT := make([]float64, 0, len(windows)-1) @@ -317,20 +299,17 @@ func _getNextSignificantChangePoint( continue } - // Create shuffled indices for the current window rowIndices := make([]int, windowSize) for k := 0; k < windowSize; k++ { - rowIndices[k] = k + a // Global indices + rowIndices[k] = k + a } r.Shuffle(len(rowIndices), func(i, j int) { rowIndices[i], rowIndices[j] = rowIndices[j], rowIndices[i] }) - // Create shuffled sub-matrix using the shuffled global indices shuffledDistances := mat.NewDense(windowSize, windowSize, nil) for row := 0; row < windowSize; row++ { for col := 0; col < windowSize; col++ { - // Use shuffled global indices to pick elements from the original distances matrix shuffledDistances.Set(row, col, distances.At(rowIndices[row], rowIndices[col])) } } @@ -344,7 +323,6 @@ func _getNextSignificantChangePoint( continue } - // Find the maximum T-statistic for the current permutation maxPermuteStat := stats[0] for _, s := range stats { if s > maxPermuteStat { @@ -355,10 +333,9 @@ func _getNextSignificantChangePoint( } if len(permuteT) == 0 { - continue // If all windows were empty or invalid for this permutation + continue } - // Find the overall best T-statistic for this permutation bestPermute := permuteT[0] for _, val := range permuteT { if val > bestPermute { @@ -371,12 +348,11 @@ func _getNextSignificantChangePoint( } } - // Calculate probability (p-value) probability := float64(betterNum) / float64(permutations+1) if probability <= pvalue { - return bestCandidate.idx, nil // Return the significant change point + return bestCandidate.idx, nil } - return -1, nil // No significant change point found + return -1, nil } // _getEnergyStatisticsFromDistanceMatrix returns energy statistics from a combined distance matrix. @@ -412,8 +388,8 @@ func _getEnergyStatisticsFromDistanceMatrix(distanceMatrix *mat.Dense, n, m int) // Sum distances between X and Y (bottom-left sub-matrix, which is equivalent to top-right due to symmetry) xySum := 0.0 if n > 0 && m > 0 { - for r := n; r < lenDistanceMatrix; r++ { // Rows from Y partition - for c := 0; c < n; c++ { // Columns from X partition + for r := n; r < lenDistanceMatrix; r++ { + for c := 0; c < n; c++ { xySum += distanceMatrix.At(r, c) } } @@ -450,12 +426,12 @@ func EDivisive(series interface{}, pvalue float64, permutations int) ([]int, err return nil, err } if significantChangePoint == -1 { - break // No more significant change points found + break } changePoints = append(changePoints, significantChangePoint) } - sort.Ints(changePoints) // Ensure change points are sorted + sort.Ints(changePoints) return changePoints, nil } @@ -475,24 +451,20 @@ func GetEnergyStatistics(x, y interface{}) (*EnergyStatistics, error) { n, _ := xMat.Dims() m, _ := yMat.Dims() - // Ensure x and y have the same number of variables (columns) _, xCols := xMat.Dims() _, yCols := yMat.Dims() if xCols != yCols { return nil, errors.New("distributions x and y must have the same number of variables (columns)") } - // Concatenate x and y into a single combined matrix combinedRows := n + m combinedData := make([]float64, combinedRows*xCols) - // Copy data from xMat for r := 0; r < n; r++ { for c := 0; c < xCols; c++ { combinedData[r*xCols+c] = xMat.At(r, c) } } - // Copy data from yMat (offset by n rows) for r := 0; r < m; r++ { for c := 0; c < yCols; c++ { combinedData[(n+r)*yCols+c] = yMat.At(r, c) @@ -500,13 +472,11 @@ func GetEnergyStatistics(x, y interface{}) (*EnergyStatistics, error) { } combinedMat := mat.NewDense(combinedRows, xCols, combinedData) - // Calculate the distance matrix for the combined data distances, err := _getDistanceMatrix(combinedMat) if err != nil { return nil, err } - // Derive energy statistics from the combined distance matrix return _getEnergyStatisticsFromDistanceMatrix(distances, n, m) } @@ -525,24 +495,20 @@ func GetEnergyStatisticsAndProbabilities(x, y interface{}, permutations int) (*E n, _ := xMat.Dims() m, _ := yMat.Dims() - // Ensure x and y have the same number of variables (columns) _, xCols := xMat.Dims() _, yCols := yMat.Dims() if xCols != yCols { return nil, errors.New("distributions x and y must have the same number of variables (columns)") } - // Concatenate x and y into a single combined matrix combinedRows := n + m combinedData := make([]float64, combinedRows*xCols) - // Copy data from xMat for r := 0; r < n; r++ { for c := 0; c < xCols; c++ { combinedData[r*xCols+c] = xMat.At(r, c) } } - // Copy data from yMat (offset by n rows) for r := 0; r < m; r++ { for c := 0; c < yCols; c++ { combinedData[(n+r)*yCols+c] = yMat.At(r, c) @@ -550,7 +516,6 @@ func GetEnergyStatisticsAndProbabilities(x, y interface{}, permutations int) (*E } combinedMat := mat.NewDense(combinedRows, xCols, combinedData) - // Calculate the distance matrix for the combined data (this matrix will be shuffled) distancesBetweenAll, err := _getDistanceMatrix(combinedMat) if err != nil { return nil, err @@ -563,11 +528,9 @@ func GetEnergyStatisticsAndProbabilities(x, y interface{}, permutations int) (*E countT := 0 countH := 0 - // Initialize random number generator for shuffling src := rand.NewSource(time.Now().UnixNano()) r := rand.New(src) - // Create initial row indices (0 to lenCombined-1) rowIndices := make([]int, lenCombined) for i := 0; i < lenCombined; i++ { rowIndices[i] = i @@ -581,7 +544,6 @@ func GetEnergyStatisticsAndProbabilities(x, y interface{}, permutations int) (*E // Perform permutation test for p := 0; p < permutations; p++ { - // Shuffle the row indices r.Shuffle(len(rowIndices), func(i, j int) { rowIndices[i], rowIndices[j] = rowIndices[j], rowIndices[i] }) @@ -596,7 +558,6 @@ func GetEnergyStatisticsAndProbabilities(x, y interface{}, permutations int) (*E } } - // Calculate energy statistics for the shuffled data shuffledEnergyStatistics, err := _getEnergyStatisticsFromDistanceMatrix(shuffledDistances, n, m) if err != nil { return nil, err @@ -615,9 +576,9 @@ func GetEnergyStatisticsAndProbabilities(x, y interface{}, permutations int) (*E } // Calculate p-values - total := float64(permutations + 1) // Include the original observation in the total count + total := float64(permutations + 1) return &EnergyStatisticsWithProbabilities{ - EnergyStatistics: *energyStatistics, // Original statistics + EnergyStatistics: *energyStatistics, EPValue: float64(countE) / total, TPValue: float64(countT) / total, HPValue: float64(countH) / total, @@ -663,8 +624,6 @@ func TestEnergyStatistics() { // --- Test Case 2: y is the same as x (expected h around 0) --- // Set y to be the same as x - // In Go, assigning a pointer means both variables point to the same underlying data. - // This correctly mimics `y = x` in Python where `y` becomes a reference to the same array. y = x fmt.Println("--- Expected h around 0 (y is the same as x) ---") From f1aa06a208e1c834a7408a7a340fff5cc3fff200 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Mon, 14 Jul 2025 09:59:21 -0400 Subject: [PATCH 17/23] add version ID --- .evergreen/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 2855d4b2ad..e1ed36cfff 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -292,6 +292,7 @@ functions: binary: bash env: COMMIT: "${github_commit}" + VERSION_ID: ${version_id} include_expansions_in_env: [perf_uri_private_endpoint] args: [*task-runner, perf-pr-comment] From a97fc644659e100390d64a27f4d9630f20d7d10c Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Mon, 14 Jul 2025 10:09:48 -0400 Subject: [PATCH 18/23] update pathname --- etc/perf-pr-comment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/perf-pr-comment.sh b/etc/perf-pr-comment.sh index aba08743aa..7d6cb6a775 100755 --- a/etc/perf-pr-comment.sh +++ b/etc/perf-pr-comment.sh @@ -4,4 +4,4 @@ set -eux -go run ./internal/cmd/perfnotif/main.go +go run ./internal/cmd/perfcomp/main.go From 6dcfd7bd5b0de9d493393e422c0dfbc4808ea5e8 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Mon, 14 Jul 2025 10:16:38 -0400 Subject: [PATCH 19/23] update compiled files --- etc/perf-pr-comment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/perf-pr-comment.sh b/etc/perf-pr-comment.sh index 7d6cb6a775..eb41326322 100755 --- a/etc/perf-pr-comment.sh +++ b/etc/perf-pr-comment.sh @@ -4,4 +4,4 @@ set -eux -go run ./internal/cmd/perfcomp/main.go +go run ./internal/cmd/perfcomp/main.go ./internal/cmd/perfcomp/energystatistics.go From 65e3d919f278086972631f72bd5429791193ffbb Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Mon, 14 Jul 2025 13:11:05 -0400 Subject: [PATCH 20/23] needs testing, compare main to patch --- internal/cmd/perfcomp/main.go | 107 ++++++++++++++++++++++++++++++---- 1 file changed, 96 insertions(+), 11 deletions(-) diff --git a/internal/cmd/perfcomp/main.go b/internal/cmd/perfcomp/main.go index 6d496a7a4e..4d0d528929 100644 --- a/internal/cmd/perfcomp/main.go +++ b/internal/cmd/perfcomp/main.go @@ -12,6 +12,7 @@ import ( "fmt" "log" "os" + "sort" "strings" "time" @@ -36,8 +37,8 @@ type RawData struct { Reason interface{} `bson:"reason"` User interface{} `bson:"user"` } - TestName string `bson:"test_name"` - Args []interface{} `bson:"args"` + TestName string `bson:"test_name"` + Args map[string]interface{} `bson:"args"` } CreatedAt interface{} `bson:"created_at"` CompletedAt interface{} `bson:"completed_at"` @@ -75,16 +76,27 @@ func main() { if version == "" { log.Panic("could not retrieve version") } - rawData, err3 := findRawData(version, coll) + patchRawData, err3 := findRawData(version, coll) if err3 != nil { log.Panicf("Error getting raw data: %v", err3) } - commits, err := parseMainelineCommits(rawData) - if err != nil { - log.Panicf("Error parsing commits: %v", err) + mainlineCommits, err4 := parseMainelineCommits(patchRawData) + if err4 != nil { + log.Panicf("Error parsing commits: %v", err4) + } + fmt.Println(mainlineCommits) + + mainlineVersion := "mongo_go_driver_" + mainlineCommits[0] + mainlineRawData, err5 := findRawData(mainlineVersion, coll) + if err5 != nil { + log.Panicf("Could not retrieve mainline raw data") + } + energyStats, err6 := getEnergyStatsFromRaw(patchRawData, mainlineRawData) + if err6 != nil { + log.Panicf("Could not process energy stats: %v", err6) } - fmt.Println(commits) + fmt.Printf("H-score: %.4f (p-value: %.4f)\n", energyStats.H, energyStats.HPValue) err0 := client.Disconnect(context.Background()) if err0 != nil { @@ -133,13 +145,86 @@ func findRawData(version string, coll *mongo.Collection) ([]RawData, error) { func parseMainelineCommits(rawData []RawData) ([]string, error) { commits := make([]string, 0, len(rawData)) - for _, rd := range rawData { + for i, rd := range rawData { taskID := rd.Info.TaskID - pieces := strings.Split(taskID, "_") // Format: mongo_go_driver_perf_perf__ - if len(pieces) < 6 { + pieces := strings.Split(taskID, "_") // Format: mongo_go_driver_perf_perf_patch___ + for j, p := range pieces { + if p == "patch" { + if len(pieces) < j+2 { + return nil, errors.New("task ID doesn't hold commit SHA") + } + commits = append(commits, pieces[j+1]) + break + } + } + if len(commits) < i+1 { // didn't find SHA in task_ID return nil, errors.New("task ID doesn't hold commit SHA") } - commits = append(commits, pieces[5]) } return commits, nil } + +func getEnergyStatsFromRaw(xRaw []RawData, yRaw []RawData) (*EnergyStatisticsWithProbabilities, error) { + permutations := 1000 + var x [][]float64 + var y [][]float64 + + // process xRaw and yRaw + for i := range xRaw { + sort.Slice(xRaw[i].Rollups.Stats, func(i, j int) bool { + return xRaw[i].Rollups.Stats[i].Name < xRaw[i].Rollups.Stats[j].Name + }) + sort.Slice(yRaw[i].Rollups.Stats, func(i, j int) bool { + return yRaw[i].Rollups.Stats[i].Name < yRaw[i].Rollups.Stats[j].Name + }) + + var valsX []float64 + for _, stat := range xRaw[i].Rollups.Stats { + valsX = append(valsX, stat.Val) + } + x = append(x, valsX) + + var valsY []float64 + for _, stat := range yRaw[i].Rollups.Stats { + valsY = append(valsY, stat.Val) + } + y = append(y, valsY) + } + + // pad rows with zeros to enforce consistent lengths + maxLength := 0 + for _, row := range x { + if len(row) > maxLength { + maxLength = len(row) + } + } + for _, row := range y { + if len(row) > maxLength { + maxLength = len(row) + } + } + + for i := range x { + if len(x[i]) < maxLength { + padding := make([]float64, maxLength-len(x[i])) + x[i] = append(x[i], padding...) + } + } + for i := range y { + if len(y[i]) < maxLength { + padding := make([]float64, maxLength-len(y[i])) + y[i] = append(y[i], padding...) + } + } + + if len(x) != len(y) { + return nil, errors.New("x and y must be the same length") + } + + energyStats, err := GetEnergyStatisticsAndProbabilities(x, y, permutations) + if err != nil { + return nil, err + } + + return energyStats, nil +} From acc88add91ac9ef79cc028b33ee6a21a4211a67d Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Mon, 14 Jul 2025 13:22:27 -0400 Subject: [PATCH 21/23] testing --- internal/cmd/benchmark/benchmark_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/cmd/benchmark/benchmark_test.go b/internal/cmd/benchmark/benchmark_test.go index 69e8d12d2d..710c9aac72 100644 --- a/internal/cmd/benchmark/benchmark_test.go +++ b/internal/cmd/benchmark/benchmark_test.go @@ -240,6 +240,7 @@ func benchmarkBSONDecoding(b *testing.B, canonicalOnly bool, source string) { for i := 0; i < b.N; i++ { recordMetrics(b, metrics, func(b *testing.B) { + time.Sleep(100 * time.Millisecond) var out bson.D err := bson.Unmarshal(raw, &out) From cc3cfa81e98c5b73914a372473437df927e4df27 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Mon, 14 Jul 2025 17:07:12 -0400 Subject: [PATCH 22/23] comparison --- internal/cmd/benchmark/benchmark_test.go | 2 +- internal/cmd/perfcomp/main.go | 98 ++++++++++++------------ 2 files changed, 49 insertions(+), 51 deletions(-) diff --git a/internal/cmd/benchmark/benchmark_test.go b/internal/cmd/benchmark/benchmark_test.go index 710c9aac72..4392a48a8d 100644 --- a/internal/cmd/benchmark/benchmark_test.go +++ b/internal/cmd/benchmark/benchmark_test.go @@ -240,7 +240,7 @@ func benchmarkBSONDecoding(b *testing.B, canonicalOnly bool, source string) { for i := 0; i < b.N; i++ { recordMetrics(b, metrics, func(b *testing.B) { - time.Sleep(100 * time.Millisecond) + time.Sleep(500 * time.Millisecond) var out bson.D err := bson.Unmarshal(raw, &out) diff --git a/internal/cmd/perfcomp/main.go b/internal/cmd/perfcomp/main.go index 4d0d528929..fcee9152c9 100644 --- a/internal/cmd/perfcomp/main.go +++ b/internal/cmd/perfcomp/main.go @@ -85,18 +85,22 @@ func main() { if err4 != nil { log.Panicf("Error parsing commits: %v", err4) } - fmt.Println(mainlineCommits) mainlineVersion := "mongo_go_driver_" + mainlineCommits[0] mainlineRawData, err5 := findRawData(mainlineVersion, coll) if err5 != nil { log.Panicf("Could not retrieve mainline raw data") } - energyStats, err6 := getEnergyStatsFromRaw(patchRawData, mainlineRawData) + + if len(mainlineRawData) != len(patchRawData) { + log.Panicf("Path and mainline data length do not match.") + } + + changePoints, err6 := getEnergyStatsForAllTests(patchRawData, mainlineRawData) if err6 != nil { - log.Panicf("Could not process energy stats: %v", err6) + log.Panicf("Could not get energy stats: %v", err6) } - fmt.Printf("H-score: %.4f (p-value: %.4f)\n", energyStats.H, energyStats.HPValue) + fmt.Printf("Significant change points length %d", len(changePoints)) err0 := client.Disconnect(context.Background()) if err0 != nil { @@ -164,57 +168,24 @@ func parseMainelineCommits(rawData []RawData) ([]string, error) { return commits, nil } -func getEnergyStatsFromRaw(xRaw []RawData, yRaw []RawData) (*EnergyStatisticsWithProbabilities, error) { +func getEnergyStatsForSingleTest(xRaw RawData, yRaw RawData) (*EnergyStatisticsWithProbabilities, error) { permutations := 1000 - var x [][]float64 - var y [][]float64 - - // process xRaw and yRaw - for i := range xRaw { - sort.Slice(xRaw[i].Rollups.Stats, func(i, j int) bool { - return xRaw[i].Rollups.Stats[i].Name < xRaw[i].Rollups.Stats[j].Name - }) - sort.Slice(yRaw[i].Rollups.Stats, func(i, j int) bool { - return yRaw[i].Rollups.Stats[i].Name < yRaw[i].Rollups.Stats[j].Name - }) - - var valsX []float64 - for _, stat := range xRaw[i].Rollups.Stats { - valsX = append(valsX, stat.Val) - } - x = append(x, valsX) + var x []float64 + var y []float64 - var valsY []float64 - for _, stat := range yRaw[i].Rollups.Stats { - valsY = append(valsY, stat.Val) - } - y = append(y, valsY) - } + sort.Slice(xRaw.Rollups.Stats, func(i, j int) bool { + return xRaw.Rollups.Stats[i].Name < xRaw.Rollups.Stats[j].Name + }) + sort.Slice(yRaw.Rollups.Stats, func(i, j int) bool { + return yRaw.Rollups.Stats[i].Name < yRaw.Rollups.Stats[j].Name + }) - // pad rows with zeros to enforce consistent lengths - maxLength := 0 - for _, row := range x { - if len(row) > maxLength { - maxLength = len(row) - } - } - for _, row := range y { - if len(row) > maxLength { - maxLength = len(row) - } + for _, stat := range xRaw.Rollups.Stats { + x = append(x, stat.Val) } - for i := range x { - if len(x[i]) < maxLength { - padding := make([]float64, maxLength-len(x[i])) - x[i] = append(x[i], padding...) - } - } - for i := range y { - if len(y[i]) < maxLength { - padding := make([]float64, maxLength-len(y[i])) - y[i] = append(y[i], padding...) - } + for _, stat := range yRaw.Rollups.Stats { + y = append(y, stat.Val) } if len(x) != len(y) { @@ -228,3 +199,30 @@ func getEnergyStatsFromRaw(xRaw []RawData, yRaw []RawData) (*EnergyStatisticsWit return energyStats, nil } + +func getEnergyStatsForAllTests(patchRawData []RawData, mainlineRawData []RawData) (map[string]float64, error) { + + sort.Slice(patchRawData, func(i, j int) bool { + return patchRawData[i].Info.TestName < patchRawData[j].Info.TestName + }) + sort.Slice(mainlineRawData, func(i, j int) bool { + return mainlineRawData[i].Info.TestName < mainlineRawData[j].Info.TestName + }) + + var changePoints = make(map[string]float64) + for i := range patchRawData { + var testname string + if testname := patchRawData[i].Info.TestName; testname != mainlineRawData[i].Info.TestName { + return nil, errors.New("tests do not match") + } + energyStats, err := getEnergyStatsForSingleTest(patchRawData[i], mainlineRawData[i]) + if err != nil { + return nil, err + } + if energyStats.H >= 0.6 { + changePoints[testname] = energyStats.H + } + fmt.Printf("%s | H-score: %.4f (p-value: %.4f)\n", patchRawData[i].Info.TestName, energyStats.H, energyStats.HPValue) + } + return changePoints, nil +} From da1a70f75601f655913930b66a566e19b60f8780 Mon Sep 17 00:00:00 2001 From: Selena Zhou Date: Mon, 14 Jul 2025 17:59:54 -0400 Subject: [PATCH 23/23] add testing for energy stats --- internal/cmd/benchmark/benchmark_test.go | 2 +- internal/cmd/perfcomp/energystatistics.go | 60 +------------------ .../cmd/perfcomp/energystatistics_test.go | 36 +++++++++++ 3 files changed, 40 insertions(+), 58 deletions(-) create mode 100644 internal/cmd/perfcomp/energystatistics_test.go diff --git a/internal/cmd/benchmark/benchmark_test.go b/internal/cmd/benchmark/benchmark_test.go index 4392a48a8d..710c9aac72 100644 --- a/internal/cmd/benchmark/benchmark_test.go +++ b/internal/cmd/benchmark/benchmark_test.go @@ -240,7 +240,7 @@ func benchmarkBSONDecoding(b *testing.B, canonicalOnly bool, source string) { for i := 0; i < b.N; i++ { recordMetrics(b, metrics, func(b *testing.B) { - time.Sleep(500 * time.Millisecond) + time.Sleep(100 * time.Millisecond) var out bson.D err := bson.Unmarshal(raw, &out) diff --git a/internal/cmd/perfcomp/energystatistics.go b/internal/cmd/perfcomp/energystatistics.go index a1bfd61e64..64237f7bf1 100644 --- a/internal/cmd/perfcomp/energystatistics.go +++ b/internal/cmd/perfcomp/energystatistics.go @@ -9,8 +9,6 @@ package main import ( "errors" "fmt" - "log" - "math" "math/rand" "sort" "time" @@ -32,9 +30,9 @@ type EnergyStatistics struct { // Class for representing Energy Statistics and permutation test result. type EnergyStatisticsWithProbabilities struct { EnergyStatistics - EPValue float64 - TPValue float64 - HPValue float64 + EPValue float64 + TPValue float64 + HPValue float64 } // _convert converts a series into a 2-dimensional Gonum matrix of float64. @@ -584,55 +582,3 @@ func GetEnergyStatisticsAndProbabilities(x, y interface{}, permutations int) (*E HPValue: float64(countH) / total, }, nil } - -func TestEnergyStatistics() { - // Initialize random number generator for reproducibility (optional, but good for tests) - seed := time.Now().UnixNano() - r := rand.New(rand.NewSource(seed)) - - // --- Test Case 1: x and y are different distributions --- - - // Generate x: 100x5 matrix with random values between 0 and 1 (mimics np.random.rand) - xData := make([]float64, 100*5) - for i := range xData { - xData[i] = r.Float64() - } - x := mat.NewDense(100, 5, xData) - - // Generate y: 100x5 matrix with normal distribution (mean 1000, std dev 1) (mimics np.random.normal) - yData := make([]float64, 100*5) - for i := range yData { - // Box-Muller transform to get normally distributed numbers - u1, u2 := r.Float64(), r.Float64() - z0 := math.Sqrt(-2.0*math.Log(u1)) * math.Cos(2*math.Pi*u2) - yData[i] = 1000 + 1*z0 // mean + std_dev * z0 - } - y := mat.NewDense(100, 5, yData) - - permutations := 1000 - - fmt.Println("--- Expected h around 1 (x and y are different) ---") - energyStats1, err := GetEnergyStatisticsAndProbabilities(x, y, permutations) - if err != nil { - log.Fatalf("Error calculating energy statistics for different distributions: %v", err) - } - - fmt.Printf("E-statistic: %.4f (p-value: %.4f)\n", energyStats1.E, energyStats1.EPValue) - fmt.Printf("Test statistic: %.4f (p-value: %.4f)\n", energyStats1.T, energyStats1.TPValue) - fmt.Printf("E-coefficient of inhomogeneity (h): %.4f (p-value: %.4f)\n\n", energyStats1.H, energyStats1.HPValue) - - // --- Test Case 2: y is the same as x (expected h around 0) --- - - // Set y to be the same as x - y = x - - fmt.Println("--- Expected h around 0 (y is the same as x) ---") - energyStats2, err := GetEnergyStatisticsAndProbabilities(x, y, permutations) - if err != nil { - log.Fatalf("Error calculating energy statistics for identical distributions: %v", err) - } - - fmt.Printf("E-statistic: %.4f (p-value: %.4f)\n", energyStats2.E, energyStats2.EPValue) - fmt.Printf("Test statistic: %.4f (p-value: %.4f)\n", energyStats2.T, energyStats2.TPValue) - fmt.Printf("E-coefficient of inhomogeneity (h): %.4f (p-value: %.4f)\n", energyStats2.H, energyStats2.HPValue) -} diff --git a/internal/cmd/perfcomp/energystatistics_test.go b/internal/cmd/perfcomp/energystatistics_test.go new file mode 100644 index 0000000000..f0ed05d269 --- /dev/null +++ b/internal/cmd/perfcomp/energystatistics_test.go @@ -0,0 +1,36 @@ +// Copyright (C) MongoDB, Inc. 2025-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package main + +import "testing" + +func TestEnergyStatistics(t *testing.T) { + v1 := []float64{1.000812854, + 0, + 29128, + 635, + 1271, + 58256, + 500406427, + 1.9981990072491742, + 1.998583360145495, + 1.9983911836973345} + + v2 := []float64{1.194869853, + 17334, + 24551, + 629, + 10904148, + 425573368, + 68932, + 2136.1724489294575, + 16173.901792068316, + 15622.55897516013} + + energyStats, _ := GetEnergyStatisticsAndProbabilities(v1, v2, 1000) + t.Errorf("Expected h-score: %v, but got: %v", 1, energyStats.H) +}