diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 05832906d1..e1ed36cfff 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -285,6 +285,17 @@ functions: echo "Response Body: $response_body" echo "HTTP Status: $http_status" + send-perf-pr-comment: + - command: subprocess.exec + type: test + params: + binary: bash + env: + COMMIT: "${github_commit}" + VERSION_ID: ${version_id} + include_expansions_in_env: [perf_uri_private_endpoint] + args: [*task-runner, perf-pr-comment] + run-enterprise-auth-tests: - command: ec2.assume_role params: @@ -676,6 +687,7 @@ tasks: binary: bash args: [*task-runner, driver-benchmark] - func: send-perf-data + - func: send-perf-pr-comment - name: test-standalone-noauth-nossl tags: ["test", "standalone"] diff --git a/Taskfile.yml b/Taskfile.yml index 3473cb4981..8b2c7df0e3 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -70,6 +70,8 @@ tasks: pr-task: bash etc/pr-task.sh + perf-pr-comment: bash etc/perf-pr-comment.sh + # Lint with various GOOS and GOARCH tasks to catch static analysis failures that may only affect # specific operating systems or architectures. For example, staticcheck will only check for 64-bit # alignment of atomically accessed variables on 32-bit architectures (see diff --git a/etc/perf-pr-comment.sh b/etc/perf-pr-comment.sh new file mode 100755 index 0000000000..eb41326322 --- /dev/null +++ b/etc/perf-pr-comment.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +# perf-pr-comment +# Generates a report of Go Driver perf changes for the current branch. + +set -eux + +go run ./internal/cmd/perfcomp/main.go ./internal/cmd/perfcomp/energystatistics.go diff --git a/go.work b/go.work index 23ad2ff8a7..9f345c684c 100644 --- a/go.work +++ b/go.work @@ -7,6 +7,7 @@ use ( ./examples/_logger/zerolog ./internal/cmd/benchmark ./internal/cmd/compilecheck + ./internal/cmd/perfcomp ./internal/cmd/faas/awslambda/mongodb ./internal/test/goleak ) diff --git a/internal/cmd/benchmark/benchmark_test.go b/internal/cmd/benchmark/benchmark_test.go index 69e8d12d2d..710c9aac72 100644 --- a/internal/cmd/benchmark/benchmark_test.go +++ b/internal/cmd/benchmark/benchmark_test.go @@ -240,6 +240,7 @@ func benchmarkBSONDecoding(b *testing.B, canonicalOnly bool, source string) { for i := 0; i < b.N; i++ { recordMetrics(b, metrics, func(b *testing.B) { + time.Sleep(100 * time.Millisecond) var out bson.D err := bson.Unmarshal(raw, &out) diff --git a/internal/cmd/perfcomp/energystatistics.go b/internal/cmd/perfcomp/energystatistics.go new file mode 100644 index 0000000000..64237f7bf1 --- /dev/null +++ b/internal/cmd/perfcomp/energystatistics.go @@ -0,0 +1,584 @@ +// Copyright (C) MongoDB, Inc. 2025-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package main + +import ( + "errors" + "fmt" + "math/rand" + "sort" + "time" + + "gonum.org/v1/gonum/floats" + "gonum.org/v1/gonum/mat" +) + +// Class for representing Energy Statistics. +// E - E-statistic +// T - Test statistic +// H - E-coefficient of inhomogeneity +type EnergyStatistics struct { + E float64 + T float64 + H float64 +} + +// Class for representing Energy Statistics and permutation test result. +type EnergyStatisticsWithProbabilities struct { + EnergyStatistics + EPValue float64 + TPValue float64 + HPValue float64 +} + +// _convert converts a series into a 2-dimensional Gonum matrix of float64. +// It accepts []float64 or [][]float64. If a []float64 is provided, it is +// converted into a column vector (N x 1 matrix). +func _convert(series interface{}) (*mat.Dense, error) { + var data []float64 + var rows, cols int + + switch s := series.(type) { + case []float64: + data = s + rows = len(s) + cols = 1 + case [][]float64: + if len(s) == 0 { + return mat.NewDense(0, 0, nil), nil + } + rows = len(s) + cols = len(s[0]) + for _, row := range s { + if len(row) != cols { + return nil, errors.New("input [][]float64 has inconsistent row lengths") + } + data = append(data, row...) + } + case *mat.Dense: + // If it's already a mat.Dense, handle potential 1D row vector to column vector conversion + r, c := s.Dims() + if r == 1 && c > 1 { // If it's a row vector (1 x N), transpose to column vector (N x 1) + transposed := mat.NewDense(c, 1, nil) + transposed.Copy(s.T()) + return transposed, nil + } + return s, nil + default: + return nil, errors.New("series is not the expected type ([]float64, [][]float64, or *mat.Dense)") + } + + if len(data) == 0 { + return mat.NewDense(0, 0, nil), nil + } + + return mat.NewDense(rows, cols, data), nil +} + +// _getValidInput returns a valid form of input as a Gonum matrix. +func _getValidInput(series interface{}) (*mat.Dense, error) { + m, err := _convert(series) + if err != nil { + return nil, err + } + r, _ := m.Dims() + if r == 0 { + return nil, errors.New("distribution cannot be empty") + } + return m, nil +} + +// _getDistanceMatrix returns the matrix of pairwise Euclidean distances within the series. +// For an m x n series, it returns an m x m matrix where (i,j)th value is the Euclidean +// distance between the i-th and j-th observations (rows) of the series. +func _getDistanceMatrix(series *mat.Dense) (*mat.Dense, error) { + r, c := series.Dims() + if r == 0 { + return mat.NewDense(0, 0, nil), nil + } + + distMatrix := mat.NewDense(r, r, nil) + + // Calculate Euclidean distance between each pair of rows + for i := 0; i < r; i++ { + vecI := mat.NewVecDense(c, nil) + for k := 0; k < c; k++ { + vecI.SetVec(k, series.At(i, k)) + } + + for j := i; j < r; j++ { + vecJ := mat.NewVecDense(c, nil) + for k := 0; k < c; k++ { + vecJ.SetVec(k, series.At(j, k)) + } + + // Calculate Euclidean distance: ||vecI - vecJ||_2 + var diff mat.VecDense + diff.SubVec(vecI, vecJ) + dist := floats.Norm(diff.RawVector().Data, 2) // Euclidean norm (L2 norm) + + distMatrix.Set(i, j, dist) + distMatrix.Set(j, i, dist) + } + } + return distMatrix, nil +} + +// _calculateStats calculates the E-statistic, Test statistic, and E-coefficient of inhomogeneity. +// It takes the sums of distances within distributions X (x), within Y (y), and between X and Y (xy), +// along with their respective lengths (n, m). +func _calculateStats(x, y, xy float64, n, m int) (e, t, h float64) { + xyAvg := 0.0 + if n > 0 && m > 0 { + xyAvg = xy / float64(n*m) + } + + xAvg := 0.0 + if n > 0 { + xAvg = x / float64(n*n) + } + + yAvg := 0.0 + if m > 0 { + yAvg = y / float64(m*m) + } + + // E-statistic + e = 2*xyAvg - xAvg - yAvg + + // Test statistic + t = 0.0 + if n+m > 0 { + t = (float64(n*m) / float64(n+m)) * e + } + + // E-coefficient of inhomogeneity + h = 0.0 + if xyAvg > 0 { + h = e / (2 * xyAvg) + } + return e, t, h +} + +// _calculateTStats finds t-statistic values given a distance matrix. +// It iteratively calculates the test statistic for all possible partition points (tau). +func _calculateTStats(distanceMatrix *mat.Dense) ([]float64, error) { + N, _ := distanceMatrix.Dims() + if N == 0 { + return []float64{}, nil + } + + statistics := make([]float64, N) + + initialYSum := 0.0 + for r := 0; r < N; r++ { + for c := r; c < N; c++ { + initialYSum += distanceMatrix.At(r, c) + } + } + + xy := 0.0 + x := 0.0 + y := initialYSum + + for tau := 0; tau < N; tau++ { + _, t, _ := _calculateStats(x, y, xy, tau, N-tau) + statistics[tau] = t + + // columnDelta: sum |Xi - X_tau| for i < tau (distances from elements in X to the new element at tau) + columnDelta := 0.0 + for rIdx := 0; rIdx < tau; rIdx++ { + columnDelta += distanceMatrix.At(rIdx, tau) + } + + // rowDelta: sum |X_tau - Yj| for tau <= j (distances from the new element at tau to elements in Y) + rowDelta := 0.0 + for cIdx := tau; cIdx < N; cIdx++ { + rowDelta += distanceMatrix.At(tau, cIdx) + } + + xy = xy - columnDelta + rowDelta // Distances between X and Y + x = x + columnDelta // Distances within X + y = y - rowDelta // Distances within Y + } + + return statistics, nil +} + +// _getNextSignificantChangePoint calculates the next significant change point using a permutation test. +// It searches for change points within windows defined by existing change points. +func _getNextSignificantChangePoint( + distances *mat.Dense, + changePoints []int, + memo map[[2]int]struct { + idx int + val float64 + }, + pvalue float64, + permutations int, +) (int, error) { + N, _ := distances.Dims() + if N == 0 { + return -1, nil + } + + windows := []int{0} + windows = append(windows, changePoints...) + windows = append(windows, N) + sort.Ints(windows) + + type candidate struct { + idx int + val float64 + } + var candidates []candidate + + for i := 0; i < len(windows)-1; i++ { + a, b := windows[i], windows[i+1] + boundsKey := [2]int{a, b} + + if val, ok := memo[boundsKey]; ok { + candidates = append(candidates, candidate{idx: val.idx, val: val.val}) + } else { + windowDistances := distances.Slice(a, b, a, b).(*mat.Dense) + stats, err := _calculateTStats(windowDistances) + if err != nil { + return -1, fmt.Errorf("error calculating t-stats for window [%d:%d]: %w", a, b, err) + } + + if len(stats) == 0 { + continue + } + + // Find the index of the maximum T-statistic within the window + idx := 0 + maxStat := stats[0] + for k, s := range stats { + if s > maxStat { + maxStat = s + idx = k + } + } + newCandidate := candidate{idx: idx + a, val: maxStat} + candidates = append(candidates, newCandidate) + memo[boundsKey] = struct { + idx int + val float64 + }{idx: newCandidate.idx, val: newCandidate.val} + } + } + + if len(candidates) == 0 { + return -1, nil + } + + // Find the overall best candidate among all windows + bestCandidate := candidates[0] + for _, c := range candidates { + if c.val > bestCandidate.val { + bestCandidate = c + } + } + + betterNum := 0 + src := rand.NewSource(time.Now().UnixNano()) + r := rand.New(src) + + for p := 0; p < permutations; p++ { + permuteT := make([]float64, 0, len(windows)-1) + for i := 0; i < len(windows)-1; i++ { + a, b := windows[i], windows[i+1] + windowSize := b - a + if windowSize == 0 { + continue + } + + rowIndices := make([]int, windowSize) + for k := 0; k < windowSize; k++ { + rowIndices[k] = k + a + } + r.Shuffle(len(rowIndices), func(i, j int) { + rowIndices[i], rowIndices[j] = rowIndices[j], rowIndices[i] + }) + + shuffledDistances := mat.NewDense(windowSize, windowSize, nil) + for row := 0; row < windowSize; row++ { + for col := 0; col < windowSize; col++ { + shuffledDistances.Set(row, col, distances.At(rowIndices[row], rowIndices[col])) + } + } + + stats, err := _calculateTStats(shuffledDistances) + if err != nil { + return -1, fmt.Errorf("error calculating t-stats for shuffled window [%d:%d]: %w", a, b, err) + } + + if len(stats) == 0 { + continue + } + + maxPermuteStat := stats[0] + for _, s := range stats { + if s > maxPermuteStat { + maxPermuteStat = s + } + } + permuteT = append(permuteT, maxPermuteStat) + } + + if len(permuteT) == 0 { + continue + } + + bestPermute := permuteT[0] + for _, val := range permuteT { + if val > bestPermute { + bestPermute = val + } + } + + if bestPermute >= bestCandidate.val { + betterNum++ + } + } + + probability := float64(betterNum) / float64(permutations+1) + if probability <= pvalue { + return bestCandidate.idx, nil + } + return -1, nil +} + +// _getEnergyStatisticsFromDistanceMatrix returns energy statistics from a combined distance matrix. +// It partitions the combined distance matrix into within-X, within-Y, and between-XY distances +// based on the provided lengths n (for X) and m (for Y). +func _getEnergyStatisticsFromDistanceMatrix(distanceMatrix *mat.Dense, n, m int) (*EnergyStatistics, error) { + lenDistanceMatrix, _ := distanceMatrix.Dims() + + if lenDistanceMatrix == 0 { + return &EnergyStatistics{E: 0, T: 0, H: 0}, nil + } + + // Sum distances within X (top-left sub-matrix) + xSum := 0.0 + if n > 0 { + for r := 0; r < n; r++ { + for c := 0; c < n; c++ { + xSum += distanceMatrix.At(r, c) + } + } + } + + // Sum distances within Y (bottom-right sub-matrix) + ySum := 0.0 + if m > 0 { + for r := n; r < lenDistanceMatrix; r++ { + for c := n; c < lenDistanceMatrix; c++ { + ySum += distanceMatrix.At(r, c) + } + } + } + + // Sum distances between X and Y (bottom-left sub-matrix, which is equivalent to top-right due to symmetry) + xySum := 0.0 + if n > 0 && m > 0 { + for r := n; r < lenDistanceMatrix; r++ { + for c := 0; c < n; c++ { + xySum += distanceMatrix.At(r, c) + } + } + } + + e, t, h := _calculateStats(xSum, ySum, xySum, n, m) + return &EnergyStatistics{E: e, T: t, H: h}, nil +} + +// EDivisive calculates the change points in the series using the e-divisive algorithm. +// It iteratively finds significant change points until no more are found based on the p-value. +func EDivisive(series interface{}, pvalue float64, permutations int) ([]int, error) { + seriesMat, err := _getValidInput(series) + if err != nil { + return nil, err + } + + distances, err := _getDistanceMatrix(seriesMat) + if err != nil { + return nil, err + } + + changePoints := []int{} + memo := make(map[[2]int]struct { + idx int + val float64 + }) // Cache for _getNextSignificantChangePoint + + for { + significantChangePoint, err := _getNextSignificantChangePoint( + distances, changePoints, memo, pvalue, permutations, + ) + if err != nil { + return nil, err + } + if significantChangePoint == -1 { + break + } + changePoints = append(changePoints, significantChangePoint) + } + + sort.Ints(changePoints) + return changePoints, nil +} + +// GetEnergyStatistics calculates energy statistics of distributions x and y. +// It combines x and y, calculates the full distance matrix, and then derives +// the E-statistic, Test statistic, and E-coefficient of inhomogeneity. +func GetEnergyStatistics(x, y interface{}) (*EnergyStatistics, error) { + xMat, err := _getValidInput(x) + if err != nil { + return nil, err + } + yMat, err := _getValidInput(y) + if err != nil { + return nil, err + } + + n, _ := xMat.Dims() + m, _ := yMat.Dims() + + _, xCols := xMat.Dims() + _, yCols := yMat.Dims() + if xCols != yCols { + return nil, errors.New("distributions x and y must have the same number of variables (columns)") + } + + combinedRows := n + m + combinedData := make([]float64, combinedRows*xCols) + + for r := 0; r < n; r++ { + for c := 0; c < xCols; c++ { + combinedData[r*xCols+c] = xMat.At(r, c) + } + } + for r := 0; r < m; r++ { + for c := 0; c < yCols; c++ { + combinedData[(n+r)*yCols+c] = yMat.At(r, c) + } + } + combinedMat := mat.NewDense(combinedRows, xCols, combinedData) + + distances, err := _getDistanceMatrix(combinedMat) + if err != nil { + return nil, err + } + + return _getEnergyStatisticsFromDistanceMatrix(distances, n, m) +} + +// GetEnergyStatisticsAndProbabilities returns energy statistics and the corresponding +// permutation test results (p-values) for distributions x and y. +func GetEnergyStatisticsAndProbabilities(x, y interface{}, permutations int) (*EnergyStatisticsWithProbabilities, error) { + xMat, err := _getValidInput(x) + if err != nil { + return nil, err + } + yMat, err := _getValidInput(y) + if err != nil { + return nil, err + } + + n, _ := xMat.Dims() + m, _ := yMat.Dims() + + _, xCols := xMat.Dims() + _, yCols := yMat.Dims() + if xCols != yCols { + return nil, errors.New("distributions x and y must have the same number of variables (columns)") + } + + combinedRows := n + m + combinedData := make([]float64, combinedRows*xCols) + + for r := 0; r < n; r++ { + for c := 0; c < xCols; c++ { + combinedData[r*xCols+c] = xMat.At(r, c) + } + } + for r := 0; r < m; r++ { + for c := 0; c < yCols; c++ { + combinedData[(n+r)*yCols+c] = yMat.At(r, c) + } + } + combinedMat := mat.NewDense(combinedRows, xCols, combinedData) + + distancesBetweenAll, err := _getDistanceMatrix(combinedMat) + if err != nil { + return nil, err + } + + lenCombined, _ := distancesBetweenAll.Dims() + + // Counters for permutation test + countE := 0 + countT := 0 + countH := 0 + + src := rand.NewSource(time.Now().UnixNano()) + r := rand.New(src) + + rowIndices := make([]int, lenCombined) + for i := 0; i < lenCombined; i++ { + rowIndices[i] = i + } + + // Calculate initial energy statistics for the original (unshuffled) data + energyStatistics, err := _getEnergyStatisticsFromDistanceMatrix(distancesBetweenAll, n, m) + if err != nil { + return nil, err + } + + // Perform permutation test + for p := 0; p < permutations; p++ { + r.Shuffle(len(rowIndices), func(i, j int) { + rowIndices[i], rowIndices[j] = rowIndices[j], rowIndices[i] + }) + + // Create a new shuffled distance matrix by reordering rows/columns of the original + // distance matrix according to the shuffled rowIndices. This simulates shuffling + // the original combined data and then calculating distances. + shuffledDistances := mat.NewDense(lenCombined, lenCombined, nil) + for row := 0; row < lenCombined; row++ { + for col := 0; col < lenCombined; col++ { + shuffledDistances.Set(row, col, distancesBetweenAll.At(rowIndices[row], rowIndices[col])) + } + } + + shuffledEnergyStatistics, err := _getEnergyStatisticsFromDistanceMatrix(shuffledDistances, n, m) + if err != nil { + return nil, err + } + + // Compare shuffled statistics with original statistics + if shuffledEnergyStatistics.E >= energyStatistics.E { + countE++ + } + if shuffledEnergyStatistics.T >= energyStatistics.T { + countT++ + } + if shuffledEnergyStatistics.H >= energyStatistics.H { + countH++ + } + } + + // Calculate p-values + total := float64(permutations + 1) + return &EnergyStatisticsWithProbabilities{ + EnergyStatistics: *energyStatistics, + EPValue: float64(countE) / total, + TPValue: float64(countT) / total, + HPValue: float64(countH) / total, + }, nil +} diff --git a/internal/cmd/perfcomp/energystatistics_test.go b/internal/cmd/perfcomp/energystatistics_test.go new file mode 100644 index 0000000000..f0ed05d269 --- /dev/null +++ b/internal/cmd/perfcomp/energystatistics_test.go @@ -0,0 +1,36 @@ +// Copyright (C) MongoDB, Inc. 2025-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package main + +import "testing" + +func TestEnergyStatistics(t *testing.T) { + v1 := []float64{1.000812854, + 0, + 29128, + 635, + 1271, + 58256, + 500406427, + 1.9981990072491742, + 1.998583360145495, + 1.9983911836973345} + + v2 := []float64{1.194869853, + 17334, + 24551, + 629, + 10904148, + 425573368, + 68932, + 2136.1724489294575, + 16173.901792068316, + 15622.55897516013} + + energyStats, _ := GetEnergyStatisticsAndProbabilities(v1, v2, 1000) + t.Errorf("Expected h-score: %v, but got: %v", 1, energyStats.H) +} diff --git a/internal/cmd/perfcomp/go.mod b/internal/cmd/perfcomp/go.mod new file mode 100644 index 0000000000..6446bed897 --- /dev/null +++ b/internal/cmd/perfcomp/go.mod @@ -0,0 +1,22 @@ +module go.mongodb.go/mongo-driver/v2/internal/cmd/perfcomp + +go 1.23 + +replace go.mongodb.org/mongo-driver/v2 => ../../../ + +require ( + go.mongodb.org/mongo-driver/v2 v2.2.2 + gonum.org/v1/gonum v0.16.0 +) + +require ( + github.com/golang/snappy v1.0.0 // indirect + github.com/klauspost/compress v1.16.7 // indirect + github.com/xdg-go/pbkdf2 v1.0.0 // indirect + github.com/xdg-go/scram v1.1.2 // indirect + github.com/xdg-go/stringprep v1.0.4 // indirect + github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 // indirect + golang.org/x/crypto v0.33.0 // indirect + golang.org/x/sync v0.12.0 // indirect + golang.org/x/text v0.23.0 // indirect +) diff --git a/internal/cmd/perfcomp/go.sum b/internal/cmd/perfcomp/go.sum new file mode 100644 index 0000000000..49f669457a --- /dev/null +++ b/internal/cmd/perfcomp/go.sum @@ -0,0 +1,48 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs= +github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= +github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= +github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= +github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= +github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= +github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 h1:ilQV1hzziu+LLM3zUTJ0trRztfwgjqKnBWNtSRkbmwM= +github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78/go.mod h1:aL8wCCfTfSfmXjznFBSZNN13rSJjlIOI1fUNAtF7rmI= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus= +golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= +golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= +golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= diff --git a/internal/cmd/perfcomp/main.go b/internal/cmd/perfcomp/main.go new file mode 100644 index 0000000000..fcee9152c9 --- /dev/null +++ b/internal/cmd/perfcomp/main.go @@ -0,0 +1,228 @@ +// Copyright (C) MongoDB, Inc. 2025-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package main + +import ( + "context" + "errors" + "fmt" + "log" + "os" + "sort" + "strings" + "time" + + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + "go.mongodb.org/mongo-driver/v2/mongo/options" +) + +type RawData struct { + Info struct { + Project string `bson:"project"` + Version string `bson:"version"` + Variant string `bson:"variant"` + Order int64 `bson:"order"` + TaskName string `bson:"task_name"` + TaskID string `bson:"task_id"` + Execution int64 `bson:"execution"` + Mainline bool `bson:"mainline"` + OverrideInfo struct { + OverrideMainline bool `bson:"override_mainline"` + BaseOrder interface{} `bson:"base_order"` + Reason interface{} `bson:"reason"` + User interface{} `bson:"user"` + } + TestName string `bson:"test_name"` + Args map[string]interface{} `bson:"args"` + } + CreatedAt interface{} `bson:"created_at"` + CompletedAt interface{} `bson:"completed_at"` + Rollups struct { + Stats []struct { + Name string `bson:"name"` + Val float64 `bson:"val"` + Metadata interface{} `bson:"metadata"` + } + } + FailedRollupAttempts int64 `bson:"failed_rollup_attempts"` +} + +func main() { + uri := os.Getenv("perf_uri_private_endpoint") + if uri == "" { + log.Panic("perf_uri_private_endpoint env variable is not set") + } + + client, err1 := mongo.Connect(options.Client().ApplyURI(uri)) + if err1 != nil { + log.Panicf("Error connecting client: %v", err1) + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + err2 := client.Ping(ctx, nil) + if err2 != nil { + log.Panicf("Error pinging MongoDB Analytics: %v", err2) + } + fmt.Println("Successfully connected to MongoDB Analytics node.") + + coll := client.Database("expanded_metrics").Collection("raw_results") + version := os.Getenv("VERSION_ID") + if version == "" { + log.Panic("could not retrieve version") + } + patchRawData, err3 := findRawData(version, coll) + if err3 != nil { + log.Panicf("Error getting raw data: %v", err3) + } + + mainlineCommits, err4 := parseMainelineCommits(patchRawData) + if err4 != nil { + log.Panicf("Error parsing commits: %v", err4) + } + + mainlineVersion := "mongo_go_driver_" + mainlineCommits[0] + mainlineRawData, err5 := findRawData(mainlineVersion, coll) + if err5 != nil { + log.Panicf("Could not retrieve mainline raw data") + } + + if len(mainlineRawData) != len(patchRawData) { + log.Panicf("Path and mainline data length do not match.") + } + + changePoints, err6 := getEnergyStatsForAllTests(patchRawData, mainlineRawData) + if err6 != nil { + log.Panicf("Could not get energy stats: %v", err6) + } + fmt.Printf("Significant change points length %d", len(changePoints)) + + err0 := client.Disconnect(context.Background()) + if err0 != nil { + log.Panicf("Failed to disconnect client: %v", err0) + } + +} + +// findRawData will get all of the rawData for the given version +func findRawData(version string, coll *mongo.Collection) ([]RawData, error) { + filter := bson.D{ + {"info.project", "mongo-go-driver"}, + {"info.version", version}, + {"info.variant", "perf"}, + {"info.task_name", "perf"}, + } + + findOptions := options.Find() + + findCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + cursor, err := coll.Find(findCtx, filter, findOptions) + if err != nil { + return nil, err + } + defer cursor.Close(findCtx) + + fmt.Printf("Successfully retrieved %d docs from version %s.\n", cursor.RemainingBatchLength(), version) + + var rawData []RawData + for cursor.Next(findCtx) { + var rd RawData + if err := cursor.Decode(&rd); err != nil { + return nil, err + } + rawData = append(rawData, rd) + } + + if err := cursor.Err(); err != nil { + return nil, err + } + + return rawData, nil +} + +func parseMainelineCommits(rawData []RawData) ([]string, error) { + commits := make([]string, 0, len(rawData)) + for i, rd := range rawData { + taskID := rd.Info.TaskID + pieces := strings.Split(taskID, "_") // Format: mongo_go_driver_perf_perf_patch___ + for j, p := range pieces { + if p == "patch" { + if len(pieces) < j+2 { + return nil, errors.New("task ID doesn't hold commit SHA") + } + commits = append(commits, pieces[j+1]) + break + } + } + if len(commits) < i+1 { // didn't find SHA in task_ID + return nil, errors.New("task ID doesn't hold commit SHA") + } + } + return commits, nil +} + +func getEnergyStatsForSingleTest(xRaw RawData, yRaw RawData) (*EnergyStatisticsWithProbabilities, error) { + permutations := 1000 + var x []float64 + var y []float64 + + sort.Slice(xRaw.Rollups.Stats, func(i, j int) bool { + return xRaw.Rollups.Stats[i].Name < xRaw.Rollups.Stats[j].Name + }) + sort.Slice(yRaw.Rollups.Stats, func(i, j int) bool { + return yRaw.Rollups.Stats[i].Name < yRaw.Rollups.Stats[j].Name + }) + + for _, stat := range xRaw.Rollups.Stats { + x = append(x, stat.Val) + } + + for _, stat := range yRaw.Rollups.Stats { + y = append(y, stat.Val) + } + + if len(x) != len(y) { + return nil, errors.New("x and y must be the same length") + } + + energyStats, err := GetEnergyStatisticsAndProbabilities(x, y, permutations) + if err != nil { + return nil, err + } + + return energyStats, nil +} + +func getEnergyStatsForAllTests(patchRawData []RawData, mainlineRawData []RawData) (map[string]float64, error) { + + sort.Slice(patchRawData, func(i, j int) bool { + return patchRawData[i].Info.TestName < patchRawData[j].Info.TestName + }) + sort.Slice(mainlineRawData, func(i, j int) bool { + return mainlineRawData[i].Info.TestName < mainlineRawData[j].Info.TestName + }) + + var changePoints = make(map[string]float64) + for i := range patchRawData { + var testname string + if testname := patchRawData[i].Info.TestName; testname != mainlineRawData[i].Info.TestName { + return nil, errors.New("tests do not match") + } + energyStats, err := getEnergyStatsForSingleTest(patchRawData[i], mainlineRawData[i]) + if err != nil { + return nil, err + } + if energyStats.H >= 0.6 { + changePoints[testname] = energyStats.H + } + fmt.Printf("%s | H-score: %.4f (p-value: %.4f)\n", patchRawData[i].Info.TestName, energyStats.H, energyStats.HPValue) + } + return changePoints, nil +}