benchspy enhancements (#1545)

Tofel · web-flow · commit da0bb1366e2e · 2025-01-10T16:19:53.000+01:00
* add threshold validation, nil report validation, better readability for infinite metric change
* use single error instead of a map[string][]error when comparing Direct queries
diff --git a/book/src/libs/wasp/benchspy/real_world.md b/book/src/libs/wasp/benchspy/real_world.md
@@ -87,15 +87,17 @@ This function fetches the current report (for version passed as environment vari
 Let’s assume you want to ensure that none of the performance metrics degrade by more than **1%** between releases (and that error rate has not changed at all). Here's how you can write assertions using a convenient function for the `Direct` query executor:
 
 ```go
-hasErrors, errors := benchspy.CompareDirectWithThresholds(
+hasFailed, error := benchspy.CompareDirectWithThresholds(
     1.0, // Max 1% worse median latency
     1.0, // Max 1% worse p95 latency
     1.0, // Max 1% worse maximum latency
     0.0, // No increase in error rate
     currentReport, previousReport)
-require.False(t, hasErrors, fmt.Sprintf("errors found: %v", errors))
+require.False(t, hasError, fmt.Sprintf("issues found: %v", error))
 ```
 
+Error returned by this function is a concatenation of all threshold violations found for each standard metric and generator.
+
 ---
 
 ## Conclusion
diff --git a/wasp/benchspy/report.go b/wasp/benchspy/report.go
@@ -3,6 +3,7 @@ package benchspy
 import (
 	"context"
 	"encoding/json"
+	goerrors "errors"
 	"fmt"
 	"os"
 	"strings"
@@ -150,21 +151,27 @@ func MustAllPrometheusResults(sr *StandardReport) map[string]model.Value {
 }
 
 func calculateDiffPercentage(current, previous float64) float64 {
-	var diffPrecentage float64
-	if previous != 0.0 && current != 0.0 {
-		diffPrecentage = (current - previous) / previous * 100
-	} else if previous == 0.0 && current == 0.0 {
-		diffPrecentage = 0.0
-	} else {
-		diffPrecentage = 100.0
+	if previous == 0.0 {
+		if current == 0.0 {
+			return 0.0
+		}
+		return 999.0 // Convention for infinite change when previous is 0
+	}
+
+	if current == 0.0 {
+		return -100.0 // Complete improvement when current is 0
 	}
 
-	return diffPrecentage
+	return (current - previous) / previous * 100
 }
 
 // CompareDirectWithThresholds evaluates the current and previous reports against specified thresholds.
 // It checks for significant differences in metrics and returns any discrepancies found, aiding in performance analysis.
-func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold float64, currentReport, previousReport *StandardReport) (bool, map[string][]error) {
+func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold float64, currentReport, previousReport *StandardReport) (bool, error) {
+	if currentReport == nil || previousReport == nil {
+		return true, errors.New("one or both reports are nil")
+	}
+
 	L.Info().
 		Str("Current report", currentReport.CommitOrTag).
 		Str("Previous report", previousReport.CommitOrTag).
@@ -174,6 +181,10 @@ func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, er
 		Float64("Error rate threshold", errorRateThreshold).
 		Msg("Comparing Direct metrics with thresholds")
 
+	if thresholdsErr := validateThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold); thresholdsErr != nil {
+		return true, thresholdsErr
+	}
+
 	allCurrentResults := MustAllDirectResults(currentReport)
 	allPreviousResults := MustAllDirectResults(previousReport)
 
@@ -239,7 +250,46 @@ func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, er
 		Int("Number of meaningful differences", len(errors)).
 		Msg("Finished comparing Direct metrics with thresholds")
 
-	return len(errors) > 0, errors
+	return len(errors) > 0, concatenateGeneratorErrors(errors)
+}
+
+func concatenateGeneratorErrors(errors map[string][]error) error {
+	var errs []error
+	for generatorName, errors := range errors {
+		for _, err := range errors {
+			errs = append(errs, fmt.Errorf("[%s] %w", generatorName, err))
+		}
+	}
+	return goerrors.Join(errs...)
+}
+
+func validateThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold float64) error {
+	var errs []error
+
+	var validateThreshold = func(name string, threshold float64) error {
+		if threshold < 0 || threshold > 100 {
+			return fmt.Errorf("%s threshold %.4f is not in the range [0, 100]", name, threshold)
+		}
+		return nil
+	}
+
+	if err := validateThreshold("median", medianThreshold); err != nil {
+		errs = append(errs, err)
+	}
+
+	if err := validateThreshold("p95", p95Threshold); err != nil {
+		errs = append(errs, err)
+	}
+
+	if err := validateThreshold("max", maxThreshold); err != nil {
+		errs = append(errs, err)
+	}
+
+	if err := validateThreshold("error rate", errorRateThreshold); err != nil {
+		errs = append(errs, err)
+	}
+
+	return goerrors.Join(errs...)
 }
 
 // PrintStandardDirectMetrics outputs a comparison of direct metrics between two reports.
diff --git a/wasp/benchspy/report_test.go b/wasp/benchspy/report_test.go