add threshold validation, nil report validation, better readability for infinite metric change

Tofel · Tofel · commit a4ffabfc5207 · 2025-01-09T19:18:40.000+01:00
diff --git a/wasp/benchspy/report.go b/wasp/benchspy/report.go
@@ -150,21 +150,29 @@ func MustAllPrometheusResults(sr *StandardReport) map[string]model.Value {
 }
 
 func calculateDiffPercentage(current, previous float64) float64 {
-	var diffPrecentage float64
-	if previous != 0.0 && current != 0.0 {
-		diffPrecentage = (current - previous) / previous * 100
-	} else if previous == 0.0 && current == 0.0 {
-		diffPrecentage = 0.0
-	} else {
-		diffPrecentage = 100.0
+	if previous == 0.0 {
+		if current == 0.0 {
+			return 0.0
+		}
+		return 999.0 // Convention for infinite change when previous is 0
+	}
+
+	if current == 0.0 {
+		return -100.0 // Complete improvement when current is 0
 	}
 
-	return diffPrecentage
+	return (current - previous) / previous * 100
 }
 
 // CompareDirectWithThresholds evaluates the current and previous reports against specified thresholds.
 // It checks for significant differences in metrics and returns any discrepancies found, aiding in performance analysis.
 func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold float64, currentReport, previousReport *StandardReport) (bool, map[string][]error) {
+	if currentReport == nil || previousReport == nil {
+		return true, map[string][]error{
+			"initialization": {errors.New("one or both reports are nil")},
+		}
+	}
+
 	L.Info().
 		Str("Current report", currentReport.CommitOrTag).
 		Str("Previous report", previousReport.CommitOrTag).
@@ -174,6 +182,12 @@ func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, er
 		Float64("Error rate threshold", errorRateThreshold).
 		Msg("Comparing Direct metrics with thresholds")
 
+	if thresholdsErrs := validateThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold); len(thresholdsErrs) > 0 {
+		return true, map[string][]error{
+			"initialization": thresholdsErrs,
+		}
+	}
+
 	allCurrentResults := MustAllDirectResults(currentReport)
 	allPreviousResults := MustAllDirectResults(previousReport)
 
@@ -242,6 +256,35 @@ func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, er
 	return len(errors) > 0, errors
 }
 
+func validateThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold float64) []error {
+	var errs []error
+
+	var validateThreshold = func(name string, threshold float64) error {
+		if threshold < 0 || threshold > 100 {
+			return fmt.Errorf("%s threshold %.4f is not in the range [0, 100]", name, threshold)
+		}
+		return nil
+	}
+
+	if err := validateThreshold("median", medianThreshold); err != nil {
+		errs = append(errs, err)
+	}
+
+	if err := validateThreshold("p95", p95Threshold); err != nil {
+		errs = append(errs, err)
+	}
+
+	if err := validateThreshold("max", maxThreshold); err != nil {
+		errs = append(errs, err)
+	}
+
+	if err := validateThreshold("error rate", errorRateThreshold); err != nil {
+		errs = append(errs, err)
+	}
+
+	return errs
+}
+
 // PrintStandardDirectMetrics outputs a comparison of direct metrics between two reports.
 // It displays the current and previous values along with the percentage difference for each metric,
 // helping users to quickly assess performance changes across different generator configurations.
diff --git a/wasp/benchspy/report_test.go b/wasp/benchspy/report_test.go
@@ -1613,9 +1613,210 @@ func TestBenchSpy_CompareDirectWithThresholds(t *testing.T) {
 		assert.Len(t, errs, 1)
 		assert.Len(t, errs["test-gen"], 4)
 		for _, err := range errs["test-gen"] {
-			assert.Contains(t, err.Error(), "100.0000% different")
+			assert.Contains(t, err.Error(), "999.0000% different")
 		}
 	})
+
+	t.Run("handle non-zero to zero transition", func(t *testing.T) {
+		previousReport := &StandardReport{
+			BasicData: BasicData{
+				GeneratorConfigs: map[string]*wasp.Config{
+					"test-gen": {
+						GenName: "test-gen",
+					},
+				},
+			},
+			QueryExecutors: []QueryExecutor{
+				&MockQueryExecutor{
+					KindFn: func() string { return string(StandardQueryExecutor_Direct) },
+					ResultsFn: func() map[string]interface{} {
+						return map[string]interface{}{
+							string(MedianLatency):       10.0,
+							string(Percentile95Latency): 20.0,
+							string(MaxLatency):          311.0,
+							string(ErrorRate):           1.0,
+						}
+					},
+					GeneratorNameFn: func() string { return "test-gen" },
+				},
+			},
+		}
+
+		currentReport := &StandardReport{
+			BasicData: BasicData{
+				GeneratorConfigs: map[string]*wasp.Config{
+					"test-gen": {
+						GenName: "test-gen",
+					},
+				},
+			},
+			QueryExecutors: []QueryExecutor{
+				&MockQueryExecutor{
+					KindFn: func() string { return string(StandardQueryExecutor_Direct) },
+					ResultsFn: func() map[string]interface{} {
+						return map[string]interface{}{
+							string(MedianLatency):       0.0,
+							string(Percentile95Latency): 0.0,
+							string(MaxLatency):          0.0,
+							string(ErrorRate):           0.0,
+						}
+					},
+					GeneratorNameFn: func() string { return "test-gen" },
+				},
+			},
+		}
+
+		failed, errs := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, currentReport, previousReport)
+		assert.False(t, failed)
+		assert.Len(t, errs, 0)
+	})
+
+	t.Run("handle edge-cases", func(t *testing.T) {
+		previousReport := &StandardReport{
+			BasicData: BasicData{
+				GeneratorConfigs: map[string]*wasp.Config{
+					"test-gen": {
+						GenName: "test-gen",
+					},
+				},
+			},
+			QueryExecutors: []QueryExecutor{
+				&MockQueryExecutor{
+					KindFn: func() string { return string(StandardQueryExecutor_Direct) },
+					ResultsFn: func() map[string]interface{} {
+						return map[string]interface{}{
+							string(MedianLatency):       10.1,
+							string(Percentile95Latency): 10.1,
+							string(MaxLatency):          10.0,
+							string(ErrorRate):           10.0,
+						}
+					},
+					GeneratorNameFn: func() string { return "test-gen" },
+				},
+			},
+		}
+
+		currentReport := &StandardReport{
+			BasicData: BasicData{
+				GeneratorConfigs: map[string]*wasp.Config{
+					"test-gen": {
+						GenName: "test-gen",
+					},
+				},
+			},
+			QueryExecutors: []QueryExecutor{
+				&MockQueryExecutor{
+					KindFn: func() string { return string(StandardQueryExecutor_Direct) },
+					ResultsFn: func() map[string]interface{} {
+						return map[string]interface{}{
+							string(MedianLatency):       10.2,
+							string(Percentile95Latency): 10.1999,
+							string(MaxLatency):          0.0,
+							string(ErrorRate):           0.0,
+						}
+					},
+					GeneratorNameFn: func() string { return "test-gen" },
+				},
+			},
+		}
+
+		failed, errs := CompareDirectWithThresholds(0.99, 0.9892, 10.0, 10.0, currentReport, previousReport)
+		assert.True(t, failed)
+		assert.Equal(t, 1, len(errs))
+		assert.Equal(t, 1, len(errs["test-gen"]))
+		assert.Contains(t, errs["test-gen"][0].Error(), "0.9901% different")
+	})
+
+	t.Run("handle nil reports", func(t *testing.T) {
+		report := &StandardReport{
+			BasicData: BasicData{
+				GeneratorConfigs: map[string]*wasp.Config{
+					"test-gen": {
+						GenName: "test-gen",
+					},
+				},
+			},
+			QueryExecutors: []QueryExecutor{
+				&MockQueryExecutor{
+					KindFn: func() string { return string(StandardQueryExecutor_Direct) },
+					ResultsFn: func() map[string]interface{} {
+						return map[string]interface{}{
+							string(MedianLatency):       10.2,
+							string(Percentile95Latency): 10.1999,
+							string(MaxLatency):          0.0,
+							string(ErrorRate):           0.0,
+						}
+					},
+					GeneratorNameFn: func() string { return "test-gen" },
+				},
+			},
+		}
+
+		failed, errs := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, report, nil)
+		assert.True(t, failed)
+		assert.Equal(t, 1, len(errs))
+		assert.Equal(t, 1, len(errs["initialization"]))
+		assert.Contains(t, errs["initialization"][0].Error(), "one or both reports are nil")
+
+		failed, errs = CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, nil, report)
+		assert.True(t, failed)
+		assert.Equal(t, 1, len(errs))
+		assert.Equal(t, 1, len(errs["initialization"]))
+		assert.Contains(t, errs["initialization"][0].Error(), "one or both reports are nil")
+
+		failed, errs = CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, nil, nil)
+		assert.True(t, failed)
+		assert.Equal(t, 1, len(errs))
+		assert.Equal(t, 1, len(errs["initialization"]))
+		assert.Contains(t, errs["initialization"][0].Error(), "one or both reports are nil")
+	})
+
+	t.Run("handle incorrect thresholds", func(t *testing.T) {
+		report := &StandardReport{
+			BasicData: BasicData{
+				GeneratorConfigs: map[string]*wasp.Config{
+					"test-gen": {
+						GenName: "test-gen",
+					},
+				},
+			},
+			QueryExecutors: []QueryExecutor{
+				&MockQueryExecutor{
+					KindFn: func() string { return string(StandardQueryExecutor_Direct) },
+					ResultsFn: func() map[string]interface{} {
+						return map[string]interface{}{
+							string(MedianLatency):       10.0,
+							string(Percentile95Latency): 10.0,
+							string(MaxLatency):          0.0,
+							string(ErrorRate):           0.0,
+						}
+					},
+					GeneratorNameFn: func() string { return "test-gen" },
+				},
+			},
+		}
+
+		failed, errs := CompareDirectWithThresholds(-0.1, 100.0, 0.0, 100.0, report, report)
+		assert.True(t, failed)
+		assert.Equal(t, 1, len(errs))
+		assert.Equal(t, 1, len(errs["initialization"]))
+		assert.Contains(t, errs["initialization"][0].Error(), "median threshold -0.1000 is not in the range [0, 100]")
+
+		failed, errs = CompareDirectWithThresholds(1.0, 101.0, 0.0, 100.0, report, report)
+		assert.True(t, failed)
+		assert.Equal(t, 1, len(errs))
+		assert.Equal(t, 1, len(errs["initialization"]))
+		assert.Contains(t, errs["initialization"][0].Error(), "p95 threshold 101.0000 is not in the range [0, 100]")
+
+		failed, errs = CompareDirectWithThresholds(-1, -1, -1, -1, report, report)
+		assert.True(t, failed)
+		assert.Equal(t, 1, len(errs))
+		assert.Equal(t, 4, len(errs["initialization"]))
+		assert.Contains(t, errs["initialization"][0].Error(), "median threshold -1.0000 is not in the range [0, 100]")
+		assert.Contains(t, errs["initialization"][1].Error(), "p95 threshold -1.0000 is not in the range [0, 100]")
+		assert.Contains(t, errs["initialization"][2].Error(), "max threshold -1.0000 is not in the range [0, 100]")
+		assert.Contains(t, errs["initialization"][3].Error(), "error rate threshold -1.0000 is not in the range [0, 100]")
+	})
 }
 
 func TestBenchSpy_Standard_Direct_Metrics_Two_Generators_E2E(t *testing.T) {