smartcontractkit · Tofel · Jun 12, 2025 · Jun 10, 2025 · Jun 11, 2025 · Jun 11, 2025
@@ -112,14 +112,9 @@ var compareAverages = func(t *testing.T, metricName string, currentAsStringSlice
 	assert.LessOrEqual(t, math.Abs(diffPrecentage), maxPrecentageDiff, "%s medians are more than 1% different", metricName, fmt.Sprintf("%.4f", diffPrecentage))
 }
 
-compareAverages(
-    t,
-    string(benchspy.MedianLatency),
-    currentAsStringSlice,
-    previousAsStringSlice,
-    1.0,
-)
+compareAverages(t, string(benchspy.MedianLatency), currentAsStringSlice, previousAsStringSlice, 1.0)
 compareAverages(t, string(benchspy.Percentile95Latency), currentAsStringSlice, previousAsStringSlice, 1.0)
+compareAverages(t, string(benchspy.Percentile99Latency), currentAsStringSlice, previousAsStringSlice, 1.0)
 compareAverages(t, string(benchspy.MaxLatency), currentAsStringSlice, previousAsStringSlice, 1.0)
 compareAverages(t, string(benchspy.ErrorRate), currentAsStringSlice, previousAsStringSlice, 1.0)
 ```

@@ -90,6 +90,7 @@ Let’s assume you want to ensure that none of the performance metrics degrade b
 hasFailed, error := benchspy.CompareDirectWithThresholds(
     1.0, // Max 1% worse median latency
     1.0, // Max 1% worse p95 latency
+    1.0, // Max 1% worse p99 latency
     1.0, // Max 1% worse maximum latency
     0.0, // No increase in error rate
     currentReport, previousReport)

@@ -18,6 +18,7 @@ Both query executors focus on the characteristics of the load generated by `WASP
 Predefined metrics for both include:
 - Median latency
 - 95th percentile latency
+- 99th percentile latency
 - Max latency
 - Error rate
 

@@ -15,6 +15,7 @@ hasErrors, errors := benchspy.CompareDirectWithThresholds(
     // maximum differences in percentages for:
     1.0, // median latency
     1.0, // p95 latency
+    1.0, // p99 latency
     1.0, // max latency
     1.0, // error rate
     currentReport,
@@ -29,6 +30,7 @@ If there are errors they will be returned as `map[string][]errors`, where key is
 > Both `Direct` and `Loki` query executors support following standard performance metrics out of the box:
 > - `median_latency`
 > - `p95_latency`
+> - `p99_latency`
 > - `max_latency`
 > - `error_rate`
 
@@ -43,6 +45,8 @@ Generator: vu1
 +-------------------------+---------+---------+---------+
 | 95th_percentile_latency | 50.7387 | 50.7622 | 0.0463  |
 +-------------------------+---------+---------+---------+
+| 99th_percentile_latency | 54.8192 | 51.0124 | -7.4624 |
++-------------------------+---------+---------+---------+
 | max_latency             | 55.7195 | 51.7248 | -7.1692 |
 +-------------------------+---------+---------+---------+
 | error_rate              | 0.0000  | 0.0000  | 0.0000  |

@@ -0,0 +1 @@
+- Add p99 metric to BenchSpy's Direct and Loki standard Query Executors
@@ -269,6 +269,11 @@ func (dqe *DirectQueryExecutor) standardQuery(standardMetric StandardLoadMetric)
 			return stats.Percentile(responsesToDurationFn(responses), 95)
 		}
 		return p95Fn, nil
+	case Percentile99Latency:
+		p99Fn := func(responses *wasp.SliceBuffer[*wasp.Response]) (float64, error) {
+			return stats.Percentile(responsesToDurationFn(responses), 99)
+		}
+		return p99Fn, nil
 	case MaxLatency:
 		maxFn := func(responses *wasp.SliceBuffer[*wasp.Response]) (float64, error) {
 			return stats.Max(responsesToDurationFn(responses))

@@ -235,13 +235,15 @@ func TestBenchSpy_DirectQueryExecutor_Execute(t *testing.T) {
 		// 4 responses with ~150ms latency (150ms sleep + some execution overhead)
 		// and 2-3 responses with ~200ms latency (200ms sleep + some execution overhead)
 		// expected median latency: (150ms, 151ms>
-		resultsAsFloats, err := ResultsAs(0.0, executor, string(MedianLatency), string(Percentile95Latency), string(ErrorRate))
+		resultsAsFloats, err := ResultsAs(0.0, executor, string(MedianLatency), string(Percentile95Latency), string(Percentile99Latency), string(ErrorRate))
 		assert.NoError(t, err)
-		require.Equal(t, 3, len(resultsAsFloats))
+		require.Equal(t, 4, len(resultsAsFloats))
 		require.InDelta(t, 151.0, resultsAsFloats[string(MedianLatency)], 1.0)
 
 		// since we have 2-3 responses with 200-201ms latency, the 95th percentile should be (200ms, 201ms>
 		require.InDelta(t, 201.0, resultsAsFloats[string(Percentile95Latency)], 1.0)
+		// since we have 2-3 responses with 200-201ms latency, the 99th percentile should be (199ms, 203ms>
+		require.InDelta(t, 201.0, resultsAsFloats[string(Percentile99Latency)], 2.0)
 
 		errorRate, exists := resultsAsFloats[string(ErrorRate)]
 		assert.True(t, exists)

@@ -22,6 +22,7 @@ import (
 var (
 	Loki_MedianQuery = `quantile_over_time(0.5, {branch=~"%s", commit=~"%s", go_test_name=~"%s", test_data_type=~"responses", gen_name=~"%s"} | json| unwrap duration [10s]) by (go_test_name, gen_name) / 1e6`
 	Loki_95thQuery   = `quantile_over_time(0.95, {branch=~"%s", commit=~"%s", go_test_name=~"%s", test_data_type=~"responses", gen_name=~"%s"} | json| unwrap duration [10s]) by (go_test_name, gen_name) / 1e6`
+	Loki_99thQuery   = `quantile_over_time(0.99, {branch=~"%s", commit=~"%s", go_test_name=~"%s", test_data_type=~"responses", gen_name=~"%s"} | json| unwrap duration [10s]) by (go_test_name, gen_name) / 1e6`
 	Loki_MaxQuery    = `max(max_over_time({branch=~"%s", commit=~"%s", go_test_name=~"%s", test_data_type=~"responses", gen_name=~"%s"} | json| unwrap duration [10s]) by (go_test_name, gen_name) / 1e6)`
 	Loki_ErrorRate   = `sum(max_over_time({branch=~"%s", commit=~"%s", go_test_name=~"%s", test_data_type=~"stats", gen_name=~"%s"} | json| unwrap failed [%s]) by (node_id, go_test_name, gen_name)) by (__stream_shard__)`
 )
@@ -311,6 +312,8 @@ func (l *LokiQueryExecutor) standardQuery(standardMetric StandardLoadMetric, tes
 		return fmt.Sprintf(Loki_MedianQuery, branch, commit, testName, generatorName), nil
 	case Percentile95Latency:
 		return fmt.Sprintf(Loki_95thQuery, branch, commit, testName, generatorName), nil
+	case Percentile99Latency:
+		return fmt.Sprintf(Loki_99thQuery, branch, commit, testName, generatorName), nil
 	case MaxLatency:
 		return fmt.Sprintf(Loki_MaxQuery, branch, commit, testName, generatorName), nil
 	case ErrorRate:

@@ -168,7 +168,7 @@ func calculateDiffPercentage(current, previous float64) float64 {
 
 // CompareDirectWithThresholds evaluates the current and previous reports against specified thresholds.
 // It checks for significant differences in metrics and returns any discrepancies found, aiding in performance analysis.
-func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold float64, currentReport, previousReport *StandardReport) (bool, error) {
+func CompareDirectWithThresholds(medianThreshold, p95Threshold, p99Threshold, maxThreshold, errorRateThreshold float64, currentReport, previousReport *StandardReport) (bool, error) {
 	if currentReport == nil || previousReport == nil {
 		return true, errors.New("one or both reports are nil")
 	}
@@ -178,11 +178,12 @@ func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, er
 		Str("Previous report", previousReport.CommitOrTag).
 		Float64("Median threshold", medianThreshold).
 		Float64("P95 threshold", p95Threshold).
+		Float64("P99 threshold", p99Threshold).
 		Float64("Max threshold", maxThreshold).
 		Float64("Error rate threshold", errorRateThreshold).
 		Msg("Comparing Direct metrics with thresholds")
 
-	if thresholdsErr := validateThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold); thresholdsErr != nil {
+	if thresholdsErr := validateThresholds(medianThreshold, p95Threshold, p99Threshold, maxThreshold, errorRateThreshold); thresholdsErr != nil {
 		return true, thresholdsErr
 	}
 
@@ -234,6 +235,10 @@ func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, er
 			errors[genCfg.GenName] = append(errors[genCfg.GenName], err)
 		}
 
+		if err := compareValues(string(Percentile99Latency), genCfg.GenName, p99Threshold); err != nil {
+			errors[genCfg.GenName] = append(errors[genCfg.GenName], err)
+		}
+
 		if err := compareValues(string(MaxLatency), genCfg.GenName, maxThreshold); err != nil {
 			errors[genCfg.GenName] = append(errors[genCfg.GenName], err)
 		}
@@ -264,7 +269,7 @@ func concatenateGeneratorErrors(errors map[string][]error) error {
 	return goerrors.Join(errs...)
 }
 
-func validateThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold float64) error {
+func validateThresholds(medianThreshold, p95Threshold, p99Threshold, maxThreshold, errorRateThreshold float64) error {
 	var errs []error
 
 	var validateThreshold = func(name string, threshold float64) error {
@@ -282,6 +287,10 @@ func validateThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateTh
 		errs = append(errs, err)
 	}
 
+	if err := validateThreshold("p99", p99Threshold); err != nil {
+		errs = append(errs, err)
+	}
+
 	if err := validateThreshold("max", maxThreshold); err != nil {
 		errs = append(errs, err)
 	}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		- Add p99 metric to BenchSpy's Direct and Loki standard Query Executors