Skip to content

Commit 39db163

Browse files
authored
[DX-1040] Add p99 metric to BenchSpy (#1889)
1 parent d6febbd commit 39db163

File tree

14 files changed

+84
-33
lines changed

14 files changed

+84
-33
lines changed

book/src/libs/wasp/benchspy/loki_std.md

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -112,14 +112,9 @@ var compareAverages = func(t *testing.T, metricName string, currentAsStringSlice
112112
assert.LessOrEqual(t, math.Abs(diffPrecentage), maxPrecentageDiff, "%s medians are more than 1% different", metricName, fmt.Sprintf("%.4f", diffPrecentage))
113113
}
114114

115-
compareAverages(
116-
t,
117-
string(benchspy.MedianLatency),
118-
currentAsStringSlice,
119-
previousAsStringSlice,
120-
1.0,
121-
)
115+
compareAverages(t, string(benchspy.MedianLatency), currentAsStringSlice, previousAsStringSlice, 1.0)
122116
compareAverages(t, string(benchspy.Percentile95Latency), currentAsStringSlice, previousAsStringSlice, 1.0)
117+
compareAverages(t, string(benchspy.Percentile99Latency), currentAsStringSlice, previousAsStringSlice, 1.0)
123118
compareAverages(t, string(benchspy.MaxLatency), currentAsStringSlice, previousAsStringSlice, 1.0)
124119
compareAverages(t, string(benchspy.ErrorRate), currentAsStringSlice, previousAsStringSlice, 1.0)
125120
```

book/src/libs/wasp/benchspy/real_world.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ Let’s assume you want to ensure that none of the performance metrics degrade b
9090
hasFailed, error := benchspy.CompareDirectWithThresholds(
9191
1.0, // Max 1% worse median latency
9292
1.0, // Max 1% worse p95 latency
93+
1.0, // Max 1% worse p99 latency
9394
1.0, // Max 1% worse maximum latency
9495
0.0, // No increase in error rate
9596
currentReport, previousReport)

book/src/libs/wasp/benchspy/reports/standard_report.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Both query executors focus on the characteristics of the load generated by `WASP
1818
Predefined metrics for both include:
1919
- Median latency
2020
- 95th percentile latency
21+
- 99th percentile latency
2122
- Max latency
2223
- Error rate
2324

book/src/libs/wasp/benchspy/simplest_metrics.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ hasErrors, errors := benchspy.CompareDirectWithThresholds(
1515
// maximum differences in percentages for:
1616
1.0, // median latency
1717
1.0, // p95 latency
18+
1.0, // p99 latency
1819
1.0, // max latency
1920
1.0, // error rate
2021
currentReport,
@@ -29,6 +30,7 @@ If there are errors they will be returned as `map[string][]errors`, where key is
2930
> Both `Direct` and `Loki` query executors support following standard performance metrics out of the box:
3031
> - `median_latency`
3132
> - `p95_latency`
33+
> - `p99_latency`
3234
> - `max_latency`
3335
> - `error_rate`
3436
@@ -43,6 +45,8 @@ Generator: vu1
4345
+-------------------------+---------+---------+---------+
4446
| 95th_percentile_latency | 50.7387 | 50.7622 | 0.0463 |
4547
+-------------------------+---------+---------+---------+
48+
| 99th_percentile_latency | 54.8192 | 51.0124 | -7.4624 |
49+
+-------------------------+---------+---------+---------+
4650
| max_latency | 55.7195 | 51.7248 | -7.1692 |
4751
+-------------------------+---------+---------+---------+
4852
| error_rate | 0.0000 | 0.0000 | 0.0000 |

wasp/.changeset/v1.51.1.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
- Add p99 metric to BenchSpy's Direct and Loki standard Query Executors

wasp/benchspy/direct.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,11 @@ func (dqe *DirectQueryExecutor) standardQuery(standardMetric StandardLoadMetric)
269269
return stats.Percentile(responsesToDurationFn(responses), 95)
270270
}
271271
return p95Fn, nil
272+
case Percentile99Latency:
273+
p99Fn := func(responses *wasp.SliceBuffer[*wasp.Response]) (float64, error) {
274+
return stats.Percentile(responsesToDurationFn(responses), 99)
275+
}
276+
return p99Fn, nil
272277
case MaxLatency:
273278
maxFn := func(responses *wasp.SliceBuffer[*wasp.Response]) (float64, error) {
274279
return stats.Max(responsesToDurationFn(responses))

wasp/benchspy/direct_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,13 +235,15 @@ func TestBenchSpy_DirectQueryExecutor_Execute(t *testing.T) {
235235
// 4 responses with ~150ms latency (150ms sleep + some execution overhead)
236236
// and 2-3 responses with ~200ms latency (200ms sleep + some execution overhead)
237237
// expected median latency: (150ms, 151ms>
238-
resultsAsFloats, err := ResultsAs(0.0, executor, string(MedianLatency), string(Percentile95Latency), string(ErrorRate))
238+
resultsAsFloats, err := ResultsAs(0.0, executor, string(MedianLatency), string(Percentile95Latency), string(Percentile99Latency), string(ErrorRate))
239239
assert.NoError(t, err)
240-
require.Equal(t, 3, len(resultsAsFloats))
240+
require.Equal(t, 4, len(resultsAsFloats))
241241
require.InDelta(t, 151.0, resultsAsFloats[string(MedianLatency)], 1.0)
242242

243243
// since we have 2-3 responses with 200-201ms latency, the 95th percentile should be (200ms, 201ms>
244244
require.InDelta(t, 201.0, resultsAsFloats[string(Percentile95Latency)], 1.0)
245+
// since we have 2-3 responses with 200-201ms latency, the 99th percentile should be (199ms, 203ms>
246+
require.InDelta(t, 201.0, resultsAsFloats[string(Percentile99Latency)], 2.0)
245247

246248
errorRate, exists := resultsAsFloats[string(ErrorRate)]
247249
assert.True(t, exists)

wasp/benchspy/loki.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
var (
2323
Loki_MedianQuery = `quantile_over_time(0.5, {branch=~"%s", commit=~"%s", go_test_name=~"%s", test_data_type=~"responses", gen_name=~"%s"} | json| unwrap duration [10s]) by (go_test_name, gen_name) / 1e6`
2424
Loki_95thQuery = `quantile_over_time(0.95, {branch=~"%s", commit=~"%s", go_test_name=~"%s", test_data_type=~"responses", gen_name=~"%s"} | json| unwrap duration [10s]) by (go_test_name, gen_name) / 1e6`
25+
Loki_99thQuery = `quantile_over_time(0.99, {branch=~"%s", commit=~"%s", go_test_name=~"%s", test_data_type=~"responses", gen_name=~"%s"} | json| unwrap duration [10s]) by (go_test_name, gen_name) / 1e6`
2526
Loki_MaxQuery = `max(max_over_time({branch=~"%s", commit=~"%s", go_test_name=~"%s", test_data_type=~"responses", gen_name=~"%s"} | json| unwrap duration [10s]) by (go_test_name, gen_name) / 1e6)`
2627
Loki_ErrorRate = `sum(max_over_time({branch=~"%s", commit=~"%s", go_test_name=~"%s", test_data_type=~"stats", gen_name=~"%s"} | json| unwrap failed [%s]) by (node_id, go_test_name, gen_name)) by (__stream_shard__)`
2728
)
@@ -311,6 +312,8 @@ func (l *LokiQueryExecutor) standardQuery(standardMetric StandardLoadMetric, tes
311312
return fmt.Sprintf(Loki_MedianQuery, branch, commit, testName, generatorName), nil
312313
case Percentile95Latency:
313314
return fmt.Sprintf(Loki_95thQuery, branch, commit, testName, generatorName), nil
315+
case Percentile99Latency:
316+
return fmt.Sprintf(Loki_99thQuery, branch, commit, testName, generatorName), nil
314317
case MaxLatency:
315318
return fmt.Sprintf(Loki_MaxQuery, branch, commit, testName, generatorName), nil
316319
case ErrorRate:

wasp/benchspy/report.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ func calculateDiffPercentage(current, previous float64) float64 {
168168

169169
// CompareDirectWithThresholds evaluates the current and previous reports against specified thresholds.
170170
// It checks for significant differences in metrics and returns any discrepancies found, aiding in performance analysis.
171-
func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold float64, currentReport, previousReport *StandardReport) (bool, error) {
171+
func CompareDirectWithThresholds(medianThreshold, p95Threshold, p99Threshold, maxThreshold, errorRateThreshold float64, currentReport, previousReport *StandardReport) (bool, error) {
172172
if currentReport == nil || previousReport == nil {
173173
return true, errors.New("one or both reports are nil")
174174
}
@@ -178,11 +178,12 @@ func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, er
178178
Str("Previous report", previousReport.CommitOrTag).
179179
Float64("Median threshold", medianThreshold).
180180
Float64("P95 threshold", p95Threshold).
181+
Float64("P99 threshold", p99Threshold).
181182
Float64("Max threshold", maxThreshold).
182183
Float64("Error rate threshold", errorRateThreshold).
183184
Msg("Comparing Direct metrics with thresholds")
184185

185-
if thresholdsErr := validateThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold); thresholdsErr != nil {
186+
if thresholdsErr := validateThresholds(medianThreshold, p95Threshold, p99Threshold, maxThreshold, errorRateThreshold); thresholdsErr != nil {
186187
return true, thresholdsErr
187188
}
188189

@@ -234,6 +235,10 @@ func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, er
234235
errors[genCfg.GenName] = append(errors[genCfg.GenName], err)
235236
}
236237

238+
if err := compareValues(string(Percentile99Latency), genCfg.GenName, p99Threshold); err != nil {
239+
errors[genCfg.GenName] = append(errors[genCfg.GenName], err)
240+
}
241+
237242
if err := compareValues(string(MaxLatency), genCfg.GenName, maxThreshold); err != nil {
238243
errors[genCfg.GenName] = append(errors[genCfg.GenName], err)
239244
}
@@ -264,7 +269,7 @@ func concatenateGeneratorErrors(errors map[string][]error) error {
264269
return goerrors.Join(errs...)
265270
}
266271

267-
func validateThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold float64) error {
272+
func validateThresholds(medianThreshold, p95Threshold, p99Threshold, maxThreshold, errorRateThreshold float64) error {
268273
var errs []error
269274

270275
var validateThreshold = func(name string, threshold float64) error {
@@ -282,6 +287,10 @@ func validateThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateTh
282287
errs = append(errs, err)
283288
}
284289

290+
if err := validateThreshold("p99", p99Threshold); err != nil {
291+
errs = append(errs, err)
292+
}
293+
285294
if err := validateThreshold("max", maxThreshold); err != nil {
286295
errs = append(errs, err)
287296
}

0 commit comments

Comments
 (0)