Skip to content

Commit a4ffabf

Browse files
committed
add threshold validation, nil report validation, better readability for infinite metric change
1 parent cdc37af commit a4ffabf

File tree

2 files changed

+253
-9
lines changed

2 files changed

+253
-9
lines changed

wasp/benchspy/report.go

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -150,21 +150,29 @@ func MustAllPrometheusResults(sr *StandardReport) map[string]model.Value {
150150
}
151151

152152
func calculateDiffPercentage(current, previous float64) float64 {
153-
var diffPrecentage float64
154-
if previous != 0.0 && current != 0.0 {
155-
diffPrecentage = (current - previous) / previous * 100
156-
} else if previous == 0.0 && current == 0.0 {
157-
diffPrecentage = 0.0
158-
} else {
159-
diffPrecentage = 100.0
153+
if previous == 0.0 {
154+
if current == 0.0 {
155+
return 0.0
156+
}
157+
return 999.0 // Convention for infinite change when previous is 0
158+
}
159+
160+
if current == 0.0 {
161+
return -100.0 // Complete improvement when current is 0
160162
}
161163

162-
return diffPrecentage
164+
return (current - previous) / previous * 100
163165
}
164166

165167
// CompareDirectWithThresholds evaluates the current and previous reports against specified thresholds.
166168
// It checks for significant differences in metrics and returns any discrepancies found, aiding in performance analysis.
167169
func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold float64, currentReport, previousReport *StandardReport) (bool, map[string][]error) {
170+
if currentReport == nil || previousReport == nil {
171+
return true, map[string][]error{
172+
"initialization": {errors.New("one or both reports are nil")},
173+
}
174+
}
175+
168176
L.Info().
169177
Str("Current report", currentReport.CommitOrTag).
170178
Str("Previous report", previousReport.CommitOrTag).
@@ -174,6 +182,12 @@ func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, er
174182
Float64("Error rate threshold", errorRateThreshold).
175183
Msg("Comparing Direct metrics with thresholds")
176184

185+
if thresholdsErrs := validateThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold); len(thresholdsErrs) > 0 {
186+
return true, map[string][]error{
187+
"initialization": thresholdsErrs,
188+
}
189+
}
190+
177191
allCurrentResults := MustAllDirectResults(currentReport)
178192
allPreviousResults := MustAllDirectResults(previousReport)
179193

@@ -242,6 +256,35 @@ func CompareDirectWithThresholds(medianThreshold, p95Threshold, maxThreshold, er
242256
return len(errors) > 0, errors
243257
}
244258

259+
func validateThresholds(medianThreshold, p95Threshold, maxThreshold, errorRateThreshold float64) []error {
260+
var errs []error
261+
262+
var validateThreshold = func(name string, threshold float64) error {
263+
if threshold < 0 || threshold > 100 {
264+
return fmt.Errorf("%s threshold %.4f is not in the range [0, 100]", name, threshold)
265+
}
266+
return nil
267+
}
268+
269+
if err := validateThreshold("median", medianThreshold); err != nil {
270+
errs = append(errs, err)
271+
}
272+
273+
if err := validateThreshold("p95", p95Threshold); err != nil {
274+
errs = append(errs, err)
275+
}
276+
277+
if err := validateThreshold("max", maxThreshold); err != nil {
278+
errs = append(errs, err)
279+
}
280+
281+
if err := validateThreshold("error rate", errorRateThreshold); err != nil {
282+
errs = append(errs, err)
283+
}
284+
285+
return errs
286+
}
287+
245288
// PrintStandardDirectMetrics outputs a comparison of direct metrics between two reports.
246289
// It displays the current and previous values along with the percentage difference for each metric,
247290
// helping users to quickly assess performance changes across different generator configurations.

wasp/benchspy/report_test.go

Lines changed: 202 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1613,9 +1613,210 @@ func TestBenchSpy_CompareDirectWithThresholds(t *testing.T) {
16131613
assert.Len(t, errs, 1)
16141614
assert.Len(t, errs["test-gen"], 4)
16151615
for _, err := range errs["test-gen"] {
1616-
assert.Contains(t, err.Error(), "100.0000% different")
1616+
assert.Contains(t, err.Error(), "999.0000% different")
16171617
}
16181618
})
1619+
1620+
t.Run("handle non-zero to zero transition", func(t *testing.T) {
1621+
previousReport := &StandardReport{
1622+
BasicData: BasicData{
1623+
GeneratorConfigs: map[string]*wasp.Config{
1624+
"test-gen": {
1625+
GenName: "test-gen",
1626+
},
1627+
},
1628+
},
1629+
QueryExecutors: []QueryExecutor{
1630+
&MockQueryExecutor{
1631+
KindFn: func() string { return string(StandardQueryExecutor_Direct) },
1632+
ResultsFn: func() map[string]interface{} {
1633+
return map[string]interface{}{
1634+
string(MedianLatency): 10.0,
1635+
string(Percentile95Latency): 20.0,
1636+
string(MaxLatency): 311.0,
1637+
string(ErrorRate): 1.0,
1638+
}
1639+
},
1640+
GeneratorNameFn: func() string { return "test-gen" },
1641+
},
1642+
},
1643+
}
1644+
1645+
currentReport := &StandardReport{
1646+
BasicData: BasicData{
1647+
GeneratorConfigs: map[string]*wasp.Config{
1648+
"test-gen": {
1649+
GenName: "test-gen",
1650+
},
1651+
},
1652+
},
1653+
QueryExecutors: []QueryExecutor{
1654+
&MockQueryExecutor{
1655+
KindFn: func() string { return string(StandardQueryExecutor_Direct) },
1656+
ResultsFn: func() map[string]interface{} {
1657+
return map[string]interface{}{
1658+
string(MedianLatency): 0.0,
1659+
string(Percentile95Latency): 0.0,
1660+
string(MaxLatency): 0.0,
1661+
string(ErrorRate): 0.0,
1662+
}
1663+
},
1664+
GeneratorNameFn: func() string { return "test-gen" },
1665+
},
1666+
},
1667+
}
1668+
1669+
failed, errs := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, currentReport, previousReport)
1670+
assert.False(t, failed)
1671+
assert.Len(t, errs, 0)
1672+
})
1673+
1674+
t.Run("handle edge-cases", func(t *testing.T) {
1675+
previousReport := &StandardReport{
1676+
BasicData: BasicData{
1677+
GeneratorConfigs: map[string]*wasp.Config{
1678+
"test-gen": {
1679+
GenName: "test-gen",
1680+
},
1681+
},
1682+
},
1683+
QueryExecutors: []QueryExecutor{
1684+
&MockQueryExecutor{
1685+
KindFn: func() string { return string(StandardQueryExecutor_Direct) },
1686+
ResultsFn: func() map[string]interface{} {
1687+
return map[string]interface{}{
1688+
string(MedianLatency): 10.1,
1689+
string(Percentile95Latency): 10.1,
1690+
string(MaxLatency): 10.0,
1691+
string(ErrorRate): 10.0,
1692+
}
1693+
},
1694+
GeneratorNameFn: func() string { return "test-gen" },
1695+
},
1696+
},
1697+
}
1698+
1699+
currentReport := &StandardReport{
1700+
BasicData: BasicData{
1701+
GeneratorConfigs: map[string]*wasp.Config{
1702+
"test-gen": {
1703+
GenName: "test-gen",
1704+
},
1705+
},
1706+
},
1707+
QueryExecutors: []QueryExecutor{
1708+
&MockQueryExecutor{
1709+
KindFn: func() string { return string(StandardQueryExecutor_Direct) },
1710+
ResultsFn: func() map[string]interface{} {
1711+
return map[string]interface{}{
1712+
string(MedianLatency): 10.2,
1713+
string(Percentile95Latency): 10.1999,
1714+
string(MaxLatency): 0.0,
1715+
string(ErrorRate): 0.0,
1716+
}
1717+
},
1718+
GeneratorNameFn: func() string { return "test-gen" },
1719+
},
1720+
},
1721+
}
1722+
1723+
failed, errs := CompareDirectWithThresholds(0.99, 0.9892, 10.0, 10.0, currentReport, previousReport)
1724+
assert.True(t, failed)
1725+
assert.Equal(t, 1, len(errs))
1726+
assert.Equal(t, 1, len(errs["test-gen"]))
1727+
assert.Contains(t, errs["test-gen"][0].Error(), "0.9901% different")
1728+
})
1729+
1730+
t.Run("handle nil reports", func(t *testing.T) {
1731+
report := &StandardReport{
1732+
BasicData: BasicData{
1733+
GeneratorConfigs: map[string]*wasp.Config{
1734+
"test-gen": {
1735+
GenName: "test-gen",
1736+
},
1737+
},
1738+
},
1739+
QueryExecutors: []QueryExecutor{
1740+
&MockQueryExecutor{
1741+
KindFn: func() string { return string(StandardQueryExecutor_Direct) },
1742+
ResultsFn: func() map[string]interface{} {
1743+
return map[string]interface{}{
1744+
string(MedianLatency): 10.2,
1745+
string(Percentile95Latency): 10.1999,
1746+
string(MaxLatency): 0.0,
1747+
string(ErrorRate): 0.0,
1748+
}
1749+
},
1750+
GeneratorNameFn: func() string { return "test-gen" },
1751+
},
1752+
},
1753+
}
1754+
1755+
failed, errs := CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, report, nil)
1756+
assert.True(t, failed)
1757+
assert.Equal(t, 1, len(errs))
1758+
assert.Equal(t, 1, len(errs["initialization"]))
1759+
assert.Contains(t, errs["initialization"][0].Error(), "one or both reports are nil")
1760+
1761+
failed, errs = CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, nil, report)
1762+
assert.True(t, failed)
1763+
assert.Equal(t, 1, len(errs))
1764+
assert.Equal(t, 1, len(errs["initialization"]))
1765+
assert.Contains(t, errs["initialization"][0].Error(), "one or both reports are nil")
1766+
1767+
failed, errs = CompareDirectWithThresholds(10.0, 10.0, 10.0, 10.0, nil, nil)
1768+
assert.True(t, failed)
1769+
assert.Equal(t, 1, len(errs))
1770+
assert.Equal(t, 1, len(errs["initialization"]))
1771+
assert.Contains(t, errs["initialization"][0].Error(), "one or both reports are nil")
1772+
})
1773+
1774+
t.Run("handle incorrect thresholds", func(t *testing.T) {
1775+
report := &StandardReport{
1776+
BasicData: BasicData{
1777+
GeneratorConfigs: map[string]*wasp.Config{
1778+
"test-gen": {
1779+
GenName: "test-gen",
1780+
},
1781+
},
1782+
},
1783+
QueryExecutors: []QueryExecutor{
1784+
&MockQueryExecutor{
1785+
KindFn: func() string { return string(StandardQueryExecutor_Direct) },
1786+
ResultsFn: func() map[string]interface{} {
1787+
return map[string]interface{}{
1788+
string(MedianLatency): 10.0,
1789+
string(Percentile95Latency): 10.0,
1790+
string(MaxLatency): 0.0,
1791+
string(ErrorRate): 0.0,
1792+
}
1793+
},
1794+
GeneratorNameFn: func() string { return "test-gen" },
1795+
},
1796+
},
1797+
}
1798+
1799+
failed, errs := CompareDirectWithThresholds(-0.1, 100.0, 0.0, 100.0, report, report)
1800+
assert.True(t, failed)
1801+
assert.Equal(t, 1, len(errs))
1802+
assert.Equal(t, 1, len(errs["initialization"]))
1803+
assert.Contains(t, errs["initialization"][0].Error(), "median threshold -0.1000 is not in the range [0, 100]")
1804+
1805+
failed, errs = CompareDirectWithThresholds(1.0, 101.0, 0.0, 100.0, report, report)
1806+
assert.True(t, failed)
1807+
assert.Equal(t, 1, len(errs))
1808+
assert.Equal(t, 1, len(errs["initialization"]))
1809+
assert.Contains(t, errs["initialization"][0].Error(), "p95 threshold 101.0000 is not in the range [0, 100]")
1810+
1811+
failed, errs = CompareDirectWithThresholds(-1, -1, -1, -1, report, report)
1812+
assert.True(t, failed)
1813+
assert.Equal(t, 1, len(errs))
1814+
assert.Equal(t, 4, len(errs["initialization"]))
1815+
assert.Contains(t, errs["initialization"][0].Error(), "median threshold -1.0000 is not in the range [0, 100]")
1816+
assert.Contains(t, errs["initialization"][1].Error(), "p95 threshold -1.0000 is not in the range [0, 100]")
1817+
assert.Contains(t, errs["initialization"][2].Error(), "max threshold -1.0000 is not in the range [0, 100]")
1818+
assert.Contains(t, errs["initialization"][3].Error(), "error rate threshold -1.0000 is not in the range [0, 100]")
1819+
})
16191820
}
16201821

16211822
func TestBenchSpy_Standard_Direct_Metrics_Two_Generators_E2E(t *testing.T) {

0 commit comments

Comments
 (0)