Skip to content

Commit 473161c

Browse files
committed
test(perf): Aggregate snapshot latencies to average
The statistical test used by firecracker performance tests is a simplified t-test. This is based on the central limit theorem, and is thus only valid for testing _averages_. Nevertheless, the snapshot tests tried to use it for detecting deviations of P90/P50 values. This is unsound, and the results are most definitely not what was intended. This commit changes the aggregate collected from the snapshot tests to be an average instead, to make the test sound. Note that since we also emit the individual data points, we can always reconstruct percentiles. Signed-off-by: Patrick Roy <[email protected]>
1 parent 9fed7df commit 473161c

File tree

4 files changed

+7
-33
lines changed

4 files changed

+7
-33
lines changed

tests/integration_tests/performance/configs/test_snapshot_restore_performance_config_4.14.json

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4475,14 +4475,7 @@
44754475
"latency": {
44764476
"statistics": [
44774477
{
4478-
"criteria": "EqualWith",
4479-
"function": "Percentile50",
4480-
"name": "P50"
4481-
},
4482-
{
4483-
"criteria": "EqualWith",
4484-
"function": "Percentile90",
4485-
"name": "P90"
4478+
"function": "Avg"
44864479
}
44874480
],
44884481
"unit": "ms"

tests/integration_tests/performance/configs/test_snapshot_restore_performance_config_5.10.json

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4149,14 +4149,7 @@
41494149
"latency": {
41504150
"statistics": [
41514151
{
4152-
"criteria": "EqualWith",
4153-
"function": "Percentile50",
4154-
"name": "P50"
4155-
},
4156-
{
4157-
"criteria": "EqualWith",
4158-
"function": "Percentile90",
4159-
"name": "P90"
4152+
"function": "Avg"
41604153
}
41614154
],
41624155
"unit": "ms"

tests/integration_tests/performance/configs/test_snapshot_restore_performance_config_6.1.json

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4149,12 +4149,7 @@
41494149
"latency": {
41504150
"statistics": [
41514151
{
4152-
"function": "Percentile50",
4153-
"name": "P50"
4154-
},
4155-
{
4156-
"function": "Percentile90",
4157-
"name": "P90"
4152+
"function": "Avg"
41584153
}
41594154
],
41604155
"unit": "ms"

tools/parse_baselines/providers/snapshot_restore.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,22 +26,15 @@ def __init__(self, data_provider: Iterator):
2626
super().__init__(
2727
data_provider,
2828
[
29-
"latency/P50",
30-
"latency/P90",
29+
"latency/Avg",
3130
],
3231
)
3332

3433
def calculate_baseline(self, data: List[float]) -> dict:
3534
"""Return the target and delta values, given a list of data points."""
3635
avg = statistics.mean(data)
37-
min_ = min(data)
38-
max_ = max(data)
39-
40-
min_delta = 100 * abs(avg - min_) / avg
41-
max_delta = 100 * abs(avg - max_) / avg
42-
delta = max(max_delta, min_delta)
43-
36+
stddev = statistics.stdev(data)
4437
return {
45-
"target": round(avg, 3),
46-
"delta_percentage": math.ceil(delta) + DELTA_EXTRA_MARGIN,
38+
"target": math.ceil(round(avg, 2)),
39+
"delta_percentage": math.ceil(3 * stddev / avg * 100) + DELTA_EXTRA_MARGIN,
4740
}

0 commit comments

Comments
 (0)