11
11
12
12
import host_tools .logging as log_tools
13
13
from framework .artifacts import NetIfaceConfig
14
- from framework .builder import MicrovmBuilder , SnapshotBuilder , SnapshotType
15
- from framework .stats import consumer , criteria , function , producer , types
16
- from framework .utils import CpuMap , eager_map , get_kernel_version
17
- from framework .utils_cpuid import get_instance_type
14
+ from framework .builder import MicrovmBuilder , SnapshotBuilder
15
+ from framework .stats import consumer , producer , types
16
+ from framework .utils import CpuMap
18
17
19
18
# How many latencies do we sample per test.
20
19
SAMPLE_COUNT = 3
21
20
USEC_IN_MSEC = 1000
22
21
PLATFORM = platform .machine ()
23
22
24
-
25
- # Latencies in milliseconds.
26
- # The latency for snapshot creation has high variance due to scheduler noise.
27
- # The issue is tracked here:
28
- # https://github.com/firecracker-microvm/firecracker/issues/2346
29
- # TODO: Update baseline values after fix.
30
- CREATE_LATENCY_BASELINES = {
31
- ("x86_64" , "2vcpu_256mb.json" , "FULL" ): 180 ,
32
- ("x86_64" , "2vcpu_256mb.json" , "DIFF" ): 70 ,
33
- ("x86_64" , "2vcpu_512mb.json" , "FULL" ): 280 ,
34
- ("x86_64" , "2vcpu_512mb.json" , "DIFF" ): 90 ,
35
- ("aarch64" , "2vcpu_256mb.json" , "FULL" ): 160 ,
36
- ("aarch64" , "2vcpu_256mb.json" , "DIFF" ): 70 ,
37
- ("aarch64" , "2vcpu_512mb.json" , "FULL" ): 300 ,
38
- ("aarch64" , "2vcpu_512mb.json" , "DIFF" ): 75 ,
39
- }
40
-
41
- # The latencies for x86 are pretty high due to a design
42
- # in the cgroups V1 implementation in the kernel. We recommend
43
- # switching to cgroups v2 for much lower snap resume latencies.
44
- # More details on this:
45
- # https://github.com/firecracker-microvm/firecracker/issues/2027
46
- # Latencies for snap resume on cgroups V2 can be found in our
47
- # long-running performance configs (i.e. integration_tests/performance/configs).
48
- LOAD_LATENCY_BASELINES = {
49
- ("m5d.metal" , "4.14" , "sync" , "2vcpu_256mb.json" ): 9 ,
50
- ("m5d.metal" , "4.14" , "sync" , "2vcpu_512mb.json" ): 9 ,
51
- ("m5d.metal" , "5.10" , "sync" , "2vcpu_256mb.json" ): 70 ,
52
- ("m5d.metal" , "5.10" , "sync" , "2vcpu_512mb.json" ): 90 ,
53
- ("m5d.metal" , "5.10" , "async" , "2vcpu_256mb.json" ): 210 ,
54
- ("m5d.metal" , "5.10" , "async" , "2vcpu_512mb.json" ): 210 ,
55
- ("m5d.metal" , "6.1" , "sync" , "2vcpu_256mb.json" ): 255 ,
56
- ("m5d.metal" , "6.1" , "sync" , "2vcpu_512mb.json" ): 245 ,
57
- ("m5d.metal" , "6.1" , "async" , "2vcpu_256mb.json" ): 245 ,
58
- ("m5d.metal" , "6.1" , "async" , "2vcpu_512mb.json" ): 225 ,
59
- ("m6a.metal" , "4.14" , "sync" , "2vcpu_256mb.json" ): 15 ,
60
- ("m6a.metal" , "4.14" , "sync" , "2vcpu_512mb.json" ): 19 ,
61
- ("m6a.metal" , "5.10" , "sync" , "2vcpu_256mb.json" ): 75 ,
62
- ("m6a.metal" , "5.10" , "sync" , "2vcpu_512mb.json" ): 75 ,
63
- ("m6a.metal" , "5.10" , "async" , "2vcpu_256mb.json" ): 220 ,
64
- ("m6a.metal" , "5.10" , "async" , "2vcpu_512mb.json" ): 220 ,
65
- ("m6a.metal" , "6.1" , "sync" , "2vcpu_256mb.json" ): 250 ,
66
- ("m6a.metal" , "6.1" , "sync" , "2vcpu_512mb.json" ): 250 ,
67
- ("m6a.metal" , "6.1" , "async" , "2vcpu_256mb.json" ): 250 ,
68
- ("m6a.metal" , "6.1" , "async" , "2vcpu_512mb.json" ): 300 ,
69
- ("m6i.metal" , "4.14" , "sync" , "2vcpu_256mb.json" ): 9 ,
70
- ("m6i.metal" , "4.14" , "sync" , "2vcpu_512mb.json" ): 9 ,
71
- ("m6i.metal" , "5.10" , "sync" , "2vcpu_256mb.json" ): 70 ,
72
- ("m6i.metal" , "5.10" , "sync" , "2vcpu_512mb.json" ): 70 ,
73
- ("m6i.metal" , "5.10" , "async" , "2vcpu_256mb.json" ): 245 ,
74
- ("m6i.metal" , "5.10" , "async" , "2vcpu_512mb.json" ): 245 ,
75
- ("m6i.metal" , "6.1" , "sync" , "2vcpu_256mb.json" ): 220 ,
76
- ("m6i.metal" , "6.1" , "sync" , "2vcpu_512mb.json" ): 250 ,
77
- ("m6i.metal" , "6.1" , "async" , "2vcpu_256mb.json" ): 220 ,
78
- ("m6i.metal" , "6.1" , "async" , "2vcpu_512mb.json" ): 220 ,
79
- ("m6g.metal" , "4.14" , "sync" , "2vcpu_256mb.json" ): 3 ,
80
- ("m6g.metal" , "4.14" , "sync" , "2vcpu_512mb.json" ): 3 ,
81
- ("m6g.metal" , "5.10" , "sync" , "2vcpu_256mb.json" ): 3 ,
82
- ("m6g.metal" , "5.10" , "sync" , "2vcpu_512mb.json" ): 3 ,
83
- ("m6g.metal" , "5.10" , "async" , "2vcpu_256mb.json" ): 320 ,
84
- ("m6g.metal" , "5.10" , "async" , "2vcpu_512mb.json" ): 380 ,
85
- ("m6g.metal" , "6.1" , "sync" , "2vcpu_256mb.json" ): 2 ,
86
- ("m6g.metal" , "6.1" , "sync" , "2vcpu_512mb.json" ): 3 ,
87
- ("m6g.metal" , "6.1" , "async" , "2vcpu_256mb.json" ): 2 ,
88
- ("m6g.metal" , "6.1" , "async" , "2vcpu_512mb.json" ): 3 ,
89
- ("c7g.metal" , "4.14" , "sync" , "2vcpu_256mb.json" ): 2 ,
90
- ("c7g.metal" , "4.14" , "sync" , "2vcpu_512mb.json" ): 2 ,
91
- ("c7g.metal" , "5.10" , "sync" , "2vcpu_256mb.json" ): 2 ,
92
- ("c7g.metal" , "5.10" , "sync" , "2vcpu_512mb.json" ): 3 ,
93
- ("c7g.metal" , "5.10" , "async" , "2vcpu_256mb.json" ): 320 ,
94
- ("c7g.metal" , "5.10" , "async" , "2vcpu_512mb.json" ): 360 ,
95
- ("c7g.metal" , "6.1" , "sync" , "2vcpu_256mb.json" ): 2 ,
96
- ("c7g.metal" , "6.1" , "sync" , "2vcpu_512mb.json" ): 3 ,
97
- ("c7g.metal" , "6.1" , "async" , "2vcpu_256mb.json" ): 2 ,
98
- ("c7g.metal" , "6.1" , "async" , "2vcpu_512mb.json" ): 3 ,
99
- }
100
-
101
-
102
- def snapshot_create_measurements (vm_type , snapshot_type ):
103
- """Define measurements for snapshot create tests."""
104
- lower_than = {
105
- "target" : CREATE_LATENCY_BASELINES [
106
- platform .machine (),
107
- vm_type ,
108
- "FULL" if snapshot_type == SnapshotType .FULL else "DIFF" ,
109
- ]
110
- }
111
-
112
- latency = types .MeasurementDef .create_measurement (
113
- "latency" ,
114
- "ms" ,
115
- [function .Max ("max" )],
116
- {"max" : criteria .LowerThan (lower_than )},
117
- )
118
-
119
- return [latency ]
23
+ # measurement without pass criteria = test is infallible but still submits metrics. Nice!
24
+ LATENCY_MEASUREMENT = types .MeasurementDef .create_measurement (
25
+ "latency" ,
26
+ "ms" ,
27
+ [],
28
+ {},
29
+ )
120
30
121
31
122
- def snapshot_resume_measurements (vm_type , io_engine ):
123
- """Define measurements for snapshot resume tests."""
124
- load_latency = {
125
- "target" : LOAD_LATENCY_BASELINES [
126
- get_instance_type (), get_kernel_version (level = 1 ), io_engine , vm_type
127
- ]
128
- }
129
-
130
- latency = types .MeasurementDef .create_measurement (
131
- "latency" ,
132
- "ms" ,
133
- [function .Max ("max" )],
134
- {"max" : criteria .LowerThan (load_latency )},
135
- )
136
-
137
- return [latency ]
138
-
139
-
140
- def snapshot_create_producer (
141
- logger , vm , disks , ssh_key , target_version , metrics_fifo , snapshot_type
142
- ):
32
+ def snapshot_create_producer (logger , vm , disks , ssh_key , target_version , metrics_fifo ):
143
33
"""Produce results for snapshot create tests."""
144
34
snapshot_builder = SnapshotBuilder (vm )
145
35
snapshot_builder .create (
146
36
disks = disks ,
147
37
ssh_key = ssh_key ,
148
- snapshot_type = snapshot_type ,
149
38
target_version = target_version ,
150
39
use_ramdisk = True ,
151
40
)
152
41
metrics = vm .flush_metrics (metrics_fifo )
153
42
154
- if snapshot_type == SnapshotType .FULL :
155
- value = metrics ["latencies_us" ]["full_create_snapshot" ] / USEC_IN_MSEC
156
- else :
157
- value = metrics ["latencies_us" ]["diff_create_snapshot" ] / USEC_IN_MSEC
43
+ value = metrics ["latencies_us" ]["full_create_snapshot" ] / USEC_IN_MSEC
158
44
159
45
logger .info ("Latency {} ms" .format (value ))
160
46
@@ -203,6 +89,12 @@ def test_older_snapshot_resume_latency(
203
89
With each previous firecracker version, create a snapshot and try to
204
90
restore in current version.
205
91
"""
92
+
93
+ # The guest kernel does not "participate" in snapshot restore, so just pick some
94
+ # arbitrary one
95
+ if "4.14" not in guest_kernel .name ():
96
+ pytest .skip ()
97
+
206
98
logger = logging .getLogger ("old_snapshot_load" )
207
99
jailer = firecracker_release .jailer ()
208
100
fc_version = firecracker_release .base_name ()[1 :]
@@ -252,39 +144,37 @@ def test_older_snapshot_resume_latency(
252
144
),
253
145
func_kwargs = {},
254
146
)
255
- eager_map (
256
- cons .set_measurement_def ,
257
- snapshot_resume_measurements (microvm_cfg , io_engine .lower ()),
258
- )
147
+ cons .set_measurement_def (LATENCY_MEASUREMENT )
259
148
260
149
st_core .add_pipe (producer = prod , consumer = cons , tag = microvm_cfg )
261
150
# Gather results and verify pass criteria.
262
151
st_core .run_exercise ()
263
152
264
153
265
- @pytest .mark .parametrize ("guest_mem_mib" , [256 , 512 ])
266
- @pytest .mark .parametrize ("snapshot_type" , [SnapshotType .FULL , SnapshotType .DIFF ])
267
154
def test_snapshot_create_latency (
268
155
microvm_factory ,
269
156
guest_kernel ,
270
157
rootfs ,
271
- guest_mem_mib ,
272
- snapshot_type ,
273
158
firecracker_release ,
274
159
st_core ,
275
160
):
276
161
"""
277
- Test scenario: Full/Diff snapshot create performance measurement.
162
+ Test scenario: Full snapshot create performance measurement.
278
163
279
164
Testing matrix:
280
165
- Guest kernel: all supported ones
281
166
- Rootfs: Ubuntu 18.04
282
- - Microvm: 2vCPU with 256/512 MB RAM
283
- TODO: Multiple microvm sizes must be tested in the async pipeline.
167
+ - Microvm: 2vCPU with 512 MB RAM
284
168
"""
169
+
170
+ # The guest kernel does not "participate" in snapshot restore, so just pick some
171
+ # arbitrary one
172
+ if "4.14" not in guest_kernel .name ():
173
+ pytest .skip ()
174
+
285
175
logger = logging .getLogger ("snapshot_sequence" )
286
176
287
- diff_snapshots = snapshot_type == SnapshotType . DIFF
177
+ guest_mem_mib = 512
288
178
vcpus = 2
289
179
microvm_cfg = f"{ vcpus } vcpu_{ guest_mem_mib } mb.json"
290
180
vm = microvm_factory .build (guest_kernel , rootfs , monitor_memory = False )
@@ -293,7 +183,6 @@ def test_snapshot_create_latency(
293
183
vcpu_count = vcpus ,
294
184
mem_size_mib = guest_mem_mib ,
295
185
use_initrd = True ,
296
- track_dirty_pages = diff_snapshots ,
297
186
)
298
187
299
188
# Configure metrics system.
@@ -319,12 +208,10 @@ def test_snapshot_create_latency(
319
208
idx_vcpu , current_cpu_id + idx_vcpu
320
209
), f"Failed to pin fc_vcpu { idx_vcpu } thread."
321
210
322
- st_core .name = f"snapshot_create_ { snapshot_type } _latency "
211
+ st_core .name = f"snapshot_create_SnapshotType.FULL_latency "
323
212
st_core .iterations = SAMPLE_COUNT
324
213
st_core .custom ["guest_config" ] = microvm_cfg .strip (".json" )
325
- st_core .custom ["snapshot_type" ] = (
326
- "FULL" if snapshot_type == SnapshotType .FULL else "DIFF"
327
- )
214
+ st_core .custom ["snapshot_type" ] = "FULL"
328
215
329
216
prod = producer .LambdaProducer (
330
217
func = snapshot_create_producer ,
@@ -335,7 +222,6 @@ def test_snapshot_create_latency(
335
222
"ssh_key" : rootfs .ssh_key (),
336
223
"target_version" : firecracker_release .snapshot_version ,
337
224
"metrics_fifo" : metrics_fifo ,
338
- "snapshot_type" : snapshot_type ,
339
225
},
340
226
)
341
227
@@ -345,10 +231,7 @@ def test_snapshot_create_latency(
345
231
),
346
232
func_kwargs = {},
347
233
)
348
- eager_map (
349
- cons .set_measurement_def ,
350
- snapshot_create_measurements (microvm_cfg , snapshot_type ),
351
- )
234
+ cons .set_measurement_def (LATENCY_MEASUREMENT )
352
235
353
236
st_core .add_pipe (producer = prod , consumer = cons , tag = microvm_cfg )
354
237
# Gather results and verify pass criteria.
0 commit comments