9
9
10
10
import pytest
11
11
from conftest import _test_images_s3_bucket
12
- from framework .artifacts import ArtifactCollection , ArtifactSet , \
13
- create_net_devices_configuration
12
+ from framework .artifacts import (
13
+ ArtifactCollection ,
14
+ ArtifactSet ,
15
+ create_net_devices_configuration ,
16
+ )
14
17
from framework .builder import MicrovmBuilder , SnapshotBuilder , SnapshotType
15
18
from framework .matrix import TestContext , TestMatrix
16
19
from framework .stats import core
25
28
from integration_tests .performance .utils import handle_failure
26
29
27
30
TEST_ID = "snap_restore_performance"
28
- CONFIG_NAME_REL = "test_{}_config_{}.json" . format (
29
- TEST_ID , get_kernel_version (level = 1 ))
31
+ CONFIG_NAME_REL = "test_{}_config_{}.json" \
32
+ . format ( TEST_ID , get_kernel_version (level = 1 ))
30
33
CONFIG_NAME_ABS = os .path .join (defs .CFG_LOCATION , CONFIG_NAME_REL )
31
- CONFIG_DICT = json .load (open (CONFIG_NAME_ABS , encoding = ' utf-8' ))
34
+ CONFIG_DICT = json .load (open (CONFIG_NAME_ABS , encoding = " utf-8" ))
32
35
33
36
DEBUG = False
34
37
BASE_VCPU_COUNT = 1
38
41
USEC_IN_MSEC = 1000
39
42
40
43
# Measurements tags.
41
- RESTORE_LATENCY = "restore_latency "
44
+ RESTORE_LATENCY = "latency "
42
45
43
46
# Define 4 net device configurations.
44
47
net_ifaces = create_net_devices_configuration (4 )
53
56
class SnapRestoreBaselinesProvider (BaselineProvider ):
54
57
"""Baselines provider for snapshot restore latency."""
55
58
56
- def __init__ (self , env_id ):
59
+ def __init__ (self , env_id , workload ):
57
60
"""Snapshot baseline provider initialization."""
58
61
cpu_model_name = get_cpu_model_name ()
59
- baselines = list (filter (
60
- lambda cpu_baseline : cpu_baseline ["model" ] == cpu_model_name ,
61
- CONFIG_DICT ["hosts" ]["instances" ][get_instance_type ()]["cpus" ]))
62
+ baselines = list (
63
+ filter (
64
+ lambda cpu_baseline : cpu_baseline ["model" ] == cpu_model_name ,
65
+ CONFIG_DICT ["hosts" ]["instances" ][get_instance_type ()]["cpus" ],
66
+ )
67
+ )
62
68
63
69
super ().__init__ (DictQuery ({}))
64
70
if len (baselines ) > 0 :
65
71
super ().__init__ (DictQuery (baselines [0 ]))
66
72
67
- self ._tag = "baselines/{}/" + env_id + "/{}"
73
+ self ._tag = "baselines/{}/" + env_id + "/{}/" + workload
68
74
69
75
def get (self , ms_name : str , st_name : str ) -> dict :
70
76
"""Return the baseline value corresponding to the key."""
@@ -84,33 +90,35 @@ def construct_scratch_drives():
84
90
"""Create an array of scratch disks."""
85
91
scratchdisks = ["vdb" , "vdc" , "vdd" , "vde" ]
86
92
disk_files = [
87
- drive_tools .FilesystemFile (tempfile .mktemp (), size = 64 )
88
- for _ in scratchdisks
93
+ drive_tools .FilesystemFile (
94
+ tempfile .mktemp (), size = 64
95
+ ) for _ in scratchdisks
89
96
]
90
97
return list (zip (scratchdisks , disk_files ))
91
98
92
99
93
- def default_lambda_consumer (env_id ):
100
+ def default_lambda_consumer (env_id , workload ):
94
101
"""Create a default lambda consumer for the snapshot restore test."""
95
102
return st .consumer .LambdaConsumer (
96
103
metadata_provider = DictMetadataProvider (
97
104
CONFIG_DICT ["measurements" ],
98
- SnapRestoreBaselinesProvider (env_id )
105
+ SnapRestoreBaselinesProvider (env_id , workload )
99
106
),
100
107
func = consume_output ,
101
- func_kwargs = {})
108
+ func_kwargs = {},
109
+ )
102
110
103
111
104
112
def get_snap_restore_latency (
105
- context ,
106
- vcpus ,
107
- mem_size ,
108
- nets = 1 ,
109
- blocks = 1 ,
110
- all_devices = False ,
111
- iterations = 10 ):
113
+ context , vcpus , mem_size , nets = 1 , blocks = 1 , all_devices = False ,
114
+ iterations = 10
115
+ ):
112
116
"""Restore snapshots with various configs to measure latency."""
113
- vm_builder = context .custom ['builder' ]
117
+ vm_builder = context .custom ["builder" ]
118
+ logger = context .custom ["logger" ]
119
+ balloon = vsock = 1 if all_devices else 0
120
+ microvm_spec = f"{ vcpus } vcpu_{ mem_size } mb_{ nets } net_{ blocks } \
121
+ block_{ vsock } vsock_{ balloon } balloon"
114
122
115
123
# Create a rw copy artifact.
116
124
rw_disk = context .disk .copy ()
@@ -128,7 +136,9 @@ def get_snap_restore_latency(
128
136
ssh_key = ssh_key ,
129
137
config = context .microvm ,
130
138
net_ifaces = ifaces ,
131
- use_ramdisk = True )
139
+ use_ramdisk = True ,
140
+ io_engine = "Sync" ,
141
+ )
132
142
basevm = vm_instance .vm
133
143
response = basevm .machine_cfg .put (
134
144
vcpu_count = vcpus ,
@@ -138,44 +148,45 @@ def get_snap_restore_latency(
138
148
139
149
extra_disk_paths = []
140
150
if blocks > 1 :
141
- for (name , diskfile ) in scratch_drives [:(blocks - 1 )]:
142
- basevm .add_drive (name , diskfile .path , use_ramdisk = True )
151
+ for (name , diskfile ) in scratch_drives [: (blocks - 1 )]:
152
+ basevm .add_drive (
153
+ name , diskfile .path , use_ramdisk = True , io_engine = "Sync"
154
+ )
143
155
extra_disk_paths .append (diskfile .path )
144
156
assert len (extra_disk_paths ) > 0
145
157
146
158
if all_devices :
147
159
response = basevm .balloon .put (
148
- amount_mib = 0 ,
149
- deflate_on_oom = True ,
150
- stats_polling_interval_s = 1
160
+ amount_mib = 0 , deflate_on_oom = True , stats_polling_interval_s = 1
151
161
)
152
162
assert basevm .api_session .is_status_no_content (response .status_code )
153
163
154
164
response = basevm .vsock .put (
155
- vsock_id = "vsock0" ,
156
- guest_cid = 3 ,
157
- uds_path = "/v.sock"
165
+ vsock_id = "vsock0" , guest_cid = 3 , uds_path = "/v.sock"
158
166
)
159
167
assert basevm .api_session .is_status_no_content (response .status_code )
160
168
161
169
basevm .start ()
162
170
171
+ logger .info (
172
+ 'Testing with microvm: "{}", kernel {}, disk {}' .format (
173
+ microvm_spec , context .kernel .name (), context .disk .name ()
174
+ )
175
+ )
163
176
# Create a snapshot builder from a microvm.
164
177
snapshot_builder = SnapshotBuilder (basevm )
165
178
full_snapshot = snapshot_builder .create (
166
179
[rw_disk .local_path ()] + extra_disk_paths ,
167
180
ssh_key ,
168
181
SnapshotType .FULL ,
169
182
net_ifaces = ifaces ,
170
- use_ramdisk = True
183
+ use_ramdisk = True ,
171
184
)
172
185
basevm .kill ()
173
186
values = []
174
187
for _ in range (iterations ):
175
188
microvm , metrics_fifo = vm_builder .build_from_snapshot (
176
- full_snapshot ,
177
- resume = True ,
178
- use_ramdisk = True
189
+ full_snapshot , resume = True , use_ramdisk = True
179
190
)
180
191
# Attempt to connect to resumed microvm.
181
192
ssh_connection = net_tools .SSHConnection (microvm .ssh_config )
@@ -188,7 +199,7 @@ def get_snap_restore_latency(
188
199
metrics = microvm .get_all_metrics (metrics_fifo )
189
200
for data_point in metrics :
190
201
metrics = json .loads (data_point )
191
- cur_value = metrics [' latencies_us' ][ ' load_snapshot' ]
202
+ cur_value = metrics [" latencies_us" ][ " load_snapshot" ]
192
203
if cur_value > 0 :
193
204
value = cur_value / USEC_IN_MSEC
194
205
break
@@ -215,9 +226,7 @@ def consume_output(cons, result):
215
226
@pytest .mark .nonci
216
227
@pytest .mark .timeout (300 * 1000 ) # 1.40 hours
217
228
@pytest .mark .parametrize (
218
- 'results_file_dumper' ,
219
- [CONFIG_NAME_ABS ],
220
- indirect = True
229
+ "results_file_dumper" , [CONFIG_NAME_ABS ], indirect = True
221
230
)
222
231
def test_snap_restore_performance (bin_cloner_path , results_file_dumper ):
223
232
"""
@@ -236,124 +245,133 @@ def test_snap_restore_performance(bin_cloner_path, results_file_dumper):
236
245
# Create a test context and add builder, logger, network.
237
246
test_context = TestContext ()
238
247
test_context .custom = {
239
- 'builder' : MicrovmBuilder (bin_cloner_path ),
240
- 'logger' : logger ,
241
- 'name' : TEST_ID ,
242
- 'results_file_dumper' : results_file_dumper
248
+ "builder" : MicrovmBuilder (bin_cloner_path ),
249
+ "logger" : logger ,
250
+ "name" : TEST_ID ,
251
+ "results_file_dumper" : results_file_dumper ,
252
+ "workload" : "restore" ,
243
253
}
244
254
245
- test_matrix = TestMatrix (context = test_context ,
246
- artifact_sets = [
247
- microvm_artifacts ,
248
- kernel_artifacts ,
249
- disk_artifacts
250
- ])
255
+ test_matrix = TestMatrix (
256
+ context = test_context ,
257
+ artifact_sets = [microvm_artifacts , kernel_artifacts , disk_artifacts ],
258
+ )
251
259
test_matrix .run_test (snapshot_workload )
252
260
253
261
254
262
def snapshot_scaling_vcpus (context , st_core , vcpu_count = 10 ):
255
263
"""Restore snapshots with variable vcpu count."""
264
+ workload = context .custom ["workload" ]
256
265
for i in range (vcpu_count ):
257
- env_id = f"{ context .kernel .name ()} /{ context .disk .name ()} /" \
266
+ env_id = (
267
+ f"{ context .kernel .name ()} /{ context .disk .name ()} /"
258
268
f"{ BASE_VCPU_COUNT + i } vcpu_{ BASE_MEM_SIZE_MIB } mb"
269
+ )
259
270
260
271
st_prod = st .producer .LambdaProducer (
261
272
func = get_snap_restore_latency ,
262
273
func_kwargs = {
263
274
"context" : context ,
264
275
"vcpus" : BASE_VCPU_COUNT + i ,
265
- "mem_size" : BASE_MEM_SIZE_MIB
266
- }
276
+ "mem_size" : BASE_MEM_SIZE_MIB ,
277
+ },
267
278
)
268
- st_cons = default_lambda_consumer (env_id )
269
- st_core .add_pipe (st_prod , st_cons , f"{ env_id } /restore_latency " )
279
+ st_cons = default_lambda_consumer (env_id , workload )
280
+ st_core .add_pipe (st_prod , st_cons , f"{ env_id } /{ workload } " )
270
281
271
282
272
283
def snapshot_scaling_mem (context , st_core , mem_exponent = 9 ):
273
284
"""Restore snapshots with variable memory size."""
285
+ workload = context .custom ["workload" ]
274
286
for i in range (1 , mem_exponent ):
275
- env_id = f"{ context .kernel .name ()} /{ context .disk .name ()} /" \
287
+ env_id = (
288
+ f"{ context .kernel .name ()} /{ context .disk .name ()} /"
276
289
f"{ BASE_VCPU_COUNT } vcpu_{ BASE_MEM_SIZE_MIB * (2 ** i )} mb"
290
+ )
277
291
278
292
st_prod = st .producer .LambdaProducer (
279
293
func = get_snap_restore_latency ,
280
294
func_kwargs = {
281
295
"context" : context ,
282
296
"vcpus" : BASE_VCPU_COUNT ,
283
- "mem_size" : BASE_MEM_SIZE_MIB * (2 ** i )
284
- }
297
+ "mem_size" : BASE_MEM_SIZE_MIB * (2 ** i ),
298
+ },
285
299
)
286
- st_cons = default_lambda_consumer (env_id )
287
- st_core .add_pipe (st_prod , st_cons , f"{ env_id } /restore_latency " )
300
+ st_cons = default_lambda_consumer (env_id , workload )
301
+ st_core .add_pipe (st_prod , st_cons , f"{ env_id } /{ workload } " )
288
302
289
303
290
304
def snapshot_scaling_net (context , st_core , net_count = 4 ):
291
305
"""Restore snapshots with variable net device count."""
306
+ workload = context .custom ["workload" ]
292
307
for i in range (1 , net_count ):
293
- env_id = f"{ context .kernel .name ()} /{ context .disk .name ()} /" \
308
+ env_id = (
309
+ f"{ context .kernel .name ()} /{ context .disk .name ()} /"
294
310
f"{ BASE_NET_COUNT + i } net_dev"
311
+ )
295
312
296
313
st_prod = st .producer .LambdaProducer (
297
314
func = get_snap_restore_latency ,
298
315
func_kwargs = {
299
316
"context" : context ,
300
317
"vcpus" : BASE_VCPU_COUNT ,
301
318
"mem_size" : BASE_MEM_SIZE_MIB ,
302
- "nets" : BASE_NET_COUNT + i
303
- }
319
+ "nets" : BASE_NET_COUNT + i ,
320
+ },
304
321
)
305
- st_cons = default_lambda_consumer (env_id )
306
- st_core .add_pipe (st_prod , st_cons , f"{ env_id } /restore_latency " )
322
+ st_cons = default_lambda_consumer (env_id , workload )
323
+ st_core .add_pipe (st_prod , st_cons , f"{ env_id } /{ workload } " )
307
324
308
325
309
326
def snapshot_scaling_block (context , st_core , block_count = 4 ):
310
327
"""Restore snapshots with variable block device count."""
311
328
# pylint: disable=W0603
329
+ workload = context .custom ["workload" ]
312
330
global scratch_drives
313
331
scratch_drives = construct_scratch_drives ()
314
332
315
333
for i in range (1 , block_count ):
316
- env_id = f"{ context .kernel .name ()} /{ context .disk .name ()} /" \
334
+ env_id = (
335
+ f"{ context .kernel .name ()} /{ context .disk .name ()} /"
317
336
f"{ BASE_BLOCK_COUNT + i } block_dev"
337
+ )
318
338
319
339
st_prod = st .producer .LambdaProducer (
320
340
func = get_snap_restore_latency ,
321
341
func_kwargs = {
322
342
"context" : context ,
323
343
"vcpus" : BASE_VCPU_COUNT ,
324
344
"mem_size" : BASE_MEM_SIZE_MIB ,
325
- "blocks" : BASE_BLOCK_COUNT + i
326
- }
345
+ "blocks" : BASE_BLOCK_COUNT + i ,
346
+ },
327
347
)
328
- st_cons = default_lambda_consumer (env_id )
329
- st_core .add_pipe (st_prod , st_cons , f"{ env_id } /restore_latency " )
348
+ st_cons = default_lambda_consumer (env_id , workload )
349
+ st_core .add_pipe (st_prod , st_cons , f"{ env_id } /{ workload } " )
330
350
331
351
332
352
def snapshot_all_devices (context , st_core ):
333
353
"""Restore snapshots with one of each devices."""
334
- env_id = f"{ context .kernel .name ()} /{ context .disk .name ()} /" \
335
- f"all_dev"
336
-
354
+ workload = context .custom ["workload" ]
355
+ env_id = f"{ context .kernel .name ()} /{ context .disk .name ()} /" f"all_dev"
337
356
st_prod = st .producer .LambdaProducer (
338
357
func = get_snap_restore_latency ,
339
358
func_kwargs = {
340
359
"context" : context ,
341
360
"vcpus" : BASE_VCPU_COUNT ,
342
361
"mem_size" : BASE_MEM_SIZE_MIB ,
343
- "all_devices" : True
344
- }
362
+ "all_devices" : True ,
363
+ },
345
364
)
346
- st_cons = default_lambda_consumer (env_id )
347
- st_core .add_pipe (st_prod , st_cons , f"{ env_id } /restore_latency " )
365
+ st_cons = default_lambda_consumer (env_id , workload )
366
+ st_core .add_pipe (st_prod , st_cons , f"{ env_id } /{ workload } " )
348
367
349
368
350
369
def snapshot_workload (context ):
351
370
"""Test all VM configurations for snapshot restore."""
352
371
file_dumper = context .custom ["results_file_dumper" ]
353
372
354
373
st_core = core .Core (
355
- name = TEST_ID ,
356
- iterations = 1 ,
374
+ name = TEST_ID , iterations = 1 ,
357
375
custom = {"cpu_model_name" : get_cpu_model_name ()}
358
376
)
359
377
0 commit comments