Skip to content

Commit 2a8a966

Browse files
committed
feat(test/balloon): include HugePages in RSS measurements
This moves the logic to measure RSS to framework.utils and adds a logic to also include huge pages in the measurement. Furthermore, this also adds caching for the firecracker_pid, as well as a new property to get the corresponding psutil.Process. Signed-off-by: Riccardo Mancini <[email protected]>
1 parent bea12f4 commit 2a8a966

File tree

4 files changed

+57
-59
lines changed

4 files changed

+57
-59
lines changed

tests/framework/microvm.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@
2323
from collections import namedtuple
2424
from dataclasses import dataclass
2525
from enum import Enum, auto
26-
from functools import lru_cache
26+
from functools import cached_property, lru_cache
2727
from pathlib import Path
2828
from typing import Optional
2929

30+
import psutil
3031
from tenacity import Retrying, retry, stop_after_attempt, wait_fixed
3132

3233
import host_tools.cargo_build as build_tools
@@ -472,7 +473,7 @@ def state(self):
472473
"""Get the InstanceInfo property and return the state field."""
473474
return self.api.describe.get().json()["state"]
474475

475-
@property
476+
@cached_property
476477
def firecracker_pid(self):
477478
"""Return Firecracker's PID
478479
@@ -491,6 +492,11 @@ def firecracker_pid(self):
491492
with attempt:
492493
return int(self.jailer.pid_file.read_text(encoding="ascii"))
493494

495+
@cached_property
496+
def ps(self):
497+
"""Returns a handle to the psutil.Process for this VM"""
498+
return psutil.Process(self.firecracker_pid)
499+
494500
@property
495501
def dimensions(self):
496502
"""Gets a default set of cloudwatch dimensions describing the configuration of this microvm"""

tests/framework/utils.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import typing
1515
from collections import defaultdict, namedtuple
1616
from contextlib import contextmanager
17+
from pathlib import Path
1718
from typing import Dict
1819

1920
import psutil
@@ -129,6 +130,19 @@ def track_cpu_utilization(
129130
return cpu_utilization
130131

131132

133+
def get_resident_memory(process: psutil.Process):
134+
"""Returns current memory utilization in KiB, including used HugeTLBFS"""
135+
136+
proc_status = Path("/proc", str(process.pid), "status").read_text("utf-8")
137+
for line in proc_status.splitlines():
138+
if line.startswith("HugetlbPages:"): # entry is in KiB
139+
hugetlbfs_usage = int(line.split()[1])
140+
break
141+
else:
142+
assert False, f"HugetlbPages not found in {str(proc_status)}"
143+
return hugetlbfs_usage + process.memory_info().rss // 1024
144+
145+
132146
@contextmanager
133147
def chroot(path):
134148
"""

tests/integration_tests/functional/test_balloon.py

Lines changed: 31 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -9,35 +9,29 @@
99
import pytest
1010
import requests
1111

12-
from framework.utils import check_output, get_free_mem_ssh
12+
from framework.utils import get_resident_memory
1313

1414
STATS_POLLING_INTERVAL_S = 1
1515

1616

17-
def get_stable_rss_mem_by_pid(pid, percentage_delta=1):
17+
def get_stable_rss_mem(uvm, percentage_delta=1):
1818
"""
1919
Get the RSS memory that a guest uses, given the pid of the guest.
2020
2121
Wait till the fluctuations in RSS drop below percentage_delta.
2222
Or print a warning if this does not happen.
2323
"""
2424

25-
# All values are reported as KiB
26-
27-
def get_rss_from_pmap():
28-
_, output, _ = check_output("pmap -X {}".format(pid))
29-
return int(output.split("\n")[-2].split()[1], 10)
30-
3125
first_rss = 0
3226
second_rss = 0
3327
for _ in range(5):
34-
first_rss = get_rss_from_pmap()
28+
first_rss = get_resident_memory(uvm.ps)
3529
time.sleep(1)
36-
second_rss = get_rss_from_pmap()
30+
second_rss = get_resident_memory(uvm.ps)
3731
abs_diff = abs(first_rss - second_rss)
3832
abs_delta = abs_diff / first_rss * 100
3933
print(
40-
f"RSS readings: old: {first_rss} new: {second_rss} abs_diff: {abs_diff} abs_delta: {abs_delta}"
34+
f"RSS readings (bytes): old: {first_rss} new: {second_rss} abs_diff: {abs_diff} abs_delta: {abs_delta}"
4135
)
4236
if abs_delta < percentage_delta:
4337
return second_rss
@@ -87,25 +81,24 @@ def make_guest_dirty_memory(ssh_connection, amount_mib=32):
8781
def _test_rss_memory_lower(test_microvm):
8882
"""Check inflating the balloon makes guest use less rss memory."""
8983
# Get the firecracker pid, and open an ssh connection.
90-
firecracker_pid = test_microvm.firecracker_pid
9184
ssh_connection = test_microvm.ssh
9285

9386
# Using deflate_on_oom, get the RSS as low as possible
9487
test_microvm.api.balloon.patch(amount_mib=200)
9588

9689
# Get initial rss consumption.
97-
init_rss = get_stable_rss_mem_by_pid(firecracker_pid)
90+
init_rss = get_stable_rss_mem(test_microvm)
9891

9992
# Get the balloon back to 0.
10093
test_microvm.api.balloon.patch(amount_mib=0)
10194
# This call will internally wait for rss to become stable.
102-
_ = get_stable_rss_mem_by_pid(firecracker_pid)
95+
_ = get_stable_rss_mem(test_microvm)
10396

10497
# Dirty memory, then inflate balloon and get ballooned rss consumption.
10598
make_guest_dirty_memory(ssh_connection, amount_mib=32)
10699

107100
test_microvm.api.balloon.patch(amount_mib=200)
108-
balloon_rss = get_stable_rss_mem_by_pid(firecracker_pid)
101+
balloon_rss = get_stable_rss_mem(test_microvm)
109102

110103
# Check that the ballooning reclaimed the memory.
111104
assert balloon_rss - init_rss <= 15000
@@ -157,7 +150,7 @@ def test_inflate_reduces_free(uvm_plain_any):
157150
# Inflate 64 MB == 16384 page balloon.
158151
test_microvm.api.balloon.patch(amount_mib=64)
159152
# This call will internally wait for rss to become stable.
160-
_ = get_stable_rss_mem_by_pid(firecracker_pid)
153+
_ = get_stable_rss_mem(test_microvm.ps)
161154

162155
# Get the free memory after ballooning.
163156
available_mem_inflated = get_free_mem_ssh(test_microvm.ssh)
@@ -195,19 +188,18 @@ def test_deflate_on_oom(uvm_plain_any, deflate_on_oom):
195188

196189
# Start the microvm.
197190
test_microvm.start()
198-
firecracker_pid = test_microvm.firecracker_pid
199191

200192
# We get an initial reading of the RSS, then calculate the amount
201193
# we need to inflate the balloon with by subtracting it from the
202194
# VM size and adding an offset of 50 MiB in order to make sure we
203195
# get a lower reading than the initial one.
204-
initial_rss = get_stable_rss_mem_by_pid(firecracker_pid)
196+
initial_rss = get_stable_rss_mem(test_microvm)
205197
inflate_size = 256 - (int(initial_rss / 1024) + 50)
206198

207199
# Inflate the balloon
208200
test_microvm.api.balloon.patch(amount_mib=inflate_size)
209201
# This call will internally wait for rss to become stable.
210-
_ = get_stable_rss_mem_by_pid(firecracker_pid)
202+
_ = get_stable_rss_mem(test_microvm)
211203

212204
# Check that using memory leads to the balloon device automatically
213205
# deflate (or not).
@@ -250,39 +242,38 @@ def test_reinflate_balloon(uvm_plain_any):
250242

251243
# Start the microvm.
252244
test_microvm.start()
253-
firecracker_pid = test_microvm.firecracker_pid
254245

255246
# First inflate the balloon to free up the uncertain amount of memory
256247
# used by the kernel at boot and establish a baseline, then give back
257248
# the memory.
258249
test_microvm.api.balloon.patch(amount_mib=200)
259250
# This call will internally wait for rss to become stable.
260-
_ = get_stable_rss_mem_by_pid(firecracker_pid)
251+
_ = get_stable_rss_mem(test_microvm)
261252

262253
test_microvm.api.balloon.patch(amount_mib=0)
263254
# This call will internally wait for rss to become stable.
264-
_ = get_stable_rss_mem_by_pid(firecracker_pid)
255+
_ = get_stable_rss_mem(test_microvm)
265256

266257
# Get the guest to dirty memory.
267258
make_guest_dirty_memory(test_microvm.ssh, amount_mib=32)
268-
first_reading = get_stable_rss_mem_by_pid(firecracker_pid)
259+
first_reading = get_stable_rss_mem(test_microvm)
269260

270261
# Now inflate the balloon.
271262
test_microvm.api.balloon.patch(amount_mib=200)
272-
second_reading = get_stable_rss_mem_by_pid(firecracker_pid)
263+
second_reading = get_stable_rss_mem(test_microvm)
273264

274265
# Now deflate the balloon.
275266
test_microvm.api.balloon.patch(amount_mib=0)
276267
# This call will internally wait for rss to become stable.
277-
_ = get_stable_rss_mem_by_pid(firecracker_pid)
268+
_ = get_stable_rss_mem(test_microvm)
278269

279270
# Now have the guest dirty memory again.
280271
make_guest_dirty_memory(test_microvm.ssh, amount_mib=32)
281-
third_reading = get_stable_rss_mem_by_pid(firecracker_pid)
272+
third_reading = get_stable_rss_mem(test_microvm)
282273

283274
# Now inflate the balloon again.
284275
test_microvm.api.balloon.patch(amount_mib=200)
285-
fourth_reading = get_stable_rss_mem_by_pid(firecracker_pid)
276+
fourth_reading = get_stable_rss_mem(test_microvm)
286277

287278
# Check that the memory used is the same after regardless of the previous
288279
# inflate history of the balloon (with the third reading being allowed
@@ -309,10 +300,9 @@ def test_size_reduction(uvm_plain_any):
309300

310301
# Start the microvm.
311302
test_microvm.start()
312-
firecracker_pid = test_microvm.firecracker_pid
313303

314304
# Check memory usage.
315-
first_reading = get_stable_rss_mem_by_pid(firecracker_pid)
305+
first_reading = get_stable_rss_mem(test_microvm)
316306

317307
# Have the guest drop its caches.
318308
test_microvm.ssh.run("sync; echo 3 > /proc/sys/vm/drop_caches")
@@ -328,7 +318,7 @@ def test_size_reduction(uvm_plain_any):
328318
test_microvm.api.balloon.patch(amount_mib=inflate_size)
329319

330320
# Check memory usage again.
331-
second_reading = get_stable_rss_mem_by_pid(firecracker_pid)
321+
second_reading = get_stable_rss_mem(test_microvm)
332322

333323
# There should be a reduction of at least 10MB.
334324
assert first_reading - second_reading >= 10000
@@ -353,7 +343,6 @@ def test_stats(uvm_plain_any):
353343

354344
# Start the microvm.
355345
test_microvm.start()
356-
firecracker_pid = test_microvm.firecracker_pid
357346

358347
# Give Firecracker enough time to poll the stats at least once post-boot
359348
time.sleep(STATS_POLLING_INTERVAL_S * 2)
@@ -371,7 +360,7 @@ def test_stats(uvm_plain_any):
371360
make_guest_dirty_memory(test_microvm.ssh, amount_mib=10)
372361
time.sleep(1)
373362
# This call will internally wait for rss to become stable.
374-
_ = get_stable_rss_mem_by_pid(firecracker_pid)
363+
_ = get_stable_rss_mem(test_microvm)
375364

376365
# Make sure that the stats catch the page faults.
377366
after_workload_stats = test_microvm.api.balloon_stats.get().json()
@@ -380,7 +369,7 @@ def test_stats(uvm_plain_any):
380369
# Now inflate the balloon with 10MB of pages.
381370
test_microvm.api.balloon.patch(amount_mib=10)
382371
# This call will internally wait for rss to become stable.
383-
_ = get_stable_rss_mem_by_pid(firecracker_pid)
372+
_ = get_stable_rss_mem(test_microvm)
384373

385374
# Get another reading of the stats after the polling interval has passed.
386375
inflated_stats = test_microvm.api.balloon_stats.get().json()
@@ -393,7 +382,7 @@ def test_stats(uvm_plain_any):
393382
# available memory.
394383
test_microvm.api.balloon.patch(amount_mib=0)
395384
# This call will internally wait for rss to become stable.
396-
_ = get_stable_rss_mem_by_pid(firecracker_pid)
385+
_ = get_stable_rss_mem(test_microvm)
397386

398387
# Get another reading of the stats after the polling interval has passed.
399388
deflated_stats = test_microvm.api.balloon_stats.get().json()
@@ -421,13 +410,12 @@ def test_stats_update(uvm_plain_any):
421410

422411
# Start the microvm.
423412
test_microvm.start()
424-
firecracker_pid = test_microvm.firecracker_pid
425413

426414
# Dirty 30MB of pages.
427415
make_guest_dirty_memory(test_microvm.ssh, amount_mib=30)
428416

429417
# This call will internally wait for rss to become stable.
430-
_ = get_stable_rss_mem_by_pid(firecracker_pid)
418+
_ = get_stable_rss_mem(test_microvm)
431419

432420
# Get an initial reading of the stats.
433421
initial_stats = test_microvm.api.balloon_stats.get().json()
@@ -477,17 +465,14 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory):
477465
make_guest_dirty_memory(vm.ssh, amount_mib=60)
478466
time.sleep(1)
479467

480-
# Get the firecracker pid, and open an ssh connection.
481-
firecracker_pid = vm.firecracker_pid
482-
483468
# Check memory usage.
484-
first_reading = get_stable_rss_mem_by_pid(firecracker_pid)
469+
first_reading = get_stable_rss_mem(vm)
485470

486471
# Now inflate the balloon with 20MB of pages.
487472
vm.api.balloon.patch(amount_mib=20)
488473

489474
# Check memory usage again.
490-
second_reading = get_stable_rss_mem_by_pid(firecracker_pid)
475+
second_reading = get_stable_rss_mem(vm)
491476

492477
# There should be a reduction in RSS, but it's inconsistent.
493478
# We only test that the reduction happens.
@@ -496,28 +481,25 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory):
496481
snapshot = vm.snapshot_full()
497482
microvm = microvm_factory.build_from_snapshot(snapshot)
498483

499-
# Get the firecracker from snapshot pid, and open an ssh connection.
500-
firecracker_pid = microvm.firecracker_pid
501-
502484
# Wait out the polling interval, then get the updated stats.
503485
time.sleep(STATS_POLLING_INTERVAL_S * 2)
504486
stats_after_snap = microvm.api.balloon_stats.get().json()
505487

506488
# Check memory usage.
507-
third_reading = get_stable_rss_mem_by_pid(firecracker_pid)
489+
third_reading = get_stable_rss_mem(microvm)
508490

509491
# Dirty 60MB of pages.
510492
make_guest_dirty_memory(microvm.ssh, amount_mib=60)
511493

512494
# Check memory usage.
513-
fourth_reading = get_stable_rss_mem_by_pid(firecracker_pid)
495+
fourth_reading = get_stable_rss_mem(microvm)
514496

515497
assert fourth_reading > third_reading
516498

517499
# Inflate the balloon with another 20MB of pages.
518500
microvm.api.balloon.patch(amount_mib=40)
519501

520-
fifth_reading = get_stable_rss_mem_by_pid(firecracker_pid)
502+
fifth_reading = get_stable_rss_mem(microvm)
521503

522504
# There should be a reduction in RSS, but it's inconsistent.
523505
# We only test that the reduction happens.
@@ -557,15 +539,14 @@ def test_memory_scrub(uvm_plain_any):
557539
microvm.api.balloon.patch(amount_mib=60)
558540

559541
# Get the firecracker pid, and open an ssh connection.
560-
firecracker_pid = microvm.firecracker_pid
561542

562543
# Wait for the inflate to complete.
563-
_ = get_stable_rss_mem_by_pid(firecracker_pid)
544+
_ = get_stable_rss_mem(microvm)
564545

565546
# Deflate the balloon completely.
566547
microvm.api.balloon.patch(amount_mib=0)
567548

568549
# Wait for the deflate to complete.
569-
_ = get_stable_rss_mem_by_pid(firecracker_pid)
550+
_ = get_stable_rss_mem(microvm)
570551

571552
microvm.ssh.check_output("/usr/local/bin/readmem {} {}".format(60, 1))

tests/integration_tests/functional/test_snapshot_restore_cross_kernel.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,29 +20,26 @@
2020
from framework.utils_cpu_templates import get_supported_cpu_templates
2121
from framework.utils_vsock import check_vsock_device
2222
from integration_tests.functional.test_balloon import (
23-
get_stable_rss_mem_by_pid,
23+
get_stable_rss_mem,
2424
make_guest_dirty_memory,
2525
)
2626

2727
pytestmark = pytest.mark.nonci
2828

2929

3030
def _test_balloon(microvm):
31-
# Get the firecracker pid.
32-
firecracker_pid = microvm.firecracker_pid
33-
3431
# Check memory usage.
35-
first_reading = get_stable_rss_mem_by_pid(firecracker_pid)
32+
first_reading = get_stable_rss_mem(microvm.ps)
3633
# Dirty 300MB of pages.
3734
make_guest_dirty_memory(microvm.ssh, amount_mib=300)
3835
# Check memory usage again.
39-
second_reading = get_stable_rss_mem_by_pid(firecracker_pid)
36+
second_reading = get_stable_rss_mem(microvm.ps)
4037
assert second_reading > first_reading
4138

4239
# Inflate the balloon. Get back 200MB.
4340
microvm.api.balloon.patch(amount_mib=200)
4441

45-
third_reading = get_stable_rss_mem_by_pid(firecracker_pid)
42+
third_reading = get_stable_rss_mem(microvm.ps)
4643
# Ensure that there is a reduction in RSS.
4744
assert second_reading > third_reading
4845

0 commit comments

Comments
 (0)