|
16 | 16 | STATS_POLLING_INTERVAL_S = 1 |
17 | 17 |
|
18 | 18 |
|
| 19 | +def check_guest_dmesg_for_stalls(ssh_connection): |
| 20 | + """Check guest dmesg for RCU stalls and soft lockups.""" |
| 21 | + _, stdout, _ = ssh_connection.run("dmesg") |
| 22 | + assert "rcu_sched self-detected stall on CPU" not in stdout |
| 23 | + assert "rcu_preempt detected stalls on CPUs/tasks" not in stdout |
| 24 | + assert "BUG: soft lockup -" not in stdout |
| 25 | + |
| 26 | + |
19 | 27 | def lower_ssh_oom_chance(ssh_connection): |
20 | 28 | """Lure OOM away from ssh process""" |
21 | 29 | logger = logging.getLogger("lower_ssh_oom_chance") |
@@ -76,6 +84,7 @@ def _test_rss_memory_lower(test_microvm): |
76 | 84 |
|
77 | 85 | # Check that the ballooning reclaimed the memory. |
78 | 86 | assert balloon_rss - init_rss <= 15000 |
| 87 | + check_guest_dmesg_for_stalls(ssh_connection) |
79 | 88 |
|
80 | 89 |
|
81 | 90 | # pylint: disable=C0103 |
@@ -131,6 +140,7 @@ def test_inflate_reduces_free(uvm_plain_any): |
131 | 140 |
|
132 | 141 | # Assert that ballooning reclaimed about 64 MB of memory. |
133 | 142 | assert available_mem_inflated <= available_mem_deflated - 85 * 64000 / 100 |
| 143 | + check_guest_dmesg_for_stalls(test_microvm.ssh) |
134 | 144 |
|
135 | 145 |
|
136 | 146 | # pylint: disable=C0103 |
@@ -192,6 +202,7 @@ def test_deflate_on_oom(uvm_plain_any, deflate_on_oom): |
192 | 202 | print(f"size before: {balloon_size_before} size after: {balloon_size_after}") |
193 | 203 | if deflate_on_oom: |
194 | 204 | assert balloon_size_after < balloon_size_before, "Balloon did not deflate" |
| 205 | + check_guest_dmesg_for_stalls(test_microvm.ssh) |
195 | 206 | else: |
196 | 207 | assert balloon_size_after >= balloon_size_before, "Balloon deflated" |
197 | 208 | # Kill it here, letting the infrastructure know that the process might |
@@ -255,6 +266,7 @@ def test_reinflate_balloon(uvm_plain_any): |
255 | 266 | # is probably freed after the first inflation. |
256 | 267 | assert (third_reading - first_reading) <= 20000 |
257 | 268 | assert abs(second_reading - fourth_reading) <= 20000 |
| 269 | + check_guest_dmesg_for_stalls(test_microvm.ssh) |
258 | 270 |
|
259 | 271 |
|
260 | 272 | # pylint: disable=C0103 |
@@ -326,6 +338,7 @@ def test_stats(uvm_plain_any): |
326 | 338 | # Ensure the stats reflect deflating the balloon. |
327 | 339 | assert inflated_stats["free_memory"] < deflated_stats["free_memory"] |
328 | 340 | assert inflated_stats["available_memory"] < deflated_stats["available_memory"] |
| 341 | + check_guest_dmesg_for_stalls(test_microvm.ssh) |
329 | 342 |
|
330 | 343 |
|
331 | 344 | def test_stats_update(uvm_plain_any): |
@@ -377,6 +390,7 @@ def test_stats_update(uvm_plain_any): |
377 | 390 |
|
378 | 391 | # Ensure that stats don't have unknown balloon stats fields |
379 | 392 | assert "balloon: unknown stats update tag:" not in test_microvm.log_data |
| 393 | + check_guest_dmesg_for_stalls(test_microvm.ssh) |
380 | 394 |
|
381 | 395 |
|
382 | 396 | def test_balloon_snapshot(uvm_plain_any, microvm_factory): |
@@ -453,6 +467,7 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory): |
453 | 467 | # Ensure the stats are still working after restore and show |
454 | 468 | # that the balloon inflated. |
455 | 469 | assert stats_after_snap["available_memory"] > latest_stats["available_memory"] |
| 470 | + check_guest_dmesg_for_stalls(microvm.ssh) |
456 | 471 |
|
457 | 472 |
|
458 | 473 | @pytest.mark.parametrize("method", ["reporting", "hinting"]) |
@@ -532,6 +547,7 @@ def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method): |
532 | 547 | # There should be a reduction in RSS, but it's inconsistent. |
533 | 548 | # We only test that the reduction happens. |
534 | 549 | assert third_reading > fourth_reading |
| 550 | + check_guest_dmesg_for_stalls(microvm.ssh) |
535 | 551 |
|
536 | 552 |
|
537 | 553 | @pytest.mark.parametrize("method", ["traditional", "hinting", "reporting"]) |
@@ -581,3 +597,4 @@ def test_memory_scrub(uvm_plain_any, method): |
581 | 597 | _ = get_stable_rss_mem(microvm) |
582 | 598 |
|
583 | 599 | microvm.ssh.check_output("/usr/local/bin/readmem {} {}".format(60, 1)) |
| 600 | + check_guest_dmesg_for_stalls(microvm.ssh) |
0 commit comments