|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Watchdog reset on system lockup |
| 3 | +
|
| 4 | +Verify that a system's watchdog trips and successfully reboots the |
| 5 | +system back to a working state if a lockup occurs. |
| 6 | +
|
| 7 | +This is tested by using the Linux kernel's `test_lockup` module to |
| 8 | +inject a hard lockup (i.e., blocking servicing of all interrupts) on |
| 9 | +all CPU cores that lasts for twice as long as the watchdog's reported |
| 10 | +timeout. |
| 11 | +
|
| 12 | +""" |
| 13 | +import base64 |
| 14 | +import infamy |
| 15 | +import json |
| 16 | +import subprocess |
| 17 | +import time |
| 18 | + |
| 19 | +with infamy.Test() as test: |
| 20 | + with test.step("Set up topology and attach to target DUT"): |
| 21 | + env = infamy.Env() |
| 22 | + target = env.attach("target", "mgmt") |
| 23 | + tgtssh = env.attach("target", "mgmt", "ssh") |
| 24 | + |
| 25 | + with test.step("Verify the presence of a watchdog device"): |
| 26 | + wctl = tgtssh.run(["watchdogctl"], stdout=subprocess.PIPE) |
| 27 | + conf = json.loads(wctl.stdout) |
| 28 | + |
| 29 | + dogs = [ dog for dog in conf.get("device", []) if dog.get("name", "") == "/dev/watchdog" ] |
| 30 | + if len(dogs) < 1: |
| 31 | + test.fail("No watchdog device available") |
| 32 | + else: |
| 33 | + dog = dogs[0] |
| 34 | + |
| 35 | + print(f"Found {dog['name']} ({dog['identity']}), timeout:{dog['timeout']}s") |
| 36 | + |
| 37 | + with test.step("Verify the presence of the test_lockup module"): |
| 38 | + if tgtssh.run(["modprobe", "-q", "-n", "test_lockup"]).returncode != 0: |
| 39 | + test.fail("test_lockup module is not available") |
| 40 | + |
| 41 | + with test.step("Trigger a hard lockup on all CPU cores"): |
| 42 | + tgtssh.runsh(f""" |
| 43 | + lockup() |
| 44 | + {{ |
| 45 | + # Give the SSH session some time to properly shut down |
| 46 | + sleep 3 |
| 47 | +
|
| 48 | + sudo modprobe test_lockup \ |
| 49 | + disable_irq=1 \ |
| 50 | + all_cpus=1 \ |
| 51 | + time_secs={dog['timeout'] * 2} |
| 52 | + }} |
| 53 | +
|
| 54 | + lockup </dev/null &>/dev/null & |
| 55 | + """) |
| 56 | + |
| 57 | + with test.step("Wait for the watchdog to trip"): |
| 58 | + time.sleep(dog["timeout"]) |
| 59 | + |
| 60 | + with test.step("Verify that the system reboots"): |
| 61 | + infamy.util.wait_boot(target, env) |
| 62 | + |
| 63 | + test.succeed() |
0 commit comments