Skip to content

Commit 553d659

Browse files
committed
test/case/hardware/watchdog: Add test
Verify that if the system encounters a hard lockup, i.e., interrupts are no longer being serviced, then the system's watchdog will correctly reboot it.
1 parent 3460169 commit 553d659

File tree

9 files changed

+160
-6
lines changed

9 files changed

+160
-6
lines changed

test/case/hardware/Readme.adoc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@ Tests verifying hardware monitoring and management:
55

66
- USB device detection and enumeration
77
- Multiple USB port management and device handling
8+
- Watchdog reset capability
89

910
include::usb/Readme.adoc[]
1011

1112
include::usb_two_ports/Readme.adoc[]
13+
14+
include::watchdog/Readme.adoc[]

test/case/hardware/all.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@
44

55
- name: USB configuration with two USB ports
66
case: usb_two_ports/test.py
7+
8+
- name: Watchdog reset on system lockup
9+
case: watchdog/test.py
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
test.adoc
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
=== Watchdog reset on system lockup
2+
3+
ifdef::topdoc[:imagesdir: {topdoc}../../test/case/hardware/watchdog]
4+
5+
==== Description
6+
7+
Verify that a system's watchdog trips and successfully reboots the
8+
system back to a working state if a lockup occurs.
9+
10+
This is tested by using the Linux kernel's `test_lockup` module to
11+
inject a hard lockup (i.e., blocking servicing of all interrupts) on
12+
all CPU cores that lasts for twice as long as the watchdog's reported
13+
timeout.
14+
15+
==== Topology
16+
17+
image::topology.svg[Watchdog reset on system lockup topology, align=center, scaledwidth=75%]
18+
19+
==== Sequence
20+
21+
. Set up topology and attach to target DUT
22+
. Verify the presence of a watchdog device
23+
. Verify the presence of the test_lockup module
24+
. Trigger a hard lockup on all CPU cores
25+
. Wait for the watchdog to trip
26+
. Verify that the system reboots
27+
28+
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/usr/bin/env python3
2+
"""Watchdog reset on system lockup
3+
4+
Verify that a system's watchdog trips and successfully reboots the
5+
system back to a working state if a lockup occurs.
6+
7+
This is tested by using the Linux kernel's `test_lockup` module to
8+
inject a hard lockup (i.e., blocking servicing of all interrupts) on
9+
all CPU cores that lasts for twice as long as the watchdog's reported
10+
timeout.
11+
12+
"""
13+
import base64
14+
import infamy
15+
import json
16+
import subprocess
17+
import time
18+
19+
with infamy.Test() as test:
20+
with test.step("Set up topology and attach to target DUT"):
21+
env = infamy.Env()
22+
target = env.attach("target", "mgmt")
23+
tgtssh = env.attach("target", "mgmt", "ssh")
24+
25+
with test.step("Verify the presence of a watchdog device"):
26+
wctl = tgtssh.run(["watchdogctl"], stdout=subprocess.PIPE)
27+
conf = json.loads(wctl.stdout)
28+
29+
dogs = [ dog for dog in conf.get("device", []) if dog.get("name", "") == "/dev/watchdog" ]
30+
if len(dogs) < 1:
31+
test.fail("No watchdog device available")
32+
else:
33+
dog = dogs[0]
34+
35+
print(f"Found {dog['name']} ({dog['identity']}), timeout:{dog['timeout']}s")
36+
37+
with test.step("Verify the presence of the test_lockup module"):
38+
if tgtssh.run(["modprobe", "-q", "-n", "test_lockup"]).returncode != 0:
39+
test.fail("test_lockup module is not available")
40+
41+
with test.step("Trigger a hard lockup on all CPU cores"):
42+
tgtssh.runsh(f"""
43+
lockup()
44+
{{
45+
# Give the SSH session some time to properly shut down
46+
sleep 3
47+
48+
sudo modprobe test_lockup \
49+
disable_irq=1 \
50+
all_cpus=1 \
51+
time_secs={dog['timeout'] * 2}
52+
}}
53+
54+
lockup </dev/null &>/dev/null &
55+
""")
56+
57+
with test.step("Wait for the watchdog to trip"):
58+
time.sleep(dog["timeout"])
59+
60+
with test.step("Verify that the system reboots"):
61+
infamy.util.wait_boot(target, env)
62+
63+
test.succeed()
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
graph "1x1" {
2+
layout="neato";
3+
overlap="false";
4+
esep="+80";
5+
6+
node [shape=record, fontname="DejaVu Sans Mono, Book"];
7+
edge [color="cornflowerblue", penwidth="2", fontname="DejaVu Serif, Book"];
8+
9+
host [
10+
label="host | { <mgmt> mgmt }",
11+
pos="0,12!",
12+
requires="controller",
13+
];
14+
15+
target [
16+
label="{ <mgmt> mgmt } | target",
17+
pos="10,12!",
18+
19+
requires="infix watchdog",
20+
];
21+
22+
host:mgmt -- target:mgmt [requires="mgmt", color="lightgray"]
23+
}
Lines changed: 33 additions & 0 deletions
Loading

test/virt/dual/topology.dot.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ graph "dual" {
2424
dut1 [
2525
label="{ <e1> e1 | <e2> e2 | <e3> e3 } | dut1 | { <e4> e4 | <e5> e5 | <e6> e6 }",
2626
pos="10,18!",
27-
provides="infix",
27+
provides="infix watchdog",
2828
expected_boot="primary",
2929
qn_console=9001,
3030
qn_mem="384M",
@@ -33,7 +33,7 @@ graph "dual" {
3333
dut2 [
3434
label="{ <e1> e1 | <e2> e2 | <e3> e3 } | dut2 | { <e4> e4 | <e5> e5 | <e6> e6 }",
3535
pos="10,12!",
36-
provides="infix",
36+
provides="infix watchdog",
3737
expected_boot="primary",
3838
qn_console=9002,
3939
qn_mem="384M",

test/virt/quad/topology.dot.in

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ graph "quad" {
2323
dut1 [
2424
label="{ <e1> e1 | <e2> e2 | <e3> e3 | <e4> e4 } | dut1 | { <e5> e5 | <e6> e6 | <e7> e7 | <e8> e8}",
2525
pos="10,30!",
26-
provides="infix",
26+
provides="infix watchdog",
2727
expected_boot="primary",
2828
qn_console=9001,
2929
qn_mem="384M",
@@ -32,7 +32,7 @@ graph "quad" {
3232
dut2 [
3333
label="{ <e1> e1 | <e2> e2 | <e3> e3 | <e4> e4 } | dut2 | { <e5> e5 | <e6> e6 | <e7> e7 | <e8> e8}",
3434
pos="0,20!",
35-
provides="infix",
35+
provides="infix watchdog",
3636
expected_boot="primary",
3737
qn_console=9002,
3838
qn_mem="384M",
@@ -41,7 +41,7 @@ graph "quad" {
4141
dut3 [
4242
label="{ <e1> e1 | <e2> e2 | <e3> e3 | <e4> e4 } | dut3 | { <e5> e5 | <e6> e6 | <e7> e7 | <e8> e8}",
4343
pos="0,10!",
44-
provides="infix",
44+
provides="infix watchdog",
4545
expected_boot="primary",
4646
qn_console=9003,
4747
qn_mem="384M",
@@ -51,7 +51,7 @@ graph "quad" {
5151
dut4 [
5252
label="{ <e1> e1 | <e2> e2 | <e3> e3 | <e4> e4 } | dut4 | { <e5> e5 | <e6> e6 | <e7> e7 | <e8> e8}",
5353
pos="10,0!",
54-
provides="infix",
54+
provides="infix watchdog",
5555
expected_boot="primary",
5656
qn_console=9004,
5757
qn_mem="384M",

0 commit comments

Comments
 (0)