Skip to content

Commit 67817a1

Browse files
committed
board: common: Add a watchdogd configuration
Also increase the timeout and kick more often (required by RPI4)
1 parent 692fa16 commit 67817a1

File tree

1 file changed

+165
-0
lines changed

1 file changed

+165
-0
lines changed
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
# /etc/watchdogd.conf sample
2+
# Commented out values are program defaults.
3+
#
4+
# The checker/monitor `warning` and `critical` levels are 0.00-1.00,
5+
# i.e. 0-100%, except for load average which can vary a lot between
6+
# systems and use-cases, not just because of the number of CPU cores.
7+
# Use the `script = ...` setting to call script when `warning` and
8+
# `critical` are reached for a monitor. In `critical` the monitor
9+
# otherwise triggers an unconditional reboot.
10+
#
11+
# NOTE: `critical` is optional, omitting it disables the reboot action.
12+
#
13+
14+
### Watchdogs ##########################################################
15+
# Global settings that can be overridden per watchdog
16+
17+
# Do not set WDT timeout and kick interval too low, the daemon runs at
18+
# SCHED_OTHER level with all other tasks, unless the process supervisor
19+
# is enabled. The monitor plugins (below) need CPU time as well.
20+
#timeout = 20
21+
#interval = 10
22+
23+
# With safe-exit enabled (true) the daemon will ask the driver disable
24+
# the WDT before exiting (SIGINT). However, some WDT drivers (or HW)
25+
# may not support this.
26+
#safe-exit = true
27+
28+
# Multiple watchdogs can be kicked, the default, even if no .conf file
29+
# is found or device node given on the command line, is /dev/watchdog
30+
device /dev/watchdog {
31+
timeout = 60
32+
interval = 5
33+
safe-exit = true
34+
}
35+
36+
#device /dev/watchdog2 {
37+
# timeout = 20
38+
# interval = 10
39+
# safe-exit = true
40+
#}
41+
42+
### Supervisor #########################################################
43+
# Instrumented processes can have their main loop supervised. Processes
44+
# subscribe to this service using the libwdog API, see the docs for more
45+
# on this. When the supervisor is enabled and the priority is set to a
46+
# value > 0, watchdogd runs as a SCHED_RR process with elevated realtime
47+
# priority. When disabled, or the priority is set to zero (0), it runs
48+
# as a regular SCHED_OTHER process, this is the default.
49+
#
50+
# When a supervised process fails to meet its deadline, the daemon will
51+
# perform an unconditional reset having saved the reset reason. If a
52+
# script is provided in this section it will be called instead. The
53+
# script is called as:
54+
#
55+
# script.sh supervisor CODE PID LABEL
56+
#
57+
# Availabel CODEs for the reset reason are avilable in wdog.h
58+
#
59+
#supervisor {
60+
# !!!REMEMBER TO ENABLE reset-reason (below) AS WELL!!!
61+
# enabled = true
62+
# priority = 98
63+
# script = "/path/to/supervisor-script.sh"
64+
#}
65+
66+
### Reset reason #######################################################
67+
# The following section controls if/how the reset reason & reset counter
68+
# is tracked. By default this is disabled, since not all systems allow
69+
# writing to disk, e.g. embedded systems using MTD devices with limited
70+
# number of write cycles.
71+
#
72+
# The default file setting is a non-volatile path, according to the FHS.
73+
# It can be changed to another location, but make sure that location is
74+
# writable first.
75+
reset-reason {
76+
enabled = true
77+
file = "/var/lib/misc/watchdogd.state"
78+
}
79+
80+
### Checkers/Monitors ##################################################
81+
#
82+
# Script or command to run instead of reboot when a monitor plugin
83+
# reaches any of its critical or warning level. Setting this will
84+
# disable the built-in reboot on critical, it is therefore up to the
85+
# script to perform reboot, if needed. The script is called as:
86+
#
87+
# script.sh {filenr, fsmon, loadavg, meminfo} {crit, warn} VALUE
88+
#
89+
#script = "/path/to/reboot-action.sh"
90+
91+
# Monitors file descriptor leaks based on /proc/sys/fs/file-nr
92+
filenr {
93+
# enabled = true
94+
interval = 300
95+
logmark = false
96+
warning = 0.9
97+
critical = 1.0
98+
# script = "/path/to/alt-reboot-action.sh"
99+
}
100+
101+
# Monitors a file system, blocks and inode usage against watermarks
102+
# The script is called with fsmon as the first argument and there
103+
# are two environment variables FSMON_NAME, for the monitored path,
104+
# and FSMON_TYPE indicating either 'blocks' or 'inodes'.
105+
#fsmon /var {
106+
# enabled = true
107+
# interval = 300
108+
# logmark = false
109+
# warning = 0.95
110+
# critical = 1.0
111+
# script = "/path/to/alt-reboot-action.sh"
112+
#}
113+
114+
# Monitors load average based on sysinfo() from /proc/loadavg
115+
# The level is composed from the average of the 1 and 5 min marks.
116+
loadavg {
117+
# enabled = true
118+
interval = 300
119+
logmark = false
120+
warning = 1.0
121+
critical = 2.0
122+
# script = "/path/to/alt-reboot-action.sh"
123+
}
124+
125+
# Monitors free RAM based on data from /proc/meminfo
126+
meminfo {
127+
# enabled = true
128+
interval = 300
129+
logmark = false
130+
warning = 0.9
131+
critical = 0.95
132+
# script = "/path/to/alt-reboot-action.sh"
133+
}
134+
135+
# Monitor temperature. The critical value is unset by default, so no
136+
# action is taken at that watermark (by default). Both the critical and
137+
# warning watermarks are relative to the trip/critical/max value from
138+
# sysfs. The warning is default 0.9, i.e., 90% of critical. Use script
139+
# to to reset the fan controller or poweroff(8) the system.
140+
#
141+
# Each temp monitor caches the last 10 values, calculates the mean, and
142+
# compares that to the warning and critical levels. Logging is only
143+
# done every 10 x interval (if enabled).
144+
#tempmon /sys/class/hwmon/hwmon0/temp1_input {
145+
# enabled = true
146+
# interval = 30
147+
# warning = 0.9
148+
# critical = 0.95
149+
# logmark = true
150+
# script = "/script/to/log/and/poweroff.sh"
151+
#}
152+
153+
# Monitor a generic script, executes 'monitor-script' every 'interval'
154+
# seconds, with a max runtime of 'timeout' seconds. When the exit code
155+
# of the monitor script is above the critical level watchdogd either
156+
# starts the reboot, or calls the alternate 'script' to determin the
157+
# next cause of action.
158+
#generic /path/to/monitor-script.sh {
159+
# enabled = true
160+
# interval = 300
161+
# timeout = 60
162+
# warning = 1
163+
# critical = 10
164+
# script = "/path/to/alt-reboot-action.sh"
165+
#}

0 commit comments

Comments
 (0)