Skip to content

Commit 1253b9b

Browse files
paulmckrcuKAGA-KOKO
authored andcommitted
clocksource: Provide kernel module to test clocksource watchdog
When the clocksource watchdog marks a clock as unstable, this might be due to that clock being unstable or it might be due to delays that happen to occur between the reads of the two clocks. It would be good to have a way of testing the clocksource watchdog's ability to distinguish between these two causes of clock skew and instability. Therefore, provide a new clocksource-wdtest module selected by a new TEST_CLOCKSOURCE_WATCHDOG Kconfig option. This module has a single module parameter named "holdoff" that provides the number of seconds of delay before testing should start, which defaults to zero when built as a module and to 10 seconds when built directly into the kernel. Very large systems that boot slowly may need to increase the value of this module parameter. This module uses hand-crafted clocksource structures to do its testing, thus avoiding messing up timing for the rest of the kernel and for user applications. This module first verifies that the ->uncertainty_margin field of the clocksource structures are set sanely. It then tests the delay-detection capability of the clocksource watchdog, increasing the number of consecutive delays injected, first provoking console messages complaining about the delays and finally forcing a clock-skew event. Unexpected test results cause at least one WARN_ON_ONCE() console splat. If there are no splats, the test has passed. Finally, it fuzzes the value returned from a clocksource to test the clocksource watchdog's ability to detect time skew. This module checks the state of its clocksource after each test, and uses WARN_ON_ONCE() to emit a console splat if there are any failures. This should enable all types of test frameworks to detect any such failures. This facility is intended for diagnostic use only, and should be avoided on production systems. Reported-by: Chris Mason <[email protected]> Suggested-by: Thomas Gleixner <[email protected]> Signed-off-by: Paul E. McKenney <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Tested-by: Feng Tang <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 2e27e79 commit 1253b9b

File tree

6 files changed

+228
-2
lines changed

6 files changed

+228
-2
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,12 @@
597597
The actual CPUs are chosen randomly, with
598598
no replacement if the same CPU is chosen twice.
599599

600+
clocksource-wdtest.holdoff= [KNL]
601+
Set the time in seconds that the clocksource
602+
watchdog test waits before commencing its tests.
603+
Defaults to zero when built as a module and to
604+
10 seconds when built into the kernel.
605+
600606
clearcpuid=BITNUM[,BITNUM...] [X86]
601607
Disable CPUID feature X for the kernel. See
602608
arch/x86/include/asm/cpufeatures.h for the valid bit

include/linux/clocksource.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,4 +291,7 @@ static inline void timer_probe(void) {}
291291
#define TIMER_ACPI_DECLARE(name, table_id, fn) \
292292
ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn)
293293

294+
extern ulong max_cswd_read_retries;
295+
void clocksource_verify_percpu(struct clocksource *cs);
296+
294297
#endif /* _LINUX_CLOCKSOURCE_H */

kernel/time/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o
2121
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
2222
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
2323
obj-$(CONFIG_TIME_NS) += namespace.o
24+
obj-$(CONFIG_TEST_CLOCKSOURCE_WATCHDOG) += clocksource-wdtest.o

kernel/time/clocksource-wdtest.c

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
// SPDX-License-Identifier: GPL-2.0+
2+
/*
3+
* Unit test for the clocksource watchdog.
4+
*
5+
* Copyright (C) 2021 Facebook, Inc.
6+
*
7+
* Author: Paul E. McKenney <[email protected]>
8+
*/
9+
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10+
11+
#include <linux/device.h>
12+
#include <linux/clocksource.h>
13+
#include <linux/init.h>
14+
#include <linux/module.h>
15+
#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
16+
#include <linux/tick.h>
17+
#include <linux/kthread.h>
18+
#include <linux/delay.h>
19+
#include <linux/prandom.h>
20+
#include <linux/cpu.h>
21+
22+
MODULE_LICENSE("GPL");
23+
MODULE_AUTHOR("Paul E. McKenney <[email protected]>");
24+
25+
static int holdoff = IS_BUILTIN(CONFIG_TEST_CLOCKSOURCE_WATCHDOG) ? 10 : 0;
26+
module_param(holdoff, int, 0444);
27+
MODULE_PARM_DESC(holdoff, "Time to wait to start test (s).");
28+
29+
/* Watchdog kthread's task_struct pointer for debug purposes. */
30+
static struct task_struct *wdtest_task;
31+
32+
static u64 wdtest_jiffies_read(struct clocksource *cs)
33+
{
34+
return (u64)jiffies;
35+
}
36+
37+
/* Assume HZ > 100. */
38+
#define JIFFIES_SHIFT 8
39+
40+
static struct clocksource clocksource_wdtest_jiffies = {
41+
.name = "wdtest-jiffies",
42+
.rating = 1, /* lowest valid rating*/
43+
.uncertainty_margin = TICK_NSEC,
44+
.read = wdtest_jiffies_read,
45+
.mask = CLOCKSOURCE_MASK(32),
46+
.flags = CLOCK_SOURCE_MUST_VERIFY,
47+
.mult = TICK_NSEC << JIFFIES_SHIFT, /* details above */
48+
.shift = JIFFIES_SHIFT,
49+
.max_cycles = 10,
50+
};
51+
52+
static int wdtest_ktime_read_ndelays;
53+
static bool wdtest_ktime_read_fuzz;
54+
55+
static u64 wdtest_ktime_read(struct clocksource *cs)
56+
{
57+
int wkrn = READ_ONCE(wdtest_ktime_read_ndelays);
58+
static int sign = 1;
59+
u64 ret;
60+
61+
if (wkrn) {
62+
udelay(cs->uncertainty_margin / 250);
63+
WRITE_ONCE(wdtest_ktime_read_ndelays, wkrn - 1);
64+
}
65+
ret = ktime_get_real_fast_ns();
66+
if (READ_ONCE(wdtest_ktime_read_fuzz)) {
67+
sign = -sign;
68+
ret = ret + sign * 100 * NSEC_PER_MSEC;
69+
}
70+
return ret;
71+
}
72+
73+
static void wdtest_ktime_cs_mark_unstable(struct clocksource *cs)
74+
{
75+
pr_info("--- Marking %s unstable due to clocksource watchdog.\n", cs->name);
76+
}
77+
78+
#define KTIME_FLAGS (CLOCK_SOURCE_IS_CONTINUOUS | \
79+
CLOCK_SOURCE_VALID_FOR_HRES | \
80+
CLOCK_SOURCE_MUST_VERIFY | \
81+
CLOCK_SOURCE_VERIFY_PERCPU)
82+
83+
static struct clocksource clocksource_wdtest_ktime = {
84+
.name = "wdtest-ktime",
85+
.rating = 300,
86+
.read = wdtest_ktime_read,
87+
.mask = CLOCKSOURCE_MASK(64),
88+
.flags = KTIME_FLAGS,
89+
.mark_unstable = wdtest_ktime_cs_mark_unstable,
90+
.list = LIST_HEAD_INIT(clocksource_wdtest_ktime.list),
91+
};
92+
93+
/* Reset the clocksource if needed. */
94+
static void wdtest_ktime_clocksource_reset(void)
95+
{
96+
if (clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE) {
97+
clocksource_unregister(&clocksource_wdtest_ktime);
98+
clocksource_wdtest_ktime.flags = KTIME_FLAGS;
99+
schedule_timeout_uninterruptible(HZ / 10);
100+
clocksource_register_khz(&clocksource_wdtest_ktime, 1000 * 1000);
101+
}
102+
}
103+
104+
/* Run the specified series of watchdog tests. */
105+
static int wdtest_func(void *arg)
106+
{
107+
unsigned long j1, j2;
108+
char *s;
109+
int i;
110+
111+
schedule_timeout_uninterruptible(holdoff * HZ);
112+
113+
/*
114+
* Verify that jiffies-like clocksources get the manually
115+
* specified uncertainty margin.
116+
*/
117+
pr_info("--- Verify jiffies-like uncertainty margin.\n");
118+
__clocksource_register(&clocksource_wdtest_jiffies);
119+
WARN_ON_ONCE(clocksource_wdtest_jiffies.uncertainty_margin != TICK_NSEC);
120+
121+
j1 = clocksource_wdtest_jiffies.read(&clocksource_wdtest_jiffies);
122+
schedule_timeout_uninterruptible(HZ);
123+
j2 = clocksource_wdtest_jiffies.read(&clocksource_wdtest_jiffies);
124+
WARN_ON_ONCE(j1 == j2);
125+
126+
clocksource_unregister(&clocksource_wdtest_jiffies);
127+
128+
/*
129+
* Verify that tsc-like clocksources are assigned a reasonable
130+
* uncertainty margin.
131+
*/
132+
pr_info("--- Verify tsc-like uncertainty margin.\n");
133+
clocksource_register_khz(&clocksource_wdtest_ktime, 1000 * 1000);
134+
WARN_ON_ONCE(clocksource_wdtest_ktime.uncertainty_margin < NSEC_PER_USEC);
135+
136+
j1 = clocksource_wdtest_ktime.read(&clocksource_wdtest_ktime);
137+
udelay(1);
138+
j2 = clocksource_wdtest_ktime.read(&clocksource_wdtest_ktime);
139+
pr_info("--- tsc-like times: %lu - %lu = %lu.\n", j2, j1, j2 - j1);
140+
WARN_ON_ONCE(time_before(j2, j1 + NSEC_PER_USEC));
141+
142+
/* Verify tsc-like stability with various numbers of errors injected. */
143+
for (i = 0; i <= max_cswd_read_retries + 1; i++) {
144+
if (i <= 1 && i < max_cswd_read_retries)
145+
s = "";
146+
else if (i <= max_cswd_read_retries)
147+
s = ", expect message";
148+
else
149+
s = ", expect clock skew";
150+
pr_info("--- Watchdog with %dx error injection, %lu retries%s.\n", i, max_cswd_read_retries, s);
151+
WRITE_ONCE(wdtest_ktime_read_ndelays, i);
152+
schedule_timeout_uninterruptible(2 * HZ);
153+
WARN_ON_ONCE(READ_ONCE(wdtest_ktime_read_ndelays));
154+
WARN_ON_ONCE((i <= max_cswd_read_retries) !=
155+
!(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE));
156+
wdtest_ktime_clocksource_reset();
157+
}
158+
159+
/* Verify tsc-like stability with clock-value-fuzz error injection. */
160+
pr_info("--- Watchdog clock-value-fuzz error injection, expect clock skew and per-CPU mismatches.\n");
161+
WRITE_ONCE(wdtest_ktime_read_fuzz, true);
162+
schedule_timeout_uninterruptible(2 * HZ);
163+
WARN_ON_ONCE(!(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE));
164+
clocksource_verify_percpu(&clocksource_wdtest_ktime);
165+
WRITE_ONCE(wdtest_ktime_read_fuzz, false);
166+
167+
clocksource_unregister(&clocksource_wdtest_ktime);
168+
169+
pr_info("--- Done with test.\n");
170+
return 0;
171+
}
172+
173+
static void wdtest_print_module_parms(void)
174+
{
175+
pr_alert("--- holdoff=%d\n", holdoff);
176+
}
177+
178+
/* Cleanup function. */
179+
static void clocksource_wdtest_cleanup(void)
180+
{
181+
}
182+
183+
static int __init clocksource_wdtest_init(void)
184+
{
185+
int ret = 0;
186+
187+
wdtest_print_module_parms();
188+
189+
/* Create watchdog-test task. */
190+
wdtest_task = kthread_run(wdtest_func, NULL, "wdtest");
191+
if (IS_ERR(wdtest_task)) {
192+
ret = PTR_ERR(wdtest_task);
193+
pr_warn("%s: Failed to create wdtest kthread.\n", __func__);
194+
wdtest_task = NULL;
195+
return ret;
196+
}
197+
198+
return 0;
199+
}
200+
201+
module_init(clocksource_wdtest_init);
202+
module_exit(clocksource_wdtest_cleanup);

kernel/time/clocksource.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,9 @@ void clocksource_mark_unstable(struct clocksource *cs)
199199
spin_unlock_irqrestore(&watchdog_lock, flags);
200200
}
201201

202-
static ulong max_cswd_read_retries = 3;
202+
ulong max_cswd_read_retries = 3;
203203
module_param(max_cswd_read_retries, ulong, 0644);
204+
EXPORT_SYMBOL_GPL(max_cswd_read_retries);
204205
static int verify_n_cpus = 8;
205206
module_param(verify_n_cpus, int, 0644);
206207

@@ -294,7 +295,7 @@ static void clocksource_verify_one_cpu(void *csin)
294295
csnow_mid = cs->read(cs);
295296
}
296297

297-
static void clocksource_verify_percpu(struct clocksource *cs)
298+
void clocksource_verify_percpu(struct clocksource *cs)
298299
{
299300
int64_t cs_nsec, cs_nsec_max = 0, cs_nsec_min = LLONG_MAX;
300301
u64 csnow_begin, csnow_end;
@@ -347,6 +348,7 @@ static void clocksource_verify_percpu(struct clocksource *cs)
347348
pr_warn(" CPU %d check durations %lldns - %lldns for clocksource %s.\n",
348349
testcpu, cs_nsec_min, cs_nsec_max, cs->name);
349350
}
351+
EXPORT_SYMBOL_GPL(clocksource_verify_percpu);
350352

351353
static void clocksource_watchdog(struct timer_list *unused)
352354
{

lib/Kconfig.debug

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2571,6 +2571,18 @@ config TEST_FPU
25712571

25722572
If unsure, say N.
25732573

2574+
config TEST_CLOCKSOURCE_WATCHDOG
2575+
tristate "Test clocksource watchdog in kernel space"
2576+
depends on CLOCKSOURCE_WATCHDOG
2577+
help
2578+
Enable this option to create a kernel module that will trigger
2579+
a test of the clocksource watchdog. This module may be loaded
2580+
via modprobe or insmod in which case it will run upon being
2581+
loaded, or it may be built in, in which case it will run
2582+
shortly after boot.
2583+
2584+
If unsure, say N.
2585+
25742586
endif # RUNTIME_TESTING_MENU
25752587

25762588
config ARCH_USE_MEMTEST

0 commit comments

Comments
 (0)