Skip to content

Commit 8d0968c

Browse files
jgross1Ingo Molnar
authored andcommitted
locking/csd_lock: Add boot parameter for controlling CSD lock debugging
Currently CSD lock debugging can be switched on and off via a kernel config option only. Unfortunately there is at least one problem with CSD lock handling pending for about 2 years now, which has been seen in different environments (mostly when running virtualized under KVM or Xen, at least once on bare metal). Multiple attempts to catch this issue have finally led to introduction of CSD lock debug code, but this code is not in use in most distros as it has some impact on performance. In order to be able to ship kernels with CONFIG_CSD_LOCK_WAIT_DEBUG enabled even for production use, add a boot parameter for switching the debug functionality on. This will reduce any performance impact of the debug coding to a bare minimum when not being used. Signed-off-by: Juergen Gross <[email protected]> [ Minor edits. ] Signed-off-by: Ingo Molnar <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 50bf808 commit 8d0968c

File tree

2 files changed

+40
-4
lines changed

2 files changed

+40
-4
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,12 @@
784784
cs89x0_media= [HW,NET]
785785
Format: { rj45 | aui | bnc }
786786

787+
csdlock_debug= [KNL] Enable debug add-ons of cross-CPU function call
788+
handling. When switched on, additional debug data is
789+
printed to the console in case a hanging CPU is
790+
detected, and that CPU is pinged again in order to try
791+
to resolve the hang situation.
792+
787793
dasd= [HW,NET]
788794
See header of drivers/s390/block/dasd_devmap.c.
789795

kernel/smp.c

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <linux/sched/clock.h>
2525
#include <linux/nmi.h>
2626
#include <linux/sched/debug.h>
27+
#include <linux/jump_label.h>
2728

2829
#include "smpboot.h"
2930
#include "sched/smp.h"
@@ -102,6 +103,20 @@ void __init call_function_init(void)
102103

103104
#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
104105

106+
static DEFINE_STATIC_KEY_FALSE(csdlock_debug_enabled);
107+
108+
static int __init csdlock_debug(char *str)
109+
{
110+
unsigned int val = 0;
111+
112+
get_option(&str, &val);
113+
if (val)
114+
static_branch_enable(&csdlock_debug_enabled);
115+
116+
return 0;
117+
}
118+
early_param("csdlock_debug", csdlock_debug);
119+
105120
static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
106121
static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
107122
static DEFINE_PER_CPU(void *, cur_csd_info);
@@ -110,7 +125,7 @@ static DEFINE_PER_CPU(void *, cur_csd_info);
110125
static atomic_t csd_bug_count = ATOMIC_INIT(0);
111126

112127
/* Record current CSD work for current CPU, NULL to erase. */
113-
static void csd_lock_record(call_single_data_t *csd)
128+
static void __csd_lock_record(call_single_data_t *csd)
114129
{
115130
if (!csd) {
116131
smp_mb(); /* NULL cur_csd after unlock. */
@@ -125,7 +140,13 @@ static void csd_lock_record(call_single_data_t *csd)
125140
/* Or before unlock, as the case may be. */
126141
}
127142

128-
static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd)
143+
static __always_inline void csd_lock_record(call_single_data_t *csd)
144+
{
145+
if (static_branch_unlikely(&csdlock_debug_enabled))
146+
__csd_lock_record(csd);
147+
}
148+
149+
static int csd_lock_wait_getcpu(call_single_data_t *csd)
129150
{
130151
unsigned int csd_type;
131152

@@ -140,7 +161,7 @@ static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd)
140161
* the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
141162
* so waiting on other types gets much less information.
142163
*/
143-
static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
164+
static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
144165
{
145166
int cpu = -1;
146167
int cpux;
@@ -204,7 +225,7 @@ static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 t
204225
* previous function call. For multi-cpu calls its even more interesting
205226
* as we'll have to ensure no other cpu is observing our csd.
206227
*/
207-
static __always_inline void csd_lock_wait(call_single_data_t *csd)
228+
static void __csd_lock_wait(call_single_data_t *csd)
208229
{
209230
int bug_id = 0;
210231
u64 ts0, ts1;
@@ -218,6 +239,15 @@ static __always_inline void csd_lock_wait(call_single_data_t *csd)
218239
smp_acquire__after_ctrl_dep();
219240
}
220241

242+
static __always_inline void csd_lock_wait(call_single_data_t *csd)
243+
{
244+
if (static_branch_unlikely(&csdlock_debug_enabled)) {
245+
__csd_lock_wait(csd);
246+
return;
247+
}
248+
249+
smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
250+
}
221251
#else
222252
static void csd_lock_record(call_single_data_t *csd)
223253
{

0 commit comments

Comments
 (0)