Skip to content

Commit d6834d9

Browse files
Li HuafeiIngo Molnar
authored andcommitted
watchdog/hardlockup/perf: Fix perf_event memory leak
During stress-testing, we found a kmemleak report for perf_event: unreferenced object 0xff110001410a33e0 (size 1328): comm "kworker/4:11", pid 288, jiffies 4294916004 hex dump (first 32 bytes): b8 be c2 3b 02 00 11 ff 22 01 00 00 00 00 ad de ...;...."....... f0 33 0a 41 01 00 11 ff f0 33 0a 41 01 00 11 ff .3.A.....3.A.... backtrace (crc 24eb7b3a): [<00000000e211b653>] kmem_cache_alloc_node_noprof+0x269/0x2e0 [<000000009d0985fa>] perf_event_alloc+0x5f/0xcf0 [<00000000084ad4a2>] perf_event_create_kernel_counter+0x38/0x1b0 [<00000000fde96401>] hardlockup_detector_event_create+0x50/0xe0 [<0000000051183158>] watchdog_hardlockup_enable+0x17/0x70 [<00000000ac89727f>] softlockup_start_fn+0x15/0x40 ... Our stress test includes CPU online and offline cycles, and updating the watchdog configuration. After reading the code, I found that there may be a race between cleaning up perf_event after updating watchdog and disabling event when the CPU goes offline: CPU0 CPU1 CPU2 (update watchdog) (hotplug offline CPU1) ... _cpu_down(CPU1) cpus_read_lock() // waiting for cpu lock softlockup_start_all smp_call_on_cpu(CPU1) softlockup_start_fn ... watchdog_hardlockup_enable(CPU1) perf create E1 watchdog_ev[CPU1] = E1 cpus_read_unlock() cpus_write_lock() cpuhp_kick_ap_work(CPU1) cpuhp_thread_fun ... watchdog_hardlockup_disable(CPU1) watchdog_ev[CPU1] = NULL dead_event[CPU1] = E1 __lockup_detector_cleanup for each dead_events_mask release each dead_event /* * CPU1 has not been added to * dead_events_mask, then E1 * will not be released */ CPU1 -> dead_events_mask cpumask_clear(&dead_events_mask) // dead_events_mask is cleared, E1 is leaked In this case, the leaked perf_event E1 matches the perf_event leak reported by kmemleak. Due to the low probability of problem recurrence (only reported once), I added some hack delays in the code: static void __lockup_detector_reconfigure(void) { ... watchdog_hardlockup_start(); cpus_read_unlock(); + mdelay(100); /* * Must be called outside the cpus locked section to prevent * recursive locking in the perf code. ... } void watchdog_hardlockup_disable(unsigned int cpu) { ... perf_event_disable(event); this_cpu_write(watchdog_ev, NULL); this_cpu_write(dead_event, event); + mdelay(100); cpumask_set_cpu(smp_processor_id(), &dead_events_mask); atomic_dec(&watchdog_cpus); ... } void hardlockup_detector_perf_cleanup(void) { ... perf_event_release_kernel(event); per_cpu(dead_event, cpu) = NULL; } + mdelay(100); cpumask_clear(&dead_events_mask); } Then, simultaneously performing CPU on/off and switching watchdog, it is almost certain to reproduce this leak. The problem here is that releasing perf_event is not within the CPU hotplug read-write lock. Commit: 941154b ("watchdog/hardlockup/perf: Prevent CPU hotplug deadlock") introduced deferred release to solve the deadlock caused by calling get_online_cpus() when releasing perf_event. Later, commit: efe951d ("perf/x86: Fix perf,x86,cpuhp deadlock") removed the get_online_cpus() call on the perf_event release path to solve another deadlock problem. Therefore, it is now possible to move the release of perf_event back into the CPU hotplug read-write lock, and release the event immediately after disabling it. Fixes: 941154b ("watchdog/hardlockup/perf: Prevent CPU hotplug deadlock") Signed-off-by: Li Huafei <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Peter Zijlstra <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 5e7adc8 commit d6834d9

File tree

4 files changed

+1
-61
lines changed

4 files changed

+1
-61
lines changed

include/linux/nmi.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
void lockup_detector_init(void);
1818
void lockup_detector_retry_init(void);
1919
void lockup_detector_soft_poweroff(void);
20-
void lockup_detector_cleanup(void);
2120

2221
extern int watchdog_user_enabled;
2322
extern int watchdog_thresh;
@@ -37,7 +36,6 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
3736
static inline void lockup_detector_init(void) { }
3837
static inline void lockup_detector_retry_init(void) { }
3938
static inline void lockup_detector_soft_poweroff(void) { }
40-
static inline void lockup_detector_cleanup(void) { }
4139
#endif /* !CONFIG_LOCKUP_DETECTOR */
4240

4341
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
@@ -104,12 +102,10 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
104102
#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
105103
extern void hardlockup_detector_perf_stop(void);
106104
extern void hardlockup_detector_perf_restart(void);
107-
extern void hardlockup_detector_perf_cleanup(void);
108105
extern void hardlockup_config_perf_event(const char *str);
109106
#else
110107
static inline void hardlockup_detector_perf_stop(void) { }
111108
static inline void hardlockup_detector_perf_restart(void) { }
112-
static inline void hardlockup_detector_perf_cleanup(void) { }
113109
static inline void hardlockup_config_perf_event(const char *str) { }
114110
#endif
115111

kernel/cpu.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1453,11 +1453,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
14531453

14541454
out:
14551455
cpus_write_unlock();
1456-
/*
1457-
* Do post unplug cleanup. This is still protected against
1458-
* concurrent CPU hotplug via cpu_add_remove_lock.
1459-
*/
1460-
lockup_detector_cleanup();
14611456
arch_smt_update();
14621457
return ret;
14631458
}

kernel/watchdog.c

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -347,8 +347,6 @@ static int __init watchdog_thresh_setup(char *str)
347347
}
348348
__setup("watchdog_thresh=", watchdog_thresh_setup);
349349

350-
static void __lockup_detector_cleanup(void);
351-
352350
#ifdef CONFIG_SOFTLOCKUP_DETECTOR_INTR_STORM
353351
enum stats_per_group {
354352
STATS_SYSTEM,
@@ -886,11 +884,6 @@ static void __lockup_detector_reconfigure(void)
886884

887885
watchdog_hardlockup_start();
888886
cpus_read_unlock();
889-
/*
890-
* Must be called outside the cpus locked section to prevent
891-
* recursive locking in the perf code.
892-
*/
893-
__lockup_detector_cleanup();
894887
}
895888

896889
void lockup_detector_reconfigure(void)
@@ -940,24 +933,6 @@ static inline void lockup_detector_setup(void)
940933
}
941934
#endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
942935

943-
static void __lockup_detector_cleanup(void)
944-
{
945-
lockdep_assert_held(&watchdog_mutex);
946-
hardlockup_detector_perf_cleanup();
947-
}
948-
949-
/**
950-
* lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes
951-
*
952-
* Caller must not hold the cpu hotplug rwsem.
953-
*/
954-
void lockup_detector_cleanup(void)
955-
{
956-
mutex_lock(&watchdog_mutex);
957-
__lockup_detector_cleanup();
958-
mutex_unlock(&watchdog_mutex);
959-
}
960-
961936
/**
962937
* lockup_detector_soft_poweroff - Interface to stop lockup detector(s)
963938
*

kernel/watchdog_perf.c

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@
2121
#include <linux/perf_event.h>
2222

2323
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
24-
static DEFINE_PER_CPU(struct perf_event *, dead_event);
25-
static struct cpumask dead_events_mask;
2624

2725
static atomic_t watchdog_cpus = ATOMIC_INIT(0);
2826

@@ -181,36 +179,12 @@ void watchdog_hardlockup_disable(unsigned int cpu)
181179

182180
if (event) {
183181
perf_event_disable(event);
182+
perf_event_release_kernel(event);
184183
this_cpu_write(watchdog_ev, NULL);
185-
this_cpu_write(dead_event, event);
186-
cpumask_set_cpu(smp_processor_id(), &dead_events_mask);
187184
atomic_dec(&watchdog_cpus);
188185
}
189186
}
190187

191-
/**
192-
* hardlockup_detector_perf_cleanup - Cleanup disabled events and destroy them
193-
*
194-
* Called from lockup_detector_cleanup(). Serialized by the caller.
195-
*/
196-
void hardlockup_detector_perf_cleanup(void)
197-
{
198-
int cpu;
199-
200-
for_each_cpu(cpu, &dead_events_mask) {
201-
struct perf_event *event = per_cpu(dead_event, cpu);
202-
203-
/*
204-
* Required because for_each_cpu() reports unconditionally
205-
* CPU0 as set on UP kernels. Sigh.
206-
*/
207-
if (event)
208-
perf_event_release_kernel(event);
209-
per_cpu(dead_event, cpu) = NULL;
210-
}
211-
cpumask_clear(&dead_events_mask);
212-
}
213-
214188
/**
215189
* hardlockup_detector_perf_stop - Globally stop watchdog events
216190
*

0 commit comments

Comments
 (0)