|
| 1 | +From df3bed9ea57603e62696a2f8aee9609d3500b7d1 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Kan Liang < [email protected]> |
| 3 | +Date: Wed, 28 May 2025 10:58:32 -0700 |
| 4 | +Subject: [PATCH 3/3] perf: Fix the throttle error of some clock events |
| 5 | + |
| 6 | +The Arm CI reports RCU stall, which can be reproduced by the below perf |
| 7 | +command. |
| 8 | + perf record -a -e cpu-clock -- sleep 2 |
| 9 | + |
| 10 | +The cpu-clock and task_clock are two special SW events, which rely on |
| 11 | +the hrtimer. Instead of invoking the stop(), the HRTIMER_NORESTART is |
| 12 | +returned to stop the timer. Because the hrtimer interrupt handler cannot |
| 13 | +cancel itself, which causes infinite loop. |
| 14 | + |
| 15 | +There may be two ways to fix it. |
| 16 | +- Add a check of MAX_INTERRUPTS in the event_stop. Return immediately if |
| 17 | +the stop is invoked by the throttle. |
| 18 | +- Introduce a PMU flag to track the case. Avoid the event_stop in |
| 19 | +perf_event_throttle() if the flag is detected. |
| 20 | + |
| 21 | +The latter looks more generic. It may be used if there are more other |
| 22 | +cases that want to avoid the stop later. The latter is implemented. |
| 23 | + |
| 24 | +Reported-by: Leo Yan < [email protected]> |
| 25 | +Reported-by: Aishwarya TCV < [email protected]> |
| 26 | +Closes: https://lore.kernel.org/lkml/ [email protected]/ |
| 27 | +Tested-by: Leo Yan < [email protected]> |
| 28 | +Signed-off-by: Kan Liang < [email protected]> |
| 29 | +Link: https://lore.kernel.org/r/ [email protected] |
| 30 | +Signed-off-by: Alexei Starovoitov < [email protected]> |
| 31 | +--- |
| 32 | + include/linux/perf_event.h | 1 + |
| 33 | + kernel/events/core.c | 23 ++++++++++++++++++++--- |
| 34 | + 2 files changed, 21 insertions(+), 3 deletions(-) |
| 35 | + |
| 36 | +diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h |
| 37 | +index 52dc7cfab0e0..97a747a97a50 100644 |
| 38 | +--- a/include/linux/perf_event.h |
| 39 | ++++ b/include/linux/perf_event.h |
| 40 | +@@ -305,6 +305,7 @@ struct perf_event_pmu_context; |
| 41 | + #define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 |
| 42 | + #define PERF_PMU_CAP_AUX_PAUSE 0x0200 |
| 43 | + #define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400 |
| 44 | ++#define PERF_PMU_CAP_NO_THROTTLE_STOP 0x0800 |
| 45 | + |
| 46 | + /** |
| 47 | + * pmu::scope |
| 48 | +diff --git a/kernel/events/core.c b/kernel/events/core.c |
| 49 | +index f34c99f8ce8f..abd19bb571e3 100644 |
| 50 | +--- a/kernel/events/core.c |
| 51 | ++++ b/kernel/events/core.c |
| 52 | +@@ -2656,7 +2656,22 @@ static void perf_event_unthrottle(struct perf_event *event, bool start) |
| 53 | + |
| 54 | + static void perf_event_throttle(struct perf_event *event) |
| 55 | + { |
| 56 | +- event->pmu->stop(event, 0); |
| 57 | ++ /* |
| 58 | ++ * Some PMUs, e.g., cpu-clock and task_clock, may rely on |
| 59 | ++ * a special mechanism (hrtimer) to manipulate counters. |
| 60 | ++ * The regular stop doesn't work, since the hrtimer interrupt |
| 61 | ++ * handler cannot cancel itself. |
| 62 | ++ * |
| 63 | ++ * The stop should be avoided for such cases. Let the |
| 64 | ++ * driver-specific code handle it. |
| 65 | ++ * |
| 66 | ++ * The counters will eventually be disabled in the driver-specific |
| 67 | ++ * code. In unthrottle, they still need to be re-enabled. |
| 68 | ++ * There is no handling for PERF_PMU_CAP_NO_THROTTLE_STOP in |
| 69 | ++ * the perf_event_unthrottle(). |
| 70 | ++ */ |
| 71 | ++ if (!(event->pmu->capabilities & PERF_PMU_CAP_NO_THROTTLE_STOP)) |
| 72 | ++ event->pmu->stop(event, 0); |
| 73 | + event->hw.interrupts = MAX_INTERRUPTS; |
| 74 | + if (event == event->group_leader) |
| 75 | + perf_log_throttle(event, 0); |
| 76 | +@@ -11848,7 +11863,8 @@ static int cpu_clock_event_init(struct perf_event *event) |
| 77 | + static struct pmu perf_cpu_clock = { |
| 78 | + .task_ctx_nr = perf_sw_context, |
| 79 | + |
| 80 | +- .capabilities = PERF_PMU_CAP_NO_NMI, |
| 81 | ++ .capabilities = PERF_PMU_CAP_NO_NMI | |
| 82 | ++ PERF_PMU_CAP_NO_THROTTLE_STOP, |
| 83 | + .dev = PMU_NULL_DEV, |
| 84 | + |
| 85 | + .event_init = cpu_clock_event_init, |
| 86 | +@@ -11930,7 +11946,8 @@ static int task_clock_event_init(struct perf_event *event) |
| 87 | + static struct pmu perf_task_clock = { |
| 88 | + .task_ctx_nr = perf_sw_context, |
| 89 | + |
| 90 | +- .capabilities = PERF_PMU_CAP_NO_NMI, |
| 91 | ++ .capabilities = PERF_PMU_CAP_NO_NMI | |
| 92 | ++ PERF_PMU_CAP_NO_THROTTLE_STOP, |
| 93 | + .dev = PMU_NULL_DEV, |
| 94 | + |
| 95 | + .event_init = task_clock_event_init, |
| 96 | +-- |
| 97 | +2.49.0 |
| 98 | + |
0 commit comments