Skip to content

Commit 87b940a

Browse files
sean-jcPeter Zijlstra
authored andcommitted
perf/core: Use static_call to optimize perf_guest_info_callbacks
Use static_call to optimize perf's guest callbacks on arm64 and x86, which are now the only architectures that define the callbacks. Use DEFINE_STATIC_CALL_RET0 as the default/NULL for all guest callbacks, as the callback semantics are that a return value '0' means "not in guest". static_call obviously avoids the overhead of CONFIG_RETPOLINE=y, but is also advantageous versus other solutions, e.g. per-cpu callbacks, in that a per-cpu memory load is not needed to detect the !guest case. Based on code from Peter and Like. Suggested-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Sean Christopherson <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Paolo Bonzini <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 2aef6f3 commit 87b940a

File tree

2 files changed

+23
-26
lines changed

2 files changed

+23
-26
lines changed

include/linux/perf_event.h

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,40 +1244,22 @@ extern void perf_event_bpf_event(struct bpf_prog *prog,
12441244

12451245
#ifdef CONFIG_GUEST_PERF_EVENTS
12461246
extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
1247-
static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void)
1248-
{
1249-
/*
1250-
* Callbacks are RCU-protected and must be READ_ONCE to avoid reloading
1251-
* the callbacks between a !NULL check and dereferences, to ensure
1252-
* pending stores/changes to the callback pointers are visible before a
1253-
* non-NULL perf_guest_cbs is visible to readers, and to prevent a
1254-
* module from unloading callbacks while readers are active.
1255-
*/
1256-
return rcu_dereference(perf_guest_cbs);
1257-
}
1247+
1248+
DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
1249+
DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
1250+
DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
1251+
12581252
static inline unsigned int perf_guest_state(void)
12591253
{
1260-
struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
1261-
1262-
return guest_cbs ? guest_cbs->state() : 0;
1254+
return static_call(__perf_guest_state)();
12631255
}
12641256
static inline unsigned long perf_guest_get_ip(void)
12651257
{
1266-
struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
1267-
1268-
/*
1269-
* Arbitrarily return '0' in the unlikely scenario that the callbacks
1270-
* are unregistered between checking guest state and getting the IP.
1271-
*/
1272-
return guest_cbs ? guest_cbs->get_ip() : 0;
1258+
return static_call(__perf_guest_get_ip)();
12731259
}
12741260
static inline unsigned int perf_guest_handle_intel_pt_intr(void)
12751261
{
1276-
struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
1277-
1278-
if (guest_cbs && guest_cbs->handle_intel_pt_intr)
1279-
return guest_cbs->handle_intel_pt_intr();
1280-
return 0;
1262+
return static_call(__perf_guest_handle_intel_pt_intr)();
12811263
}
12821264
extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
12831265
extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);

kernel/events/core.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6524,12 +6524,23 @@ static void perf_pending_event(struct irq_work *entry)
65246524
#ifdef CONFIG_GUEST_PERF_EVENTS
65256525
struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
65266526

6527+
DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state);
6528+
DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
6529+
DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
6530+
65276531
void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
65286532
{
65296533
if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs)))
65306534
return;
65316535

65326536
rcu_assign_pointer(perf_guest_cbs, cbs);
6537+
static_call_update(__perf_guest_state, cbs->state);
6538+
static_call_update(__perf_guest_get_ip, cbs->get_ip);
6539+
6540+
/* Implementing ->handle_intel_pt_intr is optional. */
6541+
if (cbs->handle_intel_pt_intr)
6542+
static_call_update(__perf_guest_handle_intel_pt_intr,
6543+
cbs->handle_intel_pt_intr);
65336544
}
65346545
EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
65356546

@@ -6539,6 +6550,10 @@ void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
65396550
return;
65406551

65416552
rcu_assign_pointer(perf_guest_cbs, NULL);
6553+
static_call_update(__perf_guest_state, (void *)&__static_call_return0);
6554+
static_call_update(__perf_guest_get_ip, (void *)&__static_call_return0);
6555+
static_call_update(__perf_guest_handle_intel_pt_intr,
6556+
(void *)&__static_call_return0);
65426557
synchronize_rcu();
65436558
}
65446559
EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);

0 commit comments

Comments
 (0)