Skip to content

Commit ba22540

Browse files
committed
Merge branch 'ftrace-bpf-use-single-direct-ops-for-bpf-trampolines'
Jiri Olsa says: ==================== ftrace,bpf: Use single direct ops for bpf trampolines hi, while poking the multi-tracing interface I ended up with just one ftrace_ops object to attach all trampolines. This change allows to use less direct API calls during the attachment changes in the future code, so in effect speeding up the attachment. In current code we get a speed up from using just a single ftrace_ops object. - with current code: Performance counter stats for 'bpftrace -e fentry:vmlinux:ksys_* {} -c true': 6,364,157,902 cycles:k 828,728,902 cycles:u 1,064,803,824 instructions:u # 1.28 insn per cycle 23,797,500,067 instructions:k # 3.74 insn per cycle 4.416004987 seconds time elapsed 0.164121000 seconds user 1.289550000 seconds sys - with the fix: Performance counter stats for 'bpftrace -e fentry:vmlinux:ksys_* {} -c true': 6,535,857,905 cycles:k 810,809,429 cycles:u 1,064,594,027 instructions:u # 1.31 insn per cycle 23,962,552,894 instructions:k # 3.67 insn per cycle 1.666961239 seconds time elapsed 0.157412000 seconds user 1.283396000 seconds sys The speedup seems to be related to the fact that with single ftrace_ops object we don't call ftrace_shutdown anymore (we use ftrace_update_ops instead) and we skip the synchronize rcu calls (each ~100ms) at the end of that function. rfc: https://lore.kernel.org/bpf/[email protected]/ v1: https://lore.kernel.org/bpf/[email protected]/ v2: https://lore.kernel.org/bpf/[email protected]/ v3: https://lore.kernel.org/bpf/[email protected]/ v4: https://lore.kernel.org/bpf/[email protected]/ v5: https://lore.kernel.org/bpf/[email protected]/ v6 changes: - rename add_hash_entry_direct to add_ftrace_hash_entry_direct [Steven] - factor hash_add/hash_sub [Steven] - add kerneldoc header for update_ftrace_direct_* functions [Steven] - few assorted smaller fixes [Steven] - added missing direct_ops wrappers for !CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS case [Steven] v5 changes: - do not export ftrace_hash object [Steven] - fix update_ftrace_direct_add new_filter_hash leak [ci] v4 changes: - rebased on top of bpf-next/master (with jmp attach changes) added patch 1 to deal with that - added extra checks for update_ftrace_direct_del/mod to address the ci bot review v3 changes: - rebased on top of bpf-next/master - fixed update_ftrace_direct_del cleanup path - added missing inline to update_ftrace_direct_* stubs v2 changes: - rebased on top fo bpf-next/master plus Song's livepatch fixes [1] - renamed the API functions [2] [Steven] - do not export the new api [Steven] - kept the original direct interface: I'm not sure if we want to melt both *_ftrace_direct and the new interface into single one. It's bit different in semantic (hence the name change as Steven suggested [2]) and I don't think the changes are not that big so we could easily keep both APIs. v1 changes: - make the change x86 specific, after discussing with Mark options for arm64 [Mark] thanks, jirka [1] https://lore.kernel.org/bpf/[email protected]/ [2] https://lore.kernel.org/bpf/[email protected]/ --- ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Andrii Nakryiko <[email protected]>
2 parents ae23bc8 + 424f6a3 commit ba22540

File tree

6 files changed

+632
-75
lines changed

6 files changed

+632
-75
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,7 @@ config X86
336336
select SCHED_SMT if SMP
337337
select ARCH_SUPPORTS_SCHED_CLUSTER if SMP
338338
select ARCH_SUPPORTS_SCHED_MC if SMP
339+
select HAVE_SINGLE_FTRACE_DIRECT_OPS if X86_64 && DYNAMIC_FTRACE_WITH_DIRECT_CALLS
339340

340341
config INSTRUCTION_DECODER
341342
def_bool y

include/linux/bpf.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1329,14 +1329,17 @@ struct bpf_tramp_image {
13291329
};
13301330

13311331
struct bpf_trampoline {
1332-
/* hlist for trampoline_table */
1333-
struct hlist_node hlist;
1332+
/* hlist for trampoline_key_table */
1333+
struct hlist_node hlist_key;
1334+
/* hlist for trampoline_ip_table */
1335+
struct hlist_node hlist_ip;
13341336
struct ftrace_ops *fops;
13351337
/* serializes access to fields of this trampoline */
13361338
struct mutex mutex;
13371339
refcount_t refcnt;
13381340
u32 flags;
13391341
u64 key;
1342+
unsigned long ip;
13401343
struct {
13411344
struct btf_func_model model;
13421345
void *addr;

include/linux/ftrace.h

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ static inline void early_trace_init(void) { }
8282

8383
struct module;
8484
struct ftrace_hash;
85+
struct ftrace_func_entry;
8586

8687
#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \
8788
defined(CONFIG_DYNAMIC_FTRACE)
@@ -359,7 +360,6 @@ enum {
359360
FTRACE_OPS_FL_DIRECT = BIT(17),
360361
FTRACE_OPS_FL_SUBOP = BIT(18),
361362
FTRACE_OPS_FL_GRAPH = BIT(19),
362-
FTRACE_OPS_FL_JMP = BIT(20),
363363
};
364364

365365
#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
@@ -403,9 +403,17 @@ enum ftrace_ops_cmd {
403403
* Negative on failure. The return value is dependent on the
404404
* callback.
405405
*/
406-
typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd);
406+
typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, unsigned long ip, enum ftrace_ops_cmd cmd);
407407

408408
#ifdef CONFIG_DYNAMIC_FTRACE
409+
410+
#define FTRACE_HASH_DEFAULT_BITS 10
411+
412+
struct ftrace_hash *alloc_ftrace_hash(int size_bits);
413+
void free_ftrace_hash(struct ftrace_hash *hash);
414+
struct ftrace_func_entry *add_ftrace_hash_entry_direct(struct ftrace_hash *hash,
415+
unsigned long ip, unsigned long direct);
416+
409417
/* The hash used to know what functions callbacks trace */
410418
struct ftrace_ops_hash {
411419
struct ftrace_hash __rcu *notrace_hash;
@@ -535,6 +543,10 @@ int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr,
535543
int modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr);
536544
int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned long addr);
537545

546+
int update_ftrace_direct_add(struct ftrace_ops *ops, struct ftrace_hash *hash);
547+
int update_ftrace_direct_del(struct ftrace_ops *ops, struct ftrace_hash *hash);
548+
int update_ftrace_direct_mod(struct ftrace_ops *ops, struct ftrace_hash *hash, bool do_direct_lock);
549+
538550
void ftrace_stub_direct_tramp(void);
539551

540552
#else
@@ -561,6 +573,21 @@ static inline int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned l
561573
return -ENODEV;
562574
}
563575

576+
static inline int update_ftrace_direct_add(struct ftrace_ops *ops, struct ftrace_hash *hash)
577+
{
578+
return -ENODEV;
579+
}
580+
581+
static inline int update_ftrace_direct_del(struct ftrace_ops *ops, struct ftrace_hash *hash)
582+
{
583+
return -ENODEV;
584+
}
585+
586+
static inline int update_ftrace_direct_mod(struct ftrace_ops *ops, struct ftrace_hash *hash, bool do_direct_lock)
587+
{
588+
return -ENODEV;
589+
}
590+
564591
/*
565592
* This must be implemented by the architecture.
566593
* It is the way the ftrace direct_ops helper, when called

0 commit comments

Comments
 (0)