diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 45cbc7c6fe490..21c70ae3296bb 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1131,7 +1131,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, store_args(nr_arg_slots, args_off, ctx); /* skip to actual body of traced function */ - if (flags & BPF_TRAMP_F_SKIP_FRAME) + if (flags & BPF_TRAMP_F_ORIG_STACK) orig_call += RV_FENTRY_NINSNS * 4; if (flags & BPF_TRAMP_F_CALL_ORIG) { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fa3b616af03a2..462250a20311d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -230,6 +230,7 @@ config X86 select HAVE_DYNAMIC_FTRACE_WITH_ARGS if X86_64 select HAVE_FTRACE_REGS_HAVING_PT_REGS if X86_64 select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + select HAVE_DYNAMIC_FTRACE_WITH_JMP if X86_64 select HAVE_SAMPLE_FTRACE_DIRECT if X86_64 select HAVE_SAMPLE_FTRACE_DIRECT_MULTI if X86_64 select HAVE_EBPF_JIT diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 4450acec93903..0543b57f54ee4 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -74,7 +74,12 @@ static const char *ftrace_call_replace(unsigned long ip, unsigned long addr) * No need to translate into a callthunk. The trampoline does * the depth accounting itself. */ - return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr); + if (ftrace_is_jmp(addr)) { + addr = ftrace_jmp_get(addr); + return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr); + } else { + return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr); + } } static int ftrace_verify_code(unsigned long ip, const char *old_code) diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 367da36381678..068242e9c857a 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -285,8 +285,18 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR RET +1: + testb $1, %al + jz 2f + andq $0xfffffffffffffffe, %rax + movq %rax, MCOUNT_REG_SIZE+8(%rsp) + restore_mcount_regs + /* Restore flags */ + popfq + RET + /* Swap the flags with orig_rax */ -1: movq MCOUNT_REG_SIZE(%rsp), %rdi +2: movq MCOUNT_REG_SIZE(%rsp), %rdi movq %rdi, MCOUNT_REG_SIZE-8(%rsp) movq %rax, MCOUNT_REG_SIZE(%rsp) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 36a0d4db9f686..8aaa085db9072 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -597,8 +597,9 @@ static int emit_jump(u8 **pprog, void *func, void *ip) return emit_patch(pprog, func, ip, 0xE9); } -static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, - void *old_addr, void *new_addr) +static int ___bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t, + enum bpf_text_poke_type new_t, + void *old_addr, void *new_addr) { const u8 *nop_insn = x86_nops[5]; u8 old_insn[X86_PATCH_SIZE]; @@ -609,7 +610,7 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, memcpy(old_insn, nop_insn, X86_PATCH_SIZE); if (old_addr) { prog = old_insn; - ret = t == BPF_MOD_CALL ? + ret = old_t == BPF_MOD_CALL ? emit_call(&prog, old_addr, ip) : emit_jump(&prog, old_addr, ip); if (ret) @@ -619,7 +620,7 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, memcpy(new_insn, nop_insn, X86_PATCH_SIZE); if (new_addr) { prog = new_insn; - ret = t == BPF_MOD_CALL ? + ret = new_t == BPF_MOD_CALL ? emit_call(&prog, new_addr, ip) : emit_jump(&prog, new_addr, ip); if (ret) @@ -640,8 +641,15 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, return ret; } -int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, - void *old_addr, void *new_addr) +static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, + void *old_addr, void *new_addr) +{ + return ___bpf_arch_text_poke(ip, t, t, old_addr, new_addr); +} + +int bpf_arch_text_poke_type(void *ip, enum bpf_text_poke_type old_t, + enum bpf_text_poke_type new_t, void *old_addr, + void *new_addr) { if (!is_kernel_text((long)ip) && !is_bpf_text_address((long)ip)) @@ -655,7 +663,13 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, if (is_endbr(ip)) ip += ENDBR_INSN_SIZE; - return __bpf_arch_text_poke(ip, t, old_addr, new_addr); + return ___bpf_arch_text_poke(ip, old_t, new_t, old_addr, new_addr); +} + +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, + void *old_addr, void *new_addr) +{ + return bpf_arch_text_poke_type(ip, t, t, old_addr, new_addr); } #define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8) @@ -2847,7 +2861,7 @@ static int get_nr_used_regs(const struct btf_func_model *m) } static void save_args(const struct btf_func_model *m, u8 **prog, - int stack_size, bool for_call_origin) + int stack_size, bool for_call_origin, bool jmp) { int arg_regs, first_off = 0, nr_regs = 0, nr_stack_slots = 0; int i, j; @@ -2890,7 +2904,7 @@ static void save_args(const struct btf_func_model *m, u8 **prog, */ for (j = 0; j < arg_regs; j++) { emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP, - nr_stack_slots * 8 + 0x18); + nr_stack_slots * 8 + 16 + (!jmp) * 8); emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -stack_size); @@ -3284,12 +3298,17 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im * should be 16-byte aligned. Following code depend on * that stack_size is already 8-byte aligned. */ - stack_size += (stack_size % 16) ? 0 : 8; + if (bpf_trampoline_need_jmp(flags)) { + /* no rip in the "jmp" case */ + stack_size += (stack_size % 16) ? 8 : 0; + } else { + stack_size += (stack_size % 16) ? 0 : 8; + } } arg_stack_off = stack_size; - if (flags & BPF_TRAMP_F_SKIP_FRAME) { + if (flags & BPF_TRAMP_F_CALL_ORIG) { /* skip patched call instruction and point orig_call to actual * body of the kernel function. */ @@ -3344,7 +3363,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off); } - save_args(m, &prog, regs_off, false); + save_args(m, &prog, regs_off, false, bpf_trampoline_need_jmp(flags)); if (flags & BPF_TRAMP_F_CALL_ORIG) { /* arg1: mov rdi, im */ @@ -3377,7 +3396,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im if (flags & BPF_TRAMP_F_CALL_ORIG) { restore_regs(m, &prog, regs_off); - save_args(m, &prog, arg_stack_off, true); + save_args(m, &prog, arg_stack_off, true, bpf_trampoline_need_jmp(flags)); if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) { /* Before calling the original function, load the diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 09d5dc541d1ce..bade56bc6c963 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1202,6 +1202,12 @@ struct btf_func_model { */ #define BPF_TRAMP_F_INDIRECT BIT(8) +/* + * Indicate that the trampoline is using "jmp" instead of "call". This flag + * is only used in the !ftrace_managed case. + */ +#define BPF_TRAMP_F_JMPED BIT(9) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ @@ -1264,6 +1270,18 @@ typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start, bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog); bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP +static inline bool bpf_trampoline_need_jmp(u64 flags) +{ + return flags & BPF_TRAMP_F_CALL_ORIG && !(flags & BPF_TRAMP_F_SKIP_FRAME); +} +#else +static inline bool bpf_trampoline_need_jmp(u64 flags) +{ + return false; +} +#endif + struct bpf_ksym { unsigned long start; unsigned long end; @@ -3700,6 +3718,10 @@ enum bpf_text_poke_type { BPF_MOD_JUMP, }; +int bpf_arch_text_poke_type(void *ip, enum bpf_text_poke_type old_t, + enum bpf_text_poke_type new_t, void *addr1, + void *addr2); + int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7ded7df6e9b50..14705dec1b08a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -351,6 +351,7 @@ enum { FTRACE_OPS_FL_DIRECT = BIT(17), FTRACE_OPS_FL_SUBOP = BIT(18), FTRACE_OPS_FL_GRAPH = BIT(19), + FTRACE_OPS_FL_JMP = BIT(20), }; #ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS @@ -569,6 +570,38 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) { } #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP +static inline bool ftrace_is_jmp(unsigned long addr) +{ + return addr & 1; +} + +static inline unsigned long ftrace_jmp_set(unsigned long addr) +{ + return addr | 1UL; +} + +static inline unsigned long ftrace_jmp_get(unsigned long addr) +{ + return addr & ~1UL; +} +#else +static inline bool ftrace_is_jmp(unsigned long addr) +{ + return false; +} + +static inline unsigned long ftrace_jmp_set(unsigned long addr) +{ + return addr; +} + +static inline unsigned long ftrace_jmp_get(unsigned long addr) +{ + return addr; +} +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_JMP */ + #ifdef CONFIG_STACK_TRACER int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ef4448f18aad2..400eb70fd42e9 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -3156,6 +3156,16 @@ int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, return -ENOTSUPP; } +int __weak bpf_arch_text_poke_type(void *ip, enum bpf_text_poke_type old_t, + enum bpf_text_poke_type new_t, void *old_addr, + void *new_addr) +{ + if (old_t == new_t) + return bpf_arch_text_poke(ip, old_t, old_addr, new_addr); + + return -EOPNOTSUPP; +} + void * __weak bpf_arch_text_copy(void *dst, void *src, size_t len) { return ERR_PTR(-ENOTSUPP); diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 5949095e51c3d..02a9f33d8f6c1 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -175,15 +175,37 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key) return tr; } -static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) +static int bpf_text_poke(struct bpf_trampoline *tr, void *old_addr, + void *new_addr) { + enum bpf_text_poke_type new_t = BPF_MOD_CALL, old_t = BPF_MOD_CALL; void *ip = tr->func.addr; int ret; + if (bpf_trampoline_need_jmp(tr->flags)) + new_t = BPF_MOD_JUMP; + if (tr->flags & BPF_TRAMP_F_JMPED) + old_t = BPF_MOD_JUMP; + + ret = bpf_arch_text_poke_type(ip, old_t, new_t, old_addr, new_addr); + if (!ret) { + if (new_t == BPF_MOD_JUMP) + tr->flags |= BPF_TRAMP_F_JMPED; + else + tr->flags &= ~BPF_TRAMP_F_JMPED; + } + + return ret; +} + +static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) +{ + int ret; + if (tr->func.ftrace_managed) ret = unregister_ftrace_direct(tr->fops, (long)old_addr, false); else - ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL); + ret = bpf_text_poke(tr, old_addr, NULL); return ret; } @@ -191,7 +213,6 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr, bool lock_direct_mutex) { - void *ip = tr->func.addr; int ret; if (tr->func.ftrace_managed) { @@ -200,7 +221,7 @@ static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_ad else ret = modify_ftrace_direct_nolock(tr->fops, (long)new_addr); } else { - ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr); + ret = bpf_text_poke(tr, old_addr, new_addr); } return ret; } @@ -223,7 +244,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1); ret = register_ftrace_direct(tr->fops, (long)new_addr); } else { - ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); + ret = bpf_text_poke(tr, NULL, new_addr); } return ret; @@ -415,7 +436,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut } /* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */ - tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX); + tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX | + BPF_TRAMP_F_JMPED); if (tlinks[BPF_TRAMP_FEXIT].nr_links || tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) { @@ -432,9 +454,17 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS again: - if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) && - (tr->flags & BPF_TRAMP_F_CALL_ORIG)) - tr->flags |= BPF_TRAMP_F_ORIG_STACK; + if (tr->flags & BPF_TRAMP_F_CALL_ORIG) { + if (tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) { + tr->flags |= BPF_TRAMP_F_ORIG_STACK; + } else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_JMP)) { + /* Use "jmp" instead of "call" for the trampoline + * in the origin call case, and we don't need to + * skip the frame. + */ + tr->flags &= ~BPF_TRAMP_F_SKIP_FRAME; + } + } #endif size = arch_bpf_trampoline_size(&tr->func.model, tr->flags, @@ -465,6 +495,11 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut if (err) goto out_free; + if (bpf_trampoline_need_jmp(tr->flags)) + tr->fops->flags |= FTRACE_OPS_FL_JMP; + else + tr->fops->flags &= ~FTRACE_OPS_FL_JMP; + WARN_ON(tr->cur_image && total == 0); if (tr->cur_image) /* progs already running at this address */ diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d2c79da81e4f8..4661b9e606e00 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -80,6 +80,12 @@ config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE If the architecture generates __patchable_function_entries sections but does not want them included in the ftrace locations. +config HAVE_DYNAMIC_FTRACE_WITH_JMP + bool + help + If the architecture supports to replace the __fentry__ with a + "jmp" instruction. + config HAVE_SYSCALL_TRACEPOINTS bool help @@ -330,6 +336,12 @@ config DYNAMIC_FTRACE_WITH_ARGS depends on DYNAMIC_FTRACE depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS +config DYNAMIC_FTRACE_WITH_JMP + def_bool y + depends on DYNAMIC_FTRACE + depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS + depends on HAVE_DYNAMIC_FTRACE_WITH_JMP + config FPROBE bool "Kernel Function Probe (fprobe)" depends on HAVE_FUNCTION_GRAPH_FREGS && HAVE_FTRACE_GRAPH_FUNC diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 42bd2ba68a821..b143a79e64bbb 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5938,7 +5938,8 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long for (i = 0; i < size; i++) { hlist_for_each_entry(entry, &hash->buckets[i], hlist) { del = __ftrace_lookup_ip(direct_functions, entry->ip); - if (del && del->direct == addr) { + if (del && ftrace_jmp_get(del->direct) == + ftrace_jmp_get(addr)) { remove_hash_entry(direct_functions, del); kfree(del); } @@ -5994,6 +5995,9 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) mutex_lock(&direct_mutex); + if (ops->flags & FTRACE_OPS_FL_JMP) + addr = ftrace_jmp_set(addr); + /* Make sure requested entries are not already registered.. */ size = 1 << hash->size_bits; for (i = 0; i < size; i++) { @@ -6117,6 +6121,9 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) lockdep_assert_held_once(&direct_mutex); + if (ops->flags & FTRACE_OPS_FL_JMP) + addr = ftrace_jmp_set(addr); + /* Enable the tmp_ops to have the same functions as the direct ops */ ftrace_ops_init(&tmp_ops); tmp_ops.func_hash = ops->func_hash;