From 72ec978ef2276c8de7a41aa40a8d7642cb2c12e6 Mon Sep 17 00:00:00 2001 From: Chenghao Duan Date: Wed, 30 Jul 2025 21:12:53 +0800 Subject: [PATCH 1/5] LoongArch: Add larch_insn_gen_{beq,bne} helpers Add larch_insn_gen_beq() and larch_insn_gen_bne() helpers which will be used in BPF trampoline implementation. Co-developed-by: George Guo Signed-off-by: George Guo Co-developed-by: Youling Tang Signed-off-by: Youling Tang Signed-off-by: Chenghao Duan Reviewed-by: Hengqi Chen --- arch/loongarch/include/asm/inst.h | 2 ++ arch/loongarch/kernel/inst.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 3089785ca97e7..2ae96a35d93c6 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -511,6 +511,8 @@ u32 larch_insn_gen_lu12iw(enum loongarch_gpr rd, int imm); u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm); u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); +u32 larch_insn_gen_beq(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); +u32 larch_insn_gen_bne(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); static inline bool signed_imm_check(long val, unsigned int bit) { diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index 14d7d700bcb98..674e3b3222b62 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -336,3 +336,31 @@ u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) return insn.word; } + +u32 larch_insn_gen_beq(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + union loongarch_instruction insn; + + if ((imm & 3) || imm < -SZ_128K || imm >= SZ_128K) { + pr_warn("The generated beq instruction is out of range.\n"); + return INSN_BREAK; + } + + emit_beq(&insn, rj, rd, imm >> 2); + + return insn.word; +} + +u32 larch_insn_gen_bne(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + union loongarch_instruction insn; + + if ((imm & 3) || imm < -SZ_128K || imm >= SZ_128K) { + pr_warn("The generated bne instruction is out of range.\n"); + return INSN_BREAK; + } + + emit_bne(&insn, rj, rd, imm >> 2); + + return insn.word; +} From da3ba3b46d8d5289194fe9ecb9e06cadc169cd22 Mon Sep 17 00:00:00 2001 From: Chenghao Duan Date: Wed, 30 Jul 2025 21:12:54 +0800 Subject: [PATCH 2/5] LoongArch: BPF: Update the code to rename validate_code to validate_ctx Rename the existing validate_code() to validate_ctx() Factor out the code validation handling into a new helper validate_code() * validate_code is used to check the validity of code. * validate_ctx is used to check both code validity and table entry correctness. The new validate_code() will be used in subsequent changes. Co-developed-by: George Guo Signed-off-by: George Guo Signed-off-by: Chenghao Duan Reviewed-by: Hengqi Chen --- arch/loongarch/net/bpf_jit.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index fa1500d4aa3e3..7032f11d3a21b 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1180,6 +1180,14 @@ static int validate_code(struct jit_ctx *ctx) return -1; } + return 0; +} + +static int validate_ctx(struct jit_ctx *ctx) +{ + if (validate_code(ctx)) + return -1; + if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries)) return -1; @@ -1288,7 +1296,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) build_epilogue(&ctx); /* 3. Extra pass to validate JITed code */ - if (validate_code(&ctx)) { + if (validate_ctx(&ctx)) { bpf_jit_binary_free(header); prog = orig_prog; goto out_offset; From 4d2947d0cd628d381894b2a93d80028cf507a941 Mon Sep 17 00:00:00 2001 From: Chenghao Duan Date: Wed, 30 Jul 2025 21:12:55 +0800 Subject: [PATCH 3/5] LoongArch: BPF: Implement dynamic code modification support This commit adds support for BPF dynamic code modification on the LoongArch architecture.: 1. Implement bpf_arch_text_poke() for runtime instruction patching. 2. Add bpf_arch_text_copy() for instruction block copying. 3. Create bpf_arch_text_invalidate() for code invalidation. On LoongArch, since symbol addresses in the direct mapping region cannot be reached via relative jump instructions from the paged mapping region, we use the move_imm+jirl instruction pair as absolute jump instructions. These require 2-5 instructions, so we reserve 5 NOP instructions in the program as placeholders for function jumps. larch_insn_text_copy is solely used for BPF. The use of larch_insn_text_copy() requires page_size alignment. Currently, only the size of the trampoline is page-aligned. Co-developed-by: George Guo Signed-off-by: George Guo Signed-off-by: Chenghao Duan --- arch/loongarch/include/asm/inst.h | 1 + arch/loongarch/kernel/inst.c | 27 ++++++++ arch/loongarch/net/bpf_jit.c | 104 ++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 2ae96a35d93c6..88bb73e46f632 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -497,6 +497,7 @@ void arch_simulate_insn(union loongarch_instruction insn, struct pt_regs *regs); int larch_insn_read(void *addr, u32 *insnp); int larch_insn_write(void *addr, u32 insn); int larch_insn_patch_text(void *addr, u32 insn); +int larch_insn_text_copy(void *dst, void *src, size_t len); u32 larch_insn_gen_nop(void); u32 larch_insn_gen_b(unsigned long pc, unsigned long dest); diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c index 674e3b3222b62..7df63a950d571 100644 --- a/arch/loongarch/kernel/inst.c +++ b/arch/loongarch/kernel/inst.c @@ -4,6 +4,7 @@ */ #include #include +#include #include #include @@ -218,6 +219,32 @@ int larch_insn_patch_text(void *addr, u32 insn) return ret; } +int larch_insn_text_copy(void *dst, void *src, size_t len) +{ + int ret; + unsigned long flags; + unsigned long dst_start, dst_end, dst_len; + + dst_start = round_down((unsigned long)dst, PAGE_SIZE); + dst_end = round_up((unsigned long)dst + len, PAGE_SIZE); + dst_len = dst_end - dst_start; + + set_memory_rw(dst_start, dst_len / PAGE_SIZE); + raw_spin_lock_irqsave(&patch_lock, flags); + + ret = copy_to_kernel_nofault(dst, src, len); + if (ret) + pr_err("%s: operation failed\n", __func__); + + raw_spin_unlock_irqrestore(&patch_lock, flags); + set_memory_rox(dst_start, dst_len / PAGE_SIZE); + + if (!ret) + flush_icache_range((unsigned long)dst, (unsigned long)dst + len); + + return ret; +} + u32 larch_insn_gen_nop(void) { return INSN_NOP; diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 7032f11d3a21b..5e6ae7e0e6810 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -4,8 +4,12 @@ * * Copyright (C) 2022 Loongson Technology Corporation Limited */ +#include #include "bpf_jit.h" +#define LOONGARCH_LONG_JUMP_NINSNS 5 +#define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) + #define REG_TCC LOONGARCH_GPR_A6 #define TCC_SAVED LOONGARCH_GPR_S5 @@ -88,6 +92,7 @@ static u8 tail_call_reg(struct jit_ctx *ctx) */ static void build_prologue(struct jit_ctx *ctx) { + int i; int stack_adjust = 0, store_offset, bpf_stack_adjust; bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); @@ -98,6 +103,10 @@ static void build_prologue(struct jit_ctx *ctx) stack_adjust = round_up(stack_adjust, 16); stack_adjust += bpf_stack_adjust; + /* Reserve space for the move_imm + jirl instruction */ + for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) + emit_insn(ctx, nop); + /* * First instruction initializes the tail call count (TCC). * On tail call we skip this instruction, and the TCC is @@ -1367,3 +1376,98 @@ bool bpf_jit_supports_subprog_tailcalls(void) { return true; } + +static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target) +{ + if (!target) { + pr_err("bpf_jit: jump target address is error\n"); + return -EFAULT; + } + + move_imm(ctx, LOONGARCH_GPR_T1, target, false); + emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0); + + return 0; +} + +static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) +{ + struct jit_ctx ctx; + + ctx.idx = 0; + ctx.image = (union loongarch_instruction *)insns; + + if (!target) { + emit_insn((&ctx), nop); + emit_insn((&ctx), nop); + return 0; + } + + return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO, + (unsigned long)target); +} + +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, + void *old_addr, void *new_addr) +{ + u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; + u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; + bool is_call = poke_type == BPF_MOD_CALL; + int ret; + + if (!is_kernel_text((unsigned long)ip) && + !is_bpf_text_address((unsigned long)ip)) + return -ENOTSUPP; + + ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call); + if (ret) + return ret; + + if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES)) + return -EFAULT; + + ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call); + if (ret) + return ret; + + mutex_lock(&text_mutex); + if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES)) + ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES); + mutex_unlock(&text_mutex); + return ret; +} + +int bpf_arch_text_invalidate(void *dst, size_t len) +{ + int i; + int ret = 0; + u32 *inst; + + inst = kvmalloc(len, GFP_KERNEL); + if (!inst) + return -ENOMEM; + + for (i = 0; i < (len/sizeof(u32)); i++) + inst[i] = INSN_BREAK; + + mutex_lock(&text_mutex); + if (larch_insn_text_copy(dst, inst, len)) + ret = -EINVAL; + mutex_unlock(&text_mutex); + + kvfree(inst); + return ret; +} + +void *bpf_arch_text_copy(void *dst, void *src, size_t len) +{ + int ret; + + mutex_lock(&text_mutex); + ret = larch_insn_text_copy(dst, src, len); + mutex_unlock(&text_mutex); + if (ret) + return ERR_PTR(-EINVAL); + + return dst; +} From 34f770137ab4f01878cd973b37b35db0be0678be Mon Sep 17 00:00:00 2001 From: Chenghao Duan Date: Wed, 30 Jul 2025 21:12:56 +0800 Subject: [PATCH 4/5] LoongArch: BPF: Add bpf trampoline support for Loongarch BPF trampoline is the critical infrastructure of the BPF subsystem, acting as a mediator between kernel functions and BPF programs. Numerous important features, such as using BPF program for zero overhead kernel introspection, rely on this key component. The related tests have passed, Including the following technical points: 1. fentry 2. fmod_ret 3. fexit The following related testcases passed on LoongArch: sudo ./test_progs -a fentry_test/fentry sudo ./test_progs -a fexit_test/fexit sudo ./test_progs -a fentry_fexit sudo ./test_progs -a modify_return sudo ./test_progs -a fexit_sleep sudo ./test_progs -a test_overhead sudo ./test_progs -a trampoline_count Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507100034.wXofj6VX-lkp@intel.com/ Reported-by: Geliang Tang Co-developed-by: George Guo Signed-off-by: George Guo Signed-off-by: Chenghao Duan Tested-by: Tiezhu Yang Tested-by: Vincent Li --- arch/loongarch/net/bpf_jit.c | 390 +++++++++++++++++++++++++++++++++++ arch/loongarch/net/bpf_jit.h | 6 + 2 files changed, 396 insertions(+) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 5e6ae7e0e6810..eddf582e409db 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -7,9 +7,15 @@ #include #include "bpf_jit.h" +#define LOONGARCH_MAX_REG_ARGS 8 + #define LOONGARCH_LONG_JUMP_NINSNS 5 #define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) +#define LOONGARCH_FENTRY_NINSNS 2 +#define LOONGARCH_FENTRY_NBYTES (LOONGARCH_FENTRY_NINSNS * 4) +#define LOONGARCH_BPF_FENTRY_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) + #define REG_TCC LOONGARCH_GPR_A6 #define TCC_SAVED LOONGARCH_GPR_S5 @@ -1407,6 +1413,11 @@ static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) (unsigned long)target); } +static int emit_call(struct jit_ctx *ctx, u64 addr) +{ + return emit_jump_and_link(ctx, LOONGARCH_GPR_RA, addr); +} + int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, void *old_addr, void *new_addr) { @@ -1471,3 +1482,382 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len) return dst; } + +static void store_args(struct jit_ctx *ctx, int nargs, int args_off) +{ + int i; + + for (i = 0; i < nargs; i++) { + emit_insn(ctx, std, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off); + args_off -= 8; + } +} + +static void restore_args(struct jit_ctx *ctx, int nargs, int args_off) +{ + int i; + + for (i = 0; i < nargs; i++) { + emit_insn(ctx, ldd, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off); + args_off -= 8; + } +} + +static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, + int args_off, int retval_off, + int run_ctx_off, bool save_ret) +{ + int ret; + u32 *branch; + struct bpf_prog *p = l->link.prog; + int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); + + if (l->cookie) { + move_imm(ctx, LOONGARCH_GPR_T1, l->cookie, false); + emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off); + } else { + emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, + -run_ctx_off + cookie_off); + } + + /* arg1: prog */ + move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false); + /* arg2: &run_ctx */ + emit_insn(ctx, addid, LOONGARCH_GPR_A1, LOONGARCH_GPR_FP, -run_ctx_off); + ret = emit_call(ctx, (const u64)bpf_trampoline_enter(p)); + if (ret) + return ret; + + /* store prog start time */ + move_reg(ctx, LOONGARCH_GPR_S1, LOONGARCH_GPR_A0); + + /* if (__bpf_prog_enter(prog) == 0) + * goto skip_exec_of_prog; + * + */ + branch = (u32 *)ctx->image + ctx->idx; + /* nop reserved for conditional jump */ + emit_insn(ctx, nop); + + /* arg1: &args_off */ + emit_insn(ctx, addid, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -args_off); + if (!p->jited) + move_imm(ctx, LOONGARCH_GPR_A1, (const s64)p->insnsi, false); + ret = emit_call(ctx, (const u64)p->bpf_func); + if (ret) + return ret; + + if (save_ret) { + emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off); + emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8)); + } + + /* update branch with beqz */ + if (ctx->image) { + int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branch; + *branch = larch_insn_gen_beq(LOONGARCH_GPR_A0, LOONGARCH_GPR_ZERO, offset); + } + + /* arg1: prog */ + move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false); + /* arg2: prog start time */ + move_reg(ctx, LOONGARCH_GPR_A1, LOONGARCH_GPR_S1); + /* arg3: &run_ctx */ + emit_insn(ctx, addid, LOONGARCH_GPR_A2, LOONGARCH_GPR_FP, -run_ctx_off); + ret = emit_call(ctx, (const u64)bpf_trampoline_exit(p)); + + return ret; +} + +static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, + int args_off, int retval_off, int run_ctx_off, u32 **branches) +{ + int i; + + emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off); + for (i = 0; i < tl->nr_links; i++) { + invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, + run_ctx_off, true); + emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -retval_off); + branches[i] = (u32 *)ctx->image + ctx->idx; + emit_insn(ctx, nop); + } +} + +u64 bpf_jit_alloc_exec_limit(void) +{ + return VMALLOC_END - VMALLOC_START; +} + +void *arch_alloc_bpf_trampoline(unsigned int size) +{ + return bpf_prog_pack_alloc(size, jit_fill_hole); +} + +void arch_free_bpf_trampoline(void *image, unsigned int size) +{ + bpf_prog_pack_free(image, size); +} + +static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, + const struct btf_func_model *m, + struct bpf_tramp_links *tlinks, + void *func_addr, u32 flags) +{ + int i; + int stack_size = 0, nargs = 0; + int retval_off, args_off, nargs_off, ip_off, run_ctx_off, sreg_off; + struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; + struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + int ret, save_ret; + void *orig_call = func_addr; + u32 **branches = NULL; + + if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY)) + return -ENOTSUPP; + + /* + * FP + 8 [ RA to parent func ] return address to parent + * function + * FP + 0 [ FP of parent func ] frame pointer of parent + * function + * FP - 8 [ T0 to traced func ] return address of traced + * function + * FP - 16 [ FP of traced func ] frame pointer of traced + * function + * + * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or + * BPF_TRAMP_F_RET_FENTRY_RET + * [ argN ] + * [ ... ] + * FP - args_off [ arg1 ] + * + * FP - nargs_off [ regs count ] + * + * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG + * + * FP - run_ctx_off [ bpf_tramp_run_ctx ] + * + * FP - sreg_off [ callee saved reg ] + * + */ + + if (m->nr_args > LOONGARCH_MAX_REG_ARGS) + return -ENOTSUPP; + + if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY)) + return -ENOTSUPP; + + stack_size = 0; + + /* room of trampoline frame to store return address and frame pointer */ + stack_size += 16; + + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) { + /* Save BPF R0 and A0 */ + stack_size += 16; + retval_off = stack_size; + } + + /* room of trampoline frame to store args */ + nargs = m->nr_args; + stack_size += nargs * 8; + args_off = stack_size; + + /* room of trampoline frame to store args number */ + stack_size += 8; + nargs_off = stack_size; + + /* room of trampoline frame to store ip address */ + if (flags & BPF_TRAMP_F_IP_ARG) { + stack_size += 8; + ip_off = stack_size; + } + + /* room of trampoline frame to store struct bpf_tramp_run_ctx */ + stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); + run_ctx_off = stack_size; + + stack_size += 8; + sreg_off = stack_size; + + stack_size = round_up(stack_size, 16); + + /* For the trampoline called from function entry */ + /* RA and FP for parent function*/ + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -16); + emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 16); + + /* RA and FP for traced function*/ + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size); + emit_insn(ctx, std, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size); + + /* callee saved register S1 to pass start time */ + emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); + + /* store ip address of the traced function */ + if (flags & BPF_TRAMP_F_IP_ARG) { + move_imm(ctx, LOONGARCH_GPR_T1, (const s64)func_addr, false); + emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -ip_off); + } + + /* store nargs number*/ + move_imm(ctx, LOONGARCH_GPR_T1, nargs, false); + emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -nargs_off); + + store_args(ctx, nargs, args_off); + + /* To traced function */ + /* Ftrace jump skips 2 NOP instructions */ + if (is_kernel_text((unsigned long)orig_call)) + orig_call += LOONGARCH_FENTRY_NBYTES; + /* Direct jump skips 5 NOP instructions */ + else if (is_bpf_text_address((unsigned long)orig_call)) + orig_call += LOONGARCH_BPF_FENTRY_NBYTES; + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + move_imm(ctx, LOONGARCH_GPR_A0, (const s64)im, false); + ret = emit_call(ctx, (const u64)__bpf_tramp_enter); + if (ret) + return ret; + } + + for (i = 0; i < fentry->nr_links; i++) { + ret = invoke_bpf_prog(ctx, fentry->links[i], args_off, retval_off, + run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET); + if (ret) + return ret; + } + if (fmod_ret->nr_links) { + branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *), GFP_KERNEL); + if (!branches) + return -ENOMEM; + + invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, + run_ctx_off, branches); + } + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + restore_args(ctx, m->nr_args, args_off); + ret = emit_call(ctx, (const u64)orig_call); + if (ret) + goto out; + emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off); + emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8)); + im->ip_after_call = ctx->ro_image + ctx->idx; + /* Reserve space for the move_imm + jirl instruction */ + for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) + emit_insn(ctx, nop); + } + + for (i = 0; ctx->image && i < fmod_ret->nr_links; i++) { + int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branches[i]; + *branches[i] = larch_insn_gen_bne(LOONGARCH_GPR_T1, LOONGARCH_GPR_ZERO, offset); + } + + for (i = 0; i < fexit->nr_links; i++) { + ret = invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, + run_ctx_off, false); + if (ret) + goto out; + } + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + im->ip_epilogue = ctx->ro_image + ctx->idx; + move_imm(ctx, LOONGARCH_GPR_A0, (const s64)im, false); + ret = emit_call(ctx, (const u64)__bpf_tramp_exit); + if (ret) + goto out; + } + + if (flags & BPF_TRAMP_F_RESTORE_REGS) + restore_args(ctx, m->nr_args, args_off); + + if (save_ret) { + emit_insn(ctx, ldd, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off); + emit_insn(ctx, ldd, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8)); + } + + emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); + + /* trampoline called from function entry */ + emit_insn(ctx, ldd, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size); + + emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, 16); + + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* return to parent function */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0); + else + /* return to traced function */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T0, 0); + + ret = ctx->idx; +out: + kfree(branches); + + return ret; +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, + void *ro_image_end, const struct btf_func_model *m, + u32 flags, struct bpf_tramp_links *tlinks, + void *func_addr) +{ + int ret; + void *image, *tmp; + struct jit_ctx ctx; + u32 size = ro_image_end - ro_image; + + image = kvmalloc(size, GFP_KERNEL); + if (!image) + return -ENOMEM; + + ctx.image = (union loongarch_instruction *)image; + ctx.ro_image = (union loongarch_instruction *)ro_image; + ctx.idx = 0; + + jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image)); + ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tlinks, func_addr, flags); + if (ret > 0 && validate_code(&ctx) < 0) { + ret = -EINVAL; + goto out; + } + + tmp = bpf_arch_text_copy(ro_image, image, size); + if (IS_ERR(tmp)) { + ret = PTR_ERR(tmp); + goto out; + } + + bpf_flush_icache(ro_image, ro_image_end); +out: + kvfree(image); + return ret < 0 ? ret : size; +} + +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr) +{ + struct bpf_tramp_image im; + struct jit_ctx ctx; + int ret; + + ctx.image = NULL; + ctx.idx = 0; + + ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tlinks, func_addr, flags); + + /* Page align */ + return ret < 0 ? ret : round_up(ret * LOONGARCH_INSN_SIZE, PAGE_SIZE); +} diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h index f9c569f539491..5697158fd1645 100644 --- a/arch/loongarch/net/bpf_jit.h +++ b/arch/loongarch/net/bpf_jit.h @@ -18,6 +18,7 @@ struct jit_ctx { u32 *offset; int num_exentries; union loongarch_instruction *image; + union loongarch_instruction *ro_image; u32 stack_size; }; @@ -308,3 +309,8 @@ static inline int emit_tailcall_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch return -EINVAL; } + +static inline void bpf_flush_icache(void *start, void *end) +{ + flush_icache_range((unsigned long)start, (unsigned long)end); +} From a5071d04c50b1d09c09e66fa10ebcff93c99018d Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 30 Jul 2025 21:12:57 +0800 Subject: [PATCH 5/5] LoongArch: BPF: Add struct ops support for trampoline Use BPF_TRAMP_F_INDIRECT flag to detect struct ops and emit proper prologue and epilogue for this case. With this patch, all of the struct_ops related testcases (except struct_ops_multi_pages) passed on LoongArch. The testcase struct_ops_multi_pages failed is because the actual image_pages_cnt is 40 which is bigger than MAX_TRAMP_IMAGE_PAGES. Before: $ sudo ./test_progs -t struct_ops -d struct_ops_multi_pages ... WATCHDOG: test case struct_ops_module/struct_ops_load executes for 10 seconds... After: $ sudo ./test_progs -t struct_ops -d struct_ops_multi_pages ... #15 bad_struct_ops:OK ... #399 struct_ops_autocreate:OK ... #400 struct_ops_kptr_return:OK ... #401 struct_ops_maybe_null:OK ... #402 struct_ops_module:OK ... #404 struct_ops_no_cfi:OK ... #405 struct_ops_private_stack:SKIP ... #406 struct_ops_refcounted:OK Summary: 8/25 PASSED, 3 SKIPPED, 0 FAILED Signed-off-by: Tiezhu Yang --- arch/loongarch/net/bpf_jit.c | 71 ++++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index eddf582e409db..725c2d5ee82b9 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1610,6 +1610,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT; int ret, save_ret; void *orig_call = func_addr; u32 **branches = NULL; @@ -1685,18 +1686,31 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i stack_size = round_up(stack_size, 16); - /* For the trampoline called from function entry */ - /* RA and FP for parent function*/ - emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -16); - emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); - emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); - emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 16); - - /* RA and FP for traced function*/ - emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size); - emit_insn(ctx, std, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); - emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); - emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size); + if (!is_struct_ops) { + /* + * For the trampoline called from function entry, + * the frame of traced function and the frame of + * trampoline need to be considered. + */ + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -16); + emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 16); + + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size); + emit_insn(ctx, std, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size); + } else { + /* + * For the trampoline called directly, just handle + * the frame of trampoline. + */ + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size); + emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, stack_size - 8); + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size); + } /* callee saved register S1 to pass start time */ emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); @@ -1786,21 +1800,30 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); - /* trampoline called from function entry */ - emit_insn(ctx, ldd, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); - emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); - emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size); + if (!is_struct_ops) { + /* trampoline called from function entry */ + emit_insn(ctx, ldd, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size); + + emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, 16); - emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); - emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); - emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, 16); + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* return to parent function */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0); + else + /* return to traced function */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T0, 0); + } else { + /* trampoline called directly */ + emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, stack_size - 8); + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size); - if (flags & BPF_TRAMP_F_SKIP_FRAME) - /* return to parent function */ emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0); - else - /* return to traced function */ - emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T0, 0); + } ret = ctx->idx; out: