diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 52ffe115a8c47..5083886d6e66b 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1066,19 +1066,53 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic) emit(A64_RET(A64_LR), ctx); } -#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) +/* + * Metadata encoding for exception handling in JITed code. + * + * Format of `fixup` field in `struct exception_table_entry`: + * + * Bit layout of `fixup` (32-bit): + * + * +-----------+--------+-----------+-----------+----------+ + * | 31-27 | 26-22 | 21 | 20-16 | 15-0 | + * | | | | | | + * | FIXUP_REG | Unused | ARENA_ACC | ARENA_REG | OFFSET | + * +-----------+--------+-----------+-----------+----------+ + * + * - OFFSET (16 bits): Offset used to compute address for Load/Store instruction. + * - ARENA_REG (5 bits): Register that is used to calculate the address for load/store when + * accessing the arena region. + * - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region. + * - FIXUP_REG (5 bits): Destination register for the load instruction (cleared on fault) or set to + * DONT_CLEAR if it is a store instruction. + */ + +#define BPF_FIXUP_OFFSET_MASK GENMASK(15, 0) +#define BPF_FIXUP_ARENA_REG_MASK GENMASK(20, 16) +#define BPF_ARENA_ACCESS BIT(21) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) #define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */ bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs) { - off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); + s16 off = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); + int arena_reg = FIELD_GET(BPF_FIXUP_ARENA_REG_MASK, ex->fixup); + bool is_arena = !!(ex->fixup & BPF_ARENA_ACCESS); + bool is_write = (dst_reg == DONT_CLEAR); + unsigned long addr; if (dst_reg != DONT_CLEAR) regs->regs[dst_reg] = 0; - regs->pc = (unsigned long)&ex->fixup - offset; + /* Skip the faulting instruction */ + regs->pc += AARCH64_INSN_SIZE; + + if (is_arena) { + addr = regs->regs[arena_reg] + off; + bpf_prog_report_arena_violation(is_write, addr); + } + return true; } @@ -1088,7 +1122,9 @@ static int add_exception_handler(const struct bpf_insn *insn, int dst_reg) { off_t ins_offset; - off_t fixup_offset; + s16 off = insn->off; + bool is_arena; + int arena_reg; unsigned long pc; struct exception_table_entry *ex; @@ -1102,6 +1138,9 @@ static int add_exception_handler(const struct bpf_insn *insn, BPF_MODE(insn->code) != BPF_PROBE_ATOMIC) return 0; + is_arena = (BPF_MODE(insn->code) == BPF_PROBE_MEM32) || + (BPF_MODE(insn->code) == BPF_PROBE_ATOMIC); + if (!ctx->prog->aux->extable || WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries)) return -EINVAL; @@ -1119,22 +1158,6 @@ static int add_exception_handler(const struct bpf_insn *insn, if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN)) return -ERANGE; - /* - * Since the extable follows the program, the fixup offset is always - * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value - * to keep things simple, and put the destination register in the upper - * bits. We don't need to worry about buildtime or runtime sort - * modifying the upper bits because the table is already sorted, and - * isn't part of the main exception table. - * - * The fixup_offset is set to the next instruction from the instruction - * that may fault. The execution will jump to this after handling the - * fault. - */ - fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE); - if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset)) - return -ERANGE; - /* * The offsets above have been calculated using the RO buffer but we * need to use the R/W buffer for writes. @@ -1147,8 +1170,18 @@ static int add_exception_handler(const struct bpf_insn *insn, if (BPF_CLASS(insn->code) != BPF_LDX) dst_reg = DONT_CLEAR; - ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | - FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); + ex->fixup = FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); + + if (is_arena) { + ex->fixup |= BPF_ARENA_ACCESS; + if (BPF_CLASS(insn->code) == BPF_LDX) + arena_reg = bpf2a64[insn->src_reg]; + else + arena_reg = bpf2a64[insn->dst_reg]; + + ex->fixup |= FIELD_PREP(BPF_FIXUP_OFFSET_MASK, off) | + FIELD_PREP(BPF_FIXUP_ARENA_REG_MASK, arena_reg); + } ex->type = EX_TYPE_BPF; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 7e3fca1646203..b75dea55df5a2 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -1388,16 +1389,67 @@ static int emit_atomic_ld_st_index(u8 **pprog, u32 atomic_op, u32 size, return 0; } +/* + * Metadata encoding for exception handling in JITed code. + * + * Format of `fixup` and `data` fields in `struct exception_table_entry`: + * + * Bit layout of `fixup` (32-bit): + * + * +-----------+--------+-----------+---------+----------+ + * | 31 | 30-24 | 23-16 | 15-8 | 7-0 | + * | | | | | | + * | ARENA_ACC | Unused | ARENA_REG | DST_REG | INSN_LEN | + * +-----------+--------+-----------+---------+----------+ + * + * - INSN_LEN (8 bits): Length of faulting insn (max x86 insn = 15 bytes (fits in 8 bits)). + * - DST_REG (8 bits): Offset of dst_reg from reg2pt_regs[] (max offset = 112 (fits in 8 bits)). + * This is set to DONT_CLEAR if the insn is a store. + * - ARENA_REG (8 bits): Offset of the register that is used to calculate the + * address for load/store when accessing the arena region. + * - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region. + * + * Bit layout of `data` (32-bit): + * + * +--------------+--------+--------------+ + * | 31-16 | 15-8 | 7-0 | + * | | | | + * | ARENA_OFFSET | Unused | EX_TYPE_BPF | + * +--------------+--------+--------------+ + * + * - ARENA_OFFSET (16 bits): Offset used to calculate the address for load/store when + * accessing the arena region. + */ + #define DONT_CLEAR 1 +#define FIXUP_INSN_LEN_MASK GENMASK(7, 0) +#define FIXUP_REG_MASK GENMASK(15, 8) +#define FIXUP_ARENA_REG_MASK GENMASK(23, 16) +#define FIXUP_ARENA_ACCESS BIT(31) +#define DATA_ARENA_OFFSET_MASK GENMASK(31, 16) bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) { - u32 reg = x->fixup >> 8; + u32 reg = FIELD_GET(FIXUP_REG_MASK, x->fixup); + u32 insn_len = FIELD_GET(FIXUP_INSN_LEN_MASK, x->fixup); + bool is_arena = !!(x->fixup & FIXUP_ARENA_ACCESS); + bool is_write = (reg == DONT_CLEAR); + unsigned long addr; + s16 off; + u32 arena_reg; /* jump over faulting load and clear dest register */ if (reg != DONT_CLEAR) *(unsigned long *)((void *)regs + reg) = 0; - regs->ip += x->fixup & 0xff; + regs->ip += insn_len; + + if (is_arena) { + arena_reg = FIELD_GET(FIXUP_ARENA_REG_MASK, x->fixup); + off = FIELD_GET(DATA_ARENA_OFFSET_MASK, x->data); + addr = *(unsigned long *)((void *)regs + arena_reg) + off; + bpf_prog_report_arena_violation(is_write, addr); + } + return true; } @@ -2070,6 +2122,8 @@ st: if (is_imm8(insn->off)) { struct exception_table_entry *ex; u8 *_insn = image + proglen + (start_of_ldx - temp); + u32 arena_reg, fixup_reg; + bool is_arena; s64 delta; if (!bpf_prog->aux->extable) @@ -2089,8 +2143,25 @@ st: if (is_imm8(insn->off)) ex->data = EX_TYPE_BPF; - ex->fixup = (prog - start_of_ldx) | - ((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8); + is_arena = (BPF_MODE(insn->code) == BPF_PROBE_MEM32) || + (BPF_MODE(insn->code) == BPF_PROBE_ATOMIC); + + fixup_reg = (BPF_CLASS(insn->code) == BPF_LDX) ? + reg2pt_regs[dst_reg] : DONT_CLEAR; + + ex->fixup = FIELD_PREP(FIXUP_INSN_LEN_MASK, prog - start_of_ldx) | + FIELD_PREP(FIXUP_REG_MASK, fixup_reg); + + if (is_arena) { + ex->fixup |= FIXUP_ARENA_ACCESS; + if (BPF_CLASS(insn->code) == BPF_LDX) + arena_reg = reg2pt_regs[src_reg]; + else + arena_reg = reg2pt_regs[dst_reg]; + + ex->fixup |= FIELD_PREP(FIXUP_ARENA_REG_MASK, arena_reg); + ex->data |= FIELD_PREP(DATA_ARENA_OFFSET_MASK, insn->off); + } } break; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8f6e87f0f3a89..bf42a2cfc635e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3659,6 +3659,7 @@ int bpf_stream_stage_printk(struct bpf_stream_stage *ss, const char *fmt, ...); int bpf_stream_stage_commit(struct bpf_stream_stage *ss, struct bpf_prog *prog, enum bpf_stream_id stream_id); int bpf_stream_stage_dump_stack(struct bpf_stream_stage *ss); +void bpf_prog_report_arena_violation(bool write, unsigned long addr); #define bpf_stream_printk(ss, ...) bpf_stream_stage_printk(&ss, __VA_ARGS__) #define bpf_stream_dump_stack(ss) bpf_stream_stage_dump_stack(&ss) diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 5b37753799d20..a1653d1c04ca5 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -633,3 +633,23 @@ static int __init kfunc_init(void) return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set); } late_initcall(kfunc_init); + +void bpf_prog_report_arena_violation(bool write, unsigned long addr) +{ + struct bpf_stream_stage ss; + struct bpf_prog *prog; + u64 user_vm_start; + + prog = bpf_prog_find_from_stack(); + if (!prog) + return; + + user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); + addr += (user_vm_start >> 32) << 32; + + bpf_stream_stage(ss, prog, BPF_STDERR, ({ + bpf_stream_printk(ss, "ERROR: Arena %s access at unmapped address 0x%lx\n", + write ? "WRITE" : "READ", addr); + bpf_stream_dump_stack(ss); + })); +} diff --git a/tools/testing/selftests/bpf/prog_tests/stream.c b/tools/testing/selftests/bpf/prog_tests/stream.c index 36a1a1ebde692..8fdc83260ea14 100644 --- a/tools/testing/selftests/bpf/prog_tests/stream.c +++ b/tools/testing/selftests/bpf/prog_tests/stream.c @@ -41,6 +41,22 @@ struct { "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" "|[ \t]+[^\n]+\n)*", }, + { + offsetof(struct stream, progs.stream_arena_read_fault), + "ERROR: Arena READ access at unmapped address 0x.*\n" + "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n" + "Call trace:\n" + "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" + "|[ \t]+[^\n]+\n)*", + }, + { + offsetof(struct stream, progs.stream_arena_write_fault), + "ERROR: Arena WRITE access at unmapped address 0x.*\n" + "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n" + "Call trace:\n" + "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" + "|[ \t]+[^\n]+\n)*", + }, }; static int match_regex(const char *pattern, const char *string) @@ -63,6 +79,7 @@ void test_stream_errors(void) struct stream *skel; int ret, prog_fd; char buf[1024]; + char fault_addr[64] = {0}; skel = stream__open_and_load(); if (!ASSERT_OK_PTR(skel, "stream__open_and_load")) @@ -85,6 +102,14 @@ void test_stream_errors(void) continue; } #endif +#if !defined(__x86_64__) && !defined(__aarch64__) + ASSERT_TRUE(1, "Arena fault reporting unsupported, skip."); + if (i == 2 || i == 3) { + ret = bpf_prog_stream_read(prog_fd, 2, buf, sizeof(buf), &ropts); + ASSERT_EQ(ret, 0, "stream read"); + continue; + } +#endif ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts); ASSERT_GT(ret, 0, "stream read"); @@ -92,8 +117,14 @@ void test_stream_errors(void) buf[ret] = '\0'; ret = match_regex(stream_error_arr[i].errstr, buf); - if (!ASSERT_TRUE(ret == 1, "regex match")) + if (ret && (i == 2 || i == 3)) { + sprintf(fault_addr, "0x%lx", skel->bss->fault_addr); + ret = match_regex(fault_addr, buf); + } + if (!ASSERT_TRUE(ret == 1, "regex match")) { fprintf(stderr, "Output from stream:\n%s\n", buf); + fprintf(stderr, "Fault Addr: 0x%lx\n", skel->bss->fault_addr); + } } stream__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c index 35790897dc879..9de015ac3ced5 100644 --- a/tools/testing/selftests/bpf/progs/stream.c +++ b/tools/testing/selftests/bpf/progs/stream.c @@ -5,6 +5,7 @@ #include #include "bpf_misc.h" #include "bpf_experimental.h" +#include "bpf_arena_common.h" struct arr_elem { struct bpf_res_spin_lock lock; @@ -17,10 +18,17 @@ struct { __type(value, struct arr_elem); } arrmap SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, 1); /* number of pages */ +} arena SEC(".maps"); + #define ENOSPC 28 #define _STR "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" int size; +u64 fault_addr; SEC("syscall") __success __retval(0) @@ -76,4 +84,35 @@ int stream_syscall(void *ctx) return 0; } +SEC("syscall") +__success __retval(0) +int stream_arena_write_fault(void *ctx) +{ + struct bpf_arena *ptr = (void *)&arena; + u64 user_vm_start; + + barrier_var(ptr); + user_vm_start = ptr->user_vm_start; + + fault_addr = user_vm_start + 0xbeef; + *(u32 __arena *)(user_vm_start + 0xbeef) = 1; + + return 0; +} + +SEC("syscall") +__success __retval(0) +int stream_arena_read_fault(void *ctx) +{ + struct bpf_arena *ptr = (void *)&arena; + u64 user_vm_start; + + barrier_var(ptr); + user_vm_start = ptr->user_vm_start; + + fault_addr = user_vm_start + 0xbeef; + + return *(u32 __arena *)(user_vm_start + 0xbeef); +} + char _license[] SEC("license") = "GPL";