Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 55 additions & 22 deletions arch/arm64/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1066,19 +1066,53 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
emit(A64_RET(A64_LR), ctx);
}

#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
/*
* Metadata encoding for exception handling in JITed code.
*
* Format of `fixup` field in `struct exception_table_entry`:
*
* Bit layout of `fixup` (32-bit):
*
* +-----------+--------+-----------+-----------+----------+
* | 31-27 | 26-22 | 21 | 20-16 | 15-0 |
* | | | | | |
* | FIXUP_REG | Unused | ARENA_ACC | ARENA_REG | OFFSET |
* +-----------+--------+-----------+-----------+----------+
*
* - OFFSET (16 bits): Offset used to compute address for Load/Store instruction.
* - ARENA_REG (5 bits): Register that is used to calculate the address for load/store when
* accessing the arena region.
* - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region.
* - FIXUP_REG (5 bits): Destination register for the load instruction (cleared on fault) or set to
* DONT_CLEAR if it is a store instruction.
*/

#define BPF_FIXUP_OFFSET_MASK GENMASK(15, 0)
#define BPF_FIXUP_ARENA_REG_MASK GENMASK(20, 16)
#define BPF_ARENA_ACCESS BIT(21)
#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
#define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */

bool ex_handler_bpf(const struct exception_table_entry *ex,
struct pt_regs *regs)
{
off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
s16 off = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
int arena_reg = FIELD_GET(BPF_FIXUP_ARENA_REG_MASK, ex->fixup);
bool is_arena = !!(ex->fixup & BPF_ARENA_ACCESS);
bool is_write = (dst_reg == DONT_CLEAR);
unsigned long addr;

if (dst_reg != DONT_CLEAR)
regs->regs[dst_reg] = 0;
regs->pc = (unsigned long)&ex->fixup - offset;
/* Skip the faulting instruction */
regs->pc += AARCH64_INSN_SIZE;

if (is_arena) {
addr = regs->regs[arena_reg] + off;
bpf_prog_report_arena_violation(is_write, addr);
}

return true;
}

Expand All @@ -1088,7 +1122,9 @@ static int add_exception_handler(const struct bpf_insn *insn,
int dst_reg)
{
off_t ins_offset;
off_t fixup_offset;
s16 off = insn->off;
bool is_arena;
int arena_reg;
unsigned long pc;
struct exception_table_entry *ex;

Expand All @@ -1102,6 +1138,9 @@ static int add_exception_handler(const struct bpf_insn *insn,
BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
return 0;

is_arena = (BPF_MODE(insn->code) == BPF_PROBE_MEM32) ||
(BPF_MODE(insn->code) == BPF_PROBE_ATOMIC);

if (!ctx->prog->aux->extable ||
WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries))
return -EINVAL;
Expand All @@ -1119,22 +1158,6 @@ static int add_exception_handler(const struct bpf_insn *insn,
if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
return -ERANGE;

/*
* Since the extable follows the program, the fixup offset is always
* negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
* to keep things simple, and put the destination register in the upper
* bits. We don't need to worry about buildtime or runtime sort
* modifying the upper bits because the table is already sorted, and
* isn't part of the main exception table.
*
* The fixup_offset is set to the next instruction from the instruction
* that may fault. The execution will jump to this after handling the
* fault.
*/
fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE);
if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
return -ERANGE;

/*
* The offsets above have been calculated using the RO buffer but we
* need to use the R/W buffer for writes.
Expand All @@ -1147,8 +1170,18 @@ static int add_exception_handler(const struct bpf_insn *insn,
if (BPF_CLASS(insn->code) != BPF_LDX)
dst_reg = DONT_CLEAR;

ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
ex->fixup = FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);

if (is_arena) {
ex->fixup |= BPF_ARENA_ACCESS;
if (BPF_CLASS(insn->code) == BPF_LDX)
arena_reg = bpf2a64[insn->src_reg];
else
arena_reg = bpf2a64[insn->dst_reg];

ex->fixup |= FIELD_PREP(BPF_FIXUP_OFFSET_MASK, off) |
FIELD_PREP(BPF_FIXUP_ARENA_REG_MASK, arena_reg);
}

ex->type = EX_TYPE_BPF;

Expand Down
79 changes: 75 additions & 4 deletions arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/memory.h>
#include <linux/sort.h>
Expand Down Expand Up @@ -1388,16 +1389,67 @@ static int emit_atomic_ld_st_index(u8 **pprog, u32 atomic_op, u32 size,
return 0;
}

/*
* Metadata encoding for exception handling in JITed code.
*
* Format of `fixup` and `data` fields in `struct exception_table_entry`:
*
* Bit layout of `fixup` (32-bit):
*
* +-----------+--------+-----------+---------+----------+
* | 31 | 30-24 | 23-16 | 15-8 | 7-0 |
* | | | | | |
* | ARENA_ACC | Unused | ARENA_REG | DST_REG | INSN_LEN |
* +-----------+--------+-----------+---------+----------+
*
* - INSN_LEN (8 bits): Length of faulting insn (max x86 insn = 15 bytes (fits in 8 bits)).
* - DST_REG (8 bits): Offset of dst_reg from reg2pt_regs[] (max offset = 112 (fits in 8 bits)).
* This is set to DONT_CLEAR if the insn is a store.
* - ARENA_REG (8 bits): Offset of the register that is used to calculate the
* address for load/store when accessing the arena region.
* - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region.
*
* Bit layout of `data` (32-bit):
*
* +--------------+--------+--------------+
* | 31-16 | 15-8 | 7-0 |
* | | | |
* | ARENA_OFFSET | Unused | EX_TYPE_BPF |
* +--------------+--------+--------------+
*
* - ARENA_OFFSET (16 bits): Offset used to calculate the address for load/store when
* accessing the arena region.
*/

#define DONT_CLEAR 1
#define FIXUP_INSN_LEN_MASK GENMASK(7, 0)
#define FIXUP_REG_MASK GENMASK(15, 8)
#define FIXUP_ARENA_REG_MASK GENMASK(23, 16)
#define FIXUP_ARENA_ACCESS BIT(31)
#define DATA_ARENA_OFFSET_MASK GENMASK(31, 16)

bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
{
u32 reg = x->fixup >> 8;
u32 reg = FIELD_GET(FIXUP_REG_MASK, x->fixup);
u32 insn_len = FIELD_GET(FIXUP_INSN_LEN_MASK, x->fixup);
bool is_arena = !!(x->fixup & FIXUP_ARENA_ACCESS);
bool is_write = (reg == DONT_CLEAR);
unsigned long addr;
s16 off;
u32 arena_reg;

/* jump over faulting load and clear dest register */
if (reg != DONT_CLEAR)
*(unsigned long *)((void *)regs + reg) = 0;
regs->ip += x->fixup & 0xff;
regs->ip += insn_len;

if (is_arena) {
arena_reg = FIELD_GET(FIXUP_ARENA_REG_MASK, x->fixup);
off = FIELD_GET(DATA_ARENA_OFFSET_MASK, x->data);
addr = *(unsigned long *)((void *)regs + arena_reg) + off;
bpf_prog_report_arena_violation(is_write, addr);
}

return true;
}

Expand Down Expand Up @@ -2070,6 +2122,8 @@ st: if (is_imm8(insn->off))
{
struct exception_table_entry *ex;
u8 *_insn = image + proglen + (start_of_ldx - temp);
u32 arena_reg, fixup_reg;
bool is_arena;
s64 delta;

if (!bpf_prog->aux->extable)
Expand All @@ -2089,8 +2143,25 @@ st: if (is_imm8(insn->off))

ex->data = EX_TYPE_BPF;

ex->fixup = (prog - start_of_ldx) |
((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8);
is_arena = (BPF_MODE(insn->code) == BPF_PROBE_MEM32) ||
(BPF_MODE(insn->code) == BPF_PROBE_ATOMIC);

fixup_reg = (BPF_CLASS(insn->code) == BPF_LDX) ?
reg2pt_regs[dst_reg] : DONT_CLEAR;

ex->fixup = FIELD_PREP(FIXUP_INSN_LEN_MASK, prog - start_of_ldx) |
FIELD_PREP(FIXUP_REG_MASK, fixup_reg);

if (is_arena) {
ex->fixup |= FIXUP_ARENA_ACCESS;
if (BPF_CLASS(insn->code) == BPF_LDX)
arena_reg = reg2pt_regs[src_reg];
else
arena_reg = reg2pt_regs[dst_reg];

ex->fixup |= FIELD_PREP(FIXUP_ARENA_REG_MASK, arena_reg);
ex->data |= FIELD_PREP(DATA_ARENA_OFFSET_MASK, insn->off);
}
}
break;

Expand Down
1 change: 1 addition & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -3659,6 +3659,7 @@ int bpf_stream_stage_printk(struct bpf_stream_stage *ss, const char *fmt, ...);
int bpf_stream_stage_commit(struct bpf_stream_stage *ss, struct bpf_prog *prog,
enum bpf_stream_id stream_id);
int bpf_stream_stage_dump_stack(struct bpf_stream_stage *ss);
void bpf_prog_report_arena_violation(bool write, unsigned long addr);

#define bpf_stream_printk(ss, ...) bpf_stream_stage_printk(&ss, __VA_ARGS__)
#define bpf_stream_dump_stack(ss) bpf_stream_stage_dump_stack(&ss)
Expand Down
20 changes: 20 additions & 0 deletions kernel/bpf/arena.c
Original file line number Diff line number Diff line change
Expand Up @@ -633,3 +633,23 @@ static int __init kfunc_init(void)
return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
}
late_initcall(kfunc_init);

void bpf_prog_report_arena_violation(bool write, unsigned long addr)
{
struct bpf_stream_stage ss;
struct bpf_prog *prog;
u64 user_vm_start;

prog = bpf_prog_find_from_stack();
if (!prog)
return;

user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
addr += (user_vm_start >> 32) << 32;

bpf_stream_stage(ss, prog, BPF_STDERR, ({
bpf_stream_printk(ss, "ERROR: Arena %s access at unmapped address 0x%lx\n",
write ? "WRITE" : "READ", addr);
bpf_stream_dump_stack(ss);
}));
}
33 changes: 32 additions & 1 deletion tools/testing/selftests/bpf/prog_tests/stream.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,22 @@ struct {
"([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
"|[ \t]+[^\n]+\n)*",
},
{
offsetof(struct stream, progs.stream_arena_read_fault),
"ERROR: Arena READ access at unmapped address 0x.*\n"
"CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n"
"Call trace:\n"
"([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
"|[ \t]+[^\n]+\n)*",
},
{
offsetof(struct stream, progs.stream_arena_write_fault),
"ERROR: Arena WRITE access at unmapped address 0x.*\n"
"CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n"
"Call trace:\n"
"([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
"|[ \t]+[^\n]+\n)*",
},
};

static int match_regex(const char *pattern, const char *string)
Expand All @@ -63,6 +79,7 @@ void test_stream_errors(void)
struct stream *skel;
int ret, prog_fd;
char buf[1024];
char fault_addr[64] = {0};

skel = stream__open_and_load();
if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
Expand All @@ -85,15 +102,29 @@ void test_stream_errors(void)
continue;
}
#endif
#if !defined(__x86_64__) && !defined(__aarch64__)
ASSERT_TRUE(1, "Arena fault reporting unsupported, skip.");
if (i == 2 || i == 3) {
ret = bpf_prog_stream_read(prog_fd, 2, buf, sizeof(buf), &ropts);
ASSERT_EQ(ret, 0, "stream read");
continue;
}
#endif

ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts);
ASSERT_GT(ret, 0, "stream read");
ASSERT_LE(ret, 1023, "len for buf");
buf[ret] = '\0';

ret = match_regex(stream_error_arr[i].errstr, buf);
if (!ASSERT_TRUE(ret == 1, "regex match"))
if (ret && (i == 2 || i == 3)) {
sprintf(fault_addr, "0x%lx", skel->bss->fault_addr);
ret = match_regex(fault_addr, buf);
}
if (!ASSERT_TRUE(ret == 1, "regex match")) {
fprintf(stderr, "Output from stream:\n%s\n", buf);
fprintf(stderr, "Fault Addr: 0x%lx\n", skel->bss->fault_addr);
}
}

stream__destroy(skel);
Expand Down
39 changes: 39 additions & 0 deletions tools/testing/selftests/bpf/progs/stream.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
#include "bpf_experimental.h"
#include "bpf_arena_common.h"

struct arr_elem {
struct bpf_res_spin_lock lock;
Expand All @@ -17,10 +18,17 @@ struct {
__type(value, struct arr_elem);
} arrmap SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_ARENA);
__uint(map_flags, BPF_F_MMAPABLE);
__uint(max_entries, 1); /* number of pages */
} arena SEC(".maps");

#define ENOSPC 28
#define _STR "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

int size;
u64 fault_addr;

SEC("syscall")
__success __retval(0)
Expand Down Expand Up @@ -76,4 +84,35 @@ int stream_syscall(void *ctx)
return 0;
}

SEC("syscall")
__success __retval(0)
int stream_arena_write_fault(void *ctx)
{
struct bpf_arena *ptr = (void *)&arena;
u64 user_vm_start;

barrier_var(ptr);
user_vm_start = ptr->user_vm_start;

fault_addr = user_vm_start + 0xbeef;
*(u32 __arena *)(user_vm_start + 0xbeef) = 1;

return 0;
}

SEC("syscall")
__success __retval(0)
int stream_arena_read_fault(void *ctx)
{
struct bpf_arena *ptr = (void *)&arena;
u64 user_vm_start;

barrier_var(ptr);
user_vm_start = ptr->user_vm_start;

fault_addr = user_vm_start + 0xbeef;

return *(u32 __arena *)(user_vm_start + 0xbeef);
}

char _license[] SEC("license") = "GPL";
Loading