Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 55 additions & 22 deletions arch/arm64/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1066,19 +1066,53 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
emit(A64_RET(A64_LR), ctx);
}

#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
/*
* Metadata encoding for exception handling in JITed code.
*
* Format of `fixup` field in `struct exception_table_entry`:
*
* Bit layout of `fixup` (32-bit):
*
* +-----------+--------+-----------+-----------+----------+
* | 31-27 | 26-22 | 21 | 20-16 | 15-0 |
* | | | | | |
* | FIXUP_REG | Unused | ARENA_ACC | ARENA_REG | OFFSET |
* +-----------+--------+-----------+-----------+----------+
*
* - OFFSET (16 bits): Offset used to compute address for Load/Store instruction.
* - ARENA_REG (5 bits): Register that is used to calculate the address for load/store when
* accessing the arena region.
* - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region.
* - FIXUP_REG (5 bits): Destination register for the load instruction (cleared on fault) or set to
* DONT_CLEAR if it is a store instruction.
*/

#define BPF_FIXUP_OFFSET_MASK GENMASK(15, 0)
#define BPF_FIXUP_ARENA_REG_MASK GENMASK(20, 16)
#define BPF_ARENA_ACCESS BIT(21)
#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
#define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */

bool ex_handler_bpf(const struct exception_table_entry *ex,
struct pt_regs *regs)
{
off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
s16 off = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
int arena_reg = FIELD_GET(BPF_FIXUP_ARENA_REG_MASK, ex->fixup);
bool is_arena = !!(ex->fixup & BPF_ARENA_ACCESS);
bool is_write = (dst_reg == DONT_CLEAR);
unsigned long addr;

if (is_arena) {
addr = regs->regs[arena_reg] + off;
bpf_prog_report_arena_violation(is_write, addr, regs->pc);
}

if (dst_reg != DONT_CLEAR)
regs->regs[dst_reg] = 0;
regs->pc = (unsigned long)&ex->fixup - offset;
/* Skip the faulting instruction */
regs->pc += AARCH64_INSN_SIZE;

return true;
}

Expand All @@ -1088,7 +1122,9 @@ static int add_exception_handler(const struct bpf_insn *insn,
int dst_reg)
{
off_t ins_offset;
off_t fixup_offset;
s16 off = insn->off;
bool is_arena;
int arena_reg;
unsigned long pc;
struct exception_table_entry *ex;

Expand All @@ -1102,6 +1138,9 @@ static int add_exception_handler(const struct bpf_insn *insn,
BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
return 0;

is_arena = (BPF_MODE(insn->code) == BPF_PROBE_MEM32) ||
(BPF_MODE(insn->code) == BPF_PROBE_ATOMIC);

if (!ctx->prog->aux->extable ||
WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries))
return -EINVAL;
Expand All @@ -1119,22 +1158,6 @@ static int add_exception_handler(const struct bpf_insn *insn,
if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
return -ERANGE;

/*
* Since the extable follows the program, the fixup offset is always
* negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
* to keep things simple, and put the destination register in the upper
* bits. We don't need to worry about buildtime or runtime sort
* modifying the upper bits because the table is already sorted, and
* isn't part of the main exception table.
*
* The fixup_offset is set to the next instruction from the instruction
* that may fault. The execution will jump to this after handling the
* fault.
*/
fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE);
if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
return -ERANGE;

/*
* The offsets above have been calculated using the RO buffer but we
* need to use the R/W buffer for writes.
Expand All @@ -1147,8 +1170,18 @@ static int add_exception_handler(const struct bpf_insn *insn,
if (BPF_CLASS(insn->code) != BPF_LDX)
dst_reg = DONT_CLEAR;

ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
ex->fixup = FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);

if (is_arena) {
ex->fixup |= BPF_ARENA_ACCESS;
if (BPF_CLASS(insn->code) == BPF_LDX)
arena_reg = bpf2a64[insn->src_reg];
else
arena_reg = bpf2a64[insn->dst_reg];

ex->fixup |= FIELD_PREP(BPF_FIXUP_OFFSET_MASK, off) |
FIELD_PREP(BPF_FIXUP_ARENA_REG_MASK, arena_reg);
}

ex->type = EX_TYPE_BPF;

Expand Down
76 changes: 71 additions & 5 deletions arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/memory.h>
#include <linux/sort.h>
Expand Down Expand Up @@ -1388,16 +1389,67 @@ static int emit_atomic_ld_st_index(u8 **pprog, u32 atomic_op, u32 size,
return 0;
}

/*
* Metadata encoding for exception handling in JITed code.
*
* Format of `fixup` and `data` fields in `struct exception_table_entry`:
*
* Bit layout of `fixup` (32-bit):
*
* +-----------+--------+-----------+---------+----------+
* | 31 | 30-24 | 23-16 | 15-8 | 7-0 |
* | | | | | |
* | ARENA_ACC | Unused | ARENA_REG | DST_REG | INSN_LEN |
* +-----------+--------+-----------+---------+----------+
*
* - INSN_LEN (8 bits): Length of faulting insn (max x86 insn = 15 bytes (fits in 8 bits)).
* - DST_REG (8 bits): Offset of dst_reg from reg2pt_regs[] (max offset = 112 (fits in 8 bits)).
* This is set to DONT_CLEAR if the insn is a store.
* - ARENA_REG (8 bits): Offset of the register that is used to calculate the
* address for load/store when accessing the arena region.
* - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region.
*
* Bit layout of `data` (32-bit):
*
* +--------------+--------+--------------+
* | 31-16 | 15-8 | 7-0 |
* | | | |
* | ARENA_OFFSET | Unused | EX_TYPE_BPF |
* +--------------+--------+--------------+
*
* - ARENA_OFFSET (16 bits): Offset used to calculate the address for load/store when
* accessing the arena region.
*/

#define DONT_CLEAR 1
#define FIXUP_INSN_LEN_MASK GENMASK(7, 0)
#define FIXUP_REG_MASK GENMASK(15, 8)
#define FIXUP_ARENA_REG_MASK GENMASK(23, 16)
#define FIXUP_ARENA_ACCESS BIT(31)
#define DATA_ARENA_OFFSET_MASK GENMASK(31, 16)

bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
{
u32 reg = x->fixup >> 8;
u32 reg = FIELD_GET(FIXUP_REG_MASK, x->fixup);
u32 insn_len = FIELD_GET(FIXUP_INSN_LEN_MASK, x->fixup);
bool is_arena = !!(x->fixup & FIXUP_ARENA_ACCESS);
bool is_write = (reg == DONT_CLEAR);
unsigned long addr;
s16 off;
u32 arena_reg;

if (is_arena) {
arena_reg = FIELD_GET(FIXUP_ARENA_REG_MASK, x->fixup);
off = FIELD_GET(DATA_ARENA_OFFSET_MASK, x->data);
addr = *(unsigned long *)((void *)regs + arena_reg) + off;
bpf_prog_report_arena_violation(is_write, addr, regs->ip);
}

/* jump over faulting load and clear dest register */
if (reg != DONT_CLEAR)
*(unsigned long *)((void *)regs + reg) = 0;
regs->ip += x->fixup & 0xff;
regs->ip += insn_len;

return true;
}

Expand Down Expand Up @@ -2070,6 +2122,7 @@ st: if (is_imm8(insn->off))
{
struct exception_table_entry *ex;
u8 *_insn = image + proglen + (start_of_ldx - temp);
u32 arena_reg, fixup_reg;
s64 delta;

if (!bpf_prog->aux->extable)
Expand All @@ -2089,8 +2142,20 @@ st: if (is_imm8(insn->off))

ex->data = EX_TYPE_BPF;

ex->fixup = (prog - start_of_ldx) |
((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8);
if (BPF_CLASS(insn->code) == BPF_LDX) {
arena_reg = reg2pt_regs[src_reg];
fixup_reg = reg2pt_regs[dst_reg];
} else {
arena_reg = reg2pt_regs[dst_reg];
fixup_reg = DONT_CLEAR;
}

ex->fixup = FIELD_PREP(FIXUP_INSN_LEN_MASK, prog - start_of_ldx) |
FIELD_PREP(FIXUP_ARENA_REG_MASK, arena_reg) |
FIELD_PREP(FIXUP_REG_MASK, fixup_reg);
ex->fixup |= FIXUP_ARENA_ACCESS;

ex->data |= FIELD_PREP(DATA_ARENA_OFFSET_MASK, insn->off);
}
break;

Expand Down Expand Up @@ -2208,7 +2273,8 @@ st: if (is_imm8(insn->off))
* End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
* of 4 bytes will be ignored and rbx will be zero inited.
*/
ex->fixup = (prog - start_of_ldx) | (reg2pt_regs[dst_reg] << 8);
ex->fixup = FIELD_PREP(FIXUP_INSN_LEN_MASK, prog - start_of_ldx) |
FIELD_PREP(FIXUP_REG_MASK, reg2pt_regs[dst_reg]);
}
break;

Expand Down
7 changes: 7 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1633,6 +1633,7 @@ struct bpf_prog_aux {
/* function name for valid attach_btf_id */
const char *attach_func_name;
struct bpf_prog **func;
struct bpf_prog_aux *main_prog_aux;
void *jit_data; /* JIT specific data. arch dependent */
struct bpf_jit_poke_descriptor *poke_tab;
struct bpf_kfunc_desc_tab *kfunc_tab;
Expand Down Expand Up @@ -2880,6 +2881,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
enum bpf_dynptr_type type, u32 offset, u32 size);
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
void bpf_prog_report_arena_violation(bool write, unsigned long addr, unsigned long fault_ip);

#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
Expand Down Expand Up @@ -3167,6 +3169,11 @@ static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
{
}

static inline void bpf_prog_report_arena_violation(bool write, unsigned long addr,
unsigned long fault_ip)
{
}
#endif /* CONFIG_BPF_SYSCALL */

static __always_inline int
Expand Down
30 changes: 30 additions & 0 deletions kernel/bpf/arena.c
Original file line number Diff line number Diff line change
Expand Up @@ -633,3 +633,33 @@ static int __init kfunc_init(void)
return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
}
late_initcall(kfunc_init);

void bpf_prog_report_arena_violation(bool write, unsigned long addr, unsigned long fault_ip)
{
struct bpf_stream_stage ss;
struct bpf_prog *prog;
u64 user_vm_start;

/*
* The RCU read lock is held to safely traverse the latch tree, but we
* don't need its protection when accessing the prog, since it will not
* disappear while we are handling the fault.
*/
rcu_read_lock();
prog = bpf_prog_ksym_find(fault_ip);
rcu_read_unlock();
if (!prog)
return;

/* Use main prog for stream access */
prog = prog->aux->main_prog_aux->prog;

user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
addr += (user_vm_start >> 32) << 32;

bpf_stream_stage(ss, prog, BPF_STDERR, ({
bpf_stream_printk(ss, "ERROR: Arena %s access at unmapped address 0x%lx\n",
write ? "WRITE" : "READ", addr);
bpf_stream_dump_stack(ss);
}));
}
6 changes: 3 additions & 3 deletions kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag

fp->pages = size / PAGE_SIZE;
fp->aux = aux;
fp->aux->main_prog_aux = aux;
fp->aux->prog = fp;
fp->jit_requested = ebpf_jit_enabled();
fp->blinding_requested = bpf_jit_blinding_enabled(fp);
Expand Down Expand Up @@ -3292,9 +3293,8 @@ static bool find_from_stack_cb(void *cookie, u64 ip, u64 sp, u64 bp)
rcu_read_unlock();
if (!prog)
return true;
if (bpf_is_subprog(prog))
return true;
ctxp->prog = prog;
/* Make sure we return the main prog if we found a subprog */
ctxp->prog = prog->aux->main_prog_aux->prog;
return false;
}

Expand Down
1 change: 1 addition & 0 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -21597,6 +21597,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
func[i]->aux->poke_tab = prog->aux->poke_tab;
func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
func[i]->aux->main_prog_aux = prog->aux;

for (j = 0; j < prog->aux->size_poke_tab; j++) {
struct bpf_jit_poke_descriptor *poke;
Expand Down
Loading
Loading