diff --git a/Documentation/bpf/linux-notes.rst b/Documentation/bpf/linux-notes.rst index 00d2693de025..64ac146a926f 100644 --- a/Documentation/bpf/linux-notes.rst +++ b/Documentation/bpf/linux-notes.rst @@ -12,14 +12,6 @@ Byte swap instructions ``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and ``BPF_TO_BE`` respectively. -Jump instructions -================= - -``BPF_CALL | BPF_X | BPF_JMP`` (0x8d), where the helper function -integer would be read from a specified register, is not currently supported -by the verifier. Any programs with this instruction will fail to load -until such support is added. - Maps ==== diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d4c93d9e73e4..b6e646039224 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -660,24 +660,38 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, #define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8) -static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) +static void __emit_indirect_jump(u8 **pprog, int reg, bool ereg) { u8 *prog = *pprog; + if (ereg) + EMIT1(0x41); + + EMIT2(0xFF, 0xE0 + reg); + + *pprog = prog; +} + +static void emit_indirect_jump(u8 **pprog, int bpf_reg, u8 *ip) +{ + u8 *prog = *pprog; + int reg = reg2hex[bpf_reg]; + bool ereg = is_ereg(bpf_reg); + if (cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) { OPTIMIZER_HIDE_VAR(reg); emit_jump(&prog, its_static_thunk(reg), ip); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { EMIT_LFENCE(); - EMIT2(0xFF, 0xE0 + reg); + __emit_indirect_jump(&prog, reg, ereg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { OPTIMIZER_HIDE_VAR(reg); if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) - emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg], ip); + emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg + 8*ereg], ip); else - emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); + emit_jump(&prog, &__x86_indirect_thunk_array[reg + 8*ereg], ip); } else { - EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */ + __emit_indirect_jump(&prog, reg, ereg); if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) || IS_ENABLED(CONFIG_MITIGATION_SLS)) EMIT1(0xCC); /* int3 */ } @@ -797,7 +811,7 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog, * rdi == ctx (1st arg) * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET */ - emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start)); + emit_indirect_jump(&prog, BPF_REG_4 /* R4 -> rcx */, ip + (prog - start)); /* out: */ ctx->tail_call_indirect_label = prog - start; @@ -1691,6 +1705,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image prog = temp; for (i = 1; i <= insn_cnt; i++, insn++) { + u32 abs_xlated_off = bpf_prog->aux->subprog_start + i - 1; const s32 imm32 = insn->imm; u32 dst_reg = insn->dst_reg; u32 src_reg = insn->src_reg; @@ -2614,6 +2629,9 @@ st: if (is_imm8(insn->off)) break; + case BPF_JMP | BPF_JA | BPF_X: + emit_indirect_jump(&prog, insn->dst_reg, image + addrs[i - 1]); + break; case BPF_JMP | BPF_JA: case BPF_JMP32 | BPF_JA: if (BPF_CLASS(insn->code) == BPF_JMP) { @@ -2751,6 +2769,13 @@ st: if (is_imm8(insn->off)) return -EFAULT; } memcpy(rw_image + proglen, temp, ilen); + + /* + * Instruction arrays need to know how xlated code + * maps to jitted code + */ + bpf_prog_update_insn_ptr(bpf_prog, abs_xlated_off, proglen, + image + proglen); } proglen += ilen; addrs[i] = proglen; @@ -3543,7 +3568,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs, u8 *image, if (err) return err; - emit_indirect_jump(&prog, 2 /* rdx */, image + (prog - buf)); + emit_indirect_jump(&prog, BPF_REG_3 /* R3 -> rdx */, image + (prog - buf)); *pprog = prog; return 0; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 204f9c759a41..7c86043468e8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -995,6 +995,7 @@ enum bpf_reg_type { PTR_TO_ARENA, PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_FUNC, /* reg points to a bpf program function */ + PTR_TO_INSN, /* reg points to a bpf program instruction */ CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */ __BPF_REG_TYPE_MAX, @@ -1623,6 +1624,7 @@ struct bpf_prog_aux { u32 ctx_arg_info_size; u32 max_rdonly_access; u32 max_rdwr_access; + u32 subprog_start; struct btf *attach_btf; struct bpf_ctx_arg_aux *ctx_arg_info; void __percpu *priv_stack_ptr; @@ -2106,6 +2108,12 @@ struct bpf_array { }; }; +/* + * The bpf_array_get_next_key() function may be used for all array-like + * maps, i.e., maps with u32 keys with range [0 ,..., max_entries) + */ +int bpf_array_get_next_key(struct bpf_map *map, void *key, void *next_key); + #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 33 @@ -3782,4 +3790,40 @@ int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char * const char **linep, int *nump); struct bpf_prog *bpf_prog_find_from_stack(void); +int bpf_insn_array_init(struct bpf_map *map, const struct bpf_prog *prog); +int bpf_insn_array_ready(struct bpf_map *map); +void bpf_insn_array_release(struct bpf_map *map); +void bpf_insn_array_adjust(struct bpf_map *map, u32 off, u32 len); +void bpf_insn_array_adjust_after_remove(struct bpf_map *map, u32 off, u32 len); + +/* + * The struct bpf_insn_ptr structure describes a pointer to a + * particular instruction in a loaded BPF program. Initially + * it is initialised from userspace via user_value.xlated_off. + * During the program verification all other fields are populated + * accordingly: + * + * jitted_ip: address of the instruction in the jitted image + * user_value: user-visible original, xlated, and jitted offsets + */ +struct bpf_insn_ptr { + void *jitted_ip; + struct bpf_insn_array_value user_value; +}; + +#ifdef CONFIG_BPF_SYSCALL +void bpf_prog_update_insn_ptr(struct bpf_prog *prog, + u32 xlated_off, + u32 jitted_off, + void *jitted_ip); +#else +static inline void +bpf_prog_update_insn_ptr(struct bpf_prog *prog, + u32 xlated_off, + u32 jitted_off, + void *jitted_ip) +{ +} +#endif + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index fa78f49d4a9a..b13de31e163f 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -133,6 +133,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_ARENA, arena_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_INSN_ARRAY, insn_array_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b57222a25a4a..5441341f1ab9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -509,6 +509,15 @@ struct bpf_map_ptr_state { #define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ BPF_ALU_SANITIZE_DST) +/* + * An array of BPF instructions. + * Primary usage: return value of bpf_insn_successors. + */ +struct bpf_iarray { + int cnt; + u32 items[]; +}; + struct bpf_insn_aux_data { union { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ @@ -518,6 +527,7 @@ struct bpf_insn_aux_data { struct { u32 map_index; /* index into used_maps[] */ u32 map_off; /* offset from value base address */ + struct bpf_iarray *jt; /* jump table for gotox instruction */ }; struct { enum bpf_reg_type reg_type; /* type of pseudo_btf_id */ @@ -745,8 +755,10 @@ struct bpf_verifier_env { struct list_head free_list; /* list of struct bpf_verifier_state_list */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ struct btf_mod_pair used_btfs[MAX_USED_BTFS]; /* array of BTF's used by BPF program */ + struct bpf_map *insn_array_maps[MAX_USED_MAPS]; /* array of INSN_ARRAY map's to be relocated */ u32 used_map_cnt; /* number of used maps */ u32 used_btf_cnt; /* number of used BTF objects */ + u32 insn_array_map_cnt; /* number of used maps of type BPF_MAP_TYPE_INSN_ARRAY */ u32 id_gen; /* used to generate unique reg IDs */ u32 hidden_subprog_cnt; /* number of hidden subprogs */ int exception_callback_subprog; @@ -828,6 +840,8 @@ struct bpf_verifier_env { /* array of pointers to bpf_scc_info indexed by SCC id */ struct bpf_scc_info **scc_info; u32 scc_cnt; + struct bpf_iarray *succ; + struct bpf_iarray *gotox_tmp_buf; }; static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog) @@ -1038,6 +1052,13 @@ static inline bool bpf_stack_narrow_access_ok(int off, int fill_size, int spill_ return !(off % BPF_REG_SIZE); } +static inline bool insn_is_gotox(struct bpf_insn *insn) +{ + return BPF_CLASS(insn->code) == BPF_JMP && + BPF_OP(insn->code) == BPF_JA && + BPF_SRC(insn->code) == BPF_X; +} + const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type); const char *dynptr_type_str(enum bpf_dynptr_type type); const char *iter_type_str(const struct btf *btf, u32 btf_id); @@ -1050,7 +1071,7 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off); int bpf_jmp_offset(struct bpf_insn *insn); -int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]); +struct bpf_iarray *bpf_insn_successors(struct bpf_verifier_env *env, u32 idx); void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask); bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6829936d33f5..805d441363cd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1026,6 +1026,7 @@ enum bpf_map_type { BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, BPF_MAP_TYPE_ARENA, + BPF_MAP_TYPE_INSN_ARRAY, __MAX_BPF_MAP_TYPE }; @@ -7645,4 +7646,24 @@ enum bpf_kfunc_flags { BPF_F_PAD_ZEROS = (1ULL << 0), }; +/* + * Values of a BPF_MAP_TYPE_INSN_ARRAY entry must be of this type. + * + * Before the map is used the orig_off field should point to an + * instruction inside the program being loaded. The other fields + * must be set to 0. + * + * After the program is loaded, the xlated_off will be adjusted + * by the verifier to point to the index of the original instruction + * in the xlated program. If the instruction is deleted, it will + * be set to (u32)-1. The jitted_off will be set to the corresponding + * offset in the jitted image of the program. + */ +struct bpf_insn_array_value { + __u32 orig_off; + __u32 xlated_off; + __u32 jitted_off; + __u32 :32; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 7fd0badfacb1..232cbc97434d 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -9,7 +9,7 @@ CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o liveness.o obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o -obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o +obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_insn_array.o obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 0ba790c2d2e5..1eeb31c5b317 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -335,18 +335,17 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) } /* Called from syscall */ -static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) +int bpf_array_get_next_key(struct bpf_map *map, void *key, void *next_key) { - struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = key ? *(u32 *)key : U32_MAX; u32 *next = (u32 *)next_key; - if (index >= array->map.max_entries) { + if (index >= map->max_entries) { *next = 0; return 0; } - if (index == array->map.max_entries - 1) + if (index == map->max_entries - 1) return -ENOENT; *next = index + 1; @@ -789,7 +788,7 @@ const struct bpf_map_ops array_map_ops = { .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = array_map_free, - .map_get_next_key = array_map_get_next_key, + .map_get_next_key = bpf_array_get_next_key, .map_release_uref = array_map_free_internal_structs, .map_lookup_elem = array_map_lookup_elem, .map_update_elem = array_map_update_elem, @@ -815,7 +814,7 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = array_map_free, - .map_get_next_key = array_map_get_next_key, + .map_get_next_key = bpf_array_get_next_key, .map_lookup_elem = percpu_array_map_lookup_elem, .map_gen_lookup = percpu_array_map_gen_lookup, .map_update_elem = array_map_update_elem, @@ -1204,7 +1203,7 @@ const struct bpf_map_ops prog_array_map_ops = { .map_poke_track = prog_array_map_poke_track, .map_poke_untrack = prog_array_map_poke_untrack, .map_poke_run = prog_array_map_poke_run, - .map_get_next_key = array_map_get_next_key, + .map_get_next_key = bpf_array_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = prog_fd_array_get_ptr, @@ -1308,7 +1307,7 @@ const struct bpf_map_ops perf_event_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = perf_event_fd_array_map_free, - .map_get_next_key = array_map_get_next_key, + .map_get_next_key = bpf_array_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = perf_event_fd_array_get_ptr, @@ -1344,7 +1343,7 @@ const struct bpf_map_ops cgroup_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = cgroup_fd_array_free, - .map_get_next_key = array_map_get_next_key, + .map_get_next_key = bpf_array_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = cgroup_fd_array_get_ptr, @@ -1429,7 +1428,7 @@ const struct bpf_map_ops array_of_maps_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_of_map_alloc, .map_free = array_of_map_free, - .map_get_next_key = array_map_get_next_key, + .map_get_next_key = bpf_array_get_next_key, .map_lookup_elem = array_of_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, .map_fd_get_ptr = bpf_map_fd_get_ptr, diff --git a/kernel/bpf/bpf_insn_array.c b/kernel/bpf/bpf_insn_array.c new file mode 100644 index 000000000000..71813ce734e9 --- /dev/null +++ b/kernel/bpf/bpf_insn_array.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Isovalent */ + +#include + +#define MAX_INSN_ARRAY_ENTRIES 256 + +struct bpf_insn_array { + struct bpf_map map; + atomic_t used; + long *ips; + DECLARE_FLEX_ARRAY(struct bpf_insn_ptr, ptrs); +}; + +#define cast_insn_array(MAP_PTR) \ + container_of((MAP_PTR), struct bpf_insn_array, map) + +#define INSN_DELETED ((u32)-1) + +static inline u32 insn_array_alloc_size(u32 max_entries) +{ + const u32 base_size = sizeof(struct bpf_insn_array); + const u32 entry_size = sizeof(struct bpf_insn_ptr); + + return base_size + entry_size * max_entries; +} + +static int insn_array_alloc_check(union bpf_attr *attr) +{ + u32 value_size = sizeof(struct bpf_insn_array_value); + + if (attr->max_entries == 0 || attr->key_size != 4 || + attr->value_size != value_size || attr->map_flags != 0) + return -EINVAL; + + if (attr->max_entries > MAX_INSN_ARRAY_ENTRIES) + return -E2BIG; + + return 0; +} + +static void insn_array_free(struct bpf_map *map) +{ + struct bpf_insn_array *insn_array = cast_insn_array(map); + + kfree(insn_array->ips); + bpf_map_area_free(insn_array); +} + +static struct bpf_map *insn_array_alloc(union bpf_attr *attr) +{ + u64 size = insn_array_alloc_size(attr->max_entries); + struct bpf_insn_array *insn_array; + + insn_array = bpf_map_area_alloc(size, NUMA_NO_NODE); + if (!insn_array) + return ERR_PTR(-ENOMEM); + + insn_array->ips = kcalloc(attr->max_entries, sizeof(long), GFP_KERNEL); + if (!insn_array->ips) { + insn_array_free(&insn_array->map); + return ERR_PTR(-ENOMEM); + } + + bpf_map_init_from_attr(&insn_array->map, attr); + + return &insn_array->map; +} + +static void *insn_array_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_insn_array *insn_array = cast_insn_array(map); + u32 index = *(u32 *)key; + + if (unlikely(index >= insn_array->map.max_entries)) + return NULL; + + return &insn_array->ptrs[index].user_value; +} + +static long insn_array_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) +{ + struct bpf_insn_array *insn_array = cast_insn_array(map); + u32 index = *(u32 *)key; + struct bpf_insn_array_value val = {}; + + if (unlikely(index >= insn_array->map.max_entries)) + return -E2BIG; + + if (unlikely(map_flags & BPF_NOEXIST)) + return -EEXIST; + + copy_map_value(map, &val, value); + if (val.jitted_off || val.xlated_off) + return -EINVAL; + + insn_array->ptrs[index].user_value.orig_off = val.orig_off; + + return 0; +} + +static long insn_array_delete_elem(struct bpf_map *map, void *key) +{ + return -EINVAL; +} + +static int insn_array_check_btf(const struct bpf_map *map, + const struct btf *btf, + const struct btf_type *key_type, + const struct btf_type *value_type) +{ + if (!btf_type_is_i32(key_type)) + return -EINVAL; + + if (!btf_type_is_i64(value_type)) + return -EINVAL; + + return 0; +} + +static u64 insn_array_mem_usage(const struct bpf_map *map) +{ + u64 extra_size = 0; + + extra_size += sizeof(long) * map->max_entries; /* insn_array->ips */ + + return insn_array_alloc_size(map->max_entries) + extra_size; +} + +static int insn_array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, u32 off) +{ + struct bpf_insn_array *insn_array = cast_insn_array(map); + + if ((off % sizeof(long)) != 0 || + (off / sizeof(long)) >= map->max_entries) + return -EINVAL; + + /* from BPF's point of view, this map is a jump table */ + *imm = (unsigned long)insn_array->ips + off; + + return 0; +} + +BTF_ID_LIST_SINGLE(insn_array_btf_ids, struct, bpf_insn_array) + +const struct bpf_map_ops insn_array_map_ops = { + .map_alloc_check = insn_array_alloc_check, + .map_alloc = insn_array_alloc, + .map_free = insn_array_free, + .map_get_next_key = bpf_array_get_next_key, + .map_lookup_elem = insn_array_lookup_elem, + .map_update_elem = insn_array_update_elem, + .map_delete_elem = insn_array_delete_elem, + .map_check_btf = insn_array_check_btf, + .map_mem_usage = insn_array_mem_usage, + .map_direct_value_addr = insn_array_map_direct_value_addr, + .map_btf_id = &insn_array_btf_ids[0], +}; + +static inline bool is_frozen(struct bpf_map *map) +{ + guard(mutex)(&map->freeze_mutex); + + return map->frozen; +} + +static bool is_insn_array(const struct bpf_map *map) +{ + return map->map_type == BPF_MAP_TYPE_INSN_ARRAY; +} + +static inline bool valid_offsets(const struct bpf_insn_array *insn_array, + const struct bpf_prog *prog) +{ + u32 off; + int i; + + for (i = 0; i < insn_array->map.max_entries; i++) { + off = insn_array->ptrs[i].user_value.orig_off; + + if (off >= prog->len) + return false; + + if (off > 0) { + if (prog->insnsi[off-1].code == (BPF_LD | BPF_DW | BPF_IMM)) + return false; + } + } + + return true; +} + +int bpf_insn_array_init(struct bpf_map *map, const struct bpf_prog *prog) +{ + struct bpf_insn_array *insn_array = cast_insn_array(map); + struct bpf_insn_array_value *val; + int i; + + if (!is_frozen(map)) + return -EINVAL; + + if (!valid_offsets(insn_array, prog)) + return -EINVAL; + + /* + * There can be only one program using the map + */ + if (atomic_xchg(&insn_array->used, 1)) + return -EBUSY; + + /* + * Reset all the map indexes to the original values. This is needed, + * e.g., when a replay of verification with different log level should + * be performed. + */ + for (i = 0; i < map->max_entries; i++) { + val = &insn_array->ptrs[i].user_value; + val->xlated_off = val->orig_off; + } + + return 0; +} + +int bpf_insn_array_ready(struct bpf_map *map) +{ + struct bpf_insn_array *insn_array = cast_insn_array(map); + int i; + + for (i = 0; i < map->max_entries; i++) { + if (insn_array->ptrs[i].user_value.xlated_off == INSN_DELETED) + continue; + if (!insn_array->ips[i]) + return -EFAULT; + } + + return 0; +} + +void bpf_insn_array_release(struct bpf_map *map) +{ + struct bpf_insn_array *insn_array = cast_insn_array(map); + + atomic_set(&insn_array->used, 0); +} + +void bpf_insn_array_adjust(struct bpf_map *map, u32 off, u32 len) +{ + struct bpf_insn_array *insn_array = cast_insn_array(map); + int i; + + if (len <= 1) + return; + + for (i = 0; i < map->max_entries; i++) { + if (insn_array->ptrs[i].user_value.xlated_off <= off) + continue; + if (insn_array->ptrs[i].user_value.xlated_off == INSN_DELETED) + continue; + insn_array->ptrs[i].user_value.xlated_off += len - 1; + } +} + +void bpf_insn_array_adjust_after_remove(struct bpf_map *map, u32 off, u32 len) +{ + struct bpf_insn_array *insn_array = cast_insn_array(map); + int i; + + for (i = 0; i < map->max_entries; i++) { + if (insn_array->ptrs[i].user_value.xlated_off < off) + continue; + if (insn_array->ptrs[i].user_value.xlated_off == INSN_DELETED) + continue; + if (insn_array->ptrs[i].user_value.xlated_off < off + len) + insn_array->ptrs[i].user_value.xlated_off = INSN_DELETED; + else + insn_array->ptrs[i].user_value.xlated_off -= len; + } +} + +void bpf_prog_update_insn_ptr(struct bpf_prog *prog, + u32 xlated_off, + u32 jitted_off, + void *jitted_ip) +{ + struct bpf_insn_array *insn_array; + struct bpf_map *map; + int i, j; + + for (i = 0; i < prog->aux->used_map_cnt; i++) { + map = prog->aux->used_maps[i]; + if (!is_insn_array(map)) + continue; + + insn_array = cast_insn_array(map); + for (j = 0; j < map->max_entries; j++) { + if (insn_array->ptrs[j].user_value.xlated_off == xlated_off) { + insn_array->ips[j] = (long)jitted_ip; + insn_array->ptrs[j].jitted_ip = jitted_ip; + insn_array->ptrs[j].user_value.jitted_off = jitted_off; + } + } + } +} diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d595fe512498..ef4448f18aad 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1450,6 +1450,23 @@ void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other) bpf_prog_clone_free(fp_other); } +static void adjust_insn_arrays(struct bpf_prog *prog, u32 off, u32 len) +{ +#ifdef CONFIG_BPF_SYSCALL + struct bpf_map *map; + int i; + + if (len <= 1) + return; + + for (i = 0; i < prog->aux->used_map_cnt; i++) { + map = prog->aux->used_maps[i]; + if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) + bpf_insn_array_adjust(map, off, len); + } +#endif +} + struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) { struct bpf_insn insn_buff[16], aux[2]; @@ -1505,6 +1522,9 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) clone = tmp; insn_delta = rewritten - 1; + /* Instructions arrays must be updated using absolute xlated offsets */ + adjust_insn_arrays(clone, prog->aux->subprog_start + i, rewritten); + /* Walk new program and skip insns we just inserted. */ insn = clone->insnsi + i + insn_delta; insn_cnt += insn_delta; @@ -1688,6 +1708,7 @@ bool bpf_opcode_in_insntable(u8 code) [BPF_LD | BPF_IND | BPF_B] = true, [BPF_LD | BPF_IND | BPF_H] = true, [BPF_LD | BPF_IND | BPF_W] = true, + [BPF_JMP | BPF_JA | BPF_X] = true, [BPF_JMP | BPF_JCOND] = true, }; #undef BPF_INSN_3_TBL diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index 20883c6b1546..f8a3c7eb451e 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -358,6 +358,9 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, } else if (insn->code == (BPF_JMP | BPF_JA)) { verbose(cbs->private_data, "(%02x) goto pc%+d\n", insn->code, insn->off); + } else if (insn->code == (BPF_JMP | BPF_JA | BPF_X)) { + verbose(cbs->private_data, "(%02x) gotox r%d\n", + insn->code, insn->dst_reg); } else if (insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO) { verbose(cbs->private_data, "(%02x) may_goto pc%+d\n", diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c index 1e6538f59a78..b73e5c9ab61f 100644 --- a/kernel/bpf/liveness.c +++ b/kernel/bpf/liveness.c @@ -34,7 +34,7 @@ * - read and write marks propagation. * - The propagation phase is a textbook live variable data flow analysis: * - * state[cc, i].live_after = U [state[cc, s].live_before for s in insn_successors(i)] + * state[cc, i].live_after = U [state[cc, s].live_before for s in bpf_insn_successors(i)] * state[cc, i].live_before = * (state[cc, i].live_after / state[cc, i].must_write) U state[i].may_read * @@ -54,7 +54,7 @@ * The equation for "must_write_acc" propagation looks as follows: * * state[cc, i].must_write_acc = - * ∩ [state[cc, s].must_write_acc for s in insn_successors(i)] + * ∩ [state[cc, s].must_write_acc for s in bpf_insn_successors(i)] * U state[cc, i].must_write * * (An intersection of all "must_write_acc" for instruction successors @@ -447,7 +447,12 @@ int bpf_jmp_offset(struct bpf_insn *insn) __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field initialization overrides for opcode_info_tbl"); -inline int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]) +/* + * Returns an array of instructions succ, with succ->items[0], ..., + * succ->items[n-1] with successor instructions, where n=succ->cnt + */ +inline struct bpf_iarray * +bpf_insn_successors(struct bpf_verifier_env *env, u32 idx) { static const struct opcode_info { bool can_jump; @@ -474,19 +479,28 @@ inline int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]) _J(BPF_JSET, {.can_jump = true, .can_fallthrough = true}), #undef _J }; + struct bpf_prog *prog = env->prog; struct bpf_insn *insn = &prog->insnsi[idx]; const struct opcode_info *opcode_info; - int i = 0, insn_sz; + struct bpf_iarray *succ; + int insn_sz; + + if (unlikely(insn_is_gotox(insn))) + return env->insn_aux_data[idx].jt; + + /* pre-allocated array of size up to 2; reset cnt, as it may have been used already */ + succ = env->succ; + succ->cnt = 0; opcode_info = &opcode_info_tbl[BPF_CLASS(insn->code) | BPF_OP(insn->code)]; insn_sz = bpf_is_ldimm64(insn) ? 2 : 1; if (opcode_info->can_fallthrough) - succ[i++] = idx + insn_sz; + succ->items[succ->cnt++] = idx + insn_sz; if (opcode_info->can_jump) - succ[i++] = idx + bpf_jmp_offset(insn) + 1; + succ->items[succ->cnt++] = idx + bpf_jmp_offset(insn) + 1; - return i; + return succ; } __diag_pop(); @@ -546,11 +560,12 @@ static inline bool update_insn(struct bpf_verifier_env *env, struct bpf_insn_aux_data *aux = env->insn_aux_data; u64 new_before, new_after, must_write_acc; struct per_frame_masks *insn, *succ_insn; - u32 succ_num, s, succ[2]; + struct bpf_iarray *succ; + u32 s; bool changed; - succ_num = bpf_insn_successors(env->prog, insn_idx, succ); - if (unlikely(succ_num == 0)) + succ = bpf_insn_successors(env, insn_idx); + if (succ->cnt == 0) return false; changed = false; @@ -562,8 +577,8 @@ static inline bool update_insn(struct bpf_verifier_env *env, * of successors plus all "must_write" slots of instruction itself. */ must_write_acc = U64_MAX; - for (s = 0; s < succ_num; ++s) { - succ_insn = get_frame_masks(instance, frame, succ[s]); + for (s = 0; s < succ->cnt; ++s) { + succ_insn = get_frame_masks(instance, frame, succ->items[s]); new_after |= succ_insn->live_before; must_write_acc &= succ_insn->must_write_acc; } diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index f50533169cc3..3fb56617af36 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -461,6 +461,7 @@ const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type) [PTR_TO_ARENA] = "arena", [PTR_TO_BUF] = "buf", [PTR_TO_FUNC] = "func", + [PTR_TO_INSN] = "insn", [PTR_TO_MAP_KEY] = "map_key", [CONST_PTR_TO_DYNPTR] = "dynptr_ptr", }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8a129746bd6c..f62d61b6730a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1493,6 +1493,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_MAP_TYPE_STRUCT_OPS: case BPF_MAP_TYPE_CPUMAP: case BPF_MAP_TYPE_ARENA: + case BPF_MAP_TYPE_INSN_ARRAY: if (!bpf_token_capable(token, CAP_BPF)) goto put_token; break; @@ -2853,6 +2854,23 @@ static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr return err; } +static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog) +{ + int err; + int i; + + for (i = 0; i < prog->aux->used_map_cnt; i++) { + if (prog->aux->used_maps[i]->map_type != BPF_MAP_TYPE_INSN_ARRAY) + continue; + + err = bpf_insn_array_ready(prog->aux->used_maps[i]); + if (err) + return err; + } + + return 0; +} + /* last field in 'union bpf_attr' used by this command */ #define BPF_PROG_LOAD_LAST_FIELD keyring_id @@ -3082,6 +3100,10 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) if (err < 0) goto free_used_maps; + err = bpf_prog_mark_insn_arrays_ready(prog); + if (err < 0) + goto free_used_maps; + err = bpf_prog_alloc_id(prog); if (err) goto free_used_maps; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9b4f6920f79b..d2df21fde118 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2109,7 +2109,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL_ACCOUNT); if (!elem) - return NULL; + return ERR_PTR(-ENOMEM); elem->insn_idx = insn_idx; elem->prev_insn_idx = prev_insn_idx; @@ -2119,12 +2119,12 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, env->stack_size++; err = copy_verifier_state(&elem->st, cur); if (err) - return NULL; + return ERR_PTR(-ENOMEM); elem->st.speculative |= speculative; if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { verbose(env, "The sequence of %d jumps is too complex.\n", env->stack_size); - return NULL; + return ERR_PTR(-E2BIG); } if (elem->st.parent) { ++elem->st.parent->branches; @@ -2919,7 +2919,7 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env, elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL_ACCOUNT); if (!elem) - return NULL; + return ERR_PTR(-ENOMEM); elem->insn_idx = insn_idx; elem->prev_insn_idx = prev_insn_idx; @@ -2931,7 +2931,7 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env, verbose(env, "The sequence of %d jumps is too complex for async cb.\n", env->stack_size); - return NULL; + return ERR_PTR(-E2BIG); } /* Unlike push_stack() do not copy_verifier_state(). * The caller state doesn't matter. @@ -2942,7 +2942,7 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env, elem->st.in_sleepable = is_sleepable; frame = kzalloc(sizeof(*frame), GFP_KERNEL_ACCOUNT); if (!frame) - return NULL; + return ERR_PTR(-ENOMEM); init_func_state(env, frame, BPF_MAIN_FUNC /* callsite */, 0 /* frameno within this callchain */, @@ -6003,6 +6003,18 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, return 0; } +/* + * Return the size of the memory region accessible from a pointer to map value. + * For INSN_ARRAY maps whole bpf_insn_array->ips array is accessible. + */ +static u32 map_mem_size(const struct bpf_map *map) +{ + if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) + return map->max_entries * sizeof(long); + + return map->value_size; +} + /* check read/write into a map element with possible variable offset */ static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed, @@ -6012,11 +6024,11 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_reg_state *reg = &state->regs[regno]; struct bpf_map *map = reg->map_ptr; + u32 mem_size = map_mem_size(map); struct btf_record *rec; int err, i; - err = check_mem_region_access(env, regno, off, size, map->value_size, - zero_size_allowed); + err = check_mem_region_access(env, regno, off, size, mem_size, zero_size_allowed); if (err) return err; @@ -7478,6 +7490,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn { struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = regs + regno; + bool insn_array = reg->type == PTR_TO_MAP_VALUE && + reg->map_ptr->map_type == BPF_MAP_TYPE_INSN_ARRAY; int size, err = 0; size = bpf_size_to_bytes(bpf_size); @@ -7485,7 +7499,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn return size; /* alignment checks will add in reg->off themselves */ - err = check_ptr_alignment(env, reg, off, size, strict_alignment_once); + err = check_ptr_alignment(env, reg, off, size, strict_alignment_once || insn_array); if (err) return err; @@ -7512,6 +7526,11 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn verbose(env, "R%d leaks addr into map\n", value_regno); return -EACCES; } + if (t == BPF_WRITE && insn_array) { + verbose(env, "writes into insn_array not allowed\n"); + return -EACCES; + } + err = check_map_access_type(env, regno, off, size, t); if (err) return err; @@ -7540,6 +7559,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn regs[value_regno].type = SCALAR_VALUE; __mark_reg_known(®s[value_regno], val); + } else if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) { + if (bpf_size != BPF_DW) { + verbose(env, "Invalid read of %d bytes from insn_array\n", + size); + return -EACCES; + } + copy_register_state(®s[value_regno], reg); + regs[value_regno].type = PTR_TO_INSN; } else { mark_reg_unknown(env, regs, value_regno); } @@ -9045,8 +9072,8 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, prev_st = find_prev_entry(env, cur_st->parent, insn_idx); /* branch out active iter state */ queued_st = push_stack(env, insn_idx + 1, insn_idx, false); - if (!queued_st) - return -ENOMEM; + if (IS_ERR(queued_st)) + return PTR_ERR(queued_st); queued_iter = get_iter_from_state(queued_st, meta); queued_iter->iter.state = BPF_ITER_STATE_ACTIVE; @@ -10083,6 +10110,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_map_push_elem) goto error; break; + case BPF_MAP_TYPE_INSN_ARRAY: + goto error; default: break; } @@ -10616,8 +10645,8 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins async_cb = push_async_cb(env, env->subprog_info[subprog].start, insn_idx, subprog, is_async_cb_sleepable(env, insn)); - if (!async_cb) - return -EFAULT; + if (IS_ERR(async_cb)) + return PTR_ERR(async_cb); callee = async_cb->frame[0]; callee->async_entry_cnt = caller->async_entry_cnt + 1; @@ -10633,8 +10662,8 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins * proceed with next instruction within current frame. */ callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false); - if (!callback_state) - return -ENOMEM; + if (IS_ERR(callback_state)) + return PTR_ERR(callback_state); err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb, callback_state); @@ -13859,9 +13888,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_reg_state *regs; branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false); - if (!branch) { + if (IS_ERR(branch)) { verbose(env, "failed to push state for failed lock acquisition\n"); - return -ENOMEM; + return PTR_ERR(branch); } regs = branch->frame[branch->curframe]->regs; @@ -14316,16 +14345,15 @@ struct bpf_sanitize_info { bool mask_to_left; }; -static struct bpf_verifier_state * -sanitize_speculative_path(struct bpf_verifier_env *env, - const struct bpf_insn *insn, - u32 next_idx, u32 curr_idx) +static int sanitize_speculative_path(struct bpf_verifier_env *env, + const struct bpf_insn *insn, + u32 next_idx, u32 curr_idx) { struct bpf_verifier_state *branch; struct bpf_reg_state *regs; branch = push_stack(env, next_idx, curr_idx, true); - if (branch && insn) { + if (!IS_ERR(branch) && insn) { regs = branch->frame[branch->curframe]->regs; if (BPF_SRC(insn->code) == BPF_K) { mark_reg_unknown(env, regs, insn->dst_reg); @@ -14334,7 +14362,7 @@ sanitize_speculative_path(struct bpf_verifier_env *env, mark_reg_unknown(env, regs, insn->src_reg); } } - return branch; + return PTR_ERR_OR_ZERO(branch); } static int sanitize_ptr_alu(struct bpf_verifier_env *env, @@ -14353,7 +14381,6 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, u8 opcode = BPF_OP(insn->code); u32 alu_state, alu_limit; struct bpf_reg_state tmp; - bool ret; int err; if (can_skip_alu_sanitation(env, insn)) @@ -14426,11 +14453,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, tmp = *dst_reg; copy_register_state(dst_reg, ptr_reg); } - ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1, - env->insn_idx); - if (!ptr_is_dst_reg && ret) + err = sanitize_speculative_path(env, NULL, env->insn_idx + 1, env->insn_idx); + if (err < 0) + return REASON_STACK; + if (!ptr_is_dst_reg) *dst_reg = tmp; - return !ret ? REASON_STACK : 0; + return 0; } static void sanitize_mark_insn_seen(struct bpf_verifier_env *env) @@ -16750,8 +16778,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, /* branch out 'fallthrough' insn as a new state to explore */ queued_st = push_stack(env, idx + 1, idx, false); - if (!queued_st) - return -ENOMEM; + if (IS_ERR(queued_st)) + return PTR_ERR(queued_st); queued_st->may_goto_depth++; if (prev_st) @@ -16829,10 +16857,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, * the fall-through branch for simulation under speculative * execution. */ - if (!env->bypass_spec_v1 && - !sanitize_speculative_path(env, insn, *insn_idx + 1, - *insn_idx)) - return -EFAULT; + if (!env->bypass_spec_v1) { + err = sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx); + if (err < 0) + return err; + } if (env->log.level & BPF_LOG_LEVEL) print_insn_state(env, this_branch, this_branch->curframe); *insn_idx += insn->off; @@ -16842,11 +16871,12 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, * program will go. If needed, push the goto branch for * simulation under speculative execution. */ - if (!env->bypass_spec_v1 && - !sanitize_speculative_path(env, insn, - *insn_idx + insn->off + 1, - *insn_idx)) - return -EFAULT; + if (!env->bypass_spec_v1) { + err = sanitize_speculative_path(env, insn, *insn_idx + insn->off + 1, + *insn_idx); + if (err < 0) + return err; + } if (env->log.level & BPF_LOG_LEVEL) print_insn_state(env, this_branch, this_branch->curframe); return 0; @@ -16867,10 +16897,9 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, return err; } - other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, - false); - if (!other_branch) - return -EFAULT; + other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, false); + if (IS_ERR(other_branch)) + return PTR_ERR(other_branch); other_branch_regs = other_branch->frame[other_branch->curframe]->regs; if (BPF_SRC(insn->code) == BPF_X) { @@ -17053,7 +17082,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) } dst_reg->type = PTR_TO_MAP_VALUE; dst_reg->off = aux->map_off; - WARN_ON_ONCE(map->max_entries != 1); + WARN_ON_ONCE(map->map_type != BPF_MAP_TYPE_INSN_ARRAY && + map->max_entries != 1); /* We want reg->id to be same (0) as map_value is not distinct */ } else if (insn->src_reg == BPF_PSEUDO_MAP_FD || insn->src_reg == BPF_PSEUDO_MAP_IDX) { @@ -17805,6 +17835,212 @@ static int mark_fastcall_patterns(struct bpf_verifier_env *env) return 0; } +static struct bpf_iarray *iarray_realloc(struct bpf_iarray *old, size_t n_elem) +{ + size_t new_size = sizeof(struct bpf_iarray) + n_elem * sizeof(old->items[0]); + struct bpf_iarray *new; + + new = kvrealloc(old, new_size, GFP_KERNEL_ACCOUNT); + if (!new) { + /* this is what callers always want, so simplify the call site */ + kvfree(old); + return NULL; + } + + new->cnt = n_elem; + return new; +} + +static int copy_insn_array(struct bpf_map *map, u32 start, u32 end, u32 *items) +{ + struct bpf_insn_array_value *value; + u32 i; + + for (i = start; i <= end; i++) { + value = map->ops->map_lookup_elem(map, &i); + if (!value) + return -EINVAL; + items[i - start] = value->xlated_off; + } + return 0; +} + +static int cmp_ptr_to_u32(const void *a, const void *b) +{ + return *(u32 *)a - *(u32 *)b; +} + +static int sort_insn_array_uniq(u32 *items, int cnt) +{ + int unique = 1; + int i; + + sort(items, cnt, sizeof(items[0]), cmp_ptr_to_u32, NULL); + + for (i = 1; i < cnt; i++) + if (items[i] != items[unique - 1]) + items[unique++] = items[i]; + + return unique; +} + +/* + * sort_unique({map[start], ..., map[end]}) into off + */ +static int copy_insn_array_uniq(struct bpf_map *map, u32 start, u32 end, u32 *off) +{ + u32 n = end - start + 1; + int err; + + err = copy_insn_array(map, start, end, off); + if (err) + return err; + + return sort_insn_array_uniq(off, n); +} + +/* + * Copy all unique offsets from the map + */ +static struct bpf_iarray *jt_from_map(struct bpf_map *map) +{ + struct bpf_iarray *jt; + int n; + + jt = iarray_realloc(NULL, map->max_entries); + if (!jt) + return ERR_PTR(-ENOMEM); + + n = copy_insn_array_uniq(map, 0, map->max_entries - 1, jt->items); + if (n < 0) { + kvfree(jt); + return ERR_PTR(n); + } + + return jt; +} + +/* + * Find and collect all maps which fit in the subprog. Return the result as one + * combined jump table in jt->items (allocated with kvcalloc) + */ +static struct bpf_iarray *jt_from_subprog(struct bpf_verifier_env *env, + int subprog_start, int subprog_end) +{ + struct bpf_iarray *jt = NULL; + struct bpf_map *map; + struct bpf_iarray *jt_cur; + int i; + + for (i = 0; i < env->insn_array_map_cnt; i++) { + /* + * TODO (when needed): collect only jump tables, not static keys + * or maps for indirect calls + */ + map = env->insn_array_maps[i]; + + jt_cur = jt_from_map(map); + if (IS_ERR(jt_cur)) { + kvfree(jt); + return jt_cur; + } + + /* + * This is enough to check one element. The full table is + * checked to fit inside the subprog later in create_jt() + */ + if (jt_cur->items[0] >= subprog_start && jt_cur->items[0] < subprog_end) { + u32 old_cnt = jt ? jt->cnt : 0; + jt = iarray_realloc(jt, old_cnt + jt_cur->cnt); + if (!jt) { + kvfree(jt_cur); + return ERR_PTR(-ENOMEM); + } + memcpy(jt->items + old_cnt, jt_cur->items, jt_cur->cnt << 2); + } + + kvfree(jt_cur); + } + + if (!jt) { + verbose(env, "no jump tables found for subprog starting at %u\n", subprog_start); + return ERR_PTR(-EINVAL); + } + + jt->cnt = sort_insn_array_uniq(jt->items, jt->cnt); + return jt; +} + +static struct bpf_iarray * +create_jt(int t, struct bpf_verifier_env *env, int fd) +{ + static struct bpf_subprog_info *subprog; + int subprog_start, subprog_end; + struct bpf_iarray *jt; + int i; + + subprog = bpf_find_containing_subprog(env, t); + subprog_start = subprog->start; + subprog_end = (subprog + 1)->start; + jt = jt_from_subprog(env, subprog_start, subprog_end); + if (IS_ERR(jt)) + return jt; + + /* Check that the every element of the jump table fits within the given subprogram */ + for (i = 0; i < jt->cnt; i++) { + if (jt->items[i] < subprog_start || jt->items[i] >= subprog_end) { + verbose(env, "jump table for insn %d points outside of the subprog [%u,%u]", + t, subprog_start, subprog_end); + return ERR_PTR(-EINVAL); + } + } + + return jt; +} + +/* "conditional jump with N edges" */ +static int visit_gotox_insn(int t, struct bpf_verifier_env *env, int fd) +{ + int *insn_stack = env->cfg.insn_stack; + int *insn_state = env->cfg.insn_state; + bool keep_exploring = false; + struct bpf_iarray *jt; + int i, w; + + jt = env->insn_aux_data[t].jt; + if (!jt) { + jt = create_jt(t, env, fd); + if (IS_ERR(jt)) + return PTR_ERR(jt); + + env->insn_aux_data[t].jt = jt; + } + + mark_prune_point(env, t); + for (i = 0; i < jt->cnt; i++) { + w = jt->items[i]; + if (w < 0 || w >= env->prog->len) { + verbose(env, "indirect jump out of range from insn %d to %d\n", t, w); + return -EINVAL; + } + + mark_jmp_point(env, w); + + /* EXPLORED || DISCOVERED */ + if (insn_state[w]) + continue; + + if (env->cfg.cur_stack >= env->prog->len) + return -E2BIG; + + insn_stack[env->cfg.cur_stack++] = w; + insn_state[w] |= DISCOVERED; + keep_exploring = true; + } + + return keep_exploring ? KEEP_EXPLORING : DONE_EXPLORING; +} + /* Visits the instruction at index t and returns one of the following: * < 0 - an error occurred * DONE_EXPLORING - the instruction was fully explored @@ -17897,8 +18133,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env) return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL); case BPF_JA: - if (BPF_SRC(insn->code) != BPF_K) - return -EINVAL; + if (BPF_SRC(insn->code) == BPF_X) + return visit_gotox_insn(t, env, insn->imm); if (BPF_CLASS(insn->code) == BPF_JMP) off = insn->off; @@ -18025,8 +18261,9 @@ static int check_cfg(struct bpf_verifier_env *env) */ static int compute_postorder(struct bpf_verifier_env *env) { - u32 cur_postorder, i, top, stack_sz, s, succ_cnt, succ[2]; + u32 cur_postorder, i, top, stack_sz, s; int *stack = NULL, *postorder = NULL, *state = NULL; + struct bpf_iarray *succ; postorder = kvcalloc(env->prog->len, sizeof(int), GFP_KERNEL_ACCOUNT); state = kvcalloc(env->prog->len, sizeof(int), GFP_KERNEL_ACCOUNT); @@ -18050,11 +18287,11 @@ static int compute_postorder(struct bpf_verifier_env *env) stack_sz--; continue; } - succ_cnt = bpf_insn_successors(env->prog, top, succ); - for (s = 0; s < succ_cnt; ++s) { - if (!state[succ[s]]) { - stack[stack_sz++] = succ[s]; - state[succ[s]] |= DISCOVERED; + succ = bpf_insn_successors(env, top); + for (s = 0; s < succ->cnt; ++s) { + if (!state[succ->items[s]]) { + stack[stack_sz++] = succ->items[s]; + state[succ->items[s]] |= DISCOVERED; } } state[top] |= EXPLORED; @@ -18826,6 +19063,9 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno; case PTR_TO_ARENA: return true; + case PTR_TO_INSN: + return (rold->off == rcur->off && range_within(rold, rcur) && + tnum_in(rold->var_off, rcur->var_off)); default: return regs_exact(rold, rcur, idmap); } @@ -19835,6 +20075,99 @@ static int process_bpf_exit_full(struct bpf_verifier_env *env, return PROCESS_BPF_EXIT; } +static int indirect_jump_min_max_index(struct bpf_verifier_env *env, + int regno, + struct bpf_map *map, + u32 *pmin_index, u32 *pmax_index) +{ + struct bpf_reg_state *reg = reg_state(env, regno); + u64 min_index, max_index; + const u32 size = 8; + + if (check_add_overflow(reg->umin_value, reg->off, &min_index) || + (min_index > (u64) U32_MAX * size)) { + verbose(env, "the sum of R%u umin_value %llu and off %u is too big\n", + regno, reg->umin_value, reg->off); + return -ERANGE; + } + if (check_add_overflow(reg->umax_value, reg->off, &max_index) || + (max_index > (u64) U32_MAX * size)) { + verbose(env, "the sum of R%u umax_value %llu and off %u is too big\n", + regno, reg->umax_value, reg->off); + return -ERANGE; + } + + min_index /= size; + max_index /= size; + + if (max_index >= map->max_entries) { + verbose(env, "R%u points to outside of jump table: [%llu,%llu] max_entries %u\n", + regno, min_index, max_index, map->max_entries); + return -EINVAL; + } + + *pmin_index = min_index; + *pmax_index = max_index; + return 0; +} + +/* gotox *dst_reg */ +static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *insn) +{ + struct bpf_verifier_state *other_branch; + struct bpf_reg_state *dst_reg; + struct bpf_map *map; + u32 min_index, max_index; + int err = 0; + int n; + int i; + + dst_reg = reg_state(env, insn->dst_reg); + if (dst_reg->type != PTR_TO_INSN) { + verbose(env, "R%d has type %d, expected PTR_TO_INSN\n", + insn->dst_reg, dst_reg->type); + return -EINVAL; + } + + map = dst_reg->map_ptr; + if (verifier_bug_if(!map, env, "R%d has an empty map pointer", insn->dst_reg)) + return -EFAULT; + + if (verifier_bug_if(map->map_type != BPF_MAP_TYPE_INSN_ARRAY, env, + "R%d has incorrect map type %d", insn->dst_reg, map->map_type)) + return -EFAULT; + + err = indirect_jump_min_max_index(env, insn->dst_reg, map, &min_index, &max_index); + if (err) + return err; + + /* Ensure that the buffer is large enough */ + if (!env->gotox_tmp_buf || env->gotox_tmp_buf->cnt < max_index - min_index + 1) { + env->gotox_tmp_buf = iarray_realloc(env->gotox_tmp_buf, + max_index - min_index + 1); + if (!env->gotox_tmp_buf) + return -ENOMEM; + } + + n = copy_insn_array_uniq(map, min_index, max_index, env->gotox_tmp_buf->items); + if (n < 0) + return n; + if (n == 0) { + verbose(env, "register R%d doesn't point to any offset in map id=%d\n", + insn->dst_reg, map->id); + return -EINVAL; + } + + for (i = 0; i < n - 1; i++) { + other_branch = push_stack(env, env->gotox_tmp_buf->items[i], + env->insn_idx, env->cur_state->speculative); + if (IS_ERR(other_branch)) + return PTR_ERR(other_branch); + } + env->insn_idx = env->gotox_tmp_buf->items[n-1]; + return 0; +} + static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state) { int err; @@ -19937,6 +20270,15 @@ static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state) mark_reg_scratched(env, BPF_REG_0); } else if (opcode == BPF_JA) { + if (BPF_SRC(insn->code) == BPF_X) { + if (insn->src_reg != BPF_REG_0 || + insn->imm != 0 || insn->off != 0) { + verbose(env, "BPF_JA|BPF_X uses reserved fields\n"); + return -EINVAL; + } + return check_indirect_jump(env, insn); + } + if (BPF_SRC(insn->code) != BPF_K || insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 || @@ -20453,6 +20795,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_QUEUE: case BPF_MAP_TYPE_STACK: case BPF_MAP_TYPE_ARENA: + case BPF_MAP_TYPE_INSN_ARRAY: break; default: verbose(env, @@ -20524,6 +20867,15 @@ static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map) env->used_maps[env->used_map_cnt++] = map; + if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) { + err = bpf_insn_array_init(map, env->prog); + if (err) { + verbose(env, "Failed to properly initialize insn array\n"); + return err; + } + env->insn_array_maps[env->insn_array_map_cnt++] = map; + } + return env->used_map_cnt - 1; } @@ -20770,6 +21122,33 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len } } +static void release_insn_arrays(struct bpf_verifier_env *env) +{ + int i; + + for (i = 0; i < env->insn_array_map_cnt; i++) + bpf_insn_array_release(env->insn_array_maps[i]); +} + +static void adjust_insn_arrays(struct bpf_verifier_env *env, u32 off, u32 len) +{ + int i; + + if (len == 1) + return; + + for (i = 0; i < env->insn_array_map_cnt; i++) + bpf_insn_array_adjust(env->insn_array_maps[i], off, len); +} + +static void adjust_insn_arrays_after_remove(struct bpf_verifier_env *env, u32 off, u32 len) +{ + int i; + + for (i = 0; i < env->insn_array_map_cnt; i++) + bpf_insn_array_adjust_after_remove(env->insn_array_maps[i], off, len); +} + static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len) { struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab; @@ -20811,6 +21190,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of } adjust_insn_aux_data(env, new_prog, off, len); adjust_subprog_starts(env, off, len); + adjust_insn_arrays(env, off, len); adjust_poke_descs(new_prog, off, len); return new_prog; } @@ -20973,6 +21353,27 @@ static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, return 0; } +/* + * Clean up dynamically allocated fields of aux data for instructions [start, ...] + */ +static void clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len) +{ + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + struct bpf_insn *insns = env->prog->insnsi; + int end = start + len; + int i; + + for (i = start; i < end; i++) { + if (insn_is_gotox(&insns[i])) { + kvfree(aux_data[i].jt); + aux_data[i].jt = NULL; + } + + if (bpf_is_ldimm64(&insns[i])) + i++; + } +} + static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) { struct bpf_insn_aux_data *aux_data = env->insn_aux_data; @@ -20994,6 +21395,10 @@ static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) if (err) return err; + adjust_insn_arrays_after_remove(env, off, cnt); + + clear_insn_aux_data(env, off, cnt); + memmove(aux_data + off, aux_data + off + cnt, sizeof(*aux_data) * (orig_prog_len - off - cnt)); @@ -21533,6 +21938,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) struct bpf_insn *insn; void *old_bpf_func; int err, num_exentries; + int old_len, subprog_start_adjustment = 0; if (env->subprog_cnt <= 1) return 0; @@ -21607,6 +22013,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->func_idx = i; /* Below members will be freed only at prog->aux */ func[i]->aux->btf = prog->aux->btf; + func[i]->aux->subprog_start = subprog_start + subprog_start_adjustment; func[i]->aux->func_info = prog->aux->func_info; func[i]->aux->func_info_cnt = prog->aux->func_info_cnt; func[i]->aux->poke_tab = prog->aux->poke_tab; @@ -21636,6 +22043,8 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->jited_linfo = prog->aux->jited_linfo; func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx; func[i]->aux->arena = prog->aux->arena; + func[i]->aux->used_maps = env->used_maps; + func[i]->aux->used_map_cnt = env->used_map_cnt; num_exentries = 0; insn = func[i]->insnsi; for (j = 0; j < func[i]->len; j++, insn++) { @@ -21660,7 +22069,15 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->might_sleep = env->subprog_info[i].might_sleep; if (!i) func[i]->aux->exception_boundary = env->seen_exception; + + /* + * To properly pass the absolute subprog start to jit + * all instruction adjustments should be accumulated + */ + old_len = func[i]->len; func[i] = bpf_int_jit_compile(func[i]); + subprog_start_adjustment += func[i]->len - old_len; + if (!func[i]->jited) { err = -ENOTSUPP; goto out_free; @@ -24312,14 +24729,13 @@ static int compute_live_registers(struct bpf_verifier_env *env) for (i = 0; i < env->cfg.cur_postorder; ++i) { int insn_idx = env->cfg.insn_postorder[i]; struct insn_live_regs *live = &state[insn_idx]; - int succ_num; - u32 succ[2]; + struct bpf_iarray *succ; u16 new_out = 0; u16 new_in = 0; - succ_num = bpf_insn_successors(env->prog, insn_idx, succ); - for (int s = 0; s < succ_num; ++s) - new_out |= state[succ[s]].in; + succ = bpf_insn_successors(env, insn_idx); + for (int s = 0; s < succ->cnt; ++s) + new_out |= state[succ->items[s]].in; new_in = (new_out & ~live->def) | live->use; if (new_out != live->out || new_in != live->in) { live->in = new_in; @@ -24372,11 +24788,11 @@ static int compute_scc(struct bpf_verifier_env *env) const u32 insn_cnt = env->prog->len; int stack_sz, dfs_sz, err = 0; u32 *stack, *pre, *low, *dfs; - u32 succ_cnt, i, j, t, w; + u32 i, j, t, w; u32 next_preorder_num; u32 next_scc_id; bool assign_scc; - u32 succ[2]; + struct bpf_iarray *succ; next_preorder_num = 1; next_scc_id = 1; @@ -24483,12 +24899,12 @@ static int compute_scc(struct bpf_verifier_env *env) stack[stack_sz++] = w; } /* Visit 'w' successors */ - succ_cnt = bpf_insn_successors(env->prog, w, succ); - for (j = 0; j < succ_cnt; ++j) { - if (pre[succ[j]]) { - low[w] = min(low[w], low[succ[j]]); + succ = bpf_insn_successors(env, w); + for (j = 0; j < succ->cnt; ++j) { + if (pre[succ->items[j]]) { + low[w] = min(low[w], low[succ->items[j]]); } else { - dfs[dfs_sz++] = succ[j]; + dfs[dfs_sz++] = succ->items[j]; goto dfs_continue; } } @@ -24505,8 +24921,8 @@ static int compute_scc(struct bpf_verifier_env *env) * or if component has a self reference. */ assign_scc = stack[stack_sz - 1] != w; - for (j = 0; j < succ_cnt; ++j) { - if (succ[j] == w) { + for (j = 0; j < succ->cnt; ++j) { + if (succ->items[j] == w) { assign_scc = true; break; } @@ -24568,6 +24984,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 goto err_free_env; for (i = 0; i < len; i++) env->insn_aux_data[i].orig_idx = i; + env->succ = iarray_realloc(NULL, 2); + if (!env->succ) + goto err_free_env; env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; @@ -24791,6 +25210,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 adjust_btf_func(env); err_release_maps: + if (ret) + release_insn_arrays(env); if (!env->prog->aux->used_maps) /* if we didn't copy map pointers into bpf_prog_info, release * them now. Otherwise free_used_maps() will release them. @@ -24811,11 +25232,14 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 err_unlock: if (!is_priv) mutex_unlock(&bpf_verifier_lock); + clear_insn_aux_data(env, 0, env->prog->len); vfree(env->insn_aux_data); err_free_env: bpf_stack_liveness_free(env); kvfree(env->cfg.insn_postorder); kvfree(env->scc_info); + kvfree(env->succ); + kvfree(env->gotox_tmp_buf); kvfree(env); return ret; } diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 252e4c538edb..1af3305ea2b2 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -55,7 +55,8 @@ MAP COMMANDS | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** | | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** -| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena** } +| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena** +| | **insn_array** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index c9de44a45778..7ebf7dbcfba4 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1477,7 +1477,8 @@ static int do_help(int argc, char **argv) " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" - " task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena }\n" + " task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena |\n" + " insn_array }\n" " " HELP_SPEC_OPTIONS " |\n" " {-f|--bpffs} | {-n|--nomount} }\n" "", diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6829936d33f5..805d441363cd 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1026,6 +1026,7 @@ enum bpf_map_type { BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, BPF_MAP_TYPE_ARENA, + BPF_MAP_TYPE_INSN_ARRAY, __MAX_BPF_MAP_TYPE }; @@ -7645,4 +7646,24 @@ enum bpf_kfunc_flags { BPF_F_PAD_ZEROS = (1ULL << 0), }; +/* + * Values of a BPF_MAP_TYPE_INSN_ARRAY entry must be of this type. + * + * Before the map is used the orig_off field should point to an + * instruction inside the program being loaded. The other fields + * must be set to 0. + * + * After the program is loaded, the xlated_off will be adjusted + * by the verifier to point to the index of the original instruction + * in the xlated program. If the instruction is deleted, it will + * be set to (u32)-1. The jitted_off will be set to the corresponding + * offset in the jitted image of the program. + */ +struct bpf_insn_array_value { + __u32 orig_off; + __u32 xlated_off; + __u32 jitted_off; + __u32 :32; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index dd3b2f57082d..ee44bc49a3ba 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -190,6 +190,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", [BPF_MAP_TYPE_ARENA] = "arena", + [BPF_MAP_TYPE_INSN_ARRAY] = "insn_array", }; static const char * const prog_type_name[] = { @@ -369,6 +370,7 @@ enum reloc_type { RELO_EXTERN_CALL, RELO_SUBPROG_ADDR, RELO_CORE, + RELO_INSN_ARRAY, }; struct reloc_desc { @@ -379,7 +381,16 @@ struct reloc_desc { struct { int map_idx; int sym_off; - int ext_idx; + /* + * The following two fields can be unionized, as the + * ext_idx field is used for extern symbols, and the + * sym_size is used for jump tables, which are never + * extern + */ + union { + int ext_idx; + int sym_size; + }; }; }; }; @@ -421,6 +432,11 @@ struct bpf_sec_def { libbpf_prog_attach_fn_t prog_attach_fn; }; +struct bpf_light_subprog { + __u32 sec_insn_off; + __u32 sub_insn_off; +}; + /* * bpf_prog should be a better name but it has been used in * linux/filter.h. @@ -494,6 +510,9 @@ struct bpf_program { __u32 line_info_cnt; __u32 prog_flags; __u8 hash[SHA256_DIGEST_LENGTH]; + + struct bpf_light_subprog *subprogs; + __u32 subprog_cnt; }; struct bpf_struct_ops { @@ -523,6 +542,7 @@ struct bpf_struct_ops { #define STRUCT_OPS_SEC ".struct_ops" #define STRUCT_OPS_LINK_SEC ".struct_ops.link" #define ARENA_SEC ".addr_space.1" +#define JUMPTABLES_SEC ".jumptables" enum libbpf_map_type { LIBBPF_MAP_UNSPEC, @@ -667,6 +687,7 @@ struct elf_state { int symbols_shndx; bool has_st_ops; int arena_data_shndx; + int jumptables_data_shndx; }; struct usdt_manager; @@ -738,6 +759,16 @@ struct bpf_object { void *arena_data; size_t arena_data_sz; + void *jumptables_data; + size_t jumptables_data_sz; + + struct { + struct bpf_program *prog; + int sym_off; + int fd; + } *jumptable_maps; + size_t jumptable_map_cnt; + struct kern_feature_cache *feat_cache; char *token_path; int token_fd; @@ -764,6 +795,7 @@ void bpf_program__unload(struct bpf_program *prog) zfree(&prog->func_info); zfree(&prog->line_info); + zfree(&prog->subprogs); } static void bpf_program__exit(struct bpf_program *prog) @@ -3942,6 +3974,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if (strcmp(name, ARENA_SEC) == 0) { obj->efile.arena_data = data; obj->efile.arena_data_shndx = idx; + } else if (strcmp(name, JUMPTABLES_SEC) == 0) { + obj->jumptables_data = malloc(data->d_size); + if (!obj->jumptables_data) + return -ENOMEM; + memcpy(obj->jumptables_data, data->d_buf, data->d_size); + obj->jumptables_data_sz = data->d_size; + obj->efile.jumptables_data_shndx = idx; } else { pr_info("elf: skipping unrecognized data section(%d) %s\n", idx, name); @@ -4634,6 +4673,16 @@ static int bpf_program__record_reloc(struct bpf_program *prog, return 0; } + /* jump table data relocation */ + if (shdr_idx == obj->efile.jumptables_data_shndx) { + reloc_desc->type = RELO_INSN_ARRAY; + reloc_desc->insn_idx = insn_idx; + reloc_desc->map_idx = -1; + reloc_desc->sym_off = sym->st_value; + reloc_desc->sym_size = sym->st_size; + return 0; + } + /* generic map reference relocation */ if (type == LIBBPF_MAP_UNSPEC) { if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { @@ -6144,6 +6193,144 @@ static void poison_kfunc_call(struct bpf_program *prog, int relo_idx, insn->imm = POISON_CALL_KFUNC_BASE + ext_idx; } +static int find_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off) +{ + size_t i; + + for (i = 0; i < obj->jumptable_map_cnt; i++) { + /* + * This might happen that same offset is used for two different + * programs (as jump tables can be the same). However, for + * different programs different maps should be created. + */ + if (obj->jumptable_maps[i].sym_off == sym_off && + obj->jumptable_maps[i].prog == prog) + return obj->jumptable_maps[i].fd; + } + + return -ENOENT; +} + +static int add_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off, int map_fd) +{ + size_t new_cnt = obj->jumptable_map_cnt + 1; + size_t size = sizeof(obj->jumptable_maps[0]); + void *tmp; + + tmp = libbpf_reallocarray(obj->jumptable_maps, new_cnt, size); + if (!tmp) + return -ENOMEM; + + obj->jumptable_maps = tmp; + obj->jumptable_maps[new_cnt - 1].prog = prog; + obj->jumptable_maps[new_cnt - 1].sym_off = sym_off; + obj->jumptable_maps[new_cnt - 1].fd = map_fd; + obj->jumptable_map_cnt = new_cnt; + + return 0; +} + +static int create_jt_map(struct bpf_object *obj, struct bpf_program *prog, + int sym_off, int jt_size, int adjust_off) +{ + const __u32 jt_entry_size = 8; + const __u32 max_entries = jt_size / jt_entry_size; + const __u32 value_size = sizeof(struct bpf_insn_array_value); + struct bpf_insn_array_value val = {}; + int map_fd, err; + __u64 insn_off; + __u64 *jt; + __u32 i; + + map_fd = find_jt_map(obj, prog, sym_off); + if (map_fd >= 0) + return map_fd; + + if (sym_off % jt_entry_size) { + pr_warn("jumptable start %d should be multiple of %u\n", + sym_off, jt_entry_size); + return -EINVAL; + } + + if (jt_size % jt_entry_size) { + pr_warn("jumptable size %d should be multiple of %u\n", + jt_size, jt_entry_size); + return -EINVAL; + } + + map_fd = bpf_map_create(BPF_MAP_TYPE_INSN_ARRAY, ".jumptables", + 4, value_size, max_entries, NULL); + if (map_fd < 0) + return map_fd; + + if (!obj->jumptables_data) { + pr_warn("map '.jumptables': ELF file is missing jump table data\n"); + err = -EINVAL; + goto err_close; + } + if (sym_off + jt_size > obj->jumptables_data_sz) { + pr_warn("jumptables_data size is %zd, trying to access %d\n", + obj->jumptables_data_sz, sym_off + jt_size); + err = -EINVAL; + goto err_close; + } + + jt = (__u64 *)(obj->jumptables_data + sym_off); + for (i = 0; i < max_entries; i++) { + /* + * LLVM-generated jump tables contain u64 records, however + * should contain values that fit in u32. + * The adjust_off provided by the caller adjusts the offset to + * be relative to the beginning of the main function + */ + insn_off = jt[i]/sizeof(struct bpf_insn) + adjust_off; + if (insn_off > UINT32_MAX) { + pr_warn("invalid jump table value %llx at offset %d (adjust_off %d)\n", + jt[i], sym_off + i, adjust_off); + err = -EINVAL; + goto err_close; + } + + val.orig_off = insn_off; + err = bpf_map_update_elem(map_fd, &i, &val, 0); + if (err) + goto err_close; + } + + err = bpf_map_freeze(map_fd); + if (err) + goto err_close; + + err = add_jt_map(obj, prog, sym_off, map_fd); + if (err) + goto err_close; + + return map_fd; + +err_close: + close(map_fd); + return err; +} + +/* + * In LLVM the .jumptables section contains jump tables entries relative to the + * section start. The BPF kernel-side code expects jump table offsets relative + * to the beginning of the program (passed in bpf(BPF_PROG_LOAD)). This helper + * computes a delta to be added when creating a map. + */ +static int jt_adjust_off(struct bpf_program *prog, int insn_idx) +{ + int i; + + for (i = prog->subprog_cnt - 1; i >= 0; i--) { + if (insn_idx >= prog->subprogs[i].sub_insn_off) + return prog->subprogs[i].sub_insn_off - prog->subprogs[i].sec_insn_off; + } + + return -prog->sec_insn_off; +} + + /* Relocate data references within program code: * - map references; * - global variable references; @@ -6235,6 +6422,21 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_CORE: /* will be handled by bpf_program_record_relos() */ break; + case RELO_INSN_ARRAY: { + int map_fd; + + map_fd = create_jt_map(obj, prog, relo->sym_off, relo->sym_size, + jt_adjust_off(prog, relo->insn_idx)); + if (map_fd < 0) { + pr_warn("prog '%s': relo #%d: can't create jump table: sym_off %u\n", + prog->name, i, relo->sym_off); + return map_fd; + } + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn->imm = map_fd; + insn->off = 0; + } + break; default: pr_warn("prog '%s': relo #%d: bad relo type %d\n", prog->name, i, relo->type); @@ -6432,36 +6634,63 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra return 0; } +static int save_subprog_offsets(struct bpf_program *main_prog, struct bpf_program *subprog) +{ + size_t size = sizeof(main_prog->subprogs[0]); + int new_cnt = main_prog->subprog_cnt + 1; + void *tmp; + + tmp = libbpf_reallocarray(main_prog->subprogs, new_cnt, size); + if (!tmp) + return -ENOMEM; + + main_prog->subprogs = tmp; + main_prog->subprogs[new_cnt - 1].sec_insn_off = subprog->sec_insn_off; + main_prog->subprogs[new_cnt - 1].sub_insn_off = subprog->sub_insn_off; + main_prog->subprog_cnt = new_cnt; + + return 0; +} + static int bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog, struct bpf_program *subprog) { - struct bpf_insn *insns; - size_t new_cnt; - int err; + struct bpf_insn *insns; + size_t new_cnt; + int err; - subprog->sub_insn_off = main_prog->insns_cnt; + subprog->sub_insn_off = main_prog->insns_cnt; - new_cnt = main_prog->insns_cnt + subprog->insns_cnt; - insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); - if (!insns) { - pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); - return -ENOMEM; - } - main_prog->insns = insns; - main_prog->insns_cnt = new_cnt; + new_cnt = main_prog->insns_cnt + subprog->insns_cnt; + insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); + if (!insns) { + pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); + return -ENOMEM; + } + main_prog->insns = insns; + main_prog->insns_cnt = new_cnt; - memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, - subprog->insns_cnt * sizeof(*insns)); + memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, + subprog->insns_cnt * sizeof(*insns)); - pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", - main_prog->name, subprog->insns_cnt, subprog->name); + pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", + main_prog->name, subprog->insns_cnt, subprog->name); - /* The subprog insns are now appended. Append its relos too. */ - err = append_subprog_relos(main_prog, subprog); - if (err) - return err; - return 0; + /* The subprog insns are now appended. Append its relos too. */ + err = append_subprog_relos(main_prog, subprog); + if (err) + return err; + + /* Save subprogram offsets */ + err = save_subprog_offsets(main_prog, subprog); + if (err) { + pr_warn("prog '%s': failed to add subprog offsets: %s\n", + main_prog->name, errstr(err)); + return err; + } + + return 0; } static int @@ -9228,6 +9457,15 @@ void bpf_object__close(struct bpf_object *obj) zfree(&obj->arena_data); + zfree(&obj->jumptables_data); + obj->jumptables_data_sz = 0; + + if (obj->jumptable_maps && obj->jumptable_map_cnt) { + for (i = 0; i < obj->jumptable_map_cnt; i++) + close(obj->jumptable_maps[i].fd); + } + zfree(&obj->jumptable_maps); + free(obj); } diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 9dfbe7750f56..bccf4bb747e1 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -364,6 +364,10 @@ static int probe_map_create(enum bpf_map_type map_type) case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: break; + case BPF_MAP_TYPE_INSN_ARRAY: + key_size = sizeof(__u32); + value_size = sizeof(struct bpf_insn_array_value); + break; case BPF_MAP_TYPE_UNSPEC: default: return -EOPNOTSUPP; diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 56ae77047bc3..3defd4bc9154 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -27,6 +27,8 @@ #include "strset.h" #define BTF_EXTERN_SEC ".extern" +#define JUMPTABLES_SEC ".jumptables" +#define JUMPTABLES_REL_SEC ".rel.jumptables" struct src_sec { const char *sec_name; @@ -2025,6 +2027,9 @@ static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj, obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx; return 0; } + + if (strcmp(src_sec->sec_name, JUMPTABLES_SEC) == 0) + goto add_sym; } if (sym_bind == STB_LOCAL) @@ -2271,8 +2276,9 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob insn->imm += sec->dst_off / sizeof(struct bpf_insn); else insn->imm += sec->dst_off; - } else { - pr_warn("relocation against STT_SECTION in non-exec section is not supported!\n"); + } else if (strcmp(src_sec->sec_name, JUMPTABLES_REL_SEC) != 0) { + pr_warn("relocation against STT_SECTION in section %s is not supported!\n", + src_sec->sec_name); return -EINVAL; } } diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 7437c325179e..a897cb31fe6d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -454,7 +454,9 @@ BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(abspath $(OUTPUT)/../usr/include) \ -std=gnu11 \ -fno-strict-aliasing \ - -Wno-compare-distinct-pointer-types + -Wno-compare-distinct-pointer-types \ + -Wno-initializer-overrides \ + # # TODO: enable me -Wsign-compare CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c new file mode 100644 index 000000000000..4394654ac75a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "bpf_gotox.skel.h" + +/* Disable tests for now, as CI runs with LLVM-20 */ +#if 0 +static void __test_run(struct bpf_program *prog, void *ctx_in, size_t ctx_size_in) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts, + .ctx_in = ctx_in, + .ctx_size_in = ctx_size_in, + ); + int err, prog_fd; + + prog_fd = bpf_program__fd(prog); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run_opts err"); +} + +static void check_simple(struct bpf_gotox *skel, + struct bpf_program *prog, + __u64 ctx_in, + __u64 expected) +{ + skel->bss->ret_user = 0; + + __test_run(prog, &ctx_in, sizeof(ctx_in)); + + if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user")) + return; +} + +static void check_simple_fentry(struct bpf_gotox *skel, + struct bpf_program *prog, + __u64 ctx_in, + __u64 expected) +{ + skel->bss->in_user = ctx_in; + skel->bss->ret_user = 0; + + /* trigger */ + usleep(1); + + if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user")) + return; +} + +/* validate that for two loads of the same jump table libbpf generates only one map */ +static void check_one_map_two_jumps(struct bpf_gotox *skel) +{ + struct bpf_prog_info prog_info; + struct bpf_map_info map_info; + __u32 len; + __u32 map_ids[16]; + int prog_fd, map_fd; + int ret; + int i; + bool seen = false; + + memset(&prog_info, 0, sizeof(prog_info)); + prog_info.map_ids = (long)map_ids; + prog_info.nr_map_ids = ARRAY_SIZE(map_ids); + prog_fd = bpf_program__fd(skel->progs.one_map_two_jumps); + if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd(one_map_two_jumps)")) + return; + + len = sizeof(prog_info); + ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &len); + if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(prog_fd)")) + return; + + for (i = 0; i < prog_info.nr_map_ids; i++) { + map_fd = bpf_map_get_fd_by_id(map_ids[i]); + if (!ASSERT_GE(map_fd, 0, "bpf_program__fd(one_map_two_jumps)")) + return; + + len = sizeof(map_info); + memset(&map_info, 0, len); + ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &len); + if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(map_fd)")) { + close(map_fd); + return; + } + + if (map_info.type == BPF_MAP_TYPE_INSN_ARRAY) { + if (!ASSERT_EQ(seen, false, "more than one INSN_ARRAY map")) { + close(map_fd); + return; + } + seen = true; + } + close(map_fd); + } + + ASSERT_EQ(seen, true, "no INSN_ARRAY map"); +} + +static void check_gotox_skel(struct bpf_gotox *skel) +{ + int i; + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {2, 3, 4, 5, 7, 19, 19}; + __u64 out2[] = {103, 104, 107, 205, 115, 1019, 1019}; + __u64 in3[] = {0, 11, 27, 31, 22, 45, 99}; + __u64 out3[] = {2, 3, 4, 5, 19, 19, 19}; + __u64 in4[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out4[] = {12, 15, 7 , 15, 12, 15, 15}; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.simple_test, in[i], out[i]); + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.simple_test2, in[i], out[i]); + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.two_switches, in[i], out2[i]); + + if (0) for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.big_jump_table, in3[i], out3[i]); + + if (0) for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.one_jump_two_maps, in4[i], out4[i]); + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.use_static_global1, in[i], out[i]); + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.use_static_global2, in[i], out[i]); + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.use_nonstatic_global1, in[i], out[i]); + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.use_nonstatic_global2, in[i], out[i]); + + bpf_program__attach(skel->progs.simple_test_other_sec); + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple_fentry(skel, skel->progs.simple_test_other_sec, in[i], out[i]); + + bpf_program__attach(skel->progs.use_static_global_other_sec); + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple_fentry(skel, skel->progs.use_static_global_other_sec, in[i], out[i]); + + bpf_program__attach(skel->progs.use_nonstatic_global_other_sec); + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple_fentry(skel, skel->progs.use_nonstatic_global_other_sec, in[i], out[i]); + + if (0) check_one_map_two_jumps(skel); +} + +void test_bpf_gotox(void) +{ + struct bpf_gotox *skel; + int ret; + + skel = bpf_gotox__open(); + if (!ASSERT_NEQ(skel, NULL, "bpf_gotox__open")) + return; + + ret = bpf_gotox__load(skel); + if (!ASSERT_OK(ret, "bpf_gotox__load")) + return; + + check_gotox_skel(skel); + + bpf_gotox__destroy(skel); +} +#else +void test_bpf_gotox(void) +{ +} +#endif diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c new file mode 100644 index 000000000000..bab55ae7687e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c @@ -0,0 +1,499 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +#ifdef __x86_64__ +static int map_create(__u32 map_type, __u32 max_entries) +{ + const char *map_name = "insn_array"; + __u32 key_size = 4; + __u32 value_size = sizeof(struct bpf_insn_array_value); + + return bpf_map_create(map_type, map_name, key_size, value_size, max_entries, NULL); +} + +static int prog_load(struct bpf_insn *insns, __u32 insn_cnt, int *fd_array, __u32 fd_array_cnt) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts); + + opts.fd_array = fd_array; + opts.fd_array_cnt = fd_array_cnt; + + return bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, &opts); +} + +static void __check_success(struct bpf_insn *insns, __u32 insn_cnt, __u32 *map_in, __u32 *map_out) +{ + struct bpf_insn_array_value val = {}; + int prog_fd = -1, map_fd, i; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, insn_cnt); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + for (i = 0; i < insn_cnt; i++) { + val.orig_off = map_in[i]; + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + } + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, insn_cnt, &map_fd, 1); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)")) + goto cleanup; + + for (i = 0; i < insn_cnt; i++) { + char buf[64]; + + if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem")) + goto cleanup; + + snprintf(buf, sizeof(buf), "val.xlated_off should be equal map_out[%d]", i); + ASSERT_EQ(val.xlated_off, map_out[i], buf); + } + +cleanup: + close(prog_fd); + close(map_fd); +} + +/* + * Load a program, which will not be anyhow mangled by the verifier. Add an + * insn_array map pointing to every instruction. Check that it hasn't changed + * after the program load. + */ +static void check_one_to_one_mapping(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 4), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + __u32 map_in[] = {0, 1, 2, 3, 4, 5}; + __u32 map_out[] = {0, 1, 2, 3, 4, 5}; + + __check_success(insns, ARRAY_SIZE(insns), map_in, map_out); +} + +/* + * Load a program with two patches (get jiffies, for simplicity). Add an + * insn_array map pointing to every instruction. Check how it was changed + * after the program load. + */ +static void check_simple(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + __u32 map_in[] = {0, 1, 2, 3, 4, 5}; + __u32 map_out[] = {0, 1, 4, 5, 8, 9}; + + __check_success(insns, ARRAY_SIZE(insns), map_in, map_out); +} + +/* + * Verifier can delete code in two cases: nops & dead code. From insn + * array's point of view, the two cases are the same, so test using + * the simplest method: by loading some nops + */ +static void check_deletions(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + __u32 map_in[] = {0, 1, 2, 3, 4, 5}; + __u32 map_out[] = {0, -1, 1, -1, 2, 3}; + + __check_success(insns, ARRAY_SIZE(insns), map_in, map_out); +} + +/* + * Same test as check_deletions, but also add code which adds instructions + */ +static void check_deletions_with_functions(void) +{ + struct bpf_insn insns[] = { + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }; + __u32 map_in[] = { 0, 1, 2, 3, 4, 5, /* func */ 6, 7, 8, 9, 10}; + __u32 map_out[] = {-1, 0, -1, 3, 4, 5, /* func */ -1, 6, -1, 9, 10}; + + __check_success(insns, ARRAY_SIZE(insns), map_in, map_out); +} + +/* + * Try to load a program with a map which points to outside of the program + */ +static void check_out_of_bounds_index(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 4), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, map_fd; + struct bpf_insn_array_value val = {}; + int key; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + key = 0; + val.orig_off = ARRAY_SIZE(insns); /* too big */ + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) { + close(prog_fd); + goto cleanup; + } + +cleanup: + close(map_fd); +} + +/* + * Try to load a program with a map which points to the middle of 16-bit insn + */ +static void check_mid_insn_index(void) +{ + struct bpf_insn insns[] = { + BPF_LD_IMM64(BPF_REG_0, 0), /* 2 x 8 */ + BPF_EXIT_INSN(), + }; + int prog_fd, map_fd; + struct bpf_insn_array_value val = {}; + int key; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + key = 0; + val.orig_off = 1; /* middle of 16-byte instruction */ + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) { + close(prog_fd); + goto cleanup; + } + +cleanup: + close(map_fd); +} + +static void check_incorrect_index(void) +{ + check_out_of_bounds_index(); + check_mid_insn_index(); +} + +static int set_bpf_jit_harden(char *level) +{ + char old_level; + int err = -1; + int fd = -1; + + fd = open("/proc/sys/net/core/bpf_jit_harden", O_RDWR | O_NONBLOCK); + if (fd < 0) { + ASSERT_FAIL("open .../bpf_jit_harden returned %d (errno=%d)", fd, errno); + return -1; + } + + err = read(fd, &old_level, 1); + if (err != 1) { + ASSERT_FAIL("read from .../bpf_jit_harden returned %d (errno=%d)", err, errno); + err = -1; + goto end; + } + + lseek(fd, 0, SEEK_SET); + + err = write(fd, level, 1); + if (err != 1) { + ASSERT_FAIL("write to .../bpf_jit_harden returned %d (errno=%d)", err, errno); + err = -1; + goto end; + } + + err = 0; + *level = old_level; +end: + if (fd >= 0) + close(fd); + return err; +} + +static void check_blindness(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 4), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }; + int prog_fd = -1, map_fd; + struct bpf_insn_array_value val = {}; + char bpf_jit_harden = '@'; /* non-exizsting value */ + int i; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns)); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + val.orig_off = i; + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + } + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + bpf_jit_harden = '2'; + if (set_bpf_jit_harden(&bpf_jit_harden)) { + bpf_jit_harden = '@'; /* open, read or write failed => no write was done */ + goto cleanup; + } + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + char fmt[32]; + + if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem")) + goto cleanup; + + snprintf(fmt, sizeof(fmt), "val should be equal 3*%d", i); + ASSERT_EQ(val.xlated_off, i * 3, fmt); + } + +cleanup: + /* restore the old one */ + if (bpf_jit_harden != '@') + set_bpf_jit_harden(&bpf_jit_harden); + + close(prog_fd); + close(map_fd); +} + +/* Once map was initialized, it should be frozen */ +static void check_load_unfrozen_map(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd = -1, map_fd; + struct bpf_insn_array_value val = {}; + int i; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns)); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + val.orig_off = i; + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + } + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) + goto cleanup; + + /* correctness: now freeze the map, the program should load fine */ + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem")) + goto cleanup; + + ASSERT_EQ(val.xlated_off, i, "val should be equal i"); + } + +cleanup: + close(prog_fd); + close(map_fd); +} + +/* Map can be used only by one BPF program */ +static void check_no_map_reuse(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd = -1, map_fd, extra_fd = -1; + struct bpf_insn_array_value val = {}; + int i; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns)); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + val.orig_off = i; + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + } + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem")) + goto cleanup; + + ASSERT_EQ(val.xlated_off, i, "val should be equal i"); + } + + extra_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_EQ(extra_fd, -EBUSY, "program should have been rejected (extra_fd != -EBUSY)")) + goto cleanup; + + /* correctness: check that prog is still loadable without fd_array */ + extra_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD): expected no error")) + goto cleanup; + +cleanup: + close(extra_fd); + close(prog_fd); + close(map_fd); +} + +static void check_bpf_no_lookup(void) +{ + struct bpf_insn insns[] = { + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }; + int prog_fd = -1, map_fd; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + insns[0].imm = map_fd; + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0); + if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) + goto cleanup; + + /* correctness: check that prog is still loadable with normal map */ + close(map_fd); + map_fd = map_create(BPF_MAP_TYPE_ARRAY, 1); + insns[0].imm = map_fd; + prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)")) + goto cleanup; + +cleanup: + close(prog_fd); + close(map_fd); +} + +static void check_bpf_side(void) +{ + check_bpf_no_lookup(); +} + +void test_bpf_insn_array(void) +{ + /* Test if offsets are adjusted properly */ + + if (test__start_subtest("one2one")) + check_one_to_one_mapping(); + + if (test__start_subtest("simple")) + check_simple(); + + if (test__start_subtest("deletions")) + check_deletions(); + + if (test__start_subtest("deletions-with-functions")) + check_deletions_with_functions(); + + if (test__start_subtest("blindness")) + check_blindness(); + + /* Check all kinds of operations and related restrictions */ + + if (test__start_subtest("incorrect-index")) + check_incorrect_index(); + + if (test__start_subtest("load-unfrozen-map")) + check_load_unfrozen_map(); + + if (test__start_subtest("no-map-reuse")) + check_no_map_reuse(); + + if (test__start_subtest("bpf-side-ops")) + check_bpf_side(); +} +#else +void test_bpf_insn_array(void) +{ + +} +#endif diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index c0e8ffdaa484..4b4b081b46cc 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -35,6 +35,7 @@ #include "verifier_global_subprogs.skel.h" #include "verifier_global_ptr_args.skel.h" #include "verifier_gotol.skel.h" +#include "verifier_gotox.skel.h" #include "verifier_helper_access_var_len.skel.h" #include "verifier_helper_packet_access.skel.h" #include "verifier_helper_restricted.skel.h" @@ -173,6 +174,7 @@ void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); } void test_verifier_global_subprogs(void) { RUN(verifier_global_subprogs); } void test_verifier_global_ptr_args(void) { RUN(verifier_global_ptr_args); } void test_verifier_gotol(void) { RUN(verifier_gotol); } +void test_verifier_gotox(void) { RUN(verifier_gotox); } void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); } void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); } void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); } diff --git a/tools/testing/selftests/bpf/progs/bpf_gotox.c b/tools/testing/selftests/bpf/progs/bpf_gotox.c new file mode 100644 index 000000000000..a867d2dbcf48 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_gotox.c @@ -0,0 +1,414 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include +#include +#include +#include "bpf_misc.h" + +/* Disable tests for now, as CI runs with LLVM-20 */ +#if 0 +__u64 in_user; +__u64 ret_user; + +struct simple_ctx { + __u64 x; +}; + +__u64 some_var; + +/* + * This function adds code which will be replaced by a different + * number of instructions by the verifier. This adds additional + * stress on testing the insn_array maps corresponding to indirect jumps. + */ +static __always_inline void adjust_insns(__u64 x) +{ + some_var ^= x + bpf_jiffies64(); +} + +SEC("syscall") +int simple_test(struct simple_ctx *ctx) +{ + switch (ctx->x) { + case 0: + adjust_insns(ctx->x + 1); + ret_user = 2; + break; + case 1: + adjust_insns(ctx->x + 7); + ret_user = 3; + break; + case 2: + adjust_insns(ctx->x + 9); + ret_user = 4; + break; + case 3: + adjust_insns(ctx->x + 11); + ret_user = 5; + break; + case 4: + adjust_insns(ctx->x + 17); + ret_user = 7; + break; + default: + adjust_insns(ctx->x + 177); + ret_user = 19; + break; + } + + return 0; +} + +SEC("syscall") +int simple_test2(struct simple_ctx *ctx) +{ + switch (ctx->x) { + case 0: + adjust_insns(ctx->x + 1); + ret_user = 2; + break; + case 1: + adjust_insns(ctx->x + 7); + ret_user = 3; + break; + case 2: + adjust_insns(ctx->x + 9); + ret_user = 4; + break; + case 3: + adjust_insns(ctx->x + 11); + ret_user = 5; + break; + case 4: + adjust_insns(ctx->x + 17); + ret_user = 7; + break; + default: + adjust_insns(ctx->x + 177); + ret_user = 19; + break; + } + + return 0; +} + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int simple_test_other_sec(struct pt_regs *ctx) +{ + __u64 x = in_user; + + switch (x) { + case 0: + adjust_insns(x + 1); + ret_user = 2; + break; + case 1: + adjust_insns(x + 7); + ret_user = 3; + break; + case 2: + adjust_insns(x + 9); + ret_user = 4; + break; + case 3: + adjust_insns(x + 11); + ret_user = 5; + break; + case 4: + adjust_insns(x + 17); + ret_user = 7; + break; + default: + adjust_insns(x + 177); + ret_user = 19; + break; + } + + return 0; +} + +SEC("syscall") +int two_switches(struct simple_ctx *ctx) +{ + switch (ctx->x) { + case 0: + adjust_insns(ctx->x + 1); + ret_user = 2; + break; + case 1: + adjust_insns(ctx->x + 7); + ret_user = 3; + break; + case 2: + adjust_insns(ctx->x + 9); + ret_user = 4; + break; + case 3: + adjust_insns(ctx->x + 11); + ret_user = 5; + break; + case 4: + adjust_insns(ctx->x + 17); + ret_user = 7; + break; + default: + adjust_insns(ctx->x + 177); + ret_user = 19; + break; + } + + switch (ctx->x + !!ret_user) { + case 1: + adjust_insns(ctx->x + 7); + ret_user = 103; + break; + case 2: + adjust_insns(ctx->x + 9); + ret_user = 104; + break; + case 3: + adjust_insns(ctx->x + 11); + ret_user = 107; + break; + case 4: + adjust_insns(ctx->x + 11); + ret_user = 205; + break; + case 5: + adjust_insns(ctx->x + 11); + ret_user = 115; + break; + default: + adjust_insns(ctx->x + 177); + ret_user = 1019; + break; + } + + return 0; +} + +SEC("syscall") +int big_jump_table(struct simple_ctx *ctx __attribute__((unused))) +{ +#if 0 + const void *const jt[256] = { + [0 ... 255] = &&default_label, + [0] = &&l0, + [11] = &&l11, + [27] = &&l27, + [31] = &&l31, + }; + + goto *jt[ctx->x & 0xff]; + +l0: + adjust_insns(ctx->x + 1); + ret_user = 2; + return 0; + +l11: + adjust_insns(ctx->x + 7); + ret_user = 3; + return 0; + +l27: + adjust_insns(ctx->x + 9); + ret_user = 4; + return 0; + +l31: + adjust_insns(ctx->x + 11); + ret_user = 5; + return 0; + +default_label: + adjust_insns(ctx->x + 177); + ret_user = 19; + return 0; +#else + return 0; +#endif +} + +SEC("syscall") +int one_jump_two_maps(struct simple_ctx *ctx __attribute__((unused))) +{ +#if 0 + __label__ l1, l2, l3, l4; + void *jt1[2] = { &&l1, &&l2 }; + void *jt2[2] = { &&l3, &&l4 }; + unsigned int a = ctx->x % 2; + unsigned int b = (ctx->x / 2) % 2; + volatile int ret = 0; + + if (!(a < 2 && b < 2)) + return 19; + + if (ctx->x % 2) + goto *jt1[a]; + else + goto *jt2[b]; + + l1: ret += 1; + l2: ret += 3; + l3: ret += 5; + l4: ret += 7; + + ret_user = ret; + return ret; +#else + return 0; +#endif +} + +SEC("syscall") +int one_map_two_jumps(struct simple_ctx *ctx __attribute__((unused))) +{ +#if 0 + __label__ l1, l2, l3; + void *jt[3] = { &&l1, &&l2, &&l3 }; + unsigned int a = (ctx->x >> 2) & 1; + unsigned int b = (ctx->x >> 3) & 1; + volatile int ret = 0; + + if (ctx->x % 2) + goto *jt[a]; + + if (ctx->x % 3) + goto *jt[a + b]; + + l1: ret += 3; + l2: ret += 5; + l3: ret += 7; + + ret_user = ret; + return ret; +#else + return 0; +#endif +} + +/* Just to introduce some non-zero offsets in .text */ +static __noinline int f0(volatile struct simple_ctx *ctx __arg_ctx) +{ + if (ctx) + return 1; + else + return 13; +} + +SEC("syscall") int f1(struct simple_ctx *ctx) +{ + ret_user = 0; + return f0(ctx); +} + +static __noinline int __static_global(__u64 x) +{ + switch (x) { + case 0: + adjust_insns(x + 1); + ret_user = 2; + break; + case 1: + adjust_insns(x + 7); + ret_user = 3; + break; + case 2: + adjust_insns(x + 9); + ret_user = 4; + break; + case 3: + adjust_insns(x + 11); + ret_user = 5; + break; + case 4: + adjust_insns(x + 17); + ret_user = 7; + break; + default: + adjust_insns(x + 177); + ret_user = 19; + break; + } + + return 0; +} + +SEC("syscall") +int use_static_global1(struct simple_ctx *ctx) +{ + ret_user = 0; + return __static_global(ctx->x); +} + +SEC("syscall") +int use_static_global2(struct simple_ctx *ctx) +{ + ret_user = 0; + adjust_insns(ctx->x + 1); + return __static_global(ctx->x); +} + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int use_static_global_other_sec(void *ctx) +{ + return __static_global(in_user); +} + +__noinline int __nonstatic_global(__u64 x) +{ + switch (x) { + case 0: + adjust_insns(x + 1); + ret_user = 2; + break; + case 1: + adjust_insns(x + 7); + ret_user = 3; + break; + case 2: + adjust_insns(x + 9); + ret_user = 4; + break; + case 3: + adjust_insns(x + 11); + ret_user = 5; + break; + case 4: + adjust_insns(x + 17); + ret_user = 7; + break; + default: + adjust_insns(x + 177); + ret_user = 19; + break; + } + + return 0; +} + +SEC("syscall") +int use_nonstatic_global1(struct simple_ctx *ctx) +{ + ret_user = 0; + return __nonstatic_global(ctx->x); +} + +SEC("syscall") +int use_nonstatic_global2(struct simple_ctx *ctx) +{ + ret_user = 0; + adjust_insns(ctx->x + 1); + return __nonstatic_global(ctx->x); +} + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int use_nonstatic_global_other_sec(void *ctx) +{ + return __nonstatic_global(in_user); +} +#endif + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_gotox.c b/tools/testing/selftests/bpf/progs/verifier_gotox.c new file mode 100644 index 000000000000..1a92e4d321e8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_gotox.c @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Isovalent */ + +#include +#include +#include "bpf_misc.h" +#include "../../../include/linux/filter.h" + +#ifdef __TARGET_ARCH_x86 + +#define DEFINE_SIMPLE_JUMP_TABLE_PROG(NAME, SRC_REG, OFF, IMM, OUTCOME) \ + \ + SEC("socket") \ + OUTCOME \ + __naked void jump_table_ ## NAME(void) \ + { \ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ + jt0_%=: \ + .quad ret0_%=; \ + .quad ret1_%=; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 += 8; \ + r0 = *(u64 *)(r0 + 0); \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ + " : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, (SRC_REG), (OFF) , (IMM))) \ + : __clobber_all); \ + } + +/* + * The first program which doesn't use reserved fields + * loads and works properly. The rest fail to load. + */ +DEFINE_SIMPLE_JUMP_TABLE_PROG(ok, BPF_REG_0, 0, 0, __success __retval(1)) +DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_src_reg, BPF_REG_1, 0, 0, __failure __msg("BPF_JA|BPF_X uses reserved fields")) +DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_non_zero_off, BPF_REG_0, 1, 0, __failure __msg("BPF_JA|BPF_X uses reserved fields")) +DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_non_zero_imm, BPF_REG_0, 0, 1, __failure __msg("BPF_JA|BPF_X uses reserved fields")) + +/* + * Gotox is forbidden when there is no jump table loaded + * which points to the sub-function where the gotox is used + */ +SEC("socket") +__failure __msg("no jump tables found for subprog starting at 0") +__naked void jump_table_no_jump_table(void) +{ + asm volatile (" \ + .8byte %[gotox_r0]; \ + r0 = 1; \ + exit; \ +" : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) + : __clobber_all); +} + +/* + * Incorrect type of the target register, only PTR_TO_INSN allowed + */ +SEC("socket") +__failure __msg("R1 has type 1, expected PTR_TO_INSN") +__naked void jump_table_incorrect_dst_reg_type(void) +{ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ +jt0_%=: \ + .quad ret0_%=; \ + .quad ret1_%=; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 += 8; \ + r0 = *(u64 *)(r0 + 0); \ + r1 = 42; \ + .8byte %[gotox_r1]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ +" : \ + : __imm_insn(gotox_r1, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_1, 0, 0 , 0)) + : __clobber_all); +} + +#define DEFINE_INVALID_SIZE_PROG(READ_SIZE, OUTCOME) \ + \ + SEC("socket") \ + OUTCOME \ + __naked void jump_table_invalid_read_size_ ## READ_SIZE(void) \ + { \ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ + jt0_%=: \ + .quad ret0_%=; \ + .quad ret1_%=; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 += 8; \ + r0 = *(" #READ_SIZE " *)(r0 + 0); \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ + " : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) \ + : __clobber_all); \ + } + +DEFINE_INVALID_SIZE_PROG(u32, __failure __msg("Invalid read of 4 bytes from insn_array")) +DEFINE_INVALID_SIZE_PROG(u16, __failure __msg("Invalid read of 2 bytes from insn_array")) +DEFINE_INVALID_SIZE_PROG(u8, __failure __msg("Invalid read of 1 bytes from insn_array")) + +SEC("socket") +__failure __msg("misaligned value access off 0+1+0 size 8") +__naked void jump_table_misaligned_access(void) +{ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ +jt0_%=: \ + .quad ret0_%=; \ + .quad ret1_%=; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 += 1; \ + r0 = *(u64 *)(r0 + 0); \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ +" : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) + : __clobber_all); +} + +SEC("socket") +__failure __msg("invalid access to map value, value_size=16 off=24 size=8") +__naked void jump_table_invalid_mem_acceess_pos(void) +{ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ +jt0_%=: \ + .quad ret0_%=; \ + .quad ret1_%=; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 += 24; \ + r0 = *(u64 *)(r0 + 0); \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ +" : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) + : __clobber_all); +} + +SEC("socket") +__failure __msg("invalid access to map value, value_size=16 off=-24 size=8") +__naked void jump_table_invalid_mem_acceess_neg(void) +{ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ +jt0_%=: \ + .quad ret0_%=; \ + .quad ret1_%=; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 -= 24; \ + r0 = *(u64 *)(r0 + 0); \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ +" : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) + : __clobber_all); +} + +SEC("socket") +__success __retval(1) +__naked void jump_table_add_sub_ok(void) +{ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ +jt0_%=: \ + .quad ret0_%=; \ + .quad ret1_%=; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 -= 24; \ + r0 += 32; \ + r0 = *(u64 *)(r0 + 0); \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ +" : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) + : __clobber_all); +} + +SEC("socket") +__failure __msg("writes into insn_array not allowed") +__naked void jump_table_no_writes(void) +{ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ +jt0_%=: \ + .quad ret0_%=; \ + .quad ret1_%=; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 += 8; \ + r1 = 0xbeef; \ + *(u64 *)(r0 + 0) = r1; \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ +" : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) + : __clobber_all); +} + +#endif /* __TARGET_ARCH_x86 */ + +char _license[] SEC("license") = "GPL";