Skip to content

Commit 6cf4995

Browse files
aspskKernel Patches Daemon
authored andcommitted
bpf, x86: add new map type: instructions array
On bpf(BPF_PROG_LOAD) syscall user-supplied BPF programs are translated by the verifier into "xlated" BPF programs. During this process the original instructions offsets might be adjusted and/or individual instructions might be replaced by new sets of instructions, or deleted. Add a new BPF map type which is aimed to keep track of how, for a given program, the original instructions were relocated during the verification. Also, besides keeping track of the original -> xlated mapping, make x86 JIT to build the xlated -> jitted mapping for every instruction listed in an instruction array. This is required for every future application of instruction arrays: static keys, indirect jumps and indirect calls. A map of the BPF_MAP_TYPE_INSN_ARRAY type must be created with a u32 keys and value of size 8. The values have different semantics for userspace and for BPF space. For userspace a value consists of two u32 values – xlated and jitted offsets. For BPF side the value is a real pointer to a jitted instruction. On map creation/initialization, before loading the program, each element of the map should be initialized to point to an instruction offset within the program. Before the program load such maps should be made frozen. After the program verification xlated and jitted offsets can be read via the bpf(2) syscall. If a tracked instruction is removed by the verifier, then the xlated offset is set to (u32)-1 which is considered to be too big for a valid BPF program offset. One such a map can, obviously, be used to track one and only one BPF program. If the verification process was unsuccessful, then the same map can be re-used to verify the program with a different log level. However, if the program was loaded fine, then such a map, being frozen in any case, can't be reused by other programs even after the program release. Example. Consider the following original and xlated programs: Original prog: Xlated prog: 0: r1 = 0x0 0: r1 = 0 1: *(u32 *)(r10 - 0x4) = r1 1: *(u32 *)(r10 -4) = r1 2: r2 = r10 2: r2 = r10 3: r2 += -0x4 3: r2 += -4 4: r1 = 0x0 ll 4: r1 = map[id:88] 6: call 0x1 6: r1 += 272 7: r0 = *(u32 *)(r2 +0) 8: if r0 >= 0x1 goto pc+3 9: r0 <<= 3 10: r0 += r1 11: goto pc+1 12: r0 = 0 7: r6 = r0 13: r6 = r0 8: if r6 == 0x0 goto +0x2 14: if r6 == 0x0 goto pc+4 9: call 0x76 15: r0 = 0xffffffff8d2079c0 17: r0 = *(u64 *)(r0 +0) 10: *(u64 *)(r6 + 0x0) = r0 18: *(u64 *)(r6 +0) = r0 11: r0 = 0x0 19: r0 = 0x0 12: exit 20: exit An instruction array map, containing, e.g., instructions [0,4,7,12] will be translated by the verifier to [0,4,13,20]. A map with index 5 (the middle of 16-byte instruction) or indexes greater than 12 (outside the program boundaries) would be rejected. The functionality provided by this patch will be extended in consequent patches to implement BPF Static Keys, indirect jumps, and indirect calls. Signed-off-by: Anton Protopopov <[email protected]> Reviewed-by: Eduard Zingerman <[email protected]>
1 parent e224139 commit 6cf4995

File tree

10 files changed

+418
-1
lines changed

10 files changed

+418
-1
lines changed

arch/x86/net/bpf_jit_comp.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3827,6 +3827,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
38273827
jit_data->header = header;
38283828
jit_data->rw_header = rw_header;
38293829
}
3830+
3831+
bpf_prog_update_insn_ptrs(prog, addrs, image);
3832+
38303833
/*
38313834
* ctx.prog_offset is used when CFI preambles put code *before*
38323835
* the function. See emit_cfi(). For FineIBT specifically this code

include/linux/bpf.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3797,4 +3797,19 @@ int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char *
37973797
const char **linep, int *nump);
37983798
struct bpf_prog *bpf_prog_find_from_stack(void);
37993799

3800+
int bpf_insn_array_init(struct bpf_map *map, const struct bpf_prog *prog);
3801+
int bpf_insn_array_ready(struct bpf_map *map);
3802+
void bpf_insn_array_release(struct bpf_map *map);
3803+
void bpf_insn_array_adjust(struct bpf_map *map, u32 off, u32 len);
3804+
void bpf_insn_array_adjust_after_remove(struct bpf_map *map, u32 off, u32 len);
3805+
3806+
#ifdef CONFIG_BPF_SYSCALL
3807+
void bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image);
3808+
#else
3809+
static inline void
3810+
bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image)
3811+
{
3812+
}
3813+
#endif
3814+
38003815
#endif /* _LINUX_BPF_H */

include/linux/bpf_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops)
133133
BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops)
134134
BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops)
135135
BPF_MAP_TYPE(BPF_MAP_TYPE_ARENA, arena_map_ops)
136+
BPF_MAP_TYPE(BPF_MAP_TYPE_INSN_ARRAY, insn_array_map_ops)
136137

137138
BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
138139
BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)

include/linux/bpf_verifier.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,8 +754,10 @@ struct bpf_verifier_env {
754754
struct list_head free_list; /* list of struct bpf_verifier_state_list */
755755
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
756756
struct btf_mod_pair used_btfs[MAX_USED_BTFS]; /* array of BTF's used by BPF program */
757+
struct bpf_map *insn_array_maps[MAX_USED_MAPS]; /* array of INSN_ARRAY map's to be relocated */
757758
u32 used_map_cnt; /* number of used maps */
758759
u32 used_btf_cnt; /* number of used BTF objects */
760+
u32 insn_array_map_cnt; /* number of used maps of type BPF_MAP_TYPE_INSN_ARRAY */
759761
u32 id_gen; /* used to generate unique reg IDs */
760762
u32 hidden_subprog_cnt; /* number of hidden subprogs */
761763
int exception_callback_subprog;

include/uapi/linux/bpf.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,7 @@ enum bpf_map_type {
10261026
BPF_MAP_TYPE_USER_RINGBUF,
10271027
BPF_MAP_TYPE_CGRP_STORAGE,
10281028
BPF_MAP_TYPE_ARENA,
1029+
BPF_MAP_TYPE_INSN_ARRAY,
10291030
__MAX_BPF_MAP_TYPE
10301031
};
10311032

@@ -7649,4 +7650,24 @@ enum bpf_kfunc_flags {
76497650
BPF_F_PAD_ZEROS = (1ULL << 0),
76507651
};
76517652

7653+
/*
7654+
* Values of a BPF_MAP_TYPE_INSN_ARRAY entry must be of this type.
7655+
*
7656+
* Before the map is used the orig_off field should point to an
7657+
* instruction inside the program being loaded. The other fields
7658+
* must be set to 0.
7659+
*
7660+
* After the program is loaded, the xlated_off will be adjusted
7661+
* by the verifier to point to the index of the original instruction
7662+
* in the xlated program. If the instruction is deleted, it will
7663+
* be set to (u32)-1. The jitted_off will be set to the corresponding
7664+
* offset in the jitted image of the program.
7665+
*/
7666+
struct bpf_insn_array_value {
7667+
__u32 orig_off;
7668+
__u32 xlated_off;
7669+
__u32 jitted_off;
7670+
__u32 :32;
7671+
};
7672+
76527673
#endif /* _UAPI__LINUX_BPF_H__ */

kernel/bpf/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy)
99
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o liveness.o
1010
obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
1111
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
12-
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
12+
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_insn_array.o
1313
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
1414
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
1515
obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o

kernel/bpf/bpf_insn_array.c

Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/* Copyright (c) 2025 Isovalent */
3+
4+
#include <linux/bpf.h>
5+
6+
#define MAX_INSN_ARRAY_ENTRIES 256
7+
8+
struct bpf_insn_array {
9+
struct bpf_map map;
10+
atomic_t used;
11+
long *ips;
12+
DECLARE_FLEX_ARRAY(struct bpf_insn_array_value, values);
13+
};
14+
15+
#define cast_insn_array(MAP_PTR) \
16+
container_of((MAP_PTR), struct bpf_insn_array, map)
17+
18+
#define INSN_DELETED ((u32)-1)
19+
20+
static inline u32 insn_array_alloc_size(u32 max_entries)
21+
{
22+
const u32 base_size = sizeof(struct bpf_insn_array);
23+
const u32 entry_size = sizeof(struct bpf_insn_array_value);
24+
25+
return base_size + max_entries * (entry_size + sizeof(long));
26+
}
27+
28+
static int insn_array_alloc_check(union bpf_attr *attr)
29+
{
30+
u32 value_size = sizeof(struct bpf_insn_array_value);
31+
32+
if (attr->max_entries == 0 || attr->key_size != 4 ||
33+
attr->value_size != value_size || attr->map_flags != 0)
34+
return -EINVAL;
35+
36+
if (attr->max_entries > MAX_INSN_ARRAY_ENTRIES)
37+
return -E2BIG;
38+
39+
return 0;
40+
}
41+
42+
static void insn_array_free(struct bpf_map *map)
43+
{
44+
struct bpf_insn_array *insn_array = cast_insn_array(map);
45+
46+
bpf_map_area_free(insn_array);
47+
}
48+
49+
static struct bpf_map *insn_array_alloc(union bpf_attr *attr)
50+
{
51+
u64 size = insn_array_alloc_size(attr->max_entries);
52+
struct bpf_insn_array *insn_array;
53+
54+
insn_array = bpf_map_area_alloc(size, NUMA_NO_NODE);
55+
if (!insn_array)
56+
return ERR_PTR(-ENOMEM);
57+
58+
/* ips are allocated right after the insn_array->values[] array */
59+
insn_array->ips = (void *)&insn_array->values[attr->max_entries];
60+
61+
bpf_map_init_from_attr(&insn_array->map, attr);
62+
63+
return &insn_array->map;
64+
}
65+
66+
static void *insn_array_lookup_elem(struct bpf_map *map, void *key)
67+
{
68+
struct bpf_insn_array *insn_array = cast_insn_array(map);
69+
u32 index = *(u32 *)key;
70+
71+
if (unlikely(index >= insn_array->map.max_entries))
72+
return NULL;
73+
74+
return &insn_array->values[index];
75+
}
76+
77+
static long insn_array_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags)
78+
{
79+
struct bpf_insn_array *insn_array = cast_insn_array(map);
80+
u32 index = *(u32 *)key;
81+
struct bpf_insn_array_value val = {};
82+
83+
if (unlikely(index >= insn_array->map.max_entries))
84+
return -E2BIG;
85+
86+
if (unlikely(map_flags & BPF_NOEXIST))
87+
return -EEXIST;
88+
89+
copy_map_value(map, &val, value);
90+
if (val.jitted_off || val.xlated_off)
91+
return -EINVAL;
92+
93+
insn_array->values[index].orig_off = val.orig_off;
94+
95+
return 0;
96+
}
97+
98+
static long insn_array_delete_elem(struct bpf_map *map, void *key)
99+
{
100+
return -EINVAL;
101+
}
102+
103+
static int insn_array_check_btf(const struct bpf_map *map,
104+
const struct btf *btf,
105+
const struct btf_type *key_type,
106+
const struct btf_type *value_type)
107+
{
108+
if (!btf_type_is_i32(key_type))
109+
return -EINVAL;
110+
111+
if (!btf_type_is_i64(value_type))
112+
return -EINVAL;
113+
114+
return 0;
115+
}
116+
117+
static u64 insn_array_mem_usage(const struct bpf_map *map)
118+
{
119+
return insn_array_alloc_size(map->max_entries);
120+
}
121+
122+
BTF_ID_LIST_SINGLE(insn_array_btf_ids, struct, bpf_insn_array)
123+
124+
const struct bpf_map_ops insn_array_map_ops = {
125+
.map_alloc_check = insn_array_alloc_check,
126+
.map_alloc = insn_array_alloc,
127+
.map_free = insn_array_free,
128+
.map_get_next_key = bpf_array_get_next_key,
129+
.map_lookup_elem = insn_array_lookup_elem,
130+
.map_update_elem = insn_array_update_elem,
131+
.map_delete_elem = insn_array_delete_elem,
132+
.map_check_btf = insn_array_check_btf,
133+
.map_mem_usage = insn_array_mem_usage,
134+
.map_btf_id = &insn_array_btf_ids[0],
135+
};
136+
137+
static inline bool is_frozen(struct bpf_map *map)
138+
{
139+
guard(mutex)(&map->freeze_mutex);
140+
141+
return map->frozen;
142+
}
143+
144+
static bool is_insn_array(const struct bpf_map *map)
145+
{
146+
return map->map_type == BPF_MAP_TYPE_INSN_ARRAY;
147+
}
148+
149+
static inline bool valid_offsets(const struct bpf_insn_array *insn_array,
150+
const struct bpf_prog *prog)
151+
{
152+
u32 off;
153+
int i;
154+
155+
for (i = 0; i < insn_array->map.max_entries; i++) {
156+
off = insn_array->values[i].orig_off;
157+
158+
if (off >= prog->len)
159+
return false;
160+
161+
if (off > 0) {
162+
if (prog->insnsi[off-1].code == (BPF_LD | BPF_DW | BPF_IMM))
163+
return false;
164+
}
165+
}
166+
167+
return true;
168+
}
169+
170+
int bpf_insn_array_init(struct bpf_map *map, const struct bpf_prog *prog)
171+
{
172+
struct bpf_insn_array *insn_array = cast_insn_array(map);
173+
struct bpf_insn_array_value *values = insn_array->values;
174+
int i;
175+
176+
if (!is_frozen(map))
177+
return -EINVAL;
178+
179+
if (!valid_offsets(insn_array, prog))
180+
return -EINVAL;
181+
182+
/*
183+
* There can be only one program using the map
184+
*/
185+
if (atomic_xchg(&insn_array->used, 1))
186+
return -EBUSY;
187+
188+
/*
189+
* Reset all the map indexes to the original values. This is needed,
190+
* e.g., when a replay of verification with different log level should
191+
* be performed.
192+
*/
193+
for (i = 0; i < map->max_entries; i++)
194+
values[i].xlated_off = values[i].orig_off;
195+
196+
return 0;
197+
}
198+
199+
int bpf_insn_array_ready(struct bpf_map *map)
200+
{
201+
struct bpf_insn_array *insn_array = cast_insn_array(map);
202+
int i;
203+
204+
for (i = 0; i < map->max_entries; i++) {
205+
if (insn_array->values[i].xlated_off == INSN_DELETED)
206+
continue;
207+
if (!insn_array->ips[i])
208+
return -EFAULT;
209+
}
210+
211+
return 0;
212+
}
213+
214+
void bpf_insn_array_release(struct bpf_map *map)
215+
{
216+
struct bpf_insn_array *insn_array = cast_insn_array(map);
217+
218+
atomic_set(&insn_array->used, 0);
219+
}
220+
221+
void bpf_insn_array_adjust(struct bpf_map *map, u32 off, u32 len)
222+
{
223+
struct bpf_insn_array *insn_array = cast_insn_array(map);
224+
int i;
225+
226+
if (len <= 1)
227+
return;
228+
229+
for (i = 0; i < map->max_entries; i++) {
230+
if (insn_array->values[i].xlated_off <= off)
231+
continue;
232+
if (insn_array->values[i].xlated_off == INSN_DELETED)
233+
continue;
234+
insn_array->values[i].xlated_off += len - 1;
235+
}
236+
}
237+
238+
void bpf_insn_array_adjust_after_remove(struct bpf_map *map, u32 off, u32 len)
239+
{
240+
struct bpf_insn_array *insn_array = cast_insn_array(map);
241+
int i;
242+
243+
for (i = 0; i < map->max_entries; i++) {
244+
if (insn_array->values[i].xlated_off < off)
245+
continue;
246+
if (insn_array->values[i].xlated_off == INSN_DELETED)
247+
continue;
248+
if (insn_array->values[i].xlated_off < off + len)
249+
insn_array->values[i].xlated_off = INSN_DELETED;
250+
else
251+
insn_array->values[i].xlated_off -= len;
252+
}
253+
}
254+
255+
/*
256+
* This function is called by JITs. The image is the real program
257+
* image, the offsets array set up the xlated -> jitted mapping.
258+
*/
259+
void bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image)
260+
{
261+
struct bpf_insn_array *insn_array;
262+
struct bpf_map *map;
263+
u32 xlated_off;
264+
int i, j;
265+
266+
if (!offsets || !image)
267+
return;
268+
269+
for (i = 0; i < prog->aux->used_map_cnt; i++) {
270+
map = prog->aux->used_maps[i];
271+
if (!is_insn_array(map))
272+
continue;
273+
274+
insn_array = cast_insn_array(map);
275+
for (j = 0; j < map->max_entries; j++) {
276+
xlated_off = insn_array->values[j].xlated_off;
277+
if (xlated_off == INSN_DELETED)
278+
continue;
279+
if (xlated_off < prog->aux->subprog_start)
280+
continue;
281+
xlated_off -= prog->aux->subprog_start;
282+
if (xlated_off >= prog->len)
283+
continue;
284+
285+
insn_array->values[j].jitted_off = offsets[xlated_off];
286+
insn_array->ips[j] = (long)(image + offsets[xlated_off]);
287+
}
288+
}
289+
}

0 commit comments

Comments
 (0)