Skip to content

Commit 2b1fd82

Browse files
author
Alexei Starovoitov
committed
Merge branch 'bpf-arena-add-kfunc-for-reserving-arena-memory'
Emil Tsalapatis says: ==================== bpf/arena: Add kfunc for reserving arena memory Add a new kfunc for BPF arenas that reserves a region of the mapping to prevent it from being mapped. These regions serve as guards against out-of-bounds accesses and are useful for debugging arena-related code. >From v3 ([email protected]) ------------------------------------------------------ - Added Acked-by tags by Yonghong. - Replace hardcoded error numbers in selftests (Yonghong). - Fixed selftest for partially freeing a reserved region (Yonghong). >From v2 ([email protected]) ------------------------------------------------------ - Removed -EALREADY and replaced with -EINVAL to bring error handling in line with the rest of the BPF code (Alexei). >From v1 ([email protected]) ------------------------------------------------------ - Removed the additional guard range tree. Adjusted tests accordingly. Reserved regions now behave like allocated regions, and can be unreserved using bpf_arena_free_pages(). They can also be allocated from userspace through minor faults. It is up to the user to prevent erroneous frees and/or use the BPF_F_SEGV_ON_FAULT flag to catch stray userspace accesses (Alexei). - Changed terminology from guard pages to reserved pages (Alexei, Kartikeya). Signed-off-by: Emil Tsalapatis <[email protected]> ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents ad97cb2 + 9f9559f commit 2b1fd82

File tree

4 files changed

+250
-0
lines changed

4 files changed

+250
-0
lines changed

kernel/bpf/arena.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,34 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
550550
}
551551
}
552552

553+
/*
554+
* Reserve an arena virtual address range without populating it. This call stops
555+
* bpf_arena_alloc_pages from adding pages to this range.
556+
*/
557+
static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt)
558+
{
559+
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
560+
long pgoff;
561+
int ret;
562+
563+
if (uaddr & ~PAGE_MASK)
564+
return 0;
565+
566+
pgoff = compute_pgoff(arena, uaddr);
567+
if (pgoff + page_cnt > page_cnt_max)
568+
return -EINVAL;
569+
570+
guard(mutex)(&arena->lock);
571+
572+
/* Cannot guard already allocated pages. */
573+
ret = is_range_tree_set(&arena->rt, pgoff, page_cnt);
574+
if (ret)
575+
return -EBUSY;
576+
577+
/* "Allocate" the region to prevent it from being allocated. */
578+
return range_tree_clear(&arena->rt, pgoff, page_cnt);
579+
}
580+
553581
__bpf_kfunc_start_defs();
554582

555583
__bpf_kfunc void *bpf_arena_alloc_pages(void *p__map, void *addr__ign, u32 page_cnt,
@@ -573,11 +601,26 @@ __bpf_kfunc void bpf_arena_free_pages(void *p__map, void *ptr__ign, u32 page_cnt
573601
return;
574602
arena_free_pages(arena, (long)ptr__ign, page_cnt);
575603
}
604+
605+
__bpf_kfunc int bpf_arena_reserve_pages(void *p__map, void *ptr__ign, u32 page_cnt)
606+
{
607+
struct bpf_map *map = p__map;
608+
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
609+
610+
if (map->map_type != BPF_MAP_TYPE_ARENA)
611+
return -EINVAL;
612+
613+
if (!page_cnt)
614+
return 0;
615+
616+
return arena_reserve_pages(arena, (long)ptr__ign, page_cnt);
617+
}
576618
__bpf_kfunc_end_defs();
577619

578620
BTF_KFUNCS_START(arena_kfuncs)
579621
BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_RET | KF_ARENA_ARG2)
580622
BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2)
623+
BTF_ID_FLAGS(func, bpf_arena_reserve_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2)
581624
BTF_KFUNCS_END(arena_kfuncs)
582625

583626
static const struct btf_kfunc_id_set common_kfunc_set = {

tools/testing/selftests/bpf/bpf_arena_common.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,11 @@
4646

4747
void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
4848
int node_id, __u64 flags) __ksym __weak;
49+
int bpf_arena_reserve_pages(void *map, void __arena *addr, __u32 page_cnt) __ksym __weak;
4950
void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
5051

52+
#define arena_base(map) ((void __arena *)((struct bpf_arena *)(map))->user_vm_start)
53+
5154
#else /* when compiled as user space code */
5255

5356
#define __arena

tools/testing/selftests/bpf/progs/verifier_arena.c

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#define BPF_NO_KFUNC_PROTOTYPES
55
#include <vmlinux.h>
6+
#include <errno.h>
67
#include <bpf/bpf_helpers.h>
78
#include <bpf/bpf_tracing.h>
89
#include "bpf_misc.h"
@@ -114,6 +115,111 @@ int basic_alloc3(void *ctx)
114115
return 0;
115116
}
116117

118+
SEC("syscall")
119+
__success __retval(0)
120+
int basic_reserve1(void *ctx)
121+
{
122+
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
123+
char __arena *page;
124+
int ret;
125+
126+
page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
127+
if (!page)
128+
return 1;
129+
130+
page += __PAGE_SIZE;
131+
132+
/* Reserve the second page */
133+
ret = bpf_arena_reserve_pages(&arena, page, 1);
134+
if (ret)
135+
return 2;
136+
137+
/* Try to explicitly allocate the reserved page. */
138+
page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
139+
if (page)
140+
return 3;
141+
142+
/* Try to implicitly allocate the page (since there's only 2 of them). */
143+
page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
144+
if (page)
145+
return 4;
146+
#endif
147+
return 0;
148+
}
149+
150+
SEC("syscall")
151+
__success __retval(0)
152+
int basic_reserve2(void *ctx)
153+
{
154+
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
155+
char __arena *page;
156+
int ret;
157+
158+
page = arena_base(&arena);
159+
ret = bpf_arena_reserve_pages(&arena, page, 1);
160+
if (ret)
161+
return 1;
162+
163+
page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
164+
if ((u64)page)
165+
return 2;
166+
#endif
167+
return 0;
168+
}
169+
170+
/* Reserve the same page twice, should return -EBUSY. */
171+
SEC("syscall")
172+
__success __retval(0)
173+
int reserve_twice(void *ctx)
174+
{
175+
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
176+
char __arena *page;
177+
int ret;
178+
179+
page = arena_base(&arena);
180+
181+
ret = bpf_arena_reserve_pages(&arena, page, 1);
182+
if (ret)
183+
return 1;
184+
185+
ret = bpf_arena_reserve_pages(&arena, page, 1);
186+
if (ret != -EBUSY)
187+
return 2;
188+
#endif
189+
return 0;
190+
}
191+
192+
/* Try to reserve past the end of the arena. */
193+
SEC("syscall")
194+
__success __retval(0)
195+
int reserve_invalid_region(void *ctx)
196+
{
197+
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
198+
char __arena *page;
199+
int ret;
200+
201+
/* Try a NULL pointer. */
202+
ret = bpf_arena_reserve_pages(&arena, NULL, 3);
203+
if (ret != -EINVAL)
204+
return 1;
205+
206+
page = arena_base(&arena);
207+
208+
ret = bpf_arena_reserve_pages(&arena, page, 3);
209+
if (ret != -EINVAL)
210+
return 2;
211+
212+
ret = bpf_arena_reserve_pages(&arena, page, 4096);
213+
if (ret != -EINVAL)
214+
return 3;
215+
216+
ret = bpf_arena_reserve_pages(&arena, page, (1ULL << 32) - 1);
217+
if (ret != -EINVAL)
218+
return 4;
219+
#endif
220+
return 0;
221+
}
222+
117223
SEC("iter.s/bpf_map")
118224
__success __log_level(2)
119225
int iter_maps1(struct bpf_iter__bpf_map *ctx)

tools/testing/selftests/bpf/progs/verifier_arena_large.c

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,104 @@ int big_alloc1(void *ctx)
6767
return 0;
6868
}
6969

70+
/* Try to access a reserved page. Behavior should be identical with accessing unallocated pages. */
71+
SEC("syscall")
72+
__success __retval(0)
73+
int access_reserved(void *ctx)
74+
{
75+
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
76+
volatile char __arena *page;
77+
char __arena *base;
78+
const size_t len = 4;
79+
int ret, i;
80+
81+
/* Get a separate region of the arena. */
82+
page = base = arena_base(&arena) + 16384 * PAGE_SIZE;
83+
84+
ret = bpf_arena_reserve_pages(&arena, base, len);
85+
if (ret)
86+
return 1;
87+
88+
/* Try to dirty reserved memory. */
89+
for (i = 0; i < len && can_loop; i++)
90+
*page = 0x5a;
91+
92+
for (i = 0; i < len && can_loop; i++) {
93+
page = (volatile char __arena *)(base + i * PAGE_SIZE);
94+
95+
/*
96+
* Error out in case either the write went through,
97+
* or the address has random garbage.
98+
*/
99+
if (*page == 0x5a)
100+
return 2 + 2 * i;
101+
102+
if (*page)
103+
return 2 + 2 * i + 1;
104+
}
105+
#endif
106+
return 0;
107+
}
108+
109+
/* Try to allocate a region overlapping with a reservation. */
110+
SEC("syscall")
111+
__success __retval(0)
112+
int request_partially_reserved(void *ctx)
113+
{
114+
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
115+
volatile char __arena *page;
116+
char __arena *base;
117+
int ret;
118+
119+
/* Add an arbitrary page offset. */
120+
page = base = arena_base(&arena) + 4096 * __PAGE_SIZE;
121+
122+
ret = bpf_arena_reserve_pages(&arena, base + 3 * __PAGE_SIZE, 4);
123+
if (ret)
124+
return 1;
125+
126+
page = bpf_arena_alloc_pages(&arena, base, 5, NUMA_NO_NODE, 0);
127+
if ((u64)page != 0ULL)
128+
return 2;
129+
#endif
130+
return 0;
131+
}
132+
133+
SEC("syscall")
134+
__success __retval(0)
135+
int free_reserved(void *ctx)
136+
{
137+
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
138+
char __arena *addr;
139+
char __arena *page;
140+
int ret;
141+
142+
/* Add an arbitrary page offset. */
143+
addr = arena_base(&arena) + 32768 * __PAGE_SIZE;
144+
145+
page = bpf_arena_alloc_pages(&arena, addr, 2, NUMA_NO_NODE, 0);
146+
if (!page)
147+
return 1;
148+
149+
ret = bpf_arena_reserve_pages(&arena, addr + 2 * __PAGE_SIZE, 2);
150+
if (ret)
151+
return 2;
152+
153+
/*
154+
* Reserved and allocated pages should be interchangeable for
155+
* bpf_arena_free_pages(). Free a reserved and an allocated
156+
* page with a single call.
157+
*/
158+
bpf_arena_free_pages(&arena, addr + __PAGE_SIZE , 2);
159+
160+
/* The free call above should have succeeded, so this allocation should too. */
161+
page = bpf_arena_alloc_pages(&arena, addr + __PAGE_SIZE, 2, NUMA_NO_NODE, 0);
162+
if (!page)
163+
return 3;
164+
#endif
165+
return 0;
166+
}
167+
70168
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
71169
#define PAGE_CNT 100
72170
__u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */

0 commit comments

Comments
 (0)