Skip to content

Commit 24dd464

Browse files
puranjaymohanKernel Patches Daemon
authored andcommitted
bpf: arena: use kmalloc_nolock() in place of kvcalloc()
To make arena_alloc_pages() safe to be called from any context, replace kvcalloc() with kmalloc_nolock() so as it doesn't sleep or take any locks. kmalloc_nolock() returns NULL for allocations larger than KMALLOC_MAX_CACHE_SIZE, which is (PAGE_SIZE * 2) = 8KB on systems with 4KB pages. So, round down the allocation done by kmalloc_nolock to 1024 * 8 and reuse the array in a loop. Signed-off-by: Puranjay Mohan <[email protected]>
1 parent aa87a85 commit 24dd464

File tree

1 file changed

+52
-24
lines changed

1 file changed

+52
-24
lines changed

kernel/bpf/arena.c

Lines changed: 52 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#define GUARD_SZ round_up(1ull << sizeof_field(struct bpf_insn, off) * 8, PAGE_SIZE << 1)
4444
#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
4545

46+
static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt);
47+
4648
struct bpf_arena {
4749
struct bpf_map map;
4850
u64 user_vm_start;
@@ -491,7 +493,10 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
491493
/* user_vm_end/start are fixed before bpf prog runs */
492494
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
493495
u64 kern_vm_start = bpf_arena_get_kern_vm_start(arena);
496+
struct apply_range_data data;
494497
struct page **pages = NULL;
498+
long remaining, mapped = 0;
499+
long alloc_pages;
495500
long pgoff = 0;
496501
u32 uaddr32;
497502
int ret, i;
@@ -508,12 +513,16 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
508513
return 0;
509514
}
510515

511-
/* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
512-
pages = kvcalloc(page_cnt, sizeof(struct page *), GFP_KERNEL);
516+
/*
517+
* Cap allocation size to KMALLOC_MAX_CACHE_SIZE so kmalloc_nolock() can succeed.
518+
*/
519+
alloc_pages = min(page_cnt, KMALLOC_MAX_CACHE_SIZE / sizeof(struct page *));
520+
pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), 0, NUMA_NO_NODE);
513521
if (!pages)
514522
return 0;
523+
data.pages = pages;
515524

516-
guard(mutex)(&arena->lock);
525+
mutex_lock(&arena->lock);
517526

518527
if (uaddr) {
519528
ret = is_range_tree_set(&arena->rt, pgoff, page_cnt);
@@ -528,32 +537,51 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
528537
if (ret)
529538
goto out_free_pages;
530539

531-
struct apply_range_data data = { .pages = pages, .i = 0 };
532-
ret = bpf_map_alloc_pages(&arena->map, node_id, page_cnt, pages);
533-
if (ret)
534-
goto out;
535-
540+
remaining = page_cnt;
536541
uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE);
537-
/* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
538-
* will not overflow 32-bit. Lower 32-bit need to represent
539-
* contiguous user address range.
540-
* Map these pages at kern_vm_start base.
541-
* kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
542-
* lower 32-bit and it's ok.
543-
*/
544-
ret = apply_to_page_range(&init_mm, kern_vm_start + uaddr32,
545-
page_cnt << PAGE_SHIFT, apply_range_set_cb, &data);
546-
if (ret) {
547-
for (i = 0; i < page_cnt; i++)
548-
__free_page(pages[i]);
549-
goto out;
542+
543+
while(remaining) {
544+
long this_batch = min(remaining, alloc_pages);
545+
/* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
546+
memset(pages, 0, this_batch * sizeof(struct page *));
547+
data.i = 0;
548+
549+
ret = bpf_map_alloc_pages(&arena->map, node_id, this_batch, pages);
550+
if (ret)
551+
goto out;
552+
553+
/* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
554+
* will not overflow 32-bit. Lower 32-bit need to represent
555+
* contiguous user address range.
556+
* Map these pages at kern_vm_start base.
557+
* kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
558+
* lower 32-bit and it's ok.
559+
*/
560+
ret = apply_to_page_range(&init_mm,
561+
kern_vm_start + uaddr32 + (mapped << PAGE_SHIFT),
562+
this_batch << PAGE_SHIFT, apply_range_set_cb, &data);
563+
if (ret) {
564+
/* data.i pages were mapped, account them and free the remaining */
565+
mapped += data.i;
566+
for (i = data.i; i < this_batch; i++)
567+
__free_page(pages[i]);
568+
goto out;
569+
}
570+
571+
mapped += this_batch;
572+
remaining -= this_batch;
550573
}
551-
kvfree(pages);
574+
mutex_unlock(&arena->lock);
575+
kfree_nolock(pages);
552576
return clear_lo32(arena->user_vm_start) + uaddr32;
553577
out:
554-
range_tree_set(&arena->rt, pgoff, page_cnt);
578+
range_tree_set(&arena->rt, pgoff + mapped, page_cnt - mapped);
579+
mutex_unlock(&arena->lock);
580+
if (mapped)
581+
arena_free_pages(arena, clear_lo32(arena->user_vm_start) + uaddr32, mapped);
555582
out_free_pages:
556-
kvfree(pages);
583+
mutex_unlock(&arena->lock);
584+
kfree_nolock(pages);
557585
return 0;
558586
}
559587

0 commit comments

Comments
 (0)