Skip to content

Commit 7694ff8

Browse files
author
Alexei Starovoitov
committed
Merge branch 'memcg-accounting-for-bpf-arena'
Puranjay Mohan says: ==================== memcg accounting for BPF arena v4: https://lore.kernel.org/all/20260102181333.3033679-1-puranjay@kernel.org/ Changes in v4->v5: - Remove unused variables from bpf_map_alloc_pages() (CI) v3: https://lore.kernel.org/all/20260102151852.570285-1-puranjay@kernel.org/ Changes in v3->v4: - Do memcg set/recover in arena_reserve_pages() rather than bpf_arena_reserve_pages() for symmetry with other kfuncs (Alexei) v2: https://lore.kernel.org/all/20251231141434.3416822-1-puranjay@kernel.org/ Changes in v2->v3: - Remove memcg accounting from bpf_map_alloc_pages() as the caller does it already. (Alexei) - Do memcg set/recover in arena_alloc/free_pages() rather than bpf_arena_alloc/free_pages(), it reduces copy pasting in sleepable/non_sleepable functions. v1: https://lore.kernel.org/all/20251230153006.1347742-1-puranjay@kernel.org/ Changes in v1->v2: - Return both pointers through arguments from bpf_map_memcg_enter and make it return void. (Alexei) - Add memcg accounting in arena_free_worker (AI) This set adds memcg accounting logic into arena kfuncs and other places that do allocations in arena.c. ==================== Link: https://patch.msgid.link/20260102200230.25168-1-puranjay@kernel.org Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2 parents e40030a + e66fe1b commit 7694ff8

File tree

4 files changed

+68
-34
lines changed

4 files changed

+68
-34
lines changed

include/linux/bpf.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2608,6 +2608,10 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
26082608
int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
26092609
unsigned long nr_pages, struct page **page_array);
26102610
#ifdef CONFIG_MEMCG
2611+
void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
2612+
struct mem_cgroup **new_memcg);
2613+
void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
2614+
struct mem_cgroup *memcg);
26112615
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
26122616
int node);
26132617
void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags,
@@ -2632,6 +2636,17 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
26322636
kvcalloc(_n, _size, _flags)
26332637
#define bpf_map_alloc_percpu(_map, _size, _align, _flags) \
26342638
__alloc_percpu_gfp(_size, _align, _flags)
2639+
static inline void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
2640+
struct mem_cgroup **new_memcg)
2641+
{
2642+
*new_memcg = NULL;
2643+
*old_memcg = NULL;
2644+
}
2645+
2646+
static inline void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
2647+
struct mem_cgroup *memcg)
2648+
{
2649+
}
26352650
#endif
26362651

26372652
static inline int

kernel/bpf/arena.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
360360
{
361361
struct bpf_map *map = vmf->vma->vm_file->private_data;
362362
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
363+
struct mem_cgroup *new_memcg, *old_memcg;
363364
struct page *page;
364365
long kbase, kaddr;
365366
unsigned long flags;
@@ -377,6 +378,8 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
377378
/* already have a page vmap-ed */
378379
goto out;
379380

381+
bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
382+
380383
if (arena->map.map_flags & BPF_F_SEGV_ON_FAULT)
381384
/* User space requested to segfault when page is not allocated by bpf prog */
382385
goto out_unlock_sigsegv;
@@ -400,12 +403,14 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
400403
goto out_unlock_sigsegv;
401404
}
402405
flush_vmap_cache(kaddr, PAGE_SIZE);
406+
bpf_map_memcg_exit(old_memcg, new_memcg);
403407
out:
404408
page_ref_add(page, 1);
405409
raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
406410
vmf->page = page;
407411
return 0;
408412
out_unlock_sigsegv:
413+
bpf_map_memcg_exit(old_memcg, new_memcg);
409414
raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
410415
return VM_FAULT_SIGSEGV;
411416
}
@@ -534,6 +539,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
534539
/* user_vm_end/start are fixed before bpf prog runs */
535540
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
536541
u64 kern_vm_start = bpf_arena_get_kern_vm_start(arena);
542+
struct mem_cgroup *new_memcg, *old_memcg;
537543
struct apply_range_data data;
538544
struct page **pages = NULL;
539545
long remaining, mapped = 0;
@@ -555,11 +561,14 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
555561
return 0;
556562
}
557563

564+
bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
558565
/* Cap allocation size to KMALLOC_MAX_CACHE_SIZE so kmalloc_nolock() can succeed. */
559566
alloc_pages = min(page_cnt, KMALLOC_MAX_CACHE_SIZE / sizeof(struct page *));
560-
pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), 0, NUMA_NO_NODE);
561-
if (!pages)
567+
pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), __GFP_ACCOUNT, NUMA_NO_NODE);
568+
if (!pages) {
569+
bpf_map_memcg_exit(old_memcg, new_memcg);
562570
return 0;
571+
}
563572
data.pages = pages;
564573

565574
if (raw_res_spin_lock_irqsave(&arena->spinlock, flags))
@@ -617,6 +626,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
617626
flush_vmap_cache(kern_vm_start + uaddr32, mapped << PAGE_SHIFT);
618627
raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
619628
kfree_nolock(pages);
629+
bpf_map_memcg_exit(old_memcg, new_memcg);
620630
return clear_lo32(arena->user_vm_start) + uaddr32;
621631
out:
622632
range_tree_set(&arena->rt, pgoff + mapped, page_cnt - mapped);
@@ -630,6 +640,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
630640
raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
631641
out_free_pages:
632642
kfree_nolock(pages);
643+
bpf_map_memcg_exit(old_memcg, new_memcg);
633644
return 0;
634645
}
635646

@@ -651,6 +662,7 @@ static void zap_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
651662

652663
static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt, bool sleepable)
653664
{
665+
struct mem_cgroup *new_memcg, *old_memcg;
654666
u64 full_uaddr, uaddr_end;
655667
long kaddr, pgoff;
656668
struct page *page;
@@ -671,6 +683,7 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt,
671683

672684
page_cnt = (uaddr_end - full_uaddr) >> PAGE_SHIFT;
673685
pgoff = compute_pgoff(arena, uaddr);
686+
bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
674687

675688
if (!sleepable)
676689
goto defer;
@@ -709,11 +722,13 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt,
709722
zap_pages(arena, full_uaddr, 1);
710723
__free_page(page);
711724
}
725+
bpf_map_memcg_exit(old_memcg, new_memcg);
712726

713727
return;
714728

715729
defer:
716-
s = kmalloc_nolock(sizeof(struct arena_free_span), 0, -1);
730+
s = kmalloc_nolock(sizeof(struct arena_free_span), __GFP_ACCOUNT, -1);
731+
bpf_map_memcg_exit(old_memcg, new_memcg);
717732
if (!s)
718733
/*
719734
* If allocation fails in non-sleepable context, pages are intentionally left
@@ -735,6 +750,7 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt,
735750
static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt)
736751
{
737752
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
753+
struct mem_cgroup *new_memcg, *old_memcg;
738754
unsigned long flags;
739755
long pgoff;
740756
int ret;
@@ -757,7 +773,9 @@ static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt
757773
}
758774

759775
/* "Allocate" the region to prevent it from being allocated. */
776+
bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
760777
ret = range_tree_clear(&arena->rt, pgoff, page_cnt);
778+
bpf_map_memcg_exit(old_memcg, new_memcg);
761779
out:
762780
raw_res_spin_unlock_irqrestore(&arena->spinlock, flags);
763781
return ret;
@@ -766,6 +784,7 @@ static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt
766784
static void arena_free_worker(struct work_struct *work)
767785
{
768786
struct bpf_arena *arena = container_of(work, struct bpf_arena, free_work);
787+
struct mem_cgroup *new_memcg, *old_memcg;
769788
struct llist_node *list, *pos, *t;
770789
struct arena_free_span *s;
771790
u64 arena_vm_start, user_vm_start;
@@ -780,6 +799,8 @@ static void arena_free_worker(struct work_struct *work)
780799
return;
781800
}
782801

802+
bpf_map_memcg_enter(&arena->map, &old_memcg, &new_memcg);
803+
783804
init_llist_head(&free_pages);
784805
arena_vm_start = bpf_arena_get_kern_vm_start(arena);
785806
user_vm_start = bpf_arena_get_user_vm_start(arena);
@@ -820,6 +841,8 @@ static void arena_free_worker(struct work_struct *work)
820841
page = llist_entry(pos, struct page, pcp_llist);
821842
__free_page(page);
822843
}
844+
845+
bpf_map_memcg_exit(old_memcg, new_memcg);
823846
}
824847

825848
static void arena_free_irq(struct irq_work *iw)

kernel/bpf/range_tree.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,8 @@ int range_tree_clear(struct range_tree *rt, u32 start, u32 len)
149149
range_it_insert(rn, rt);
150150

151151
/* Add a range */
152-
new_rn = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE);
152+
new_rn = kmalloc_nolock(sizeof(struct range_node), __GFP_ACCOUNT,
153+
NUMA_NO_NODE);
153154
if (!new_rn)
154155
return -ENOMEM;
155156
new_rn->rn_start = last + 1;
@@ -234,7 +235,7 @@ int range_tree_set(struct range_tree *rt, u32 start, u32 len)
234235
right->rn_start = start;
235236
range_it_insert(right, rt);
236237
} else {
237-
left = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE);
238+
left = kmalloc_nolock(sizeof(struct range_node), __GFP_ACCOUNT, NUMA_NO_NODE);
238239
if (!left)
239240
return -ENOMEM;
240241
left->rn_start = start;

kernel/bpf/syscall.c

Lines changed: 24 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -505,17 +505,29 @@ static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map)
505505
return root_mem_cgroup;
506506
}
507507

508+
void bpf_map_memcg_enter(const struct bpf_map *map, struct mem_cgroup **old_memcg,
509+
struct mem_cgroup **new_memcg)
510+
{
511+
*new_memcg = bpf_map_get_memcg(map);
512+
*old_memcg = set_active_memcg(*new_memcg);
513+
}
514+
515+
void bpf_map_memcg_exit(struct mem_cgroup *old_memcg,
516+
struct mem_cgroup *new_memcg)
517+
{
518+
set_active_memcg(old_memcg);
519+
mem_cgroup_put(new_memcg);
520+
}
521+
508522
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
509523
int node)
510524
{
511525
struct mem_cgroup *memcg, *old_memcg;
512526
void *ptr;
513527

514-
memcg = bpf_map_get_memcg(map);
515-
old_memcg = set_active_memcg(memcg);
528+
bpf_map_memcg_enter(map, &old_memcg, &memcg);
516529
ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node);
517-
set_active_memcg(old_memcg);
518-
mem_cgroup_put(memcg);
530+
bpf_map_memcg_exit(old_memcg, memcg);
519531

520532
return ptr;
521533
}
@@ -526,11 +538,9 @@ void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags
526538
struct mem_cgroup *memcg, *old_memcg;
527539
void *ptr;
528540

529-
memcg = bpf_map_get_memcg(map);
530-
old_memcg = set_active_memcg(memcg);
541+
bpf_map_memcg_enter(map, &old_memcg, &memcg);
531542
ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node);
532-
set_active_memcg(old_memcg);
533-
mem_cgroup_put(memcg);
543+
bpf_map_memcg_exit(old_memcg, memcg);
534544

535545
return ptr;
536546
}
@@ -540,11 +550,9 @@ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
540550
struct mem_cgroup *memcg, *old_memcg;
541551
void *ptr;
542552

543-
memcg = bpf_map_get_memcg(map);
544-
old_memcg = set_active_memcg(memcg);
553+
bpf_map_memcg_enter(map, &old_memcg, &memcg);
545554
ptr = kzalloc(size, flags | __GFP_ACCOUNT);
546-
set_active_memcg(old_memcg);
547-
mem_cgroup_put(memcg);
555+
bpf_map_memcg_exit(old_memcg, memcg);
548556

549557
return ptr;
550558
}
@@ -555,11 +563,9 @@ void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
555563
struct mem_cgroup *memcg, *old_memcg;
556564
void *ptr;
557565

558-
memcg = bpf_map_get_memcg(map);
559-
old_memcg = set_active_memcg(memcg);
566+
bpf_map_memcg_enter(map, &old_memcg, &memcg);
560567
ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT);
561-
set_active_memcg(old_memcg);
562-
mem_cgroup_put(memcg);
568+
bpf_map_memcg_exit(old_memcg, memcg);
563569

564570
return ptr;
565571
}
@@ -570,11 +576,9 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
570576
struct mem_cgroup *memcg, *old_memcg;
571577
void __percpu *ptr;
572578

573-
memcg = bpf_map_get_memcg(map);
574-
old_memcg = set_active_memcg(memcg);
579+
bpf_map_memcg_enter(map, &old_memcg, &memcg);
575580
ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT);
576-
set_active_memcg(old_memcg);
577-
mem_cgroup_put(memcg);
581+
bpf_map_memcg_exit(old_memcg, memcg);
578582

579583
return ptr;
580584
}
@@ -612,12 +616,7 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
612616
unsigned long i, j;
613617
struct page *pg;
614618
int ret = 0;
615-
#ifdef CONFIG_MEMCG
616-
struct mem_cgroup *memcg, *old_memcg;
617619

618-
memcg = bpf_map_get_memcg(map);
619-
old_memcg = set_active_memcg(memcg);
620-
#endif
621620
for (i = 0; i < nr_pages; i++) {
622621
pg = __bpf_alloc_page(nid);
623622

@@ -631,10 +630,6 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
631630
break;
632631
}
633632

634-
#ifdef CONFIG_MEMCG
635-
set_active_memcg(old_memcg);
636-
mem_cgroup_put(memcg);
637-
#endif
638633
return ret;
639634
}
640635

0 commit comments

Comments
 (0)