77#include <linux/btf_ids.h>
88#include <linux/vmalloc.h>
99#include <linux/pagemap.h>
10+ #include <asm/tlbflush.h>
1011#include "range_tree.h"
1112
1213/*
@@ -92,6 +93,62 @@ static long compute_pgoff(struct bpf_arena *arena, long uaddr)
9293 return (u32 )(uaddr - (u32 )arena -> user_vm_start ) >> PAGE_SHIFT ;
9394}
9495
96+ struct apply_range_data {
97+ struct page * * pages ;
98+ int i ;
99+ };
100+
101+ static int apply_range_set_cb (pte_t * pte , unsigned long addr , void * data )
102+ {
103+ struct apply_range_data * d = data ;
104+ struct page * page ;
105+
106+ if (!data )
107+ return 0 ;
108+ /* sanity check */
109+ if (unlikely (!pte_none (ptep_get (pte ))))
110+ return - EBUSY ;
111+
112+ page = d -> pages [d -> i ++ ];
113+ /* paranoia, similar to vmap_pages_pte_range() */
114+ if (WARN_ON_ONCE (!pfn_valid (page_to_pfn (page ))))
115+ return - EINVAL ;
116+
117+ set_pte_at (& init_mm , addr , pte , mk_pte (page , PAGE_KERNEL ));
118+ return 0 ;
119+ }
120+
121+ static int apply_range_clear_cb (pte_t * pte , unsigned long addr , void * data )
122+ {
123+ pte_t old_pte ;
124+ struct page * page ;
125+
126+ /* sanity check */
127+ old_pte = ptep_get (pte );
128+ if (pte_none (old_pte ) || !pte_present (old_pte ))
129+ return 0 ; /* nothing to do */
130+
131+ /* get page and free it */
132+ page = pte_page (old_pte );
133+ if (WARN_ON_ONCE (!page ))
134+ return - EINVAL ;
135+
136+ pte_clear (& init_mm , addr , pte );
137+
138+ /* ensure no stale TLB entries */
139+ flush_tlb_kernel_range (addr , addr + PAGE_SIZE );
140+
141+ __free_page (page );
142+
143+ return 0 ;
144+ }
145+
146+ static int populate_pgtable_except_pte (struct bpf_arena * arena )
147+ {
148+ return apply_to_page_range (& init_mm , bpf_arena_get_kern_vm_start (arena ),
149+ KERN_VM_SZ - GUARD_SZ , apply_range_set_cb , NULL );
150+ }
151+
95152static struct bpf_map * arena_map_alloc (union bpf_attr * attr )
96153{
97154 struct vm_struct * kern_vm ;
@@ -144,6 +201,11 @@ static struct bpf_map *arena_map_alloc(union bpf_attr *attr)
144201 goto err ;
145202 }
146203 mutex_init (& arena -> lock );
204+ err = populate_pgtable_except_pte (arena );
205+ if (err ) {
206+ bpf_map_area_free (arena );
207+ goto err ;
208+ }
147209
148210 return & arena -> map ;
149211err :
@@ -286,14 +348,15 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
286348 if (ret )
287349 return VM_FAULT_SIGSEGV ;
288350
351+ struct apply_range_data data = { .pages = & page , .i = 0 };
289352 /* Account into memcg of the process that created bpf_arena */
290353 ret = bpf_map_alloc_pages (map , NUMA_NO_NODE , 1 , & page );
291354 if (ret ) {
292355 range_tree_set (& arena -> rt , vmf -> pgoff , 1 );
293356 return VM_FAULT_SIGSEGV ;
294357 }
295358
296- ret = vm_area_map_pages ( arena -> kern_vm , kaddr , kaddr + PAGE_SIZE , & page );
359+ ret = apply_to_page_range ( & init_mm , kaddr , PAGE_SIZE , apply_range_set_cb , & data );
297360 if (ret ) {
298361 range_tree_set (& arena -> rt , vmf -> pgoff , 1 );
299362 __free_page (page );
@@ -428,7 +491,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
428491 /* user_vm_end/start are fixed before bpf prog runs */
429492 long page_cnt_max = (arena -> user_vm_end - arena -> user_vm_start ) >> PAGE_SHIFT ;
430493 u64 kern_vm_start = bpf_arena_get_kern_vm_start (arena );
431- struct page * * pages ;
494+ struct page * * pages = NULL ;
432495 long pgoff = 0 ;
433496 u32 uaddr32 ;
434497 int ret , i ;
@@ -465,6 +528,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
465528 if (ret )
466529 goto out_free_pages ;
467530
531+ struct apply_range_data data = { .pages = pages , .i = 0 };
468532 ret = bpf_map_alloc_pages (& arena -> map , node_id , page_cnt , pages );
469533 if (ret )
470534 goto out ;
@@ -477,8 +541,8 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
477541 * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
478542 * lower 32-bit and it's ok.
479543 */
480- ret = vm_area_map_pages ( arena -> kern_vm , kern_vm_start + uaddr32 ,
481- kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE , pages );
544+ ret = apply_to_page_range ( & init_mm , kern_vm_start + uaddr32 ,
545+ page_cnt << PAGE_SHIFT , apply_range_set_cb , & data );
482546 if (ret ) {
483547 for (i = 0 ; i < page_cnt ; i ++ )
484548 __free_page (pages [i ]);
@@ -545,8 +609,8 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
545609 * page_cnt is big it's faster to do the batched zap.
546610 */
547611 zap_pages (arena , full_uaddr , 1 );
548- vm_area_unmap_pages ( arena -> kern_vm , kaddr , kaddr + PAGE_SIZE );
549- __free_page ( page );
612+ apply_to_existing_page_range ( & init_mm , kaddr , PAGE_SIZE , apply_range_clear_cb ,
613+ NULL );
550614 }
551615}
552616
0 commit comments