4343#define GUARD_SZ round_up(1ull << sizeof_field(struct bpf_insn, off) * 8, PAGE_SIZE << 1)
4444#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
4545
46+ static void arena_free_pages (struct bpf_arena * arena , long uaddr , long page_cnt );
47+
4648struct bpf_arena {
4749 struct bpf_map map ;
4850 u64 user_vm_start ;
@@ -491,7 +493,10 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
491493 /* user_vm_end/start are fixed before bpf prog runs */
492494 long page_cnt_max = (arena -> user_vm_end - arena -> user_vm_start ) >> PAGE_SHIFT ;
493495 u64 kern_vm_start = bpf_arena_get_kern_vm_start (arena );
496+ struct apply_range_data data ;
494497 struct page * * pages = NULL ;
498+ long remaining , mapped = 0 ;
499+ long alloc_pages ;
495500 long pgoff = 0 ;
496501 u32 uaddr32 ;
497502 int ret , i ;
@@ -508,12 +513,16 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
508513 return 0 ;
509514 }
510515
511- /* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
512- pages = kvcalloc (page_cnt , sizeof (struct page * ), GFP_KERNEL );
516+ /*
517+ * Cap allocation size to KMALLOC_MAX_CACHE_SIZE so kmalloc_nolock() can succeed.
518+ */
519+ alloc_pages = min (page_cnt , KMALLOC_MAX_CACHE_SIZE / sizeof (struct page * ));
520+ pages = kmalloc_nolock (alloc_pages * sizeof (struct page * ), 0 , NUMA_NO_NODE );
513521 if (!pages )
514522 return 0 ;
523+ data .pages = pages ;
515524
516- guard ( mutex ) (& arena -> lock );
525+ mutex_lock (& arena -> lock );
517526
518527 if (uaddr ) {
519528 ret = is_range_tree_set (& arena -> rt , pgoff , page_cnt );
@@ -528,32 +537,51 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
528537 if (ret )
529538 goto out_free_pages ;
530539
531- struct apply_range_data data = { .pages = pages , .i = 0 };
532- ret = bpf_map_alloc_pages (& arena -> map , node_id , page_cnt , pages );
533- if (ret )
534- goto out ;
535-
540+ remaining = page_cnt ;
536541 uaddr32 = (u32 )(arena -> user_vm_start + pgoff * PAGE_SIZE );
537- /* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
538- * will not overflow 32-bit. Lower 32-bit need to represent
539- * contiguous user address range.
540- * Map these pages at kern_vm_start base.
541- * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
542- * lower 32-bit and it's ok.
543- */
544- ret = apply_to_page_range (& init_mm , kern_vm_start + uaddr32 ,
545- page_cnt << PAGE_SHIFT , apply_range_set_cb , & data );
546- if (ret ) {
547- for (i = 0 ; i < page_cnt ; i ++ )
548- __free_page (pages [i ]);
549- goto out ;
542+
543+ while (remaining ) {
544+ long this_batch = min (remaining , alloc_pages );
545+ /* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
546+ memset (pages , 0 , this_batch * sizeof (struct page * ));
547+ data .i = 0 ;
548+
549+ ret = bpf_map_alloc_pages (& arena -> map , node_id , this_batch , pages );
550+ if (ret )
551+ goto out ;
552+
553+ /* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
554+ * will not overflow 32-bit. Lower 32-bit need to represent
555+ * contiguous user address range.
556+ * Map these pages at kern_vm_start base.
557+ * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
558+ * lower 32-bit and it's ok.
559+ */
560+ ret = apply_to_page_range (& init_mm ,
561+ kern_vm_start + uaddr32 + (mapped << PAGE_SHIFT ),
562+ this_batch << PAGE_SHIFT , apply_range_set_cb , & data );
563+ if (ret ) {
564+ /* data.i pages were mapped, account them and free the remaining */
565+ mapped += data .i ;
566+ for (i = data .i ; i < this_batch ; i ++ )
567+ __free_page (pages [i ]);
568+ goto out ;
569+ }
570+
571+ mapped += this_batch ;
572+ remaining -= this_batch ;
550573 }
551- kvfree (pages );
574+ mutex_unlock (& arena -> lock );
575+ kfree_nolock (pages );
552576 return clear_lo32 (arena -> user_vm_start ) + uaddr32 ;
553577out :
554- range_tree_set (& arena -> rt , pgoff , page_cnt );
578+ range_tree_set (& arena -> rt , pgoff + mapped , page_cnt - mapped );
579+ mutex_unlock (& arena -> lock );
580+ if (mapped )
581+ arena_free_pages (arena , clear_lo32 (arena -> user_vm_start ) + uaddr32 , mapped );
555582out_free_pages :
556- kvfree (pages );
583+ mutex_unlock (& arena -> lock );
584+ kfree_nolock (pages );
557585 return 0 ;
558586}
559587
0 commit comments