4343#define GUARD_SZ round_up(1ull << sizeof_field(struct bpf_insn, off) * 8, PAGE_SIZE << 1)
4444#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
4545
46+ static void arena_free_pages (struct bpf_arena * arena , long uaddr , long page_cnt );
47+
4648struct bpf_arena {
4749 struct bpf_map map ;
4850 u64 user_vm_start ;
@@ -492,7 +494,10 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
492494 /* user_vm_end/start are fixed before bpf prog runs */
493495 long page_cnt_max = (arena -> user_vm_end - arena -> user_vm_start ) >> PAGE_SHIFT ;
494496 u64 kern_vm_start = bpf_arena_get_kern_vm_start (arena );
497+ struct apply_range_data data ;
495498 struct page * * pages = NULL ;
499+ long remaining , mapped = 0 ;
500+ long alloc_pages ;
496501 long pgoff = 0 ;
497502 u32 uaddr32 ;
498503 int ret , i ;
@@ -509,52 +514,78 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
509514 return 0 ;
510515 }
511516
512- /* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
513- pages = kvcalloc (page_cnt , sizeof (struct page * ), GFP_KERNEL );
517+ /*
518+ * Cap allocation size to KMALLOC_MAX_CACHE_SIZE so kmalloc_nolock() can succeed.
519+ */
520+ alloc_pages = min (page_cnt , KMALLOC_MAX_CACHE_SIZE / sizeof (struct page * ));
521+ pages = kmalloc_nolock (alloc_pages * sizeof (struct page * ), 0 , NUMA_NO_NODE );
514522 if (!pages )
515523 return 0 ;
524+ data .pages = pages ;
516525
517- guard ( mutex ) (& arena -> lock );
526+ mutex_lock (& arena -> lock );
518527
519528 if (uaddr ) {
520529 ret = is_range_tree_set (& arena -> rt , pgoff , page_cnt );
521530 if (ret )
522- goto out_free_pages ;
531+ goto out_unlock_free_pages ;
523532 ret = range_tree_clear (& arena -> rt , pgoff , page_cnt );
524533 } else {
525534 ret = pgoff = range_tree_find (& arena -> rt , page_cnt );
526535 if (pgoff >= 0 )
527536 ret = range_tree_clear (& arena -> rt , pgoff , page_cnt );
528537 }
529538 if (ret )
530- goto out_free_pages ;
531-
532- struct apply_range_data data = { .pages = pages , .i = 0 };
533- ret = bpf_map_alloc_pages (& arena -> map , node_id , page_cnt , pages );
534- if (ret )
535- goto out ;
539+ goto out_unlock_free_pages ;
536540
541+ remaining = page_cnt ;
537542 uaddr32 = (u32 )(arena -> user_vm_start + pgoff * PAGE_SIZE );
538- /* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
539- * will not overflow 32-bit. Lower 32-bit need to represent
540- * contiguous user address range.
541- * Map these pages at kern_vm_start base.
542- * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
543- * lower 32-bit and it's ok.
544- */
545- ret = apply_to_page_range (& init_mm , kern_vm_start + uaddr32 ,
546- page_cnt << PAGE_SHIFT , apply_range_set_cb , & data );
547- if (ret ) {
548- for (i = 0 ; i < page_cnt ; i ++ )
549- __free_page (pages [i ]);
550- goto out ;
543+
544+ while (remaining ) {
545+ long this_batch = min (remaining , alloc_pages );
546+
547+ /* zeroing is needed, since alloc_pages_bulk() only fills in non-zero entries */
548+ memset (pages , 0 , this_batch * sizeof (struct page * ));
549+ data .i = 0 ;
550+
551+ ret = bpf_map_alloc_pages (& arena -> map , node_id , this_batch , pages );
552+ if (ret )
553+ goto out ;
554+
555+ /* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
556+ * will not overflow 32-bit. Lower 32-bit need to represent
557+ * contiguous user address range.
558+ * Map these pages at kern_vm_start base.
559+ * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
560+ * lower 32-bit and it's ok.
561+ */
562+ ret = apply_to_page_range (& init_mm ,
563+ kern_vm_start + uaddr32 + (mapped << PAGE_SHIFT ),
564+ this_batch << PAGE_SHIFT , apply_range_set_cb , & data );
565+ if (ret ) {
566+ /* data.i pages were mapped, account them and free the remaining */
567+ mapped += data .i ;
568+ for (i = data .i ; i < this_batch ; i ++ )
569+ __free_page (pages [i ]);
570+ goto out ;
571+ }
572+
573+ mapped += this_batch ;
574+ remaining -= this_batch ;
551575 }
552- kvfree (pages );
576+ mutex_unlock (& arena -> lock );
577+ kfree_nolock (pages );
553578 return clear_lo32 (arena -> user_vm_start ) + uaddr32 ;
554579out :
555- range_tree_set (& arena -> rt , pgoff , page_cnt );
580+ range_tree_set (& arena -> rt , pgoff + mapped , page_cnt - mapped );
581+ mutex_unlock (& arena -> lock );
582+ if (mapped )
583+ arena_free_pages (arena , clear_lo32 (arena -> user_vm_start ) + uaddr32 , mapped );
584+ goto out_free_pages ;
585+ out_unlock_free_pages :
586+ mutex_unlock (& arena -> lock );
556587out_free_pages :
557- kvfree (pages );
588+ kfree_nolock (pages );
558589 return 0 ;
559590}
560591
0 commit comments