@@ -595,83 +595,106 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
595
595
struct vhost_dev * dev = & v -> vdev ;
596
596
struct vhost_iotlb * iotlb = dev -> iotlb ;
597
597
struct page * * page_list ;
598
- unsigned long list_size = PAGE_SIZE / sizeof ( struct page * ) ;
598
+ struct vm_area_struct * * vmas ;
599
599
unsigned int gup_flags = FOLL_LONGTERM ;
600
- unsigned long npages , cur_base , map_pfn , last_pfn = 0 ;
601
- unsigned long locked , lock_limit , pinned , i ;
600
+ unsigned long map_pfn , last_pfn = 0 ;
601
+ unsigned long npages , lock_limit ;
602
+ unsigned long i , nmap = 0 ;
602
603
u64 iova = msg -> iova ;
604
+ long pinned ;
603
605
int ret = 0 ;
604
606
605
607
if (vhost_iotlb_itree_first (iotlb , msg -> iova ,
606
608
msg -> iova + msg -> size - 1 ))
607
609
return - EEXIST ;
608
610
609
- page_list = (struct page * * ) __get_free_page (GFP_KERNEL );
610
- if (!page_list )
611
- return - ENOMEM ;
612
-
613
611
if (msg -> perm & VHOST_ACCESS_WO )
614
612
gup_flags |= FOLL_WRITE ;
615
613
616
614
npages = PAGE_ALIGN (msg -> size + (iova & ~PAGE_MASK )) >> PAGE_SHIFT ;
617
615
if (!npages )
618
616
return - EINVAL ;
619
617
618
+ page_list = kvmalloc_array (npages , sizeof (struct page * ), GFP_KERNEL );
619
+ vmas = kvmalloc_array (npages , sizeof (struct vm_area_struct * ),
620
+ GFP_KERNEL );
621
+ if (!page_list || !vmas ) {
622
+ ret = - ENOMEM ;
623
+ goto free ;
624
+ }
625
+
620
626
mmap_read_lock (dev -> mm );
621
627
622
- locked = atomic64_add_return (npages , & dev -> mm -> pinned_vm );
623
628
lock_limit = rlimit (RLIMIT_MEMLOCK ) >> PAGE_SHIFT ;
624
-
625
- if (locked > lock_limit ) {
629
+ if (npages + atomic64_read (& dev -> mm -> pinned_vm ) > lock_limit ) {
626
630
ret = - ENOMEM ;
627
- goto out ;
631
+ goto unlock ;
628
632
}
629
633
630
- cur_base = msg -> uaddr & PAGE_MASK ;
631
- iova &= PAGE_MASK ;
634
+ pinned = pin_user_pages (msg -> uaddr & PAGE_MASK , npages , gup_flags ,
635
+ page_list , vmas );
636
+ if (npages != pinned ) {
637
+ if (pinned < 0 ) {
638
+ ret = pinned ;
639
+ } else {
640
+ unpin_user_pages (page_list , pinned );
641
+ ret = - ENOMEM ;
642
+ }
643
+ goto unlock ;
644
+ }
632
645
633
- while (npages ) {
634
- pinned = min_t (unsigned long , npages , list_size );
635
- ret = pin_user_pages (cur_base , pinned ,
636
- gup_flags , page_list , NULL );
637
- if (ret != pinned )
638
- goto out ;
639
-
640
- if (!last_pfn )
641
- map_pfn = page_to_pfn (page_list [0 ]);
642
-
643
- for (i = 0 ; i < ret ; i ++ ) {
644
- unsigned long this_pfn = page_to_pfn (page_list [i ]);
645
- u64 csize ;
646
-
647
- if (last_pfn && (this_pfn != last_pfn + 1 )) {
648
- /* Pin a contiguous chunk of memory */
649
- csize = (last_pfn - map_pfn + 1 ) << PAGE_SHIFT ;
650
- if (vhost_vdpa_map (v , iova , csize ,
651
- map_pfn << PAGE_SHIFT ,
652
- msg -> perm ))
653
- goto out ;
654
- map_pfn = this_pfn ;
655
- iova += csize ;
646
+ iova &= PAGE_MASK ;
647
+ map_pfn = page_to_pfn (page_list [0 ]);
648
+
649
+ /* One more iteration to avoid extra vdpa_map() call out of loop. */
650
+ for (i = 0 ; i <= npages ; i ++ ) {
651
+ unsigned long this_pfn ;
652
+ u64 csize ;
653
+
654
+ /* The last chunk may have no valid PFN next to it */
655
+ this_pfn = i < npages ? page_to_pfn (page_list [i ]) : -1UL ;
656
+
657
+ if (last_pfn && (this_pfn == -1UL ||
658
+ this_pfn != last_pfn + 1 )) {
659
+ /* Pin a contiguous chunk of memory */
660
+ csize = last_pfn - map_pfn + 1 ;
661
+ ret = vhost_vdpa_map (v , iova , csize << PAGE_SHIFT ,
662
+ map_pfn << PAGE_SHIFT ,
663
+ msg -> perm );
664
+ if (ret ) {
665
+ /*
666
+ * Unpin the rest chunks of memory on the
667
+ * flight with no corresponding vdpa_map()
668
+ * calls having been made yet. On the other
669
+ * hand, vdpa_unmap() in the failure path
670
+ * is in charge of accounting the number of
671
+ * pinned pages for its own.
672
+ * This asymmetrical pattern of accounting
673
+ * is for efficiency to pin all pages at
674
+ * once, while there is no other callsite
675
+ * of vdpa_map() than here above.
676
+ */
677
+ unpin_user_pages (& page_list [nmap ],
678
+ npages - nmap );
679
+ goto out ;
656
680
}
657
-
658
- last_pfn = this_pfn ;
681
+ atomic64_add (csize , & dev -> mm -> pinned_vm );
682
+ nmap += csize ;
683
+ iova += csize << PAGE_SHIFT ;
684
+ map_pfn = this_pfn ;
659
685
}
660
-
661
- cur_base += ret << PAGE_SHIFT ;
662
- npages -= ret ;
686
+ last_pfn = this_pfn ;
663
687
}
664
688
665
- /* Pin the rest chunk */
666
- ret = vhost_vdpa_map (v , iova , (last_pfn - map_pfn + 1 ) << PAGE_SHIFT ,
667
- map_pfn << PAGE_SHIFT , msg -> perm );
689
+ WARN_ON (nmap != npages );
668
690
out :
669
- if (ret ) {
691
+ if (ret )
670
692
vhost_vdpa_unmap (v , msg -> iova , msg -> size );
671
- atomic64_sub (npages , & dev -> mm -> pinned_vm );
672
- }
693
+ unlock :
673
694
mmap_read_unlock (dev -> mm );
674
- free_page ((unsigned long )page_list );
695
+ free :
696
+ kvfree (vmas );
697
+ kvfree (page_list );
675
698
return ret ;
676
699
}
677
700
0 commit comments