Skip to content

Commit 7ed9e3d

Browse files
siwliu-kernelmstsirkin
authored andcommitted
vhost-vdpa: fix page pinning leakage in error path
Pinned pages are not properly accounted particularly when mapping error occurs on IOTLB update. Clean up dangling pinned pages for the error path. As the inflight pinned pages, specifically for memory region that strides across multiple chunks, would need more than one free page for book keeping and accounting. For simplicity, pin pages for all memory in the IOVA range in one go rather than have multiple pin_user_pages calls to make up the entire region. This way it's easier to track and account the pages already mapped, particularly for clean-up in the error path. Fixes: 4c8cf31 ("vhost: introduce vDPA-based backend") Signed-off-by: Si-Wei Liu <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Michael S. Tsirkin <[email protected]>
1 parent 1477c8a commit 7ed9e3d

File tree

1 file changed

+71
-48
lines changed

1 file changed

+71
-48
lines changed

drivers/vhost/vdpa.c

Lines changed: 71 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -595,83 +595,106 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
595595
struct vhost_dev *dev = &v->vdev;
596596
struct vhost_iotlb *iotlb = dev->iotlb;
597597
struct page **page_list;
598-
unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
598+
struct vm_area_struct **vmas;
599599
unsigned int gup_flags = FOLL_LONGTERM;
600-
unsigned long npages, cur_base, map_pfn, last_pfn = 0;
601-
unsigned long locked, lock_limit, pinned, i;
600+
unsigned long map_pfn, last_pfn = 0;
601+
unsigned long npages, lock_limit;
602+
unsigned long i, nmap = 0;
602603
u64 iova = msg->iova;
604+
long pinned;
603605
int ret = 0;
604606

605607
if (vhost_iotlb_itree_first(iotlb, msg->iova,
606608
msg->iova + msg->size - 1))
607609
return -EEXIST;
608610

609-
page_list = (struct page **) __get_free_page(GFP_KERNEL);
610-
if (!page_list)
611-
return -ENOMEM;
612-
613611
if (msg->perm & VHOST_ACCESS_WO)
614612
gup_flags |= FOLL_WRITE;
615613

616614
npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;
617615
if (!npages)
618616
return -EINVAL;
619617

618+
page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
619+
vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *),
620+
GFP_KERNEL);
621+
if (!page_list || !vmas) {
622+
ret = -ENOMEM;
623+
goto free;
624+
}
625+
620626
mmap_read_lock(dev->mm);
621627

622-
locked = atomic64_add_return(npages, &dev->mm->pinned_vm);
623628
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
624-
625-
if (locked > lock_limit) {
629+
if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
626630
ret = -ENOMEM;
627-
goto out;
631+
goto unlock;
628632
}
629633

630-
cur_base = msg->uaddr & PAGE_MASK;
631-
iova &= PAGE_MASK;
634+
pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags,
635+
page_list, vmas);
636+
if (npages != pinned) {
637+
if (pinned < 0) {
638+
ret = pinned;
639+
} else {
640+
unpin_user_pages(page_list, pinned);
641+
ret = -ENOMEM;
642+
}
643+
goto unlock;
644+
}
632645

633-
while (npages) {
634-
pinned = min_t(unsigned long, npages, list_size);
635-
ret = pin_user_pages(cur_base, pinned,
636-
gup_flags, page_list, NULL);
637-
if (ret != pinned)
638-
goto out;
639-
640-
if (!last_pfn)
641-
map_pfn = page_to_pfn(page_list[0]);
642-
643-
for (i = 0; i < ret; i++) {
644-
unsigned long this_pfn = page_to_pfn(page_list[i]);
645-
u64 csize;
646-
647-
if (last_pfn && (this_pfn != last_pfn + 1)) {
648-
/* Pin a contiguous chunk of memory */
649-
csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
650-
if (vhost_vdpa_map(v, iova, csize,
651-
map_pfn << PAGE_SHIFT,
652-
msg->perm))
653-
goto out;
654-
map_pfn = this_pfn;
655-
iova += csize;
646+
iova &= PAGE_MASK;
647+
map_pfn = page_to_pfn(page_list[0]);
648+
649+
/* One more iteration to avoid extra vdpa_map() call out of loop. */
650+
for (i = 0; i <= npages; i++) {
651+
unsigned long this_pfn;
652+
u64 csize;
653+
654+
/* The last chunk may have no valid PFN next to it */
655+
this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL;
656+
657+
if (last_pfn && (this_pfn == -1UL ||
658+
this_pfn != last_pfn + 1)) {
659+
/* Pin a contiguous chunk of memory */
660+
csize = last_pfn - map_pfn + 1;
661+
ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT,
662+
map_pfn << PAGE_SHIFT,
663+
msg->perm);
664+
if (ret) {
665+
/*
666+
* Unpin the rest chunks of memory on the
667+
* flight with no corresponding vdpa_map()
668+
* calls having been made yet. On the other
669+
* hand, vdpa_unmap() in the failure path
670+
* is in charge of accounting the number of
671+
* pinned pages for its own.
672+
* This asymmetrical pattern of accounting
673+
* is for efficiency to pin all pages at
674+
* once, while there is no other callsite
675+
* of vdpa_map() than here above.
676+
*/
677+
unpin_user_pages(&page_list[nmap],
678+
npages - nmap);
679+
goto out;
656680
}
657-
658-
last_pfn = this_pfn;
681+
atomic64_add(csize, &dev->mm->pinned_vm);
682+
nmap += csize;
683+
iova += csize << PAGE_SHIFT;
684+
map_pfn = this_pfn;
659685
}
660-
661-
cur_base += ret << PAGE_SHIFT;
662-
npages -= ret;
686+
last_pfn = this_pfn;
663687
}
664688

665-
/* Pin the rest chunk */
666-
ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
667-
map_pfn << PAGE_SHIFT, msg->perm);
689+
WARN_ON(nmap != npages);
668690
out:
669-
if (ret) {
691+
if (ret)
670692
vhost_vdpa_unmap(v, msg->iova, msg->size);
671-
atomic64_sub(npages, &dev->mm->pinned_vm);
672-
}
693+
unlock:
673694
mmap_read_unlock(dev->mm);
674-
free_page((unsigned long)page_list);
695+
free:
696+
kvfree(vmas);
697+
kvfree(page_list);
675698
return ret;
676699
}
677700

0 commit comments

Comments
 (0)