Skip to content

Commit 3fdd47c

Browse files
committed
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull vhost fixes from Michael Tsirkin: "Some last minute vhost,vdpa fixes. The last two of them haven't been in next but they do seem kind of obvious, very small and safe, fix bugs reported in the field, and they are both in a new mlx5 vdpa driver, so it's not like we can introduce regressions" * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: vdpa/mlx5: Fix dependency on MLX5_CORE vdpa/mlx5: should keep avail_index despite device status vhost-vdpa: fix page pinning leakage in error path vhost-vdpa: fix vhost_vdpa_map() on error condition vhost: Don't call log_access_ok() when using IOTLB vhost: Use vhost_get_used_size() in vhost_vring_set_addr() vhost: Don't call access_ok() when using IOTLB vhost vdpa: fix vhost_vdpa_open error handling
2 parents 6288c1d + aff9077 commit 3fdd47c

File tree

4 files changed

+115
-68
lines changed

4 files changed

+115
-68
lines changed

drivers/vdpa/Kconfig

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,16 @@ config IFCVF
3030
be called ifcvf.
3131

3232
config MLX5_VDPA
33-
bool "MLX5 VDPA support library for ConnectX devices"
34-
depends on MLX5_CORE
35-
default n
33+
bool
3634
help
3735
Support library for Mellanox VDPA drivers. Provides code that is
3836
common for all types of VDPA drivers. The following drivers are planned:
3937
net, block.
4038

4139
config MLX5_VDPA_NET
4240
tristate "vDPA driver for ConnectX devices"
43-
depends on MLX5_VDPA
41+
select MLX5_VDPA
42+
depends on MLX5_CORE
4443
default n
4544
help
4645
VDPA network driver for ConnectX6 and newer. Provides offloading

drivers/vdpa/mlx5/net/mlx5_vnet.c

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1133,15 +1133,17 @@ static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *m
11331133
if (!mvq->initialized)
11341134
return;
11351135

1136-
if (query_virtqueue(ndev, mvq, &attr)) {
1137-
mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1138-
return;
1139-
}
11401136
if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
11411137
return;
11421138

11431139
if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
11441140
mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1141+
1142+
if (query_virtqueue(ndev, mvq, &attr)) {
1143+
mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1144+
return;
1145+
}
1146+
mvq->avail_idx = attr.available_index;
11451147
}
11461148

11471149
static void suspend_vqs(struct mlx5_vdpa_net *ndev)
@@ -1411,8 +1413,14 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
14111413
struct mlx5_virtq_attr attr;
14121414
int err;
14131415

1414-
if (!mvq->initialized)
1415-
return -EAGAIN;
1416+
/* If the virtq object was destroyed, use the value saved at
1417+
* the last minute of suspend_vq. This caters for userspace
1418+
* that cares about emulating the index after vq is stopped.
1419+
*/
1420+
if (!mvq->initialized) {
1421+
state->avail_index = mvq->avail_idx;
1422+
return 0;
1423+
}
14161424

14171425
err = query_virtqueue(ndev, mvq, &attr);
14181426
if (err) {

drivers/vhost/vdpa.c

Lines changed: 75 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
565565
perm_to_iommu_flags(perm));
566566
}
567567

568+
if (r)
569+
vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
570+
568571
return r;
569572
}
570573

@@ -592,83 +595,106 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
592595
struct vhost_dev *dev = &v->vdev;
593596
struct vhost_iotlb *iotlb = dev->iotlb;
594597
struct page **page_list;
595-
unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
598+
struct vm_area_struct **vmas;
596599
unsigned int gup_flags = FOLL_LONGTERM;
597-
unsigned long npages, cur_base, map_pfn, last_pfn = 0;
598-
unsigned long locked, lock_limit, pinned, i;
600+
unsigned long map_pfn, last_pfn = 0;
601+
unsigned long npages, lock_limit;
602+
unsigned long i, nmap = 0;
599603
u64 iova = msg->iova;
604+
long pinned;
600605
int ret = 0;
601606

602607
if (vhost_iotlb_itree_first(iotlb, msg->iova,
603608
msg->iova + msg->size - 1))
604609
return -EEXIST;
605610

606-
page_list = (struct page **) __get_free_page(GFP_KERNEL);
607-
if (!page_list)
608-
return -ENOMEM;
609-
610611
if (msg->perm & VHOST_ACCESS_WO)
611612
gup_flags |= FOLL_WRITE;
612613

613614
npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;
614615
if (!npages)
615616
return -EINVAL;
616617

618+
page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
619+
vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *),
620+
GFP_KERNEL);
621+
if (!page_list || !vmas) {
622+
ret = -ENOMEM;
623+
goto free;
624+
}
625+
617626
mmap_read_lock(dev->mm);
618627

619-
locked = atomic64_add_return(npages, &dev->mm->pinned_vm);
620628
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
621-
622-
if (locked > lock_limit) {
629+
if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
623630
ret = -ENOMEM;
624-
goto out;
631+
goto unlock;
625632
}
626633

627-
cur_base = msg->uaddr & PAGE_MASK;
628-
iova &= PAGE_MASK;
634+
pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags,
635+
page_list, vmas);
636+
if (npages != pinned) {
637+
if (pinned < 0) {
638+
ret = pinned;
639+
} else {
640+
unpin_user_pages(page_list, pinned);
641+
ret = -ENOMEM;
642+
}
643+
goto unlock;
644+
}
629645

630-
while (npages) {
631-
pinned = min_t(unsigned long, npages, list_size);
632-
ret = pin_user_pages(cur_base, pinned,
633-
gup_flags, page_list, NULL);
634-
if (ret != pinned)
635-
goto out;
636-
637-
if (!last_pfn)
638-
map_pfn = page_to_pfn(page_list[0]);
639-
640-
for (i = 0; i < ret; i++) {
641-
unsigned long this_pfn = page_to_pfn(page_list[i]);
642-
u64 csize;
643-
644-
if (last_pfn && (this_pfn != last_pfn + 1)) {
645-
/* Pin a contiguous chunk of memory */
646-
csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
647-
if (vhost_vdpa_map(v, iova, csize,
648-
map_pfn << PAGE_SHIFT,
649-
msg->perm))
650-
goto out;
651-
map_pfn = this_pfn;
652-
iova += csize;
646+
iova &= PAGE_MASK;
647+
map_pfn = page_to_pfn(page_list[0]);
648+
649+
/* One more iteration to avoid extra vdpa_map() call out of loop. */
650+
for (i = 0; i <= npages; i++) {
651+
unsigned long this_pfn;
652+
u64 csize;
653+
654+
/* The last chunk may have no valid PFN next to it */
655+
this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL;
656+
657+
if (last_pfn && (this_pfn == -1UL ||
658+
this_pfn != last_pfn + 1)) {
659+
/* Pin a contiguous chunk of memory */
660+
csize = last_pfn - map_pfn + 1;
661+
ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT,
662+
map_pfn << PAGE_SHIFT,
663+
msg->perm);
664+
if (ret) {
665+
/*
666+
* Unpin the rest chunks of memory on the
667+
* flight with no corresponding vdpa_map()
668+
* calls having been made yet. On the other
669+
* hand, vdpa_unmap() in the failure path
670+
* is in charge of accounting the number of
671+
* pinned pages for its own.
672+
* This asymmetrical pattern of accounting
673+
* is for efficiency to pin all pages at
674+
* once, while there is no other callsite
675+
* of vdpa_map() than here above.
676+
*/
677+
unpin_user_pages(&page_list[nmap],
678+
npages - nmap);
679+
goto out;
653680
}
654-
655-
last_pfn = this_pfn;
681+
atomic64_add(csize, &dev->mm->pinned_vm);
682+
nmap += csize;
683+
iova += csize << PAGE_SHIFT;
684+
map_pfn = this_pfn;
656685
}
657-
658-
cur_base += ret << PAGE_SHIFT;
659-
npages -= ret;
686+
last_pfn = this_pfn;
660687
}
661688

662-
/* Pin the rest chunk */
663-
ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
664-
map_pfn << PAGE_SHIFT, msg->perm);
689+
WARN_ON(nmap != npages);
665690
out:
666-
if (ret) {
691+
if (ret)
667692
vhost_vdpa_unmap(v, msg->iova, msg->size);
668-
atomic64_sub(npages, &dev->mm->pinned_vm);
669-
}
693+
unlock:
670694
mmap_read_unlock(dev->mm);
671-
free_page((unsigned long)page_list);
695+
free:
696+
kvfree(vmas);
697+
kvfree(page_list);
672698
return ret;
673699
}
674700

@@ -810,6 +836,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
810836

811837
err_init_iotlb:
812838
vhost_dev_cleanup(&v->vdev);
839+
kfree(vqs);
813840
err:
814841
atomic_dec(&v->opened);
815842
return r;

drivers/vhost/vhost.c

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,6 +1290,11 @@ static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
12901290
vring_used_t __user *used)
12911291

12921292
{
1293+
/* If an IOTLB device is present, the vring addresses are
1294+
* GIOVAs. Access validation occurs at prefetch time. */
1295+
if (vq->iotlb)
1296+
return true;
1297+
12931298
return access_ok(desc, vhost_get_desc_size(vq, num)) &&
12941299
access_ok(avail, vhost_get_avail_size(vq, num)) &&
12951300
access_ok(used, vhost_get_used_size(vq, num));
@@ -1365,15 +1370,28 @@ bool vhost_log_access_ok(struct vhost_dev *dev)
13651370
}
13661371
EXPORT_SYMBOL_GPL(vhost_log_access_ok);
13671372

1373+
static bool vq_log_used_access_ok(struct vhost_virtqueue *vq,
1374+
void __user *log_base,
1375+
bool log_used,
1376+
u64 log_addr)
1377+
{
1378+
/* If an IOTLB device is present, log_addr is a GIOVA that
1379+
* will never be logged by log_used(). */
1380+
if (vq->iotlb)
1381+
return true;
1382+
1383+
return !log_used || log_access_ok(log_base, log_addr,
1384+
vhost_get_used_size(vq, vq->num));
1385+
}
1386+
13681387
/* Verify access for write logging. */
13691388
/* Caller should have vq mutex and device mutex */
13701389
static bool vq_log_access_ok(struct vhost_virtqueue *vq,
13711390
void __user *log_base)
13721391
{
13731392
return vq_memory_access_ok(log_base, vq->umem,
13741393
vhost_has_feature(vq, VHOST_F_LOG_ALL)) &&
1375-
(!vq->log_used || log_access_ok(log_base, vq->log_addr,
1376-
vhost_get_used_size(vq, vq->num)));
1394+
vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr);
13771395
}
13781396

13791397
/* Can we start vq? */
@@ -1383,10 +1401,6 @@ bool vhost_vq_access_ok(struct vhost_virtqueue *vq)
13831401
if (!vq_log_access_ok(vq, vq->log_base))
13841402
return false;
13851403

1386-
/* Access validation occurs at prefetch time with IOTLB */
1387-
if (vq->iotlb)
1388-
return true;
1389-
13901404
return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used);
13911405
}
13921406
EXPORT_SYMBOL_GPL(vhost_vq_access_ok);
@@ -1516,10 +1530,9 @@ static long vhost_vring_set_addr(struct vhost_dev *d,
15161530
return -EINVAL;
15171531

15181532
/* Also validate log access for used ring if enabled. */
1519-
if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) &&
1520-
!log_access_ok(vq->log_base, a.log_guest_addr,
1521-
sizeof *vq->used +
1522-
vq->num * sizeof *vq->used->ring))
1533+
if (!vq_log_used_access_ok(vq, vq->log_base,
1534+
a.flags & (0x1 << VHOST_VRING_F_LOG),
1535+
a.log_guest_addr))
15231536
return -EINVAL;
15241537
}
15251538

0 commit comments

Comments
 (0)