Skip to content

Commit 7eac66d

Browse files
committed
Merge tag 'vfio-v5.9-rc2' of git://github.com/awilliam/linux-vfio
Pull VFIO fixes from Alex Williamson: - Fix lockdep issue reported for recursive read-lock (Alex Williamson) - Fix missing unwind in type1 replay function (Alex Williamson) * tag 'vfio-v5.9-rc2' of git://github.com/awilliam/linux-vfio: vfio/type1: Add proper error unwind for vfio_iommu_replay() vfio-pci: Avoid recursive read-lock usage
2 parents 33d0f96 + aae7a75 commit 7eac66d

File tree

3 files changed

+164
-29
lines changed

3 files changed

+164
-29
lines changed

drivers/vfio/pci/vfio_pci_private.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,14 @@
3333

3434
struct vfio_pci_ioeventfd {
3535
struct list_head next;
36+
struct vfio_pci_device *vdev;
3637
struct virqfd *virqfd;
3738
void __iomem *addr;
3839
uint64_t data;
3940
loff_t pos;
4041
int bar;
4142
int count;
43+
bool test_mem;
4244
};
4345

4446
struct vfio_pci_irq_ctx {

drivers/vfio/pci/vfio_pci_rdwr.c

Lines changed: 96 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,70 @@
3737
#define vfio_ioread8 ioread8
3838
#define vfio_iowrite8 iowrite8
3939

40+
#define VFIO_IOWRITE(size) \
41+
static int vfio_pci_iowrite##size(struct vfio_pci_device *vdev, \
42+
bool test_mem, u##size val, void __iomem *io) \
43+
{ \
44+
if (test_mem) { \
45+
down_read(&vdev->memory_lock); \
46+
if (!__vfio_pci_memory_enabled(vdev)) { \
47+
up_read(&vdev->memory_lock); \
48+
return -EIO; \
49+
} \
50+
} \
51+
\
52+
vfio_iowrite##size(val, io); \
53+
\
54+
if (test_mem) \
55+
up_read(&vdev->memory_lock); \
56+
\
57+
return 0; \
58+
}
59+
60+
VFIO_IOWRITE(8)
61+
VFIO_IOWRITE(16)
62+
VFIO_IOWRITE(32)
63+
#ifdef iowrite64
64+
VFIO_IOWRITE(64)
65+
#endif
66+
67+
#define VFIO_IOREAD(size) \
68+
static int vfio_pci_ioread##size(struct vfio_pci_device *vdev, \
69+
bool test_mem, u##size *val, void __iomem *io) \
70+
{ \
71+
if (test_mem) { \
72+
down_read(&vdev->memory_lock); \
73+
if (!__vfio_pci_memory_enabled(vdev)) { \
74+
up_read(&vdev->memory_lock); \
75+
return -EIO; \
76+
} \
77+
} \
78+
\
79+
*val = vfio_ioread##size(io); \
80+
\
81+
if (test_mem) \
82+
up_read(&vdev->memory_lock); \
83+
\
84+
return 0; \
85+
}
86+
87+
VFIO_IOREAD(8)
88+
VFIO_IOREAD(16)
89+
VFIO_IOREAD(32)
90+
4091
/*
4192
* Read or write from an __iomem region (MMIO or I/O port) with an excluded
4293
* range which is inaccessible. The excluded range drops writes and fills
4394
* reads with -1. This is intended for handling MSI-X vector tables and
4495
* leftover space for ROM BARs.
4596
*/
46-
static ssize_t do_io_rw(void __iomem *io, char __user *buf,
97+
static ssize_t do_io_rw(struct vfio_pci_device *vdev, bool test_mem,
98+
void __iomem *io, char __user *buf,
4799
loff_t off, size_t count, size_t x_start,
48100
size_t x_end, bool iswrite)
49101
{
50102
ssize_t done = 0;
103+
int ret;
51104

52105
while (count) {
53106
size_t fillable, filled;
@@ -66,9 +119,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf,
66119
if (copy_from_user(&val, buf, 4))
67120
return -EFAULT;
68121

69-
vfio_iowrite32(val, io + off);
122+
ret = vfio_pci_iowrite32(vdev, test_mem,
123+
val, io + off);
124+
if (ret)
125+
return ret;
70126
} else {
71-
val = vfio_ioread32(io + off);
127+
ret = vfio_pci_ioread32(vdev, test_mem,
128+
&val, io + off);
129+
if (ret)
130+
return ret;
72131

73132
if (copy_to_user(buf, &val, 4))
74133
return -EFAULT;
@@ -82,9 +141,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf,
82141
if (copy_from_user(&val, buf, 2))
83142
return -EFAULT;
84143

85-
vfio_iowrite16(val, io + off);
144+
ret = vfio_pci_iowrite16(vdev, test_mem,
145+
val, io + off);
146+
if (ret)
147+
return ret;
86148
} else {
87-
val = vfio_ioread16(io + off);
149+
ret = vfio_pci_ioread16(vdev, test_mem,
150+
&val, io + off);
151+
if (ret)
152+
return ret;
88153

89154
if (copy_to_user(buf, &val, 2))
90155
return -EFAULT;
@@ -98,9 +163,15 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf,
98163
if (copy_from_user(&val, buf, 1))
99164
return -EFAULT;
100165

101-
vfio_iowrite8(val, io + off);
166+
ret = vfio_pci_iowrite8(vdev, test_mem,
167+
val, io + off);
168+
if (ret)
169+
return ret;
102170
} else {
103-
val = vfio_ioread8(io + off);
171+
ret = vfio_pci_ioread8(vdev, test_mem,
172+
&val, io + off);
173+
if (ret)
174+
return ret;
104175

105176
if (copy_to_user(buf, &val, 1))
106177
return -EFAULT;
@@ -178,14 +249,6 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
178249

179250
count = min(count, (size_t)(end - pos));
180251

181-
if (res->flags & IORESOURCE_MEM) {
182-
down_read(&vdev->memory_lock);
183-
if (!__vfio_pci_memory_enabled(vdev)) {
184-
up_read(&vdev->memory_lock);
185-
return -EIO;
186-
}
187-
}
188-
189252
if (bar == PCI_ROM_RESOURCE) {
190253
/*
191254
* The ROM can fill less space than the BAR, so we start the
@@ -213,17 +276,15 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
213276
x_end = vdev->msix_offset + vdev->msix_size;
214277
}
215278

216-
done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite);
279+
done = do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
280+
count, x_start, x_end, iswrite);
217281

218282
if (done >= 0)
219283
*ppos += done;
220284

221285
if (bar == PCI_ROM_RESOURCE)
222286
pci_unmap_rom(pdev, io);
223287
out:
224-
if (res->flags & IORESOURCE_MEM)
225-
up_read(&vdev->memory_lock);
226-
227288
return done;
228289
}
229290

@@ -278,7 +339,12 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
278339
return ret;
279340
}
280341

281-
done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite);
342+
/*
343+
* VGA MMIO is a legacy, non-BAR resource that hopefully allows
344+
* probing, so we don't currently worry about access in relation
345+
* to the memory enable bit in the command register.
346+
*/
347+
done = do_io_rw(vdev, false, iomem, buf, off, count, 0, 0, iswrite);
282348

283349
vga_put(vdev->pdev, rsrc);
284350

@@ -296,17 +362,21 @@ static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
296362

297363
switch (ioeventfd->count) {
298364
case 1:
299-
vfio_iowrite8(ioeventfd->data, ioeventfd->addr);
365+
vfio_pci_iowrite8(ioeventfd->vdev, ioeventfd->test_mem,
366+
ioeventfd->data, ioeventfd->addr);
300367
break;
301368
case 2:
302-
vfio_iowrite16(ioeventfd->data, ioeventfd->addr);
369+
vfio_pci_iowrite16(ioeventfd->vdev, ioeventfd->test_mem,
370+
ioeventfd->data, ioeventfd->addr);
303371
break;
304372
case 4:
305-
vfio_iowrite32(ioeventfd->data, ioeventfd->addr);
373+
vfio_pci_iowrite32(ioeventfd->vdev, ioeventfd->test_mem,
374+
ioeventfd->data, ioeventfd->addr);
306375
break;
307376
#ifdef iowrite64
308377
case 8:
309-
vfio_iowrite64(ioeventfd->data, ioeventfd->addr);
378+
vfio_pci_iowrite64(ioeventfd->vdev, ioeventfd->test_mem,
379+
ioeventfd->data, ioeventfd->addr);
310380
break;
311381
#endif
312382
}
@@ -378,11 +448,13 @@ long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
378448
goto out_unlock;
379449
}
380450

451+
ioeventfd->vdev = vdev;
381452
ioeventfd->addr = vdev->barmap[bar] + pos;
382453
ioeventfd->data = data;
383454
ioeventfd->pos = pos;
384455
ioeventfd->bar = bar;
385456
ioeventfd->count = count;
457+
ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
386458

387459
ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
388460
NULL, NULL, &ioeventfd->virqfd, fd);

drivers/vfio/vfio_iommu_type1.c

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1424,13 +1424,16 @@ static int vfio_bus_type(struct device *dev, void *data)
14241424
static int vfio_iommu_replay(struct vfio_iommu *iommu,
14251425
struct vfio_domain *domain)
14261426
{
1427-
struct vfio_domain *d;
1427+
struct vfio_domain *d = NULL;
14281428
struct rb_node *n;
14291429
unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
14301430
int ret;
14311431

14321432
/* Arbitrarily pick the first domain in the list for lookups */
1433-
d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
1433+
if (!list_empty(&iommu->domain_list))
1434+
d = list_first_entry(&iommu->domain_list,
1435+
struct vfio_domain, next);
1436+
14341437
n = rb_first(&iommu->dma_list);
14351438

14361439
for (; n; n = rb_next(n)) {
@@ -1448,6 +1451,11 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
14481451
phys_addr_t p;
14491452
dma_addr_t i;
14501453

1454+
if (WARN_ON(!d)) { /* mapped w/o a domain?! */
1455+
ret = -EINVAL;
1456+
goto unwind;
1457+
}
1458+
14511459
phys = iommu_iova_to_phys(d->domain, iova);
14521460

14531461
if (WARN_ON(!phys)) {
@@ -1477,7 +1485,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
14771485
if (npage <= 0) {
14781486
WARN_ON(!npage);
14791487
ret = (int)npage;
1480-
return ret;
1488+
goto unwind;
14811489
}
14821490

14831491
phys = pfn << PAGE_SHIFT;
@@ -1486,14 +1494,67 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
14861494

14871495
ret = iommu_map(domain->domain, iova, phys,
14881496
size, dma->prot | domain->prot);
1489-
if (ret)
1490-
return ret;
1497+
if (ret) {
1498+
if (!dma->iommu_mapped)
1499+
vfio_unpin_pages_remote(dma, iova,
1500+
phys >> PAGE_SHIFT,
1501+
size >> PAGE_SHIFT,
1502+
true);
1503+
goto unwind;
1504+
}
14911505

14921506
iova += size;
14931507
}
1508+
}
1509+
1510+
/* All dmas are now mapped, defer to second tree walk for unwind */
1511+
for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
1512+
struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
1513+
14941514
dma->iommu_mapped = true;
14951515
}
1516+
14961517
return 0;
1518+
1519+
unwind:
1520+
for (; n; n = rb_prev(n)) {
1521+
struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
1522+
dma_addr_t iova;
1523+
1524+
if (dma->iommu_mapped) {
1525+
iommu_unmap(domain->domain, dma->iova, dma->size);
1526+
continue;
1527+
}
1528+
1529+
iova = dma->iova;
1530+
while (iova < dma->iova + dma->size) {
1531+
phys_addr_t phys, p;
1532+
size_t size;
1533+
dma_addr_t i;
1534+
1535+
phys = iommu_iova_to_phys(domain->domain, iova);
1536+
if (!phys) {
1537+
iova += PAGE_SIZE;
1538+
continue;
1539+
}
1540+
1541+
size = PAGE_SIZE;
1542+
p = phys + size;
1543+
i = iova + size;
1544+
while (i < dma->iova + dma->size &&
1545+
p == iommu_iova_to_phys(domain->domain, i)) {
1546+
size += PAGE_SIZE;
1547+
p += PAGE_SIZE;
1548+
i += PAGE_SIZE;
1549+
}
1550+
1551+
iommu_unmap(domain->domain, iova, size);
1552+
vfio_unpin_pages_remote(dma, iova, phys >> PAGE_SHIFT,
1553+
size >> PAGE_SHIFT, true);
1554+
}
1555+
}
1556+
1557+
return ret;
14971558
}
14981559

14991560
/*

0 commit comments

Comments
 (0)