Skip to content

Commit aae7a75

Browse files
committed
vfio/type1: Add proper error unwind for vfio_iommu_replay()
The vfio_iommu_replay() function does not currently unwind on error, yet it does pin pages, perform IOMMU mapping, and modify the vfio_dma structure to indicate IOMMU mapping. The IOMMU mappings are torn down when the domain is destroyed, but the other actions go on to cause trouble later. For example, the iommu->domain_list can be empty if we only have a non-IOMMU backed mdev attached. We don't currently check if the list is empty before getting the first entry in the list, which leads to a bogus domain pointer. If a vfio_dma entry is erroneously marked as iommu_mapped, we'll attempt to use that bogus pointer to retrieve the existing physical page addresses. This is the scenario that uncovered this issue, attempting to hot-add a vfio-pci device to a container with an existing mdev device and DMA mappings, one of which could not be pinned, causing a failure adding the new group to the existing container and setting the conditions for a subsequent attempt to explode. To resolve this, we can first check if the domain_list is empty so that we can reject replay of a bogus domain, should we ever encounter this inconsistent state again in the future. The real fix though is to add the necessary unwind support, which means cleaning up the current pinning if an IOMMU mapping fails, then walking back through the r-b tree of DMA entries, reading from the IOMMU which ranges are mapped, and unmapping and unpinning those ranges. To be able to do this, we also defer marking the DMA entry as IOMMU mapped until all entries are processed, in order to allow the unwind to know the disposition of each entry. Fixes: a54eb55 ("vfio iommu type1: Add support for mediated devices") Reported-by: Zhiyi Guo <[email protected]> Tested-by: Zhiyi Guo <[email protected]> Reviewed-by: Cornelia Huck <[email protected]> Signed-off-by: Alex Williamson <[email protected]>
1 parent bc93b9a commit aae7a75

File tree

1 file changed

+66
-5
lines changed

1 file changed

+66
-5
lines changed

drivers/vfio/vfio_iommu_type1.c

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1424,13 +1424,16 @@ static int vfio_bus_type(struct device *dev, void *data)
14241424
static int vfio_iommu_replay(struct vfio_iommu *iommu,
14251425
struct vfio_domain *domain)
14261426
{
1427-
struct vfio_domain *d;
1427+
struct vfio_domain *d = NULL;
14281428
struct rb_node *n;
14291429
unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
14301430
int ret;
14311431

14321432
/* Arbitrarily pick the first domain in the list for lookups */
1433-
d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
1433+
if (!list_empty(&iommu->domain_list))
1434+
d = list_first_entry(&iommu->domain_list,
1435+
struct vfio_domain, next);
1436+
14341437
n = rb_first(&iommu->dma_list);
14351438

14361439
for (; n; n = rb_next(n)) {
@@ -1448,6 +1451,11 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
14481451
phys_addr_t p;
14491452
dma_addr_t i;
14501453

1454+
if (WARN_ON(!d)) { /* mapped w/o a domain?! */
1455+
ret = -EINVAL;
1456+
goto unwind;
1457+
}
1458+
14511459
phys = iommu_iova_to_phys(d->domain, iova);
14521460

14531461
if (WARN_ON(!phys)) {
@@ -1477,7 +1485,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
14771485
if (npage <= 0) {
14781486
WARN_ON(!npage);
14791487
ret = (int)npage;
1480-
return ret;
1488+
goto unwind;
14811489
}
14821490

14831491
phys = pfn << PAGE_SHIFT;
@@ -1486,14 +1494,67 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
14861494

14871495
ret = iommu_map(domain->domain, iova, phys,
14881496
size, dma->prot | domain->prot);
1489-
if (ret)
1490-
return ret;
1497+
if (ret) {
1498+
if (!dma->iommu_mapped)
1499+
vfio_unpin_pages_remote(dma, iova,
1500+
phys >> PAGE_SHIFT,
1501+
size >> PAGE_SHIFT,
1502+
true);
1503+
goto unwind;
1504+
}
14911505

14921506
iova += size;
14931507
}
1508+
}
1509+
1510+
/* All dmas are now mapped, defer to second tree walk for unwind */
1511+
for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
1512+
struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
1513+
14941514
dma->iommu_mapped = true;
14951515
}
1516+
14961517
return 0;
1518+
1519+
unwind:
1520+
for (; n; n = rb_prev(n)) {
1521+
struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
1522+
dma_addr_t iova;
1523+
1524+
if (dma->iommu_mapped) {
1525+
iommu_unmap(domain->domain, dma->iova, dma->size);
1526+
continue;
1527+
}
1528+
1529+
iova = dma->iova;
1530+
while (iova < dma->iova + dma->size) {
1531+
phys_addr_t phys, p;
1532+
size_t size;
1533+
dma_addr_t i;
1534+
1535+
phys = iommu_iova_to_phys(domain->domain, iova);
1536+
if (!phys) {
1537+
iova += PAGE_SIZE;
1538+
continue;
1539+
}
1540+
1541+
size = PAGE_SIZE;
1542+
p = phys + size;
1543+
i = iova + size;
1544+
while (i < dma->iova + dma->size &&
1545+
p == iommu_iova_to_phys(domain->domain, i)) {
1546+
size += PAGE_SIZE;
1547+
p += PAGE_SIZE;
1548+
i += PAGE_SIZE;
1549+
}
1550+
1551+
iommu_unmap(domain->domain, iova, size);
1552+
vfio_unpin_pages_remote(dma, iova, phys >> PAGE_SHIFT,
1553+
size >> PAGE_SHIFT, true);
1554+
}
1555+
}
1556+
1557+
return ret;
14971558
}
14981559

14991560
/*

0 commit comments

Comments
 (0)