Skip to content

Commit 791c2b1

Browse files
rmurphy-armjoergroedel
authored andcommitted
iommu: Optimise PCI SAC address trick
Per the reasoning in commit 4bf7fda ("iommu/dma: Add config for PCI SAC address trick") and its subsequent revert, this mechanism no longer serves its original purpose, but now only works around broken hardware/drivers in a way that is unfortunately too impactful to remove. This does not, however, prevent us from solving the performance impact which that workaround has on large-scale systems that don't need it. Once the 32-bit IOVA space fills up and a workload starts allocating and freeing on both sides of the boundary, the opportunistic SAC allocation can then end up spending significant time hunting down scattered fragments of free 32-bit space, or just reestablishing max32_alloc_size. This can easily be exacerbated by a change in allocation pattern, such as by changing the network MTU, which can increase pressure on the 32-bit space by leaving a large quantity of cached IOVAs which are now the wrong size to be recycled, but also won't be freed since the non-opportunistic allocations can still be satisfied from the whole 64-bit space without triggering the reclaim path. However, in the context of a workaround where smaller DMA addresses aren't simply a preference but a necessity, if we get to that point at all then in fact it's already the endgame. The nature of the allocator is currently such that the first IOVA we give to a device after the 32-bit space runs out will be the highest possible address for that device, ever. If that works, then great, we know we can optimise for speed by always allocating from the full range. And if it doesn't, then the worst has already happened and any brokenness is now showing, so there's little point in continuing to try to hide it. To that end, implement a flag to refine the SAC business into a per-device policy that can automatically get itself out of the way if and when it stops being useful. CC: Linus Torvalds <[email protected]> CC: Jakub Kicinski <[email protected]> Reviewed-by: John Garry <[email protected]> Signed-off-by: Robin Murphy <[email protected]> Tested-by: Vasant Hegde <[email protected]> Tested-by: Jakub Kicinski <[email protected]> Link: https://lore.kernel.org/r/b8502b115b915d2a3fabde367e099e39106686c8.1681392791.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel <[email protected]>
1 parent f188056 commit 791c2b1

File tree

4 files changed

+33
-6
lines changed

4 files changed

+33
-6
lines changed

drivers/iommu/dma-iommu.c

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
660660
{
661661
struct iommu_dma_cookie *cookie = domain->iova_cookie;
662662
struct iova_domain *iovad = &cookie->iovad;
663-
unsigned long shift, iova_len, iova = 0;
663+
unsigned long shift, iova_len, iova;
664664

665665
if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
666666
cookie->msi_iova += size;
@@ -675,15 +675,29 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
675675
if (domain->geometry.force_aperture)
676676
dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end);
677677

678-
/* Try to get PCI devices a SAC address */
679-
if (dma_limit > DMA_BIT_MASK(32) && !iommu_dma_forcedac && dev_is_pci(dev))
678+
/*
679+
* Try to use all the 32-bit PCI addresses first. The original SAC vs.
680+
* DAC reasoning loses relevance with PCIe, but enough hardware and
681+
* firmware bugs are still lurking out there that it's safest not to
682+
* venture into the 64-bit space until necessary.
683+
*
684+
* If your device goes wrong after seeing the notice then likely either
685+
* its driver is not setting DMA masks accurately, the hardware has
686+
* some inherent bug in handling >32-bit addresses, or not all the
687+
* expected address bits are wired up between the device and the IOMMU.
688+
*/
689+
if (dma_limit > DMA_BIT_MASK(32) && dev->iommu->pci_32bit_workaround) {
680690
iova = alloc_iova_fast(iovad, iova_len,
681691
DMA_BIT_MASK(32) >> shift, false);
692+
if (iova)
693+
goto done;
682694

683-
if (!iova)
684-
iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
685-
true);
695+
dev->iommu->pci_32bit_workaround = false;
696+
dev_notice(dev, "Using %d-bit DMA addresses\n", bits_per(dma_limit));
697+
}
686698

699+
iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true);
700+
done:
687701
return (dma_addr_t)iova << shift;
688702
}
689703

drivers/iommu/dma-iommu.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ int iommu_dma_init_fq(struct iommu_domain *domain);
1717
void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
1818

1919
extern bool iommu_dma_forcedac;
20+
static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev)
21+
{
22+
dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac;
23+
}
2024

2125
#else /* CONFIG_IOMMU_DMA */
2226

@@ -38,5 +42,9 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he
3842
{
3943
}
4044

45+
static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev)
46+
{
47+
}
48+
4149
#endif /* CONFIG_IOMMU_DMA */
4250
#endif /* __DMA_IOMMU_H */

drivers/iommu/iommu.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,9 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
499499
mutex_unlock(&group->mutex);
500500
mutex_unlock(&iommu_probe_device_lock);
501501

502+
if (dev_is_pci(dev))
503+
iommu_dma_set_pci_32bit_workaround(dev);
504+
502505
return 0;
503506

504507
err_remove_gdev:

include/linux/iommu.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,7 @@ struct iommu_fault_param {
409409
* @priv: IOMMU Driver private data
410410
* @max_pasids: number of PASIDs this device can consume
411411
* @attach_deferred: the dma domain attachment is deferred
412+
* @pci_32bit_workaround: Limit DMA allocations to 32-bit IOVAs
412413
*
413414
* TODO: migrate other per device data pointers under iommu_dev_data, e.g.
414415
* struct iommu_group *iommu_group;
@@ -422,6 +423,7 @@ struct dev_iommu {
422423
void *priv;
423424
u32 max_pasids;
424425
u32 attach_deferred:1;
426+
u32 pci_32bit_workaround:1;
425427
};
426428

427429
int iommu_device_register(struct iommu_device *iommu,

0 commit comments

Comments
 (0)