Skip to content

Commit 6ee1b77

Browse files
Jacob Panjoergroedel
authored andcommitted
iommu/vt-d: Add svm/sva invalidate function
When Shared Virtual Address (SVA) is enabled for a guest OS via vIOMMU, we need to provide invalidation support at IOMMU API and driver level. This patch adds Intel VT-d specific function to implement iommu passdown invalidate API for shared virtual address. The use case is for supporting caching structure invalidation of assigned SVM capable devices. Emulated IOMMU exposes queue invalidation capability and passes down all descriptors from the guest to the physical IOMMU. The assumption is that guest to host device ID mapping should be resolved prior to calling IOMMU driver. Based on the device handle, host IOMMU driver can replace certain fields before submit to the invalidation queue. Signed-off-by: Liu Yi L <[email protected]> Signed-off-by: Jacob Pan <[email protected]> Signed-off-by: Lu Baolu <[email protected]> Reviewed-by: Eric Auger <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Joerg Roedel <[email protected]>
1 parent 61a06a1 commit 6ee1b77

File tree

1 file changed

+171
-0
lines changed

1 file changed

+171
-0
lines changed

drivers/iommu/intel-iommu.c

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5242,6 +5242,176 @@ static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
52425242
aux_domain_remove_dev(to_dmar_domain(domain), dev);
52435243
}
52445244

5245+
/*
5246+
* 2D array for converting and sanitizing IOMMU generic TLB granularity to
5247+
* VT-d granularity. Invalidation is typically included in the unmap operation
5248+
* as a result of DMA or VFIO unmap. However, for assigned devices guest
5249+
* owns the first level page tables. Invalidations of translation caches in the
5250+
* guest are trapped and passed down to the host.
5251+
*
5252+
* vIOMMU in the guest will only expose first level page tables, therefore
5253+
* we do not support IOTLB granularity for request without PASID (second level).
5254+
*
5255+
* For example, to find the VT-d granularity encoding for IOTLB
5256+
* type and page selective granularity within PASID:
5257+
* X: indexed by iommu cache type
5258+
* Y: indexed by enum iommu_inv_granularity
5259+
* [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR]
5260+
*/
5261+
5262+
const static int
5263+
inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = {
5264+
/*
5265+
* PASID based IOTLB invalidation: PASID selective (per PASID),
5266+
* page selective (address granularity)
5267+
*/
5268+
{-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID},
5269+
/* PASID based dev TLBs */
5270+
{-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL},
5271+
/* PASID cache */
5272+
{-EINVAL, -EINVAL, -EINVAL}
5273+
};
5274+
5275+
static inline int to_vtd_granularity(int type, int granu)
5276+
{
5277+
return inv_type_granu_table[type][granu];
5278+
}
5279+
5280+
static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules)
5281+
{
5282+
u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT;
5283+
5284+
/* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
5285+
* IOMMU cache invalidate API passes granu_size in bytes, and number of
5286+
* granu size in contiguous memory.
5287+
*/
5288+
return order_base_2(nr_pages);
5289+
}
5290+
5291+
#ifdef CONFIG_INTEL_IOMMU_SVM
5292+
static int
5293+
intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
5294+
struct iommu_cache_invalidate_info *inv_info)
5295+
{
5296+
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5297+
struct device_domain_info *info;
5298+
struct intel_iommu *iommu;
5299+
unsigned long flags;
5300+
int cache_type;
5301+
u8 bus, devfn;
5302+
u16 did, sid;
5303+
int ret = 0;
5304+
u64 size = 0;
5305+
5306+
if (!inv_info || !dmar_domain ||
5307+
inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
5308+
return -EINVAL;
5309+
5310+
if (!dev || !dev_is_pci(dev))
5311+
return -ENODEV;
5312+
5313+
iommu = device_to_iommu(dev, &bus, &devfn);
5314+
if (!iommu)
5315+
return -ENODEV;
5316+
5317+
if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE))
5318+
return -EINVAL;
5319+
5320+
spin_lock_irqsave(&device_domain_lock, flags);
5321+
spin_lock(&iommu->lock);
5322+
info = dev->archdata.iommu;
5323+
if (!info) {
5324+
ret = -EINVAL;
5325+
goto out_unlock;
5326+
}
5327+
did = dmar_domain->iommu_did[iommu->seq_id];
5328+
sid = PCI_DEVID(bus, devfn);
5329+
5330+
/* Size is only valid in address selective invalidation */
5331+
if (inv_info->granularity != IOMMU_INV_GRANU_PASID)
5332+
size = to_vtd_size(inv_info->addr_info.granule_size,
5333+
inv_info->addr_info.nb_granules);
5334+
5335+
for_each_set_bit(cache_type,
5336+
(unsigned long *)&inv_info->cache,
5337+
IOMMU_CACHE_INV_TYPE_NR) {
5338+
int granu = 0;
5339+
u64 pasid = 0;
5340+
5341+
granu = to_vtd_granularity(cache_type, inv_info->granularity);
5342+
if (granu == -EINVAL) {
5343+
pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n",
5344+
cache_type, inv_info->granularity);
5345+
break;
5346+
}
5347+
5348+
/*
5349+
* PASID is stored in different locations based on the
5350+
* granularity.
5351+
*/
5352+
if (inv_info->granularity == IOMMU_INV_GRANU_PASID &&
5353+
(inv_info->pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
5354+
pasid = inv_info->pasid_info.pasid;
5355+
else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
5356+
(inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
5357+
pasid = inv_info->addr_info.pasid;
5358+
5359+
switch (BIT(cache_type)) {
5360+
case IOMMU_CACHE_INV_TYPE_IOTLB:
5361+
if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
5362+
size &&
5363+
(inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
5364+
pr_err_ratelimited("Address out of range, 0x%llx, size order %llu\n",
5365+
inv_info->addr_info.addr, size);
5366+
ret = -ERANGE;
5367+
goto out_unlock;
5368+
}
5369+
5370+
/*
5371+
* If granu is PASID-selective, address is ignored.
5372+
* We use npages = -1 to indicate that.
5373+
*/
5374+
qi_flush_piotlb(iommu, did, pasid,
5375+
mm_to_dma_pfn(inv_info->addr_info.addr),
5376+
(granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
5377+
inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
5378+
5379+
/*
5380+
* Always flush device IOTLB if ATS is enabled. vIOMMU
5381+
* in the guest may assume IOTLB flush is inclusive,
5382+
* which is more efficient.
5383+
*/
5384+
if (info->ats_enabled)
5385+
qi_flush_dev_iotlb_pasid(iommu, sid,
5386+
info->pfsid, pasid,
5387+
info->ats_qdep,
5388+
inv_info->addr_info.addr,
5389+
size, granu);
5390+
break;
5391+
case IOMMU_CACHE_INV_TYPE_DEV_IOTLB:
5392+
if (info->ats_enabled)
5393+
qi_flush_dev_iotlb_pasid(iommu, sid,
5394+
info->pfsid, pasid,
5395+
info->ats_qdep,
5396+
inv_info->addr_info.addr,
5397+
size, granu);
5398+
else
5399+
pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n");
5400+
break;
5401+
default:
5402+
dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n",
5403+
cache_type);
5404+
ret = -EINVAL;
5405+
}
5406+
}
5407+
out_unlock:
5408+
spin_unlock(&iommu->lock);
5409+
spin_unlock_irqrestore(&device_domain_lock, flags);
5410+
5411+
return ret;
5412+
}
5413+
#endif
5414+
52455415
static int intel_iommu_map(struct iommu_domain *domain,
52465416
unsigned long iova, phys_addr_t hpa,
52475417
size_t size, int iommu_prot, gfp_t gfp)
@@ -5781,6 +5951,7 @@ const struct iommu_ops intel_iommu_ops = {
57815951
.def_domain_type = device_def_domain_type,
57825952
.pgsize_bitmap = INTEL_IOMMU_PGSIZES,
57835953
#ifdef CONFIG_INTEL_IOMMU_SVM
5954+
.cache_invalidate = intel_iommu_sva_invalidate,
57845955
.sva_bind_gpasid = intel_svm_bind_gpasid,
57855956
.sva_unbind_gpasid = intel_svm_unbind_gpasid,
57865957
#endif

0 commit comments

Comments
 (0)