Skip to content

Commit 56722a4

Browse files
Jacob Panjoergroedel
authored andcommitted
iommu/vt-d: Add bind guest PASID support
When supporting guest SVA with emulated IOMMU, the guest PASID table is shadowed in VMM. Updates to guest vIOMMU PASID table will result in PASID cache flush which will be passed down to the host as bind guest PASID calls. For the SL page tables, it will be harvested from device's default domain (request w/o PASID), or aux domain in case of mediated device. .-------------. .---------------------------. | vIOMMU | | Guest process CR3, FL only| | | '---------------------------' .----------------/ | PASID Entry |--- PASID cache flush - '-------------' | | | V | | CR3 in GPA '-------------' Guest ------| Shadow |--------------------------|-------- v v v Host .-------------. .----------------------. | pIOMMU | | Bind FL for GVA-GPA | | | '----------------------' .----------------/ | | PASID Entry | V (Nested xlate) '----------------\.------------------------------. | | |SL for GPA-HPA, default domain| | | '------------------------------' '-------------' Where: - FL = First level/stage one page tables - SL = Second level/stage two page tables Signed-off-by: Jacob Pan <[email protected]> Signed-off-by: Liu Yi L <[email protected]> Signed-off-by: Lu Baolu <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Joerg Roedel <[email protected]>
1 parent b0d1f87 commit 56722a4

File tree

4 files changed

+221
-1
lines changed

4 files changed

+221
-1
lines changed

drivers/iommu/intel-iommu.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5780,6 +5780,10 @@ const struct iommu_ops intel_iommu_ops = {
57805780
.is_attach_deferred = intel_iommu_is_attach_deferred,
57815781
.def_domain_type = device_def_domain_type,
57825782
.pgsize_bitmap = INTEL_IOMMU_PGSIZES,
5783+
#ifdef CONFIG_INTEL_IOMMU_SVM
5784+
.sva_bind_gpasid = intel_svm_bind_gpasid,
5785+
.sva_unbind_gpasid = intel_svm_unbind_gpasid,
5786+
#endif
57835787
};
57845788

57855789
static void quirk_iommu_igfx(struct pci_dev *dev)

drivers/iommu/intel-svm.c

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,206 @@ static LIST_HEAD(global_svm_list);
226226
list_for_each_entry((sdev), &(svm)->devs, list) \
227227
if ((d) != (sdev)->dev) {} else
228228

229+
int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
230+
struct iommu_gpasid_bind_data *data)
231+
{
232+
struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
233+
struct dmar_domain *dmar_domain;
234+
struct intel_svm_dev *sdev;
235+
struct intel_svm *svm;
236+
int ret = 0;
237+
238+
if (WARN_ON(!iommu) || !data)
239+
return -EINVAL;
240+
241+
if (data->version != IOMMU_GPASID_BIND_VERSION_1 ||
242+
data->format != IOMMU_PASID_FORMAT_INTEL_VTD)
243+
return -EINVAL;
244+
245+
if (!dev_is_pci(dev))
246+
return -ENOTSUPP;
247+
248+
/* VT-d supports devices with full 20 bit PASIDs only */
249+
if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX)
250+
return -EINVAL;
251+
252+
/*
253+
* We only check host PASID range, we have no knowledge to check
254+
* guest PASID range.
255+
*/
256+
if (data->hpasid <= 0 || data->hpasid >= PASID_MAX)
257+
return -EINVAL;
258+
259+
dmar_domain = to_dmar_domain(domain);
260+
261+
mutex_lock(&pasid_mutex);
262+
svm = ioasid_find(NULL, data->hpasid, NULL);
263+
if (IS_ERR(svm)) {
264+
ret = PTR_ERR(svm);
265+
goto out;
266+
}
267+
268+
if (svm) {
269+
/*
270+
* If we found svm for the PASID, there must be at
271+
* least one device bond, otherwise svm should be freed.
272+
*/
273+
if (WARN_ON(list_empty(&svm->devs))) {
274+
ret = -EINVAL;
275+
goto out;
276+
}
277+
278+
for_each_svm_dev(sdev, svm, dev) {
279+
/*
280+
* For devices with aux domains, we should allow
281+
* multiple bind calls with the same PASID and pdev.
282+
*/
283+
if (iommu_dev_feature_enabled(dev,
284+
IOMMU_DEV_FEAT_AUX)) {
285+
sdev->users++;
286+
} else {
287+
dev_warn_ratelimited(dev,
288+
"Already bound with PASID %u\n",
289+
svm->pasid);
290+
ret = -EBUSY;
291+
}
292+
goto out;
293+
}
294+
} else {
295+
/* We come here when PASID has never been bond to a device. */
296+
svm = kzalloc(sizeof(*svm), GFP_KERNEL);
297+
if (!svm) {
298+
ret = -ENOMEM;
299+
goto out;
300+
}
301+
/* REVISIT: upper layer/VFIO can track host process that bind
302+
* the PASID. ioasid_set = mm might be sufficient for vfio to
303+
* check pasid VMM ownership. We can drop the following line
304+
* once VFIO and IOASID set check is in place.
305+
*/
306+
svm->mm = get_task_mm(current);
307+
svm->pasid = data->hpasid;
308+
if (data->flags & IOMMU_SVA_GPASID_VAL) {
309+
svm->gpasid = data->gpasid;
310+
svm->flags |= SVM_FLAG_GUEST_PASID;
311+
}
312+
ioasid_set_data(data->hpasid, svm);
313+
INIT_LIST_HEAD_RCU(&svm->devs);
314+
mmput(svm->mm);
315+
}
316+
sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
317+
if (!sdev) {
318+
ret = -ENOMEM;
319+
goto out;
320+
}
321+
sdev->dev = dev;
322+
323+
/* Only count users if device has aux domains */
324+
if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
325+
sdev->users = 1;
326+
327+
/* Set up device context entry for PASID if not enabled already */
328+
ret = intel_iommu_enable_pasid(iommu, sdev->dev);
329+
if (ret) {
330+
dev_err_ratelimited(dev, "Failed to enable PASID capability\n");
331+
kfree(sdev);
332+
goto out;
333+
}
334+
335+
/*
336+
* PASID table is per device for better security. Therefore, for
337+
* each bind of a new device even with an existing PASID, we need to
338+
* call the nested mode setup function here.
339+
*/
340+
spin_lock(&iommu->lock);
341+
ret = intel_pasid_setup_nested(iommu, dev, (pgd_t *)data->gpgd,
342+
data->hpasid, &data->vtd, dmar_domain,
343+
data->addr_width);
344+
spin_unlock(&iommu->lock);
345+
if (ret) {
346+
dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n",
347+
data->hpasid, ret);
348+
/*
349+
* PASID entry should be in cleared state if nested mode
350+
* set up failed. So we only need to clear IOASID tracking
351+
* data such that free call will succeed.
352+
*/
353+
kfree(sdev);
354+
goto out;
355+
}
356+
357+
svm->flags |= SVM_FLAG_GUEST_MODE;
358+
359+
init_rcu_head(&sdev->rcu);
360+
list_add_rcu(&sdev->list, &svm->devs);
361+
out:
362+
if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) {
363+
ioasid_set_data(data->hpasid, NULL);
364+
kfree(svm);
365+
}
366+
367+
mutex_unlock(&pasid_mutex);
368+
return ret;
369+
}
370+
371+
int intel_svm_unbind_gpasid(struct device *dev, int pasid)
372+
{
373+
struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
374+
struct intel_svm_dev *sdev;
375+
struct intel_svm *svm;
376+
int ret = -EINVAL;
377+
378+
if (WARN_ON(!iommu))
379+
return -EINVAL;
380+
381+
mutex_lock(&pasid_mutex);
382+
svm = ioasid_find(NULL, pasid, NULL);
383+
if (!svm) {
384+
ret = -EINVAL;
385+
goto out;
386+
}
387+
388+
if (IS_ERR(svm)) {
389+
ret = PTR_ERR(svm);
390+
goto out;
391+
}
392+
393+
for_each_svm_dev(sdev, svm, dev) {
394+
ret = 0;
395+
if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
396+
sdev->users--;
397+
if (!sdev->users) {
398+
list_del_rcu(&sdev->list);
399+
intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
400+
intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
401+
/* TODO: Drain in flight PRQ for the PASID since it
402+
* may get reused soon, we don't want to
403+
* confuse with its previous life.
404+
* intel_svm_drain_prq(dev, pasid);
405+
*/
406+
kfree_rcu(sdev, rcu);
407+
408+
if (list_empty(&svm->devs)) {
409+
/*
410+
* We do not free the IOASID here in that
411+
* IOMMU driver did not allocate it.
412+
* Unlike native SVM, IOASID for guest use was
413+
* allocated prior to the bind call.
414+
* In any case, if the free call comes before
415+
* the unbind, IOMMU driver will get notified
416+
* and perform cleanup.
417+
*/
418+
ioasid_set_data(pasid, NULL);
419+
kfree(svm);
420+
}
421+
}
422+
break;
423+
}
424+
out:
425+
mutex_unlock(&pasid_mutex);
426+
return ret;
427+
}
428+
229429
int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
230430
{
231431
struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);

include/linux/intel-iommu.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,9 @@ struct dmar_domain *find_domain(struct device *dev);
698698
extern void intel_svm_check(struct intel_iommu *iommu);
699699
extern int intel_svm_enable_prq(struct intel_iommu *iommu);
700700
extern int intel_svm_finish_prq(struct intel_iommu *iommu);
701-
701+
int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
702+
struct iommu_gpasid_bind_data *data);
703+
int intel_svm_unbind_gpasid(struct device *dev, int pasid);
702704
struct svm_dev_ops;
703705

704706
struct intel_svm_dev {
@@ -715,9 +717,11 @@ struct intel_svm_dev {
715717
struct intel_svm {
716718
struct mmu_notifier notifier;
717719
struct mm_struct *mm;
720+
718721
struct intel_iommu *iommu;
719722
int flags;
720723
int pasid;
724+
int gpasid; /* In case that guest PASID is different from host PASID */
721725
struct list_head devs;
722726
struct list_head list;
723727
};

include/linux/intel-svm.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,18 @@ struct svm_dev_ops {
4444
* do such IOTLB flushes automatically.
4545
*/
4646
#define SVM_FLAG_SUPERVISOR_MODE (1<<1)
47+
/*
48+
* The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest
49+
* processes. Compared to the host bind, the primary differences are:
50+
* 1. mm life cycle management
51+
* 2. fault reporting
52+
*/
53+
#define SVM_FLAG_GUEST_MODE (1<<2)
54+
/*
55+
* The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space,
56+
* which requires guest and host PASID translation at both directions.
57+
*/
58+
#define SVM_FLAG_GUEST_PASID (1<<3)
4759

4860
#ifdef CONFIG_INTEL_IOMMU_SVM
4961

0 commit comments

Comments
 (0)