Skip to content

Commit eb8d93e

Browse files
LuBaolujoergroedel
authored andcommitted
iommu/vt-d: Report page request faults for guest SVA
A pasid might be bound to a page table from a VM guest via the iommu ops.sva_bind_gpasid. In this case, when a DMA page fault is detected on the physical IOMMU, we need to inject the page fault request into the guest. After the guest completes handling the page fault, a page response need to be sent back via the iommu ops.page_response(). This adds support to report a page request fault. Any external module which is interested in handling this fault should regiester a notifier with iommu_register_device_fault_handler(). Co-developed-by: Jacob Pan <[email protected]> Co-developed-by: Liu Yi L <[email protected]> Signed-off-by: Jacob Pan <[email protected]> Signed-off-by: Liu Yi L <[email protected]> Signed-off-by: Lu Baolu <[email protected]> Reviewed-by: Kevin Tian <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Joerg Roedel <[email protected]>
1 parent 19abcf7 commit eb8d93e

File tree

1 file changed

+85
-18
lines changed

1 file changed

+85
-18
lines changed

drivers/iommu/intel/svm.c

Lines changed: 85 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -811,8 +811,63 @@ static void intel_svm_drain_prq(struct device *dev, int pasid)
811811
}
812812
}
813813

814+
static int prq_to_iommu_prot(struct page_req_dsc *req)
815+
{
816+
int prot = 0;
817+
818+
if (req->rd_req)
819+
prot |= IOMMU_FAULT_PERM_READ;
820+
if (req->wr_req)
821+
prot |= IOMMU_FAULT_PERM_WRITE;
822+
if (req->exe_req)
823+
prot |= IOMMU_FAULT_PERM_EXEC;
824+
if (req->pm_req)
825+
prot |= IOMMU_FAULT_PERM_PRIV;
826+
827+
return prot;
828+
}
829+
830+
static int
831+
intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
832+
{
833+
struct iommu_fault_event event;
834+
835+
if (!dev || !dev_is_pci(dev))
836+
return -ENODEV;
837+
838+
/* Fill in event data for device specific processing */
839+
memset(&event, 0, sizeof(struct iommu_fault_event));
840+
event.fault.type = IOMMU_FAULT_PAGE_REQ;
841+
event.fault.prm.addr = desc->addr;
842+
event.fault.prm.pasid = desc->pasid;
843+
event.fault.prm.grpid = desc->prg_index;
844+
event.fault.prm.perm = prq_to_iommu_prot(desc);
845+
846+
if (desc->lpig)
847+
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
848+
if (desc->pasid_present) {
849+
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
850+
event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
851+
}
852+
if (desc->priv_data_present) {
853+
/*
854+
* Set last page in group bit if private data is present,
855+
* page response is required as it does for LPIG.
856+
* iommu_report_device_fault() doesn't understand this vendor
857+
* specific requirement thus we set last_page as a workaround.
858+
*/
859+
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
860+
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
861+
memcpy(event.fault.prm.private_data, desc->priv_data,
862+
sizeof(desc->priv_data));
863+
}
864+
865+
return iommu_report_device_fault(dev, &event);
866+
}
867+
814868
static irqreturn_t prq_event_thread(int irq, void *d)
815869
{
870+
struct intel_svm_dev *sdev = NULL;
816871
struct intel_iommu *iommu = d;
817872
struct intel_svm *svm = NULL;
818873
int head, tail, handled = 0;
@@ -824,7 +879,6 @@ static irqreturn_t prq_event_thread(int irq, void *d)
824879
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
825880
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
826881
while (head != tail) {
827-
struct intel_svm_dev *sdev;
828882
struct vm_area_struct *vma;
829883
struct page_req_dsc *req;
830884
struct qi_desc resp;
@@ -860,6 +914,20 @@ static irqreturn_t prq_event_thread(int irq, void *d)
860914
}
861915
}
862916

917+
if (!sdev || sdev->sid != req->rid) {
918+
struct intel_svm_dev *t;
919+
920+
sdev = NULL;
921+
rcu_read_lock();
922+
list_for_each_entry_rcu(t, &svm->devs, list) {
923+
if (t->sid == req->rid) {
924+
sdev = t;
925+
break;
926+
}
927+
}
928+
rcu_read_unlock();
929+
}
930+
863931
result = QI_RESP_INVALID;
864932
/* Since we're using init_mm.pgd directly, we should never take
865933
* any faults on kernel addresses. */
@@ -870,6 +938,17 @@ static irqreturn_t prq_event_thread(int irq, void *d)
870938
if (!is_canonical_address(address))
871939
goto bad_req;
872940

941+
/*
942+
* If prq is to be handled outside iommu driver via receiver of
943+
* the fault notifiers, we skip the page response here.
944+
*/
945+
if (svm->flags & SVM_FLAG_GUEST_MODE) {
946+
if (sdev && !intel_svm_prq_report(sdev->dev, req))
947+
goto prq_advance;
948+
else
949+
goto bad_req;
950+
}
951+
873952
/* If the mm is already defunct, don't handle faults. */
874953
if (!mmget_not_zero(svm->mm))
875954
goto bad_req;
@@ -888,24 +967,11 @@ static irqreturn_t prq_event_thread(int irq, void *d)
888967
goto invalid;
889968

890969
result = QI_RESP_SUCCESS;
891-
invalid:
970+
invalid:
892971
mmap_read_unlock(svm->mm);
893972
mmput(svm->mm);
894-
bad_req:
895-
/* Accounting for major/minor faults? */
896-
rcu_read_lock();
897-
list_for_each_entry_rcu(sdev, &svm->devs, list) {
898-
if (sdev->sid == req->rid)
899-
break;
900-
}
901-
/* Other devices can go away, but the drivers are not permitted
902-
* to unbind while any page faults might be in flight. So it's
903-
* OK to drop the 'lock' here now we have it. */
904-
rcu_read_unlock();
905-
906-
if (WARN_ON(&sdev->list == &svm->devs))
907-
sdev = NULL;
908-
973+
bad_req:
974+
WARN_ON(!sdev);
909975
if (sdev && sdev->ops && sdev->ops->fault_cb) {
910976
int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
911977
(req->exe_req << 1) | (req->pm_req);
@@ -916,7 +982,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
916982
and these can be NULL. Do not use them below this point! */
917983
sdev = NULL;
918984
svm = NULL;
919-
no_pasid:
985+
no_pasid:
920986
if (req->lpig || req->priv_data_present) {
921987
/*
922988
* Per VT-d spec. v3.0 ch7.7, system software must
@@ -941,6 +1007,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
9411007
resp.qw3 = 0;
9421008
qi_submit_sync(iommu, &resp, 1, 0);
9431009
}
1010+
prq_advance:
9441011
head = (head + sizeof(*req)) & PRQ_RING_MASK;
9451012
}
9461013

0 commit comments

Comments
 (0)