Skip to content

Commit 35bf49e

Browse files
LuBaolujoergroedel
authored andcommitted
iommu/vt-d: Fix lockdep splat due to klist iteration in atomic context
With CONFIG_INTEL_IOMMU_DEBUGFS enabled, below lockdep splat are seen when an I/O fault occurs on a machine with an Intel IOMMU in it. DMAR: DRHD: handling fault status reg 3 DMAR: [DMA Write NO_PASID] Request device [00:1a.0] fault addr 0x0 [fault reason 0x05] PTE Write access is not set DMAR: Dump dmar0 table entries for IOVA 0x0 DMAR: root entry: 0x0000000127f42001 DMAR: context entry: hi 0x0000000000001502, low 0x000000012d8ab001 ================================ WARNING: inconsistent lock state 5.20.0-0.rc0.20220812git7ebfc85e2cd7.10.fc38.x86_64 #1 Not tainted -------------------------------- inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. rngd/1006 [HC1[1]:SC0[0]:HE0:SE1] takes: ff177021416f2d78 (&k->k_lock){?.+.}-{2:2}, at: klist_next+0x1b/0x160 {HARDIRQ-ON-W} state was registered at: lock_acquire+0xce/0x2d0 _raw_spin_lock+0x33/0x80 klist_add_tail+0x46/0x80 bus_add_device+0xee/0x150 device_add+0x39d/0x9a0 add_memory_block+0x108/0x1d0 memory_dev_init+0xe1/0x117 driver_init+0x43/0x4d kernel_init_freeable+0x1c2/0x2cc kernel_init+0x16/0x140 ret_from_fork+0x1f/0x30 irq event stamp: 7812 hardirqs last enabled at (7811): [<ffffffff85000e86>] asm_sysvec_apic_timer_interrupt+0x16/0x20 hardirqs last disabled at (7812): [<ffffffff84f16894>] irqentry_enter+0x54/0x60 softirqs last enabled at (7794): [<ffffffff840ff669>] __irq_exit_rcu+0xf9/0x170 softirqs last disabled at (7787): [<ffffffff840ff669>] __irq_exit_rcu+0xf9/0x170 The klist iterator functions using spin_*lock_irq*() but the klist insertion functions using spin_*lock(), combined with the Intel DMAR IOMMU driver iterating over klists from atomic (hardirq) context, where pci_get_domain_bus_and_slot() calls into bus_find_device() which iterates over klists. As currently there's no plan to fix the klist to make it safe to use in atomic context, this fixes the lockdep splat by avoid calling pci_get_domain_bus_and_slot() in the hardirq context. Fixes: 8ac0b64 ("iommu/vt-d: Use pci_get_domain_bus_and_slot() in pgtable_walk()") Reported-by: Lennert Buytenhek <[email protected]> Link: https://lore.kernel.org/linux-iommu/Yvo2dfpEh%[email protected]/ Link: https://lore.kernel.org/linux-iommu/[email protected]/ Signed-off-by: Lu Baolu <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Joerg Roedel <[email protected]>
1 parent a349ffc commit 35bf49e

File tree

1 file changed

+19
-28
lines changed

1 file changed

+19
-28
lines changed

drivers/iommu/intel/iommu.c

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -813,32 +813,11 @@ static void free_context_table(struct intel_iommu *iommu)
813813
}
814814

815815
#ifdef CONFIG_DMAR_DEBUG
816-
static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, u8 bus, u8 devfn)
816+
static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn,
817+
u8 bus, u8 devfn, struct dma_pte *parent, int level)
817818
{
818-
struct device_domain_info *info;
819-
struct dma_pte *parent, *pte;
820-
struct dmar_domain *domain;
821-
struct pci_dev *pdev;
822-
int offset, level;
823-
824-
pdev = pci_get_domain_bus_and_slot(iommu->segment, bus, devfn);
825-
if (!pdev)
826-
return;
827-
828-
info = dev_iommu_priv_get(&pdev->dev);
829-
if (!info || !info->domain) {
830-
pr_info("device [%02x:%02x.%d] not probed\n",
831-
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
832-
return;
833-
}
834-
835-
domain = info->domain;
836-
level = agaw_to_level(domain->agaw);
837-
parent = domain->pgd;
838-
if (!parent) {
839-
pr_info("no page table setup\n");
840-
return;
841-
}
819+
struct dma_pte *pte;
820+
int offset;
842821

843822
while (1) {
844823
offset = pfn_level_offset(pfn, level);
@@ -865,9 +844,10 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
865844
struct pasid_entry *entries, *pte;
866845
struct context_entry *ctx_entry;
867846
struct root_entry *rt_entry;
847+
int i, dir_index, index, level;
868848
u8 devfn = source_id & 0xff;
869849
u8 bus = source_id >> 8;
870-
int i, dir_index, index;
850+
struct dma_pte *pgtable;
871851

872852
pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
873853

@@ -895,8 +875,11 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
895875
ctx_entry->hi, ctx_entry->lo);
896876

897877
/* legacy mode does not require PASID entries */
898-
if (!sm_supported(iommu))
878+
if (!sm_supported(iommu)) {
879+
level = agaw_to_level(ctx_entry->hi & 7);
880+
pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
899881
goto pgtable_walk;
882+
}
900883

901884
/* get the pointer to pasid directory entry */
902885
dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
@@ -923,8 +906,16 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
923906
for (i = 0; i < ARRAY_SIZE(pte->val); i++)
924907
pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
925908

909+
if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) {
910+
level = pte->val[2] & BIT_ULL(2) ? 5 : 4;
911+
pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK);
912+
} else {
913+
level = agaw_to_level((pte->val[0] >> 2) & 0x7);
914+
pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK);
915+
}
916+
926917
pgtable_walk:
927-
pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn);
918+
pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level);
928919
}
929920
#endif
930921

0 commit comments

Comments
 (0)