Skip to content

Commit 38e5f33

Browse files
ashkalrajoergroedel
authored andcommitted
iommu/amd: Reuse device table for kdump
After a panic if SNP is enabled in the previous kernel then the kdump kernel boots with IOMMU SNP enforcement still enabled. IOMMU device table register is locked and exclusive to the previous kernel. Attempts to copy old device table from the previous kernel fails in kdump kernel as hardware ignores writes to the locked device table base address register as per AMD IOMMU spec Section 2.12.2.1. This causes the IOMMU driver (OS) and the hardware to reference different memory locations. As a result, the IOMMU hardware cannot process the command which results in repeated "Completion-Wait loop timed out" errors and a second kernel panic: "Kernel panic - not syncing: timer doesn't work through Interrupt-remapped IO-APIC". Reuse device table instead of copying device table in case of kdump boot and remove all copying device table code. Reviewed-by: Vasant Hegde <[email protected]> Tested-by: Sairaj Kodilkar <[email protected]> Signed-off-by: Ashish Kalra <[email protected]> Link: https://lore.kernel.org/r/3a31036fb2f7323e6b1a1a1921ac777e9f7bdddc.1756157913.git.ashish.kalra@amd.com Signed-off-by: Joerg Roedel <[email protected]>
1 parent f32fe7c commit 38e5f33

File tree

1 file changed

+34
-70
lines changed

1 file changed

+34
-70
lines changed

drivers/iommu/amd/init.c

Lines changed: 34 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,9 @@ static void iommu_set_device_table(struct amd_iommu *iommu)
406406

407407
BUG_ON(iommu->mmio_base == NULL);
408408

409+
if (is_kdump_kernel())
410+
return;
411+
409412
entry = iommu_virt_to_phys(dev_table);
410413
entry |= (dev_table_size >> 12) - 1;
411414
memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
@@ -646,7 +649,10 @@ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
646649

647650
static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
648651
{
649-
iommu_free_pages(pci_seg->dev_table);
652+
if (is_kdump_kernel())
653+
memunmap((void *)pci_seg->dev_table);
654+
else
655+
iommu_free_pages(pci_seg->dev_table);
650656
pci_seg->dev_table = NULL;
651657
}
652658

@@ -1117,15 +1123,12 @@ static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
11171123
dte->data[i] |= (1UL << _bit);
11181124
}
11191125

1120-
static bool __copy_device_table(struct amd_iommu *iommu)
1126+
static bool __reuse_device_table(struct amd_iommu *iommu)
11211127
{
1122-
u64 int_ctl, int_tab_len, entry = 0;
11231128
struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1124-
struct dev_table_entry *old_devtb = NULL;
1125-
u32 lo, hi, devid, old_devtb_size;
1129+
u32 lo, hi, old_devtb_size;
11261130
phys_addr_t old_devtb_phys;
1127-
u16 dom_id, dte_v, irq_v;
1128-
u64 tmp;
1131+
u64 entry;
11291132

11301133
/* Each IOMMU use separate device table with the same size */
11311134
lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
@@ -1150,84 +1153,38 @@ static bool __copy_device_table(struct amd_iommu *iommu)
11501153
pr_err("The address of old device table is above 4G, not trustworthy!\n");
11511154
return false;
11521155
}
1153-
old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel())
1154-
? (__force void *)ioremap_encrypted(old_devtb_phys,
1155-
pci_seg->dev_table_size)
1156-
: memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB);
1157-
1158-
if (!old_devtb)
1159-
return false;
11601156

1161-
pci_seg->old_dev_tbl_cpy = iommu_alloc_pages_sz(
1162-
GFP_KERNEL | GFP_DMA32, pci_seg->dev_table_size);
1157+
/*
1158+
* Re-use the previous kernel's device table for kdump.
1159+
*/
1160+
pci_seg->old_dev_tbl_cpy = iommu_memremap(old_devtb_phys, pci_seg->dev_table_size);
11631161
if (pci_seg->old_dev_tbl_cpy == NULL) {
1164-
pr_err("Failed to allocate memory for copying old device table!\n");
1165-
memunmap(old_devtb);
1162+
pr_err("Failed to remap memory for reusing old device table!\n");
11661163
return false;
11671164
}
11681165

1169-
for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
1170-
pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid];
1171-
dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
1172-
dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
1173-
1174-
if (dte_v && dom_id) {
1175-
pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
1176-
pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
1177-
/* Reserve the Domain IDs used by previous kernel */
1178-
if (ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC) != dom_id) {
1179-
pr_err("Failed to reserve domain ID 0x%x\n", dom_id);
1180-
memunmap(old_devtb);
1181-
return false;
1182-
}
1183-
/* If gcr3 table existed, mask it out */
1184-
if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
1185-
tmp = (DTE_GCR3_30_15 | DTE_GCR3_51_31);
1186-
pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp;
1187-
tmp = (DTE_GCR3_14_12 | DTE_FLAG_GV);
1188-
pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp;
1189-
}
1190-
}
1191-
1192-
irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
1193-
int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
1194-
int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
1195-
if (irq_v && (int_ctl || int_tab_len)) {
1196-
if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
1197-
(int_tab_len != DTE_INTTABLEN_512 &&
1198-
int_tab_len != DTE_INTTABLEN_2K)) {
1199-
pr_err("Wrong old irq remapping flag: %#x\n", devid);
1200-
memunmap(old_devtb);
1201-
return false;
1202-
}
1203-
1204-
pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
1205-
}
1206-
}
1207-
memunmap(old_devtb);
1208-
12091166
return true;
12101167
}
12111168

1212-
static bool copy_device_table(void)
1169+
static bool reuse_device_table(void)
12131170
{
12141171
struct amd_iommu *iommu;
12151172
struct amd_iommu_pci_seg *pci_seg;
12161173

12171174
if (!amd_iommu_pre_enabled)
12181175
return false;
12191176

1220-
pr_warn("Translation is already enabled - trying to copy translation structures\n");
1177+
pr_warn("Translation is already enabled - trying to reuse translation structures\n");
12211178

12221179
/*
12231180
* All IOMMUs within PCI segment shares common device table.
1224-
* Hence copy device table only once per PCI segment.
1181+
* Hence reuse device table only once per PCI segment.
12251182
*/
12261183
for_each_pci_segment(pci_seg) {
12271184
for_each_iommu(iommu) {
12281185
if (pci_seg->id != iommu->pci_seg->id)
12291186
continue;
1230-
if (!__copy_device_table(iommu))
1187+
if (!__reuse_device_table(iommu))
12311188
return false;
12321189
break;
12331190
}
@@ -2906,27 +2863,34 @@ static void early_enable_iommu(struct amd_iommu *iommu)
29062863
* This function finally enables all IOMMUs found in the system after
29072864
* they have been initialized.
29082865
*
2909-
* Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
2910-
* the old content of device table entries. Not this case or copy failed,
2866+
* Or if in kdump kernel and IOMMUs are all pre-enabled, try to reuse
2867+
* the old content of device table entries. Not this case or reuse failed,
29112868
* just continue as normal kernel does.
29122869
*/
29132870
static void early_enable_iommus(void)
29142871
{
29152872
struct amd_iommu *iommu;
29162873
struct amd_iommu_pci_seg *pci_seg;
29172874

2918-
if (!copy_device_table()) {
2875+
if (!reuse_device_table()) {
29192876
/*
2920-
* If come here because of failure in copying device table from old
2877+
* If come here because of failure in reusing device table from old
29212878
* kernel with all IOMMUs enabled, print error message and try to
29222879
* free allocated old_dev_tbl_cpy.
29232880
*/
2924-
if (amd_iommu_pre_enabled)
2925-
pr_err("Failed to copy DEV table from previous kernel.\n");
2881+
if (amd_iommu_pre_enabled) {
2882+
pr_err("Failed to reuse DEV table from previous kernel.\n");
2883+
/*
2884+
* Bail out early if unable to remap/reuse DEV table from
2885+
* previous kernel if SNP enabled as IOMMU commands will
2886+
* time out without DEV table and cause kdump boot panic.
2887+
*/
2888+
BUG_ON(check_feature(FEATURE_SNP));
2889+
}
29262890

29272891
for_each_pci_segment(pci_seg) {
29282892
if (pci_seg->old_dev_tbl_cpy != NULL) {
2929-
iommu_free_pages(pci_seg->old_dev_tbl_cpy);
2893+
memunmap((void *)pci_seg->old_dev_tbl_cpy);
29302894
pci_seg->old_dev_tbl_cpy = NULL;
29312895
}
29322896
}
@@ -2936,7 +2900,7 @@ static void early_enable_iommus(void)
29362900
early_enable_iommu(iommu);
29372901
}
29382902
} else {
2939-
pr_info("Copied DEV table from previous kernel.\n");
2903+
pr_info("Reused DEV table from previous kernel.\n");
29402904

29412905
for_each_pci_segment(pci_seg) {
29422906
iommu_free_pages(pci_seg->dev_table);

0 commit comments

Comments
 (0)