Skip to content

Commit e14232a

Browse files
orange-kaoaegl
authored andcommitted
EDAC/igen6: Add polling support
Some PCs with Intel N100 (with PCI device 8086:461c, DID_ADL_N_SKU4) experienced issues with error interrupts not working, even with the following configuration in the BIOS. In-Band ECC Support: Enabled In-Band ECC Operation Mode: 2 (make all requests protected and ignore range checks) IBECC Error Injection Control: Inject Correctable Error on insertion counter Error Injection Insertion Count: 251658240 (0xf000000) Add polling mode support for these machines to ensure that memory error events are handled. Signed-off-by: Orange Kao <[email protected]> Signed-off-by: Tony Luck <[email protected]> Reviewed-by: Qiuxu Zhuo <[email protected]> Link: https://lore.kernel.org/all/[email protected]
1 parent 1d512b1 commit e14232a

File tree

1 file changed

+28
-2
lines changed

1 file changed

+28
-2
lines changed

drivers/edac/igen6_edac.c

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,6 +1170,20 @@ static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar)
11701170
return -ENODEV;
11711171
}
11721172

1173+
static void igen6_check(struct mem_ctl_info *mci)
1174+
{
1175+
struct igen6_imc *imc = mci->pvt_info;
1176+
u64 ecclog;
1177+
1178+
/* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
1179+
ecclog = ecclog_read_and_clear(imc);
1180+
if (!ecclog)
1181+
return;
1182+
1183+
if (!ecclog_gen_pool_add(imc->mc, ecclog))
1184+
irq_work_queue(&ecclog_irq_work);
1185+
}
1186+
11731187
static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
11741188
{
11751189
struct edac_mc_layer layers[2];
@@ -1211,6 +1225,8 @@ static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
12111225
mci->edac_cap = EDAC_FLAG_SECDED;
12121226
mci->mod_name = EDAC_MOD_STR;
12131227
mci->dev_name = pci_name(pdev);
1228+
if (edac_op_state == EDAC_OPSTATE_POLL)
1229+
mci->edac_check = igen6_check;
12141230
mci->pvt_info = &igen6_pvt->imc[mc];
12151231

12161232
imc = mci->pvt_info;
@@ -1350,8 +1366,18 @@ static void unregister_err_handler(void)
13501366
unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
13511367
}
13521368

1353-
static void opstate_set(struct res_config *cfg)
1369+
static void opstate_set(struct res_config *cfg, const struct pci_device_id *ent)
13541370
{
1371+
/*
1372+
* Quirk: Certain SoCs' error reporting interrupts don't work.
1373+
* Force polling mode for them to ensure that memory error
1374+
* events can be handled.
1375+
*/
1376+
if (ent->device == DID_ADL_N_SKU4) {
1377+
edac_op_state = EDAC_OPSTATE_POLL;
1378+
return;
1379+
}
1380+
13551381
/* Set the mode according to the configuration data. */
13561382
if (cfg->machine_check)
13571383
edac_op_state = EDAC_OPSTATE_INT;
@@ -1376,7 +1402,7 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
13761402
if (rc)
13771403
goto fail;
13781404

1379-
opstate_set(res_cfg);
1405+
opstate_set(res_cfg, ent);
13801406

13811407
for (i = 0; i < res_cfg->num_imc; i++) {
13821408
rc = igen6_register_mci(i, mchbar, pdev);

0 commit comments

Comments
 (0)