Skip to content

Commit 7adcadb

Browse files
committed
Merge tag 'edac_updates_for_6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull EDAC updates from Borislav Petkov: - Make ghes_edac a simple module like the rest of the EDAC drivers and drop the forced built-in only configuration by disentangling it from GHES (Jia He) - The usual small cleanups and improvements all over EDAC land * tag 'edac_updates_for_6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: EDAC/i10nm: fix refcount leak in pci_get_dev_wrapper() EDAC/i5400: Fix typo in comment: vaious -> various EDAC/mc_sysfs: Increase legacy channel support to 12 MAINTAINERS: Make Mauro EDAC reviewer MAINTAINERS: Make Manivannan Sadhasivam the maintainer of qcom_edac EDAC/igen6: Return the correct error type when not the MC owner apei/ghes: Use xchg_release() for updating new cache slot instead of cmpxchg() EDAC: Check for GHES preference in the chipset-specific EDAC drivers EDAC/ghes: Make ghes_edac a proper module EDAC/ghes: Prepare to make ghes_edac a proper module EDAC/ghes: Add a notifier for reporting memory errors efi/cper: Export several helpers for ghes_edac to use EDAC/i5000: Mark as BROKEN
2 parents 40deb5e + 3919430 commit 7adcadb

19 files changed

+194
-72
lines changed

MAINTAINERS

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7386,9 +7386,9 @@ F: drivers/edac/thunderx_edac*
73867386

73877387
EDAC-CORE
73887388
M: Borislav Petkov <[email protected]>
7389-
M: Mauro Carvalho Chehab <[email protected]>
73907389
M: Tony Luck <[email protected]>
73917390
R: James Morse <[email protected]>
7391+
R: Mauro Carvalho Chehab <[email protected]>
73927392
R: Robert Richter <[email protected]>
73937393
73947394
S: Supported
@@ -7505,8 +7505,7 @@ S: Maintained
75057505
F: drivers/edac/pnd2_edac.[ch]
75067506

75077507
EDAC-QCOM
7508-
M: Channagoud Kadabi <[email protected]>
7509-
M: Venkata Narendra Kumar Gutta <[email protected]>
7508+
M: Manivannan Sadhasivam <[email protected]>
75107509
75117510
75127511
S: Maintained

drivers/acpi/apei/ghes.c

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@
9494
#define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses
9595
#endif
9696

97+
static ATOMIC_NOTIFIER_HEAD(ghes_report_chain);
98+
9799
static inline bool is_hest_type_generic_v2(struct ghes *ghes)
98100
{
99101
return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
@@ -107,6 +109,13 @@ static inline bool is_hest_type_generic_v2(struct ghes *ghes)
107109
bool ghes_disable;
108110
module_param_named(disable, ghes_disable, bool, 0);
109111

112+
/*
113+
* "ghes.edac_force_enable" forcibly enables ghes_edac and skips the platform
114+
* check.
115+
*/
116+
static bool ghes_edac_force_enable;
117+
module_param_named(edac_force_enable, ghes_edac_force_enable, bool, 0);
118+
110119
/*
111120
* All error sources notified with HED (Hardware Error Device) share a
112121
* single notifier callback, so they need to be linked and checked one
@@ -118,6 +127,13 @@ module_param_named(disable, ghes_disable, bool, 0);
118127
static LIST_HEAD(ghes_hed);
119128
static DEFINE_MUTEX(ghes_list_mutex);
120129

130+
/*
131+
* A list of GHES devices which are given to the corresponding EDAC driver
132+
* ghes_edac for further use.
133+
*/
134+
static LIST_HEAD(ghes_devs);
135+
static DEFINE_MUTEX(ghes_devs_mutex);
136+
121137
/*
122138
* Because the memory area used to transfer hardware error information
123139
* from BIOS to Linux can be determined only in NMI, IRQ or timer
@@ -645,7 +661,7 @@ static bool ghes_do_proc(struct ghes *ghes,
645661
if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
646662
struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
647663

648-
ghes_edac_report_mem_error(sev, mem_err);
664+
atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err);
649665

650666
arch_apei_report_mem_error(sev, mem_err);
651667
queued = ghes_handle_memory_failure(gdata, sev);
@@ -1382,7 +1398,11 @@ static int ghes_probe(struct platform_device *ghes_dev)
13821398

13831399
platform_set_drvdata(ghes_dev, ghes);
13841400

1385-
ghes_edac_register(ghes, &ghes_dev->dev);
1401+
ghes->dev = &ghes_dev->dev;
1402+
1403+
mutex_lock(&ghes_devs_mutex);
1404+
list_add_tail(&ghes->elist, &ghes_devs);
1405+
mutex_unlock(&ghes_devs_mutex);
13861406

13871407
/* Handle any pending errors right away */
13881408
spin_lock_irqsave(&ghes_notify_lock_irq, flags);
@@ -1446,7 +1466,9 @@ static int ghes_remove(struct platform_device *ghes_dev)
14461466

14471467
ghes_fini(ghes);
14481468

1449-
ghes_edac_unregister(ghes);
1469+
mutex_lock(&ghes_devs_mutex);
1470+
list_del(&ghes->elist);
1471+
mutex_unlock(&ghes_devs_mutex);
14501472

14511473
kfree(ghes);
14521474

@@ -1501,3 +1523,41 @@ void __init acpi_ghes_init(void)
15011523
else
15021524
pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
15031525
}
1526+
1527+
/*
1528+
* Known x86 systems that prefer GHES error reporting:
1529+
*/
1530+
static struct acpi_platform_list plat_list[] = {
1531+
{"HPE ", "Server ", 0, ACPI_SIG_FADT, all_versions},
1532+
{ } /* End */
1533+
};
1534+
1535+
struct list_head *ghes_get_devices(void)
1536+
{
1537+
int idx = -1;
1538+
1539+
if (IS_ENABLED(CONFIG_X86)) {
1540+
idx = acpi_match_platform_list(plat_list);
1541+
if (idx < 0) {
1542+
if (!ghes_edac_force_enable)
1543+
return NULL;
1544+
1545+
pr_warn_once("Force-loading ghes_edac on an unsupported platform. You're on your own!\n");
1546+
}
1547+
}
1548+
1549+
return &ghes_devs;
1550+
}
1551+
EXPORT_SYMBOL_GPL(ghes_get_devices);
1552+
1553+
void ghes_register_report_chain(struct notifier_block *nb)
1554+
{
1555+
atomic_notifier_chain_register(&ghes_report_chain, nb);
1556+
}
1557+
EXPORT_SYMBOL_GPL(ghes_register_report_chain);
1558+
1559+
void ghes_unregister_report_chain(struct notifier_block *nb)
1560+
{
1561+
atomic_notifier_chain_unregister(&ghes_report_chain, nb);
1562+
}
1563+
EXPORT_SYMBOL_GPL(ghes_unregister_report_chain);

drivers/edac/Kconfig

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ config EDAC_DECODE_MCE
5353
has been initialized.
5454

5555
config EDAC_GHES
56-
bool "Output ACPI APEI/GHES BIOS detected errors via EDAC"
57-
depends on ACPI_APEI_GHES && (EDAC=y)
56+
tristate "Output ACPI APEI/GHES BIOS detected errors via EDAC"
57+
depends on ACPI_APEI_GHES
5858
select UEFI_CPER
5959
help
6060
Not all machines support hardware-driven error report. Some of those
@@ -211,6 +211,7 @@ config EDAC_R82600
211211
config EDAC_I5000
212212
tristate "Intel Greencreek/Blackford chipset"
213213
depends on X86 && PCI
214+
depends on BROKEN
214215
help
215216
Support for error detection and correction the Intel
216217
Greekcreek/Blackford chipsets.

drivers/edac/amd64_edac.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4329,6 +4329,9 @@ static int __init amd64_edac_init(void)
43294329
int err = -ENODEV;
43304330
int i;
43314331

4332+
if (ghes_get_devices())
4333+
return -EBUSY;
4334+
43324335
owner = edac_get_owner();
43334336
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
43344337
return -EBUSY;

drivers/edac/armada_xp_edac.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,9 @@ static int __init armada_xp_edac_init(void)
599599
{
600600
int res;
601601

602+
if (ghes_get_devices())
603+
return -EBUSY;
604+
602605
/* only polling is supported */
603606
edac_op_state = EDAC_OPSTATE_POLL;
604607

drivers/edac/edac_mc_sysfs.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,14 @@ DEVICE_CHANNEL(ch6_dimm_label, S_IRUGO | S_IWUSR,
298298
channel_dimm_label_show, channel_dimm_label_store, 6);
299299
DEVICE_CHANNEL(ch7_dimm_label, S_IRUGO | S_IWUSR,
300300
channel_dimm_label_show, channel_dimm_label_store, 7);
301+
DEVICE_CHANNEL(ch8_dimm_label, S_IRUGO | S_IWUSR,
302+
channel_dimm_label_show, channel_dimm_label_store, 8);
303+
DEVICE_CHANNEL(ch9_dimm_label, S_IRUGO | S_IWUSR,
304+
channel_dimm_label_show, channel_dimm_label_store, 9);
305+
DEVICE_CHANNEL(ch10_dimm_label, S_IRUGO | S_IWUSR,
306+
channel_dimm_label_show, channel_dimm_label_store, 10);
307+
DEVICE_CHANNEL(ch11_dimm_label, S_IRUGO | S_IWUSR,
308+
channel_dimm_label_show, channel_dimm_label_store, 11);
301309

302310
/* Total possible dynamic DIMM Label attribute file table */
303311
static struct attribute *dynamic_csrow_dimm_attr[] = {
@@ -309,6 +317,10 @@ static struct attribute *dynamic_csrow_dimm_attr[] = {
309317
&dev_attr_legacy_ch5_dimm_label.attr.attr,
310318
&dev_attr_legacy_ch6_dimm_label.attr.attr,
311319
&dev_attr_legacy_ch7_dimm_label.attr.attr,
320+
&dev_attr_legacy_ch8_dimm_label.attr.attr,
321+
&dev_attr_legacy_ch9_dimm_label.attr.attr,
322+
&dev_attr_legacy_ch10_dimm_label.attr.attr,
323+
&dev_attr_legacy_ch11_dimm_label.attr.attr,
312324
NULL
313325
};
314326

@@ -329,6 +341,14 @@ DEVICE_CHANNEL(ch6_ce_count, S_IRUGO,
329341
channel_ce_count_show, NULL, 6);
330342
DEVICE_CHANNEL(ch7_ce_count, S_IRUGO,
331343
channel_ce_count_show, NULL, 7);
344+
DEVICE_CHANNEL(ch8_ce_count, S_IRUGO,
345+
channel_ce_count_show, NULL, 8);
346+
DEVICE_CHANNEL(ch9_ce_count, S_IRUGO,
347+
channel_ce_count_show, NULL, 9);
348+
DEVICE_CHANNEL(ch10_ce_count, S_IRUGO,
349+
channel_ce_count_show, NULL, 10);
350+
DEVICE_CHANNEL(ch11_ce_count, S_IRUGO,
351+
channel_ce_count_show, NULL, 11);
332352

333353
/* Total possible dynamic ce_count attribute file table */
334354
static struct attribute *dynamic_csrow_ce_count_attr[] = {
@@ -340,6 +360,10 @@ static struct attribute *dynamic_csrow_ce_count_attr[] = {
340360
&dev_attr_legacy_ch5_ce_count.attr.attr,
341361
&dev_attr_legacy_ch6_ce_count.attr.attr,
342362
&dev_attr_legacy_ch7_ce_count.attr.attr,
363+
&dev_attr_legacy_ch8_ce_count.attr.attr,
364+
&dev_attr_legacy_ch9_ce_count.attr.attr,
365+
&dev_attr_legacy_ch10_ce_count.attr.attr,
366+
&dev_attr_legacy_ch11_ce_count.attr.attr,
343367
NULL
344368
};
345369

drivers/edac/edac_module.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#ifndef __EDAC_MODULE_H__
1212
#define __EDAC_MODULE_H__
1313

14+
#include <acpi/ghes.h>
1415
#include "edac_mc.h"
1516
#include "edac_pci.h"
1617
#include "edac_device.h"

0 commit comments

Comments
 (0)