Skip to content

Commit 8438b84

Browse files
schnhrrsuryasaimadhu
authored andcommitted
x86/mce: Take action on UCNA/Deferred errors again
Commit fa92c58 ("x86, mce: Support memory error recovery for both UCNA and Deferred error in machine_check_poll") added handling of UCNA and Deferred errors by adding them to the ring for SRAO errors. Later, commit fd4cf79 ("x86/mce: Remove the MCE ring for Action Optional errors") switched storage from the SRAO ring to the unified pool that is still in use today. In order to only act on the intended errors, a filter for MCE_AO_SEVERITY is used -- effectively removing handling of UCNA/Deferred errors again. Extend the severity filter to include UCNA/Deferred errors again. Also, generalize the naming of the notifier from SRAO to UC to capture the extended scope. Note, that this change may cause a message like the following to appear, as the same address may be reported as SRAO and as UCNA: Memory failure: 0x5fe3284: already hardware poisoned Technically, this is a return to previous behavior. Signed-off-by: Jan H. Schönherr <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Acked-by: Tony Luck <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 81736ab commit 8438b84

File tree

2 files changed

+17
-16
lines changed

2 files changed

+17
-16
lines changed

arch/x86/include/asm/mce.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ struct mce_log_buffer {
144144

145145
enum mce_notifier_prios {
146146
MCE_PRIO_FIRST = INT_MAX,
147-
MCE_PRIO_SRAO = INT_MAX - 1,
147+
MCE_PRIO_UC = INT_MAX - 1,
148148
MCE_PRIO_EXTLOG = INT_MAX - 2,
149149
MCE_PRIO_NFIT = INT_MAX - 3,
150150
MCE_PRIO_EDAC = INT_MAX - 4,

arch/x86/kernel/cpu/mce/core.c

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,8 @@ void mce_log(struct mce *m)
156156
}
157157
EXPORT_SYMBOL_GPL(mce_log);
158158

159-
static struct notifier_block mce_srao_nb;
160-
161159
/*
162-
* We run the default notifier if we have only the SRAO, the first and the
160+
* We run the default notifier if we have only the UC, the first and the
163161
* default notifier registered. I.e., the mandatory NUM_DEFAULT_NOTIFIERS
164162
* notifiers registered on the chain.
165163
*/
@@ -585,26 +583,29 @@ static struct notifier_block first_nb = {
585583
.priority = MCE_PRIO_FIRST,
586584
};
587585

588-
static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
589-
void *data)
586+
static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
587+
void *data)
590588
{
591589
struct mce *mce = (struct mce *)data;
592590
unsigned long pfn;
593591

594-
if (!mce)
592+
if (!mce || !mce_usable_address(mce))
595593
return NOTIFY_DONE;
596594

597-
if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
598-
pfn = mce->addr >> PAGE_SHIFT;
599-
if (!memory_failure(pfn, 0))
600-
set_mce_nospec(pfn);
601-
}
595+
if (mce->severity != MCE_AO_SEVERITY &&
596+
mce->severity != MCE_DEFERRED_SEVERITY)
597+
return NOTIFY_DONE;
598+
599+
pfn = mce->addr >> PAGE_SHIFT;
600+
if (!memory_failure(pfn, 0))
601+
set_mce_nospec(pfn);
602602

603603
return NOTIFY_OK;
604604
}
605-
static struct notifier_block mce_srao_nb = {
606-
.notifier_call = srao_decode_notifier,
607-
.priority = MCE_PRIO_SRAO,
605+
606+
static struct notifier_block mce_uc_nb = {
607+
.notifier_call = uc_decode_notifier,
608+
.priority = MCE_PRIO_UC,
608609
};
609610

610611
static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
@@ -2032,7 +2033,7 @@ int __init mcheck_init(void)
20322033
{
20332034
mcheck_intel_therm_init();
20342035
mce_register_decode_chain(&first_nb);
2035-
mce_register_decode_chain(&mce_srao_nb);
2036+
mce_register_decode_chain(&mce_uc_nb);
20362037
mce_register_decode_chain(&mce_default_nb);
20372038
mcheck_vendor_init_severity();
20382039

0 commit comments

Comments
 (0)