Skip to content

Commit 23ba710

Browse files
aeglsuryasaimadhu
authored andcommitted
x86/mce: Fix all mce notifiers to update the mce->kflags bitmask
If the handler took any action to log or deal with the error, set a bit in mce->kflags so that the default handler on the end of the machine check chain can see what has been done. Get rid of NOTIFY_STOP returns. Make the EDAC and dev-mcelog handlers skip over errors already processed by CEC. Signed-off-by: Tony Luck <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Tested-by: Tony Luck <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 1de08dc commit 23ba710

File tree

10 files changed

+37
-12
lines changed

10 files changed

+37
-12
lines changed

arch/x86/kernel/cpu/mce/core.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,8 +581,10 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
581581
return NOTIFY_DONE;
582582

583583
pfn = mce->addr >> PAGE_SHIFT;
584-
if (!memory_failure(pfn, 0))
584+
if (!memory_failure(pfn, 0)) {
585585
set_mce_nospec(pfn);
586+
mce->kflags |= MCE_HANDLED_UC;
587+
}
586588

587589
return NOTIFY_OK;
588590
}

arch/x86/kernel/cpu/mce/dev-mcelog.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
3939
struct mce *mce = (struct mce *)data;
4040
unsigned int entry;
4141

42+
if (mce->kflags & MCE_HANDLED_CEC)
43+
return NOTIFY_DONE;
44+
4245
mutex_lock(&mce_chrdev_read_mutex);
4346

4447
entry = mcelog->next;
@@ -56,13 +59,15 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
5659

5760
memcpy(mcelog->entry + entry, mce, sizeof(struct mce));
5861
mcelog->entry[entry].finished = 1;
62+
mcelog->entry[entry].kflags = 0;
5963

6064
/* wake processes polling /dev/mcelog */
6165
wake_up_interruptible(&mce_chrdev_wait);
6266

6367
unlock:
6468
mutex_unlock(&mce_chrdev_read_mutex);
6569

70+
mce->kflags |= MCE_HANDLED_MCELOG;
6671
return NOTIFY_OK;
6772
}
6873

drivers/acpi/acpi_extlog.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
146146
static u32 err_seq;
147147

148148
estatus = extlog_elog_entry_check(cpu, bank);
149-
if (estatus == NULL)
149+
if (estatus == NULL || (mce->kflags & MCE_HANDLED_CEC))
150150
return NOTIFY_DONE;
151151

152152
memcpy(elog_buf, (void *)estatus, ELOG_ENTRY_LEN);
@@ -176,7 +176,8 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
176176
}
177177

178178
out:
179-
return NOTIFY_STOP;
179+
mce->kflags |= MCE_HANDLED_EXTLOG;
180+
return NOTIFY_OK;
180181
}
181182

182183
static bool __init extlog_get_l1addr(void)

drivers/acpi/nfit/mce.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
7676
*/
7777
acpi_nfit_ars_rescan(acpi_desc, 0);
7878
}
79+
mce->kflags |= MCE_HANDLED_NFIT;
7980
break;
8081
}
8182

drivers/edac/i7core_edac.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1815,7 +1815,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
18151815
struct mem_ctl_info *mci;
18161816

18171817
i7_dev = get_i7core_dev(mce->socketid);
1818-
if (!i7_dev)
1818+
if (!i7_dev || (mce->kflags & MCE_HANDLED_CEC))
18191819
return NOTIFY_DONE;
18201820

18211821
mci = i7_dev->mci;
@@ -1834,7 +1834,8 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
18341834
i7core_check_error(mci, mce);
18351835

18361836
/* Advise mcelog that the errors were handled */
1837-
return NOTIFY_STOP;
1837+
mce->kflags |= MCE_HANDLED_EDAC;
1838+
return NOTIFY_OK;
18381839
}
18391840

18401841
static struct notifier_block i7_mce_dec = {

drivers/edac/mce_amd.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,9 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
10461046
unsigned int fam = x86_family(m->cpuid);
10471047
int ecc;
10481048

1049+
if (m->kflags & MCE_HANDLED_CEC)
1050+
return NOTIFY_DONE;
1051+
10491052
pr_emerg(HW_ERR "%s\n", decode_error_status(m));
10501053

10511054
pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
@@ -1146,7 +1149,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
11461149
err_code:
11471150
amd_decode_err_code(m->status & 0xffff);
11481151

1149-
return NOTIFY_STOP;
1152+
m->kflags |= MCE_HANDLED_EDAC;
1153+
return NOTIFY_OK;
11501154
}
11511155

11521156
static struct notifier_block amd_mce_dec_nb = {

drivers/edac/pnd2_edac.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,7 +1400,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
14001400
return NOTIFY_DONE;
14011401

14021402
mci = pnd2_mci;
1403-
if (!mci)
1403+
if (!mci || (mce->kflags & MCE_HANDLED_CEC))
14041404
return NOTIFY_DONE;
14051405

14061406
/*
@@ -1429,7 +1429,8 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
14291429
pnd2_mce_output_error(mci, mce, &daddr);
14301430

14311431
/* Advice mcelog that the error were handled */
1432-
return NOTIFY_STOP;
1432+
mce->kflags |= MCE_HANDLED_EDAC;
1433+
return NOTIFY_OK;
14331434
}
14341435

14351436
static struct notifier_block pnd2_mce_dec = {

drivers/edac/sb_edac.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3136,6 +3136,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
31363136

31373137
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
31383138
return NOTIFY_DONE;
3139+
if (mce->kflags & MCE_HANDLED_CEC)
3140+
return NOTIFY_DONE;
31393141

31403142
/*
31413143
* Just let mcelog handle it if the error is
@@ -3183,7 +3185,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
31833185
sbridge_mce_output_error(mci, mce);
31843186

31853187
/* Advice mcelog that the error were handled */
3186-
return NOTIFY_STOP;
3188+
mce->kflags |= MCE_HANDLED_EDAC;
3189+
return NOTIFY_OK;
31873190
}
31883191

31893192
static struct notifier_block sbridge_mce_dec = {

drivers/edac/skx_common.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,9 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
577577
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
578578
return NOTIFY_DONE;
579579

580+
if (mce->kflags & MCE_HANDLED_CEC)
581+
return NOTIFY_DONE;
582+
580583
/* ignore unless this is memory related with an address */
581584
if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
582585
return NOTIFY_DONE;
@@ -616,6 +619,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
616619

617620
skx_mce_output_error(mci, mce, &res);
618621

622+
mce->kflags |= MCE_HANDLED_EDAC;
619623
return NOTIFY_DONE;
620624
}
621625

drivers/ras/cec.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -538,9 +538,12 @@ static int cec_notifier(struct notifier_block *nb, unsigned long val,
538538
/* We eat only correctable DRAM errors with usable addresses. */
539539
if (mce_is_memory_error(m) &&
540540
mce_is_correctable(m) &&
541-
mce_usable_address(m))
542-
if (!cec_add_elem(m->addr >> PAGE_SHIFT))
543-
return NOTIFY_STOP;
541+
mce_usable_address(m)) {
542+
if (!cec_add_elem(m->addr >> PAGE_SHIFT)) {
543+
m->kflags |= MCE_HANDLED_CEC;
544+
return NOTIFY_OK;
545+
}
546+
}
544547

545548
return NOTIFY_DONE;
546549
}

0 commit comments

Comments
 (0)