Skip to content

Commit 9196a0b

Browse files
committed
Merge tag 'ras_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS updates from Borislav Petkov: - Fix confusing output from /sys/kernel/debug/ras/daemon_active - Add another MCE severity error case to the Intel error severity table to promote UC and AR errors to panic severity and remove the corresponding code condition doing that. - Make sure the thresholding and deferred error interrupts on AMD SMCA systems clear the all registers reporting an error so that there are no multiple errors logged for the same event * tag 'ras_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: RAS: Fix return value from show_trace() x86/mce: Use severity table to handle uncorrected errors in kernel x86/MCE/AMD: Clear DFR errors found in THR handler
2 parents 7adcadb + 50865c1 commit 9196a0b

File tree

3 files changed

+26
-17
lines changed

3 files changed

+26
-17
lines changed

arch/x86/kernel/cpu/mce/amd.c

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,24 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
788788
return status & MCI_STATUS_DEFERRED;
789789
}
790790

791+
static bool _log_error_deferred(unsigned int bank, u32 misc)
792+
{
793+
if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
794+
mca_msr_reg(bank, MCA_ADDR), misc))
795+
return false;
796+
797+
/*
798+
* Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers.
799+
* Return true here to avoid accessing these registers.
800+
*/
801+
if (!mce_flags.smca)
802+
return true;
803+
804+
/* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */
805+
wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
806+
return true;
807+
}
808+
791809
/*
792810
* We have three scenarios for checking for Deferred errors:
793811
*
@@ -799,19 +817,8 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
799817
*/
800818
static void log_error_deferred(unsigned int bank)
801819
{
802-
bool defrd;
803-
804-
defrd = _log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
805-
mca_msr_reg(bank, MCA_ADDR), 0);
806-
807-
if (!mce_flags.smca)
808-
return;
809-
810-
/* Clear MCA_DESTAT if we logged the deferred error from MCA_STATUS. */
811-
if (defrd) {
812-
wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
820+
if (_log_error_deferred(bank, 0))
813821
return;
814-
}
815822

816823
/*
817824
* Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
@@ -832,7 +839,7 @@ static void amd_deferred_error_interrupt(void)
832839

833840
static void log_error_thresholding(unsigned int bank, u64 misc)
834841
{
835-
_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS), mca_msr_reg(bank, MCA_ADDR), misc);
842+
_log_error_deferred(bank, misc);
836843
}
837844

838845
static void log_and_reset_block(struct threshold_block *block)

arch/x86/kernel/cpu/mce/severity.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,11 @@ static struct severity {
202202
PANIC, "Overflowed uncorrected",
203203
BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
204204
),
205+
MCESEV(
206+
PANIC, "Uncorrected in kernel",
207+
BITSET(MCI_STATUS_UC),
208+
KERNEL
209+
),
205210
MCESEV(
206211
UC, "Uncorrected",
207212
BITSET(MCI_STATUS_UC)
@@ -391,9 +396,6 @@ static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs, char
391396
*msg = s->msg;
392397
s->covered = 1;
393398

394-
if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL)
395-
return MCE_PANIC_SEVERITY;
396-
397399
return s->sev;
398400
}
399401
}

drivers/ras/debugfs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ EXPORT_SYMBOL_GPL(ras_userspace_consumers);
1515

1616
static int trace_show(struct seq_file *m, void *v)
1717
{
18-
return atomic_read(&trace_count);
18+
return 0;
1919
}
2020

2121
static int trace_open(struct inode *inode, struct file *file)

0 commit comments

Comments
 (0)