Skip to content

Commit bdde314

Browse files
committed
Merge tag 'ras_urgent_for_v6.16_rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS fixes from Borislav Petkov: - Do not remove the MCE sysfs hierarchy if thresholding sysfs nodes init fails due to new/unknown banks present, which in itself is not fatal anyway; add default names for new banks - Make sure MCE polling settings are honored after CMCI storms - Make sure MCE threshold limit is reset after the thresholding interrupt has been serviced - Clean up properly and disable CMCI banks on shutdown so that a second/kexec-ed kernel can rediscover those banks again * tag 'ras_urgent_for_v6.16_rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Make sure CMCI banks are cleared during shutdown on Intel x86/mce/amd: Fix threshold limit reset x86/mce/amd: Add default names for MCA banks and blocks x86/mce: Ensure user polling settings are honored when restarting timer x86/mce: Don't remove sysfs if thresholding sysfs init fails
2 parents 45a3f12 + 30ad231 commit bdde314

File tree

3 files changed

+29
-24
lines changed

3 files changed

+29
-24
lines changed

arch/x86/kernel/cpu/mce/amd.c

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
350350

351351
struct thresh_restart {
352352
struct threshold_block *b;
353-
int reset;
354353
int set_lvt_off;
355354
int lvt_off;
356355
u16 old_limit;
@@ -432,13 +431,13 @@ static void threshold_restart_bank(void *_tr)
432431

433432
rdmsr(tr->b->address, lo, hi);
434433

435-
if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
436-
tr->reset = 1; /* limit cannot be lower than err count */
437-
438-
if (tr->reset) { /* reset err count and overflow bit */
439-
hi =
440-
(hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
441-
(THRESHOLD_MAX - tr->b->threshold_limit);
434+
/*
435+
* Reset error count and overflow bit.
436+
* This is done during init or after handling an interrupt.
437+
*/
438+
if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) {
439+
hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI);
440+
hi |= THRESHOLD_MAX - tr->b->threshold_limit;
442441
} else if (tr->old_limit) { /* change limit w/o reset */
443442
int new_count = (hi & THRESHOLD_MAX) +
444443
(tr->old_limit - tr->b->threshold_limit);
@@ -1113,13 +1112,20 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol
11131112
}
11141113

11151114
bank_type = smca_get_bank_type(cpu, bank);
1116-
if (bank_type >= N_SMCA_BANK_TYPES)
1117-
return NULL;
11181115

11191116
if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) {
11201117
if (b->block < ARRAY_SIZE(smca_umc_block_names))
11211118
return smca_umc_block_names[b->block];
1122-
return NULL;
1119+
}
1120+
1121+
if (b && b->block) {
1122+
snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block);
1123+
return buf_mcatype;
1124+
}
1125+
1126+
if (bank_type >= N_SMCA_BANK_TYPES) {
1127+
snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank);
1128+
return buf_mcatype;
11231129
}
11241130

11251131
if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1)

arch/x86/kernel/cpu/mce/core.c

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1740,6 +1740,11 @@ static void mc_poll_banks_default(void)
17401740

17411741
void (*mc_poll_banks)(void) = mc_poll_banks_default;
17421742

1743+
static bool should_enable_timer(unsigned long iv)
1744+
{
1745+
return !mca_cfg.ignore_ce && iv;
1746+
}
1747+
17431748
static void mce_timer_fn(struct timer_list *t)
17441749
{
17451750
struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
@@ -1763,7 +1768,7 @@ static void mce_timer_fn(struct timer_list *t)
17631768

17641769
if (mce_get_storm_mode()) {
17651770
__start_timer(t, HZ);
1766-
} else {
1771+
} else if (should_enable_timer(iv)) {
17671772
__this_cpu_write(mce_next_interval, iv);
17681773
__start_timer(t, iv);
17691774
}
@@ -2156,11 +2161,10 @@ static void mce_start_timer(struct timer_list *t)
21562161
{
21572162
unsigned long iv = check_interval * HZ;
21582163

2159-
if (mca_cfg.ignore_ce || !iv)
2160-
return;
2161-
2162-
this_cpu_write(mce_next_interval, iv);
2163-
__start_timer(t, iv);
2164+
if (should_enable_timer(iv)) {
2165+
this_cpu_write(mce_next_interval, iv);
2166+
__start_timer(t, iv);
2167+
}
21642168
}
21652169

21662170
static void __mcheck_cpu_setup_timer(void)
@@ -2801,15 +2805,9 @@ static int mce_cpu_dead(unsigned int cpu)
28012805
static int mce_cpu_online(unsigned int cpu)
28022806
{
28032807
struct timer_list *t = this_cpu_ptr(&mce_timer);
2804-
int ret;
28052808

28062809
mce_device_create(cpu);
2807-
2808-
ret = mce_threshold_create_device(cpu);
2809-
if (ret) {
2810-
mce_device_remove(cpu);
2811-
return ret;
2812-
}
2810+
mce_threshold_create_device(cpu);
28132811
mce_reenable_cpu();
28142812
mce_start_timer(t);
28152813
return 0;

arch/x86/kernel/cpu/mce/intel.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,7 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c)
478478
void mce_intel_feature_clear(struct cpuinfo_x86 *c)
479479
{
480480
intel_clear_lmce();
481+
cmci_clear();
481482
}
482483

483484
bool intel_filter_mce(struct mce *m)

0 commit comments

Comments
 (0)