Skip to content

Commit ff7b862

Browse files
committed
Merge tag 'ras_updates_for_5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Borislav Petkov: - Do not report spurious MCEs on some Intel platforms caused by errata; by Prarit Bhargava. - Change dev-mcelog's hardcoded limit of 32 error records to a dynamic one, controlled by the number of logical CPUs, by Tony Luck. - Add support for the processor identification number (PPIN) on AMD, by Wei Huang. * tag 'ras_updates_for_5.7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce/amd: Add PPIN support for AMD MCE x86/mce/dev-mcelog: Dynamically allocate space for machine check records x86/mce: Do not log spurious corrected mce errors
2 parents aaf985e + 077168e commit ff7b862

File tree

7 files changed

+84
-23
lines changed

7 files changed

+84
-23
lines changed

arch/x86/include/asm/cpufeatures.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,7 @@
299299
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
300300
#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
301301
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */
302+
#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */
302303
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
303304
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
304305
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */

arch/x86/include/asm/mce.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@
102102

103103
#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
104104

105-
#define MCE_LOG_LEN 32
105+
#define MCE_LOG_MIN_LEN 32U
106106
#define MCE_LOG_SIGNATURE "MACHINECHECK"
107107

108108
/* AMD Scalable MCA */
@@ -135,11 +135,11 @@
135135
*/
136136
struct mce_log_buffer {
137137
char signature[12]; /* "MACHINECHECK" */
138-
unsigned len; /* = MCE_LOG_LEN */
138+
unsigned len; /* = elements in .mce_entry[] */
139139
unsigned next;
140140
unsigned flags;
141141
unsigned recordlen; /* length of struct mce */
142-
struct mce entry[MCE_LOG_LEN];
142+
struct mce entry[];
143143
};
144144

145145
enum mce_notifier_prios {

arch/x86/kernel/cpu/amd.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,35 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
394394
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
395395
}
396396

397+
static void amd_detect_ppin(struct cpuinfo_x86 *c)
398+
{
399+
unsigned long long val;
400+
401+
if (!cpu_has(c, X86_FEATURE_AMD_PPIN))
402+
return;
403+
404+
/* When PPIN is defined in CPUID, still need to check PPIN_CTL MSR */
405+
if (rdmsrl_safe(MSR_AMD_PPIN_CTL, &val))
406+
goto clear_ppin;
407+
408+
/* PPIN is locked in disabled mode, clear feature bit */
409+
if ((val & 3UL) == 1UL)
410+
goto clear_ppin;
411+
412+
/* If PPIN is disabled, try to enable it */
413+
if (!(val & 2UL)) {
414+
wrmsrl_safe(MSR_AMD_PPIN_CTL, val | 2UL);
415+
rdmsrl_safe(MSR_AMD_PPIN_CTL, &val);
416+
}
417+
418+
/* If PPIN_EN bit is 1, return from here; otherwise fall through */
419+
if (val & 2UL)
420+
return;
421+
422+
clear_ppin:
423+
clear_cpu_cap(c, X86_FEATURE_AMD_PPIN);
424+
}
425+
397426
u16 amd_get_nb_id(int cpu)
398427
{
399428
return per_cpu(cpu_llc_id, cpu);
@@ -941,6 +970,7 @@ static void init_amd(struct cpuinfo_x86 *c)
941970
amd_detect_cmp(c);
942971
amd_get_topology(c);
943972
srat_detect_node(c);
973+
amd_detect_ppin(c);
944974

945975
init_amd_cacheinfo(c);
946976

arch/x86/kernel/cpu/mce/core.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@ void mce_setup(struct mce *m)
142142

143143
if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
144144
rdmsrl(MSR_PPIN, m->ppin);
145+
else if (this_cpu_has(X86_FEATURE_AMD_PPIN))
146+
rdmsrl(MSR_AMD_PPIN, m->ppin);
145147

146148
m->microcode = boot_cpu_data.microcode;
147149
}
@@ -1877,6 +1879,8 @@ bool filter_mce(struct mce *m)
18771879
{
18781880
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
18791881
return amd_filter_mce(m);
1882+
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1883+
return intel_filter_mce(m);
18801884

18811885
return false;
18821886
}

arch/x86/kernel/cpu/mce/dev-mcelog.c

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,7 @@ static char *mce_helper_argv[2] = { mce_helper, NULL };
2929
* separate MCEs from kernel messages to avoid bogus bug reports.
3030
*/
3131

32-
static struct mce_log_buffer mcelog = {
33-
.signature = MCE_LOG_SIGNATURE,
34-
.len = MCE_LOG_LEN,
35-
.recordlen = sizeof(struct mce),
36-
};
32+
static struct mce_log_buffer *mcelog;
3733

3834
static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
3935

@@ -45,21 +41,21 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
4541

4642
mutex_lock(&mce_chrdev_read_mutex);
4743

48-
entry = mcelog.next;
44+
entry = mcelog->next;
4945

5046
/*
5147
* When the buffer fills up discard new entries. Assume that the
5248
* earlier errors are the more interesting ones:
5349
*/
54-
if (entry >= MCE_LOG_LEN) {
55-
set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
50+
if (entry >= mcelog->len) {
51+
set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog->flags);
5652
goto unlock;
5753
}
5854

59-
mcelog.next = entry + 1;
55+
mcelog->next = entry + 1;
6056

61-
memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
62-
mcelog.entry[entry].finished = 1;
57+
memcpy(mcelog->entry + entry, mce, sizeof(struct mce));
58+
mcelog->entry[entry].finished = 1;
6359

6460
/* wake processes polling /dev/mcelog */
6561
wake_up_interruptible(&mce_chrdev_wait);
@@ -214,21 +210,21 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
214210

215211
/* Only supports full reads right now */
216212
err = -EINVAL;
217-
if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
213+
if (*off != 0 || usize < mcelog->len * sizeof(struct mce))
218214
goto out;
219215

220-
next = mcelog.next;
216+
next = mcelog->next;
221217
err = 0;
222218

223219
for (i = 0; i < next; i++) {
224-
struct mce *m = &mcelog.entry[i];
220+
struct mce *m = &mcelog->entry[i];
225221

226222
err |= copy_to_user(buf, m, sizeof(*m));
227223
buf += sizeof(*m);
228224
}
229225

230-
memset(mcelog.entry, 0, next * sizeof(struct mce));
231-
mcelog.next = 0;
226+
memset(mcelog->entry, 0, next * sizeof(struct mce));
227+
mcelog->next = 0;
232228

233229
if (err)
234230
err = -EFAULT;
@@ -242,7 +238,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
242238
static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait)
243239
{
244240
poll_wait(file, &mce_chrdev_wait, wait);
245-
if (READ_ONCE(mcelog.next))
241+
if (READ_ONCE(mcelog->next))
246242
return EPOLLIN | EPOLLRDNORM;
247243
if (!mce_apei_read_done && apei_check_mce())
248244
return EPOLLIN | EPOLLRDNORM;
@@ -261,13 +257,13 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
261257
case MCE_GET_RECORD_LEN:
262258
return put_user(sizeof(struct mce), p);
263259
case MCE_GET_LOG_LEN:
264-
return put_user(MCE_LOG_LEN, p);
260+
return put_user(mcelog->len, p);
265261
case MCE_GETCLEAR_FLAGS: {
266262
unsigned flags;
267263

268264
do {
269-
flags = mcelog.flags;
270-
} while (cmpxchg(&mcelog.flags, flags, 0) != flags);
265+
flags = mcelog->flags;
266+
} while (cmpxchg(&mcelog->flags, flags, 0) != flags);
271267

272268
return put_user(flags, p);
273269
}
@@ -339,8 +335,18 @@ static struct miscdevice mce_chrdev_device = {
339335

340336
static __init int dev_mcelog_init_device(void)
341337
{
338+
int mce_log_len;
342339
int err;
343340

341+
mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus());
342+
mcelog = kzalloc(sizeof(*mcelog) + mce_log_len * sizeof(struct mce), GFP_KERNEL);
343+
if (!mcelog)
344+
return -ENOMEM;
345+
346+
strncpy(mcelog->signature, MCE_LOG_SIGNATURE, sizeof(mcelog->signature));
347+
mcelog->len = mce_log_len;
348+
mcelog->recordlen = sizeof(struct mce);
349+
344350
/* register character device /dev/mcelog */
345351
err = misc_register(&mce_chrdev_device);
346352
if (err) {
@@ -350,6 +356,7 @@ static __init int dev_mcelog_init_device(void)
350356
else
351357
pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
352358

359+
kfree(mcelog);
353360
return err;
354361
}
355362

arch/x86/kernel/cpu/mce/intel.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,3 +521,20 @@ void mce_intel_feature_clear(struct cpuinfo_x86 *c)
521521
{
522522
intel_clear_lmce();
523523
}
524+
525+
bool intel_filter_mce(struct mce *m)
526+
{
527+
struct cpuinfo_x86 *c = &boot_cpu_data;
528+
529+
/* MCE errata HSD131, HSM142, HSW131, BDM48, and HSM142 */
530+
if ((c->x86 == 6) &&
531+
((c->x86_model == INTEL_FAM6_HASWELL) ||
532+
(c->x86_model == INTEL_FAM6_HASWELL_L) ||
533+
(c->x86_model == INTEL_FAM6_BROADWELL) ||
534+
(c->x86_model == INTEL_FAM6_HASWELL_G)) &&
535+
(m->bank == 0) &&
536+
((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
537+
return true;
538+
539+
return false;
540+
}

arch/x86/kernel/cpu/mce/internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ void cmci_disable_bank(int bank);
4848
void intel_init_cmci(void);
4949
void intel_init_lmce(void);
5050
void intel_clear_lmce(void);
51+
bool intel_filter_mce(struct mce *m);
5152
#else
5253
# define cmci_intel_adjust_timer mce_adjust_timer_default
5354
static inline bool mce_intel_cmci_poll(void) { return false; }
@@ -56,6 +57,7 @@ static inline void cmci_disable_bank(int bank) { }
5657
static inline void intel_init_cmci(void) { }
5758
static inline void intel_init_lmce(void) { }
5859
static inline void intel_clear_lmce(void) { }
60+
static inline bool intel_filter_mce(struct mce *m) { return false; };
5961
#endif
6062

6163
void mce_timer_kick(unsigned long interval);

0 commit comments

Comments
 (0)