Skip to content

Commit c1f2ffe

Browse files
committed
Merge tag 'ras_core_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Borislav Petkov: - Log and handle twp new AMD-specific MCA registers: SYND1 and SYND2 and report the Field Replaceable Unit text info reported through them - Add support for handling variable-sized SMCA BERT records - Add the capability for reporting vendor-specific RAS error info without adding vendor-specific fields to struct mce - Cleanups * tag 'ras_core_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: EDAC/mce_amd: Add support for FRU text in MCA x86/mce/apei: Handle variable SMCA BERT record size x86/MCE/AMD: Add support for new MCA_SYND{1,2} registers tracing: Add __print_dynamic_array() helper x86/mce: Add wrapper for struct mce to export vendor specific info x86/mce/intel: Use MCG_BANKCNT_MASK instead of 0xff x86/mce/mcelog: Use xchg() to get and clear the flags
2 parents 77286b8 + 612c2ad commit c1f2ffe

File tree

15 files changed

+329
-191
lines changed

15 files changed

+329
-191
lines changed

arch/x86/include/asm/mce.h

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
* - TCC bit is present in MCx_STATUS.
6262
*/
6363
#define MCI_CONFIG_MCAX 0x1
64+
#define MCI_CONFIG_FRUTEXT BIT_ULL(9)
6465
#define MCI_IPID_MCATYPE 0xFFFF0000
6566
#define MCI_IPID_HWID 0xFFF
6667

@@ -122,6 +123,9 @@
122123
#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
123124
#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
124125
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
126+
/* Registers MISC2 to MISC4 are at offsets B to D. */
127+
#define MSR_AMD64_SMCA_MC0_SYND1 0xc000200e
128+
#define MSR_AMD64_SMCA_MC0_SYND2 0xc000200f
125129
#define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
126130
#define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
127131
#define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
@@ -132,6 +136,8 @@
132136
#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
133137
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
134138
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
139+
#define MSR_AMD64_SMCA_MCx_SYND1(x) (MSR_AMD64_SMCA_MC0_SYND1 + 0x10*(x))
140+
#define MSR_AMD64_SMCA_MCx_SYND2(x) (MSR_AMD64_SMCA_MC0_SYND2 + 0x10*(x))
135141

136142
#define XEC(x, mask) (((x) >> 16) & mask)
137143

@@ -187,6 +193,32 @@ enum mce_notifier_prios {
187193
MCE_PRIO_HIGHEST = MCE_PRIO_CEC
188194
};
189195

196+
/**
197+
* struct mce_hw_err - Hardware Error Record.
198+
* @m: Machine Check record.
199+
* @vendor: Vendor-specific error information.
200+
*
201+
* Vendor-specific fields should not be added to struct mce. Instead, vendors
202+
* should export their vendor-specific data through their structure in the
203+
* vendor union below.
204+
*
205+
* AMD's vendor data is parsed by error decoding tools for supplemental error
206+
* information. Thus, current offsets of existing fields must be maintained.
207+
* Only add new fields at the end of AMD's vendor structure.
208+
*/
209+
struct mce_hw_err {
210+
struct mce m;
211+
212+
union vendor_info {
213+
struct {
214+
u64 synd1; /* MCA_SYND1 MSR */
215+
u64 synd2; /* MCA_SYND2 MSR */
216+
} amd;
217+
} vendor;
218+
};
219+
220+
#define to_mce_hw_err(mce) container_of(mce, struct mce_hw_err, m)
221+
190222
struct notifier_block;
191223
extern void mce_register_decode_chain(struct notifier_block *nb);
192224
extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -221,8 +253,8 @@ static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
221253
u64 lapic_id) { return -EINVAL; }
222254
#endif
223255

224-
void mce_prep_record(struct mce *m);
225-
void mce_log(struct mce *m);
256+
void mce_prep_record(struct mce_hw_err *err);
257+
void mce_log(struct mce_hw_err *err);
226258
DECLARE_PER_CPU(struct device *, mce_device);
227259

228260
/* Maximum number of MCA banks per CPU. */

arch/x86/include/uapi/asm/mce.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
/*
99
* Fields are zero when not available. Also, this struct is shared with
1010
* userspace mcelog and thus must keep existing fields at current offsets.
11-
* Only add new fields to the end of the structure
11+
* Only add new, shared fields to the end of the structure.
12+
* Do not add vendor-specific fields.
1213
*/
1314
struct mce {
1415
__u64 status; /* Bank's MCi_STATUS MSR */

arch/x86/kernel/cpu/mce/amd.c

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -778,29 +778,33 @@ bool amd_mce_usable_address(struct mce *m)
778778

779779
static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
780780
{
781-
struct mce m;
781+
struct mce_hw_err err;
782+
struct mce *m = &err.m;
782783

783-
mce_prep_record(&m);
784+
mce_prep_record(&err);
784785

785-
m.status = status;
786-
m.misc = misc;
787-
m.bank = bank;
788-
m.tsc = rdtsc();
786+
m->status = status;
787+
m->misc = misc;
788+
m->bank = bank;
789+
m->tsc = rdtsc();
789790

790-
if (m.status & MCI_STATUS_ADDRV) {
791-
m.addr = addr;
791+
if (m->status & MCI_STATUS_ADDRV) {
792+
m->addr = addr;
792793

793-
smca_extract_err_addr(&m);
794+
smca_extract_err_addr(m);
794795
}
795796

796797
if (mce_flags.smca) {
797-
rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
798+
rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);
798799

799-
if (m.status & MCI_STATUS_SYNDV)
800-
rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
800+
if (m->status & MCI_STATUS_SYNDV) {
801+
rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
802+
rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1);
803+
rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2);
804+
}
801805
}
802806

803-
mce_log(&m);
807+
mce_log(&err);
804808
}
805809

806810
DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)

arch/x86/kernel/cpu/mce/apei.c

Lines changed: 78 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828

2929
void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
3030
{
31-
struct mce m;
31+
struct mce_hw_err err;
32+
struct mce *m;
3233
int lsb;
3334

3435
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
@@ -44,31 +45,33 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
4445
else
4546
lsb = PAGE_SHIFT;
4647

47-
mce_prep_record(&m);
48-
m.bank = -1;
48+
mce_prep_record(&err);
49+
m = &err.m;
50+
m->bank = -1;
4951
/* Fake a memory read error with unknown channel */
50-
m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
51-
m.misc = (MCI_MISC_ADDR_PHYS << 6) | lsb;
52+
m->status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
53+
m->misc = (MCI_MISC_ADDR_PHYS << 6) | lsb;
5254

5355
if (severity >= GHES_SEV_RECOVERABLE)
54-
m.status |= MCI_STATUS_UC;
56+
m->status |= MCI_STATUS_UC;
5557

5658
if (severity >= GHES_SEV_PANIC) {
57-
m.status |= MCI_STATUS_PCC;
58-
m.tsc = rdtsc();
59+
m->status |= MCI_STATUS_PCC;
60+
m->tsc = rdtsc();
5961
}
6062

61-
m.addr = mem_err->physical_addr;
62-
mce_log(&m);
63+
m->addr = mem_err->physical_addr;
64+
mce_log(&err);
6365
}
6466
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
6567

6668
int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
6769
{
6870
const u64 *i_mce = ((const u64 *) (ctx_info + 1));
71+
unsigned int cpu, num_regs;
6972
bool apicid_found = false;
70-
unsigned int cpu;
71-
struct mce m;
73+
struct mce_hw_err err;
74+
struct mce *m;
7275

7376
if (!boot_cpu_has(X86_FEATURE_SMCA))
7477
return -EINVAL;
@@ -86,16 +89,12 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
8689
return -EINVAL;
8790

8891
/*
89-
* The register array size must be large enough to include all the
90-
* SMCA registers which need to be extracted.
91-
*
9292
* The number of registers in the register array is determined by
9393
* Register Array Size/8 as defined in UEFI spec v2.8, sec N.2.4.2.2.
94-
* The register layout is fixed and currently the raw data in the
95-
* register array includes 6 SMCA registers which the kernel can
96-
* extract.
94+
* Sanity-check registers array size.
9795
*/
98-
if (ctx_info->reg_arr_size < 48)
96+
num_regs = ctx_info->reg_arr_size >> 3;
97+
if (!num_regs)
9998
return -EINVAL;
10099

101100
for_each_possible_cpu(cpu) {
@@ -108,18 +107,68 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
108107
if (!apicid_found)
109108
return -EINVAL;
110109

111-
mce_prep_record_common(&m);
112-
mce_prep_record_per_cpu(cpu, &m);
110+
m = &err.m;
111+
memset(&err, 0, sizeof(struct mce_hw_err));
112+
mce_prep_record_common(m);
113+
mce_prep_record_per_cpu(cpu, m);
114+
115+
m->bank = (ctx_info->msr_addr >> 4) & 0xFF;
113116

114-
m.bank = (ctx_info->msr_addr >> 4) & 0xFF;
115-
m.status = *i_mce;
116-
m.addr = *(i_mce + 1);
117-
m.misc = *(i_mce + 2);
118-
/* Skipping MCA_CONFIG */
119-
m.ipid = *(i_mce + 4);
120-
m.synd = *(i_mce + 5);
117+
/*
118+
* The SMCA register layout is fixed and includes 16 registers.
119+
* The end of the array may be variable, but the beginning is known.
120+
* Cap the number of registers to expected max (15).
121+
*/
122+
if (num_regs > 15)
123+
num_regs = 15;
124+
125+
switch (num_regs) {
126+
/* MCA_SYND2 */
127+
case 15:
128+
err.vendor.amd.synd2 = *(i_mce + 14);
129+
fallthrough;
130+
/* MCA_SYND1 */
131+
case 14:
132+
err.vendor.amd.synd1 = *(i_mce + 13);
133+
fallthrough;
134+
/* MCA_MISC4 */
135+
case 13:
136+
/* MCA_MISC3 */
137+
case 12:
138+
/* MCA_MISC2 */
139+
case 11:
140+
/* MCA_MISC1 */
141+
case 10:
142+
/* MCA_DEADDR */
143+
case 9:
144+
/* MCA_DESTAT */
145+
case 8:
146+
/* reserved */
147+
case 7:
148+
/* MCA_SYND */
149+
case 6:
150+
m->synd = *(i_mce + 5);
151+
fallthrough;
152+
/* MCA_IPID */
153+
case 5:
154+
m->ipid = *(i_mce + 4);
155+
fallthrough;
156+
/* MCA_CONFIG */
157+
case 4:
158+
/* MCA_MISC0 */
159+
case 3:
160+
m->misc = *(i_mce + 2);
161+
fallthrough;
162+
/* MCA_ADDR */
163+
case 2:
164+
m->addr = *(i_mce + 1);
165+
fallthrough;
166+
/* MCA_STATUS */
167+
case 1:
168+
m->status = *i_mce;
169+
}
121170

122-
mce_log(&m);
171+
mce_log(&err);
123172

124173
return 0;
125174
}

0 commit comments

Comments
 (0)