Skip to content

Commit 30f5a75

Browse files
committed
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Borislav Petkov: - Misc fixes to the MCE code all over the place, by Jan H. Schönherr. - Initial support for AMD F19h and other cleanups to amd64_edac, by Yazen Ghannam. - Other small cleanups. * 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: EDAC/mce_amd: Make fam_ops static global EDAC/amd64: Drop some family checks for newer systems EDAC/amd64: Add family ops for Family 19h Models 00h-0Fh x86/amd_nb: Add Family 19h PCI IDs EDAC/mce_amd: Always load on SMCA systems x86/MCE/AMD, EDAC/mce_amd: Add new Load Store unit McaType x86/mce: Fix use of uninitialized MCE message string x86/mce: Fix mce=nobootlog x86/mce: Take action on UCNA/Deferred errors again x86/mce: Remove mce_inject_log() in favor of mce_log() x86/mce: Pass MCE message to mce_panic() on failed kernel recovery x86/mce/therm_throt: Mark throttle_active_work() as __maybe_unused
2 parents b62061b + 86e9f9d commit 30f5a75

File tree

11 files changed

+141
-114
lines changed

11 files changed

+141
-114
lines changed

arch/x86/include/asm/mce.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ struct mce_log_buffer {
144144

145145
enum mce_notifier_prios {
146146
MCE_PRIO_FIRST = INT_MAX,
147-
MCE_PRIO_SRAO = INT_MAX - 1,
147+
MCE_PRIO_UC = INT_MAX - 1,
148148
MCE_PRIO_EXTLOG = INT_MAX - 2,
149149
MCE_PRIO_NFIT = INT_MAX - 3,
150150
MCE_PRIO_EDAC = INT_MAX - 4,
@@ -290,6 +290,7 @@ extern void apei_mce_report_mem_error(int corrected,
290290
/* These may be used by multiple smca_hwid_mcatypes */
291291
enum smca_bank_types {
292292
SMCA_LS = 0, /* Load Store */
293+
SMCA_LS_V2, /* Load Store */
293294
SMCA_IF, /* Instruction Fetch */
294295
SMCA_L2_CACHE, /* L2 Cache */
295296
SMCA_DE, /* Decoder Unit */

arch/x86/kernel/amd_nb.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
2323
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
2424
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
25+
#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654
2526

2627
/* Protect the PCI config register pairs used for SMN and DF indirect access. */
2728
static DEFINE_MUTEX(smn_mutex);
@@ -52,6 +53,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
5253
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
5354
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
5455
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
56+
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
5557
{}
5658
};
5759
EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
@@ -66,6 +68,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
6668
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) },
6769
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) },
6870
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
71+
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) },
6972
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
7073
{}
7174
};

arch/x86/kernel/cpu/mce/amd.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ struct smca_bank_name {
7878

7979
static struct smca_bank_name smca_names[] = {
8080
[SMCA_LS] = { "load_store", "Load Store Unit" },
81+
[SMCA_LS_V2] = { "load_store", "Load Store Unit" },
8182
[SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
8283
[SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
8384
[SMCA_DE] = { "decode_unit", "Decode Unit" },
@@ -138,6 +139,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
138139

139140
/* ZN Core (HWID=0xB0) MCA types */
140141
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
142+
{ SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10), 0xFFFFFF },
141143
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
142144
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
143145
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },

arch/x86/kernel/cpu/mce/core.c

Lines changed: 29 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,6 @@
5353

5454
#include "internal.h"
5555

56-
static DEFINE_MUTEX(mce_log_mutex);
57-
5856
/* sysfs synchronization */
5957
static DEFINE_MUTEX(mce_sysfs_mutex);
6058

@@ -156,19 +154,10 @@ void mce_log(struct mce *m)
156154
if (!mce_gen_pool_add(m))
157155
irq_work_queue(&mce_irq_work);
158156
}
159-
160-
void mce_inject_log(struct mce *m)
161-
{
162-
mutex_lock(&mce_log_mutex);
163-
mce_log(m);
164-
mutex_unlock(&mce_log_mutex);
165-
}
166-
EXPORT_SYMBOL_GPL(mce_inject_log);
167-
168-
static struct notifier_block mce_srao_nb;
157+
EXPORT_SYMBOL_GPL(mce_log);
169158

170159
/*
171-
* We run the default notifier if we have only the SRAO, the first and the
160+
* We run the default notifier if we have only the UC, the first and the
172161
* default notifier registered. I.e., the mandatory NUM_DEFAULT_NOTIFIERS
173162
* notifiers registered on the chain.
174163
*/
@@ -594,26 +583,29 @@ static struct notifier_block first_nb = {
594583
.priority = MCE_PRIO_FIRST,
595584
};
596585

597-
static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
598-
void *data)
586+
static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
587+
void *data)
599588
{
600589
struct mce *mce = (struct mce *)data;
601590
unsigned long pfn;
602591

603-
if (!mce)
592+
if (!mce || !mce_usable_address(mce))
604593
return NOTIFY_DONE;
605594

606-
if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
607-
pfn = mce->addr >> PAGE_SHIFT;
608-
if (!memory_failure(pfn, 0))
609-
set_mce_nospec(pfn);
610-
}
595+
if (mce->severity != MCE_AO_SEVERITY &&
596+
mce->severity != MCE_DEFERRED_SEVERITY)
597+
return NOTIFY_DONE;
598+
599+
pfn = mce->addr >> PAGE_SHIFT;
600+
if (!memory_failure(pfn, 0))
601+
set_mce_nospec(pfn);
611602

612603
return NOTIFY_OK;
613604
}
614-
static struct notifier_block mce_srao_nb = {
615-
.notifier_call = srao_decode_notifier,
616-
.priority = MCE_PRIO_SRAO,
605+
606+
static struct notifier_block mce_uc_nb = {
607+
.notifier_call = uc_decode_notifier,
608+
.priority = MCE_PRIO_UC,
617609
};
618610

619611
static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
@@ -763,26 +755,22 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
763755
log_it:
764756
error_seen = true;
765757

766-
mce_read_aux(&m, i);
758+
if (flags & MCP_DONTLOG)
759+
goto clear_it;
767760

761+
mce_read_aux(&m, i);
768762
m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
769-
770763
/*
771764
* Don't get the IP here because it's unlikely to
772765
* have anything to do with the actual error location.
773766
*/
774-
if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
775-
mce_log(&m);
776-
else if (mce_usable_address(&m)) {
777-
/*
778-
* Although we skipped logging this, we still want
779-
* to take action. Add to the pool so the registered
780-
* notifiers will see it.
781-
*/
782-
if (!mce_gen_pool_add(&m))
783-
mce_schedule_work();
784-
}
785767

768+
if (mca_cfg.dont_log_ce && !mce_usable_address(&m))
769+
goto clear_it;
770+
771+
mce_log(&m);
772+
773+
clear_it:
786774
/*
787775
* Clear state for this bank.
788776
*/
@@ -807,7 +795,7 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
807795
static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
808796
struct pt_regs *regs)
809797
{
810-
char *tmp;
798+
char *tmp = *msg;
811799
int i;
812800

813801
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
@@ -1232,8 +1220,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
12321220
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
12331221
struct mca_config *cfg = &mca_cfg;
12341222
int cpu = smp_processor_id();
1235-
char *msg = "Unknown";
12361223
struct mce m, *final;
1224+
char *msg = NULL;
12371225
int worst = 0;
12381226

12391227
/*
@@ -1365,7 +1353,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
13651353
ist_end_non_atomic();
13661354
} else {
13671355
if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0))
1368-
mce_panic("Failed kernel mode recovery", &m, NULL);
1356+
mce_panic("Failed kernel mode recovery", &m, msg);
13691357
}
13701358

13711359
out_ist:
@@ -2041,7 +2029,7 @@ int __init mcheck_init(void)
20412029
{
20422030
mcheck_intel_therm_init();
20432031
mce_register_decode_chain(&first_nb);
2044-
mce_register_decode_chain(&mce_srao_nb);
2032+
mce_register_decode_chain(&mce_uc_nb);
20452033
mce_register_decode_chain(&mce_default_nb);
20462034
mcheck_vendor_init_severity();
20472035

arch/x86/kernel/cpu/mce/inject.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,7 @@ static void do_inject(void)
494494
i_mce.status |= MCI_STATUS_SYNDV;
495495

496496
if (inj_type == SW_INJ) {
497-
mce_inject_log(&i_mce);
497+
mce_log(&i_mce);
498498
return;
499499
}
500500

arch/x86/kernel/cpu/mce/internal.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,6 @@ static inline int apei_clear_mce(u64 record_id)
8484
}
8585
#endif
8686

87-
void mce_inject_log(struct mce *m);
88-
8987
/*
9088
* We consider records to be equivalent if bank+status+addr+misc all match.
9189
* This is only used when the system is going down because of a fatal error

arch/x86/kernel/cpu/mce/therm_throt.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ static void get_therm_status(int level, bool *proc_hot, u8 *temp)
235235
*temp = (msr_val >> 16) & 0x7F;
236236
}
237237

238-
static void throttle_active_work(struct work_struct *work)
238+
static void __maybe_unused throttle_active_work(struct work_struct *work)
239239
{
240240
struct _thermal_state *state = container_of(to_delayed_work(work),
241241
struct _thermal_state, therm_work);

drivers/edac/amd64_edac.c

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate)
214214

215215
scrubval = scrubrates[i].scrubval;
216216

217-
if (pvt->fam == 0x17 || pvt->fam == 0x18) {
217+
if (pvt->umc) {
218218
__f17h_set_scrubval(pvt, scrubval);
219219
} else if (pvt->fam == 0x15 && pvt->model == 0x60) {
220220
f15h_select_dct(pvt, 0);
@@ -256,18 +256,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
256256
int i, retval = -EINVAL;
257257
u32 scrubval = 0;
258258

259-
switch (pvt->fam) {
260-
case 0x15:
261-
/* Erratum #505 */
262-
if (pvt->model < 0x10)
263-
f15h_select_dct(pvt, 0);
264-
265-
if (pvt->model == 0x60)
266-
amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
267-
break;
268-
269-
case 0x17:
270-
case 0x18:
259+
if (pvt->umc) {
271260
amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval);
272261
if (scrubval & BIT(0)) {
273262
amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval);
@@ -276,11 +265,15 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
276265
} else {
277266
scrubval = 0;
278267
}
279-
break;
268+
} else if (pvt->fam == 0x15) {
269+
/* Erratum #505 */
270+
if (pvt->model < 0x10)
271+
f15h_select_dct(pvt, 0);
280272

281-
default:
273+
if (pvt->model == 0x60)
274+
amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
275+
} else {
282276
amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
283-
break;
284277
}
285278

286279
scrubval = scrubval & 0x001F;
@@ -1055,6 +1048,16 @@ static void determine_memory_type(struct amd64_pvt *pvt)
10551048
{
10561049
u32 dram_ctrl, dcsm;
10571050

1051+
if (pvt->umc) {
1052+
if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
1053+
pvt->dram_type = MEM_LRDDR4;
1054+
else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
1055+
pvt->dram_type = MEM_RDDR4;
1056+
else
1057+
pvt->dram_type = MEM_DDR4;
1058+
return;
1059+
}
1060+
10581061
switch (pvt->fam) {
10591062
case 0xf:
10601063
if (pvt->ext_model >= K8_REV_F)
@@ -1100,16 +1103,6 @@ static void determine_memory_type(struct amd64_pvt *pvt)
11001103
case 0x16:
11011104
goto ddr3;
11021105

1103-
case 0x17:
1104-
case 0x18:
1105-
if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
1106-
pvt->dram_type = MEM_LRDDR4;
1107-
else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
1108-
pvt->dram_type = MEM_RDDR4;
1109-
else
1110-
pvt->dram_type = MEM_DDR4;
1111-
return;
1112-
11131106
default:
11141107
WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam);
11151108
pvt->dram_type = MEM_EMPTY;
@@ -2336,6 +2329,16 @@ static struct amd64_family_type family_types[] = {
23362329
.dbam_to_cs = f17_addr_mask_to_cs_size,
23372330
}
23382331
},
2332+
[F19_CPUS] = {
2333+
.ctl_name = "F19h",
2334+
.f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0,
2335+
.f6_id = PCI_DEVICE_ID_AMD_19H_DF_F6,
2336+
.max_mcs = 8,
2337+
.ops = {
2338+
.early_channel_count = f17_early_channel_count,
2339+
.dbam_to_cs = f17_addr_mask_to_cs_size,
2340+
}
2341+
},
23392342
};
23402343

23412344
/*
@@ -3368,6 +3371,12 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
33683371
family_types[F17_CPUS].ctl_name = "F18h";
33693372
break;
33703373

3374+
case 0x19:
3375+
fam_type = &family_types[F19_CPUS];
3376+
pvt->ops = &family_types[F19_CPUS].ops;
3377+
family_types[F19_CPUS].ctl_name = "F19h";
3378+
break;
3379+
33713380
default:
33723381
amd64_err("Unsupported family!\n");
33733382
return NULL;
@@ -3623,6 +3632,7 @@ static const struct x86_cpu_id amd64_cpuids[] = {
36233632
{ X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
36243633
{ X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
36253634
{ X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
3635+
{ X86_VENDOR_AMD, 0x19, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
36263636
{ }
36273637
};
36283638
MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids);

drivers/edac/amd64_edac.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@
122122
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496
123123
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440
124124
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446
125+
#define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650
126+
#define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656
125127

126128
/*
127129
* Function 1 - Address Map
@@ -292,6 +294,7 @@ enum amd_families {
292294
F17_M10H_CPUS,
293295
F17_M30H_CPUS,
294296
F17_M70H_CPUS,
297+
F19_CPUS,
295298
NUM_FAMILIES,
296299
};
297300

0 commit comments

Comments
 (0)