Skip to content

Commit d35fb31

Browse files
yghannambp3tk0v
authored andcommitted
x86/mce/amd: Remove shared threshold bank plumbing
Legacy AMD systems include an integrated Northbridge that is represented by MCA bank 4. This is the only non-core MCA bank in legacy systems. The Northbridge is physically shared by all the CPUs within an AMD "Node". However, in practice the "shared" MCA bank can only by managed by a single CPU within that AMD Node. This is known as the "Node Base Core" (NBC). For example, only the NBC will be able to read the MCA bank 4 registers; they will be Read-as-Zero for other CPUs. Also, the MCA Thresholding interrupt will only signal the NBC; the other CPUs will not receive it. This is enforced by hardware, and it should not be managed by software. The current AMD Thresholding code attempts to deal with the "shared" MCA bank by micromanaging the bank's sysfs kobjects. However, this does not follow the intended kobject use cases. It is also fragile, and it has caused bugs in the past. Modern AMD systems do not need this shared MCA bank support, and it should not be needed on legacy systems either. Remove the shared threshold bank code. Also, move the threshold struct definitions to mce/amd.c, since they are no longer needed in amd_nb.c. Signed-off-by: Yazen Ghannam <[email protected]> Signed-off-by: Borislav Petkov (AMD) <[email protected]> Reviewed-by: Borislav Petkov (AMD) <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 053d180 commit d35fb31

File tree

3 files changed

+27
-133
lines changed

3 files changed

+27
-133
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1189,7 +1189,7 @@ config X86_MCE_INTEL
11891189
config X86_MCE_AMD
11901190
def_bool y
11911191
prompt "AMD MCE features"
1192-
depends on X86_MCE && X86_LOCAL_APIC && AMD_NB
1192+
depends on X86_MCE && X86_LOCAL_APIC
11931193
help
11941194
Additional support for AMD specific MCE features such as
11951195
the DRAM Error Threshold.

arch/x86/include/asm/amd_nb.h

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
#include <linux/ioport.h>
66
#include <linux/pci.h>
7-
#include <linux/refcount.h>
87

98
struct amd_nb_bus_dev_range {
109
u8 bus;
@@ -29,41 +28,11 @@ struct amd_l3_cache {
2928
u8 subcaches[4];
3029
};
3130

32-
struct threshold_block {
33-
unsigned int block; /* Number within bank */
34-
unsigned int bank; /* MCA bank the block belongs to */
35-
unsigned int cpu; /* CPU which controls MCA bank */
36-
u32 address; /* MSR address for the block */
37-
u16 interrupt_enable; /* Enable/Disable APIC interrupt */
38-
bool interrupt_capable; /* Bank can generate an interrupt. */
39-
40-
u16 threshold_limit; /*
41-
* Value upon which threshold
42-
* interrupt is generated.
43-
*/
44-
45-
struct kobject kobj; /* sysfs object */
46-
struct list_head miscj; /*
47-
* List of threshold blocks
48-
* within a bank.
49-
*/
50-
};
51-
52-
struct threshold_bank {
53-
struct kobject *kobj;
54-
struct threshold_block *blocks;
55-
56-
/* initialized to the number of CPUs on the node sharing this bank */
57-
refcount_t cpus;
58-
unsigned int shared;
59-
};
60-
6131
struct amd_northbridge {
6232
struct pci_dev *root;
6333
struct pci_dev *misc;
6434
struct pci_dev *link;
6535
struct amd_l3_cache l3_cache;
66-
struct threshold_bank *bank4;
6736
};
6837

6938
struct amd_northbridge_info {

arch/x86/kernel/cpu/mce/amd.c

Lines changed: 26 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
*
55
* Written by Jacob Shin - AMD, Inc.
66
* Maintained by: Borislav Petkov <[email protected]>
7-
*
8-
* All MC4_MISCi registers are shared between cores on a node.
97
*/
108
#include <linux/interrupt.h>
119
#include <linux/notifier.h>
@@ -20,7 +18,6 @@
2018
#include <linux/smp.h>
2119
#include <linux/string.h>
2220

23-
#include <asm/amd_nb.h>
2421
#include <asm/traps.h>
2522
#include <asm/apic.h>
2623
#include <asm/mce.h>
@@ -221,6 +218,32 @@ static const struct smca_hwid smca_hwid_mcatypes[] = {
221218
#define MAX_MCATYPE_NAME_LEN 30
222219
static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
223220

221+
struct threshold_block {
222+
/* This block's number within its bank. */
223+
unsigned int block;
224+
/* MCA bank number that contains this block. */
225+
unsigned int bank;
226+
/* CPU which controls this block's MCA bank. */
227+
unsigned int cpu;
228+
/* MCA_MISC MSR address for this block. */
229+
u32 address;
230+
/* Enable/Disable APIC interrupt. */
231+
bool interrupt_enable;
232+
/* Bank can generate an interrupt. */
233+
bool interrupt_capable;
234+
/* Value upon which threshold interrupt is generated. */
235+
u16 threshold_limit;
236+
/* sysfs object */
237+
struct kobject kobj;
238+
/* List of threshold blocks within this block's MCA bank. */
239+
struct list_head miscj;
240+
};
241+
242+
struct threshold_bank {
243+
struct kobject *kobj;
244+
struct threshold_block *blocks;
245+
};
246+
224247
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
225248

226249
/*
@@ -333,19 +356,6 @@ struct thresh_restart {
333356
u16 old_limit;
334357
};
335358

336-
static inline bool is_shared_bank(int bank)
337-
{
338-
/*
339-
* Scalable MCA provides for only one core to have access to the MSRs of
340-
* a shared bank.
341-
*/
342-
if (mce_flags.smca)
343-
return false;
344-
345-
/* Bank 4 is for northbridge reporting and is thus shared */
346-
return (bank == 4);
347-
}
348-
349359
static const char *bank4_names(const struct threshold_block *b)
350360
{
351361
switch (b->address) {
@@ -1198,62 +1208,17 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
11981208
return err;
11991209
}
12001210

1201-
static int __threshold_add_blocks(struct threshold_bank *b)
1202-
{
1203-
struct list_head *head = &b->blocks->miscj;
1204-
struct threshold_block *pos = NULL;
1205-
struct threshold_block *tmp = NULL;
1206-
int err = 0;
1207-
1208-
err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
1209-
if (err)
1210-
return err;
1211-
1212-
list_for_each_entry_safe(pos, tmp, head, miscj) {
1213-
1214-
err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
1215-
if (err) {
1216-
list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
1217-
kobject_del(&pos->kobj);
1218-
1219-
return err;
1220-
}
1221-
}
1222-
return err;
1223-
}
1224-
12251211
static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
12261212
unsigned int bank)
12271213
{
12281214
struct device *dev = this_cpu_read(mce_device);
1229-
struct amd_northbridge *nb = NULL;
12301215
struct threshold_bank *b = NULL;
12311216
const char *name = get_name(cpu, bank, NULL);
12321217
int err = 0;
12331218

12341219
if (!dev)
12351220
return -ENODEV;
12361221

1237-
if (is_shared_bank(bank)) {
1238-
nb = node_to_amd_nb(topology_amd_node_id(cpu));
1239-
1240-
/* threshold descriptor already initialized on this node? */
1241-
if (nb && nb->bank4) {
1242-
/* yes, use it */
1243-
b = nb->bank4;
1244-
err = kobject_add(b->kobj, &dev->kobj, name);
1245-
if (err)
1246-
goto out;
1247-
1248-
bp[bank] = b;
1249-
refcount_inc(&b->cpus);
1250-
1251-
err = __threshold_add_blocks(b);
1252-
1253-
goto out;
1254-
}
1255-
}
1256-
12571222
b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
12581223
if (!b) {
12591224
err = -ENOMEM;
@@ -1267,17 +1232,6 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
12671232
goto out_free;
12681233
}
12691234

1270-
if (is_shared_bank(bank)) {
1271-
b->shared = 1;
1272-
refcount_set(&b->cpus, 1);
1273-
1274-
/* nb is already initialized, see above */
1275-
if (nb) {
1276-
WARN_ON(nb->bank4);
1277-
nb->bank4 = b;
1278-
}
1279-
}
1280-
12811235
err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC));
12821236
if (err)
12831237
goto out_kobj;
@@ -1310,40 +1264,11 @@ static void deallocate_threshold_blocks(struct threshold_bank *bank)
13101264
kobject_put(&bank->blocks->kobj);
13111265
}
13121266

1313-
static void __threshold_remove_blocks(struct threshold_bank *b)
1314-
{
1315-
struct threshold_block *pos = NULL;
1316-
struct threshold_block *tmp = NULL;
1317-
1318-
kobject_put(b->kobj);
1319-
1320-
list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
1321-
kobject_put(b->kobj);
1322-
}
1323-
13241267
static void threshold_remove_bank(struct threshold_bank *bank)
13251268
{
1326-
struct amd_northbridge *nb;
1327-
13281269
if (!bank->blocks)
13291270
goto out_free;
13301271

1331-
if (!bank->shared)
1332-
goto out_dealloc;
1333-
1334-
if (!refcount_dec_and_test(&bank->cpus)) {
1335-
__threshold_remove_blocks(bank);
1336-
return;
1337-
} else {
1338-
/*
1339-
* The last CPU on this node using the shared bank is going
1340-
* away, remove that bank now.
1341-
*/
1342-
nb = node_to_amd_nb(topology_amd_node_id(smp_processor_id()));
1343-
nb->bank4 = NULL;
1344-
}
1345-
1346-
out_dealloc:
13471272
deallocate_threshold_blocks(bank);
13481273

13491274
out_free:

0 commit comments

Comments
 (0)