Skip to content

Commit 2f4348e

Browse files
qzhuo2aegl
authored andcommitted
EDAC/skx_common: Add new ADXL components for 2-level memory
Some Intel servers may configure memory in 2 levels, using fast "near" memory (e.g. DDR) as a cache for larger, slower, "far" memory (e.g. 3D X-point). In these configurations the BIOS ADXL address translation for an address in a 2-level memory range will provide details of both the "near" and far components. Current exported ADXL components are only for 1-level memory system or for 2nd level memory of 2-level memory system. So add new ADXL components for 1st level memory of 2-level memory system to fully support 2-level memory system and the detection of memory error source(1st level memory or 2nd level memory). Signed-off-by: Qiuxu Zhuo <[email protected]> Signed-off-by: Tony Luck <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent e8049c4 commit 2f4348e

File tree

2 files changed

+67
-11
lines changed

2 files changed

+67
-11
lines changed

drivers/edac/skx_common.c

Lines changed: 56 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,23 +23,28 @@
2323
#include "skx_common.h"
2424

2525
static const char * const component_names[] = {
26-
[INDEX_SOCKET] = "ProcessorSocketId",
27-
[INDEX_MEMCTRL] = "MemoryControllerId",
28-
[INDEX_CHANNEL] = "ChannelId",
29-
[INDEX_DIMM] = "DimmSlotId",
26+
[INDEX_SOCKET] = "ProcessorSocketId",
27+
[INDEX_MEMCTRL] = "MemoryControllerId",
28+
[INDEX_CHANNEL] = "ChannelId",
29+
[INDEX_DIMM] = "DimmSlotId",
30+
[INDEX_NM_MEMCTRL] = "NmMemoryControllerId",
31+
[INDEX_NM_CHANNEL] = "NmChannelId",
32+
[INDEX_NM_DIMM] = "NmDimmSlotId",
3033
};
3134

3235
static int component_indices[ARRAY_SIZE(component_names)];
3336
static int adxl_component_count;
3437
static const char * const *adxl_component_names;
3538
static u64 *adxl_values;
3639
static char *adxl_msg;
40+
static unsigned long adxl_nm_bitmap;
3741

3842
static char skx_msg[MSG_SIZE];
3943
static skx_decode_f skx_decode;
4044
static skx_show_retry_log_f skx_show_retry_rd_err_log;
4145
static u64 skx_tolm, skx_tohm;
4246
static LIST_HEAD(dev_edac_list);
47+
static bool skx_mem_cfg_2lm;
4348

4449
int __init skx_adxl_get(void)
4550
{
@@ -56,14 +61,25 @@ int __init skx_adxl_get(void)
5661
for (j = 0; names[j]; j++) {
5762
if (!strcmp(component_names[i], names[j])) {
5863
component_indices[i] = j;
64+
65+
if (i >= INDEX_NM_FIRST)
66+
adxl_nm_bitmap |= 1 << i;
67+
5968
break;
6069
}
6170
}
6271

63-
if (!names[j])
72+
if (!names[j] && i < INDEX_NM_FIRST)
6473
goto err;
6574
}
6675

76+
if (skx_mem_cfg_2lm) {
77+
if (!adxl_nm_bitmap)
78+
skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n");
79+
else
80+
edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap);
81+
}
82+
6783
adxl_component_names = names;
6884
while (*names++)
6985
adxl_component_count++;
@@ -99,7 +115,7 @@ void __exit skx_adxl_put(void)
99115
kfree(adxl_msg);
100116
}
101117

102-
static bool skx_adxl_decode(struct decoded_addr *res)
118+
static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem)
103119
{
104120
struct skx_dev *d;
105121
int i, len = 0;
@@ -116,11 +132,20 @@ static bool skx_adxl_decode(struct decoded_addr *res)
116132
}
117133

118134
res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
119-
res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
120-
res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
121-
res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
135+
if (error_in_1st_level_mem) {
136+
res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ?
137+
(int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1;
138+
res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ?
139+
(int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1;
140+
res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ?
141+
(int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1;
142+
} else {
143+
res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
144+
res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
145+
res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
146+
}
122147

123-
if (res->imc > NUM_IMC - 1) {
148+
if (res->imc > NUM_IMC - 1 || res->imc < 0) {
124149
skx_printk(KERN_ERR, "Bad imc %d\n", res->imc);
125150
return false;
126151
}
@@ -151,6 +176,11 @@ static bool skx_adxl_decode(struct decoded_addr *res)
151176
return true;
152177
}
153178

179+
void skx_set_mem_cfg(bool mem_cfg_2lm)
180+
{
181+
skx_mem_cfg_2lm = mem_cfg_2lm;
182+
}
183+
154184
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
155185
{
156186
skx_decode = decode;
@@ -578,6 +608,21 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
578608
optype, skx_msg);
579609
}
580610

611+
static bool skx_error_in_1st_level_mem(const struct mce *m)
612+
{
613+
u32 errcode;
614+
615+
if (!skx_mem_cfg_2lm)
616+
return false;
617+
618+
errcode = GET_BITFIELD(m->status, 0, 15);
619+
620+
if ((errcode & 0xef80) != 0x280)
621+
return false;
622+
623+
return true;
624+
}
625+
581626
int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
582627
void *data)
583628
{
@@ -597,7 +642,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
597642
res.addr = mce->addr;
598643

599644
if (adxl_component_count) {
600-
if (!skx_adxl_decode(&res))
645+
if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))
601646
return NOTIFY_DONE;
602647
} else if (!skx_decode || !skx_decode(&res)) {
603648
return NOTIFY_DONE;

drivers/edac/skx_common.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#ifndef _SKX_COMM_EDAC_H
1010
#define _SKX_COMM_EDAC_H
1111

12+
#include <linux/bits.h>
13+
1214
#define MSG_SIZE 1024
1315

1416
/*
@@ -92,9 +94,17 @@ enum {
9294
INDEX_MEMCTRL,
9395
INDEX_CHANNEL,
9496
INDEX_DIMM,
97+
INDEX_NM_FIRST,
98+
INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
99+
INDEX_NM_CHANNEL,
100+
INDEX_NM_DIMM,
95101
INDEX_MAX
96102
};
97103

104+
#define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL)
105+
#define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL)
106+
#define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM)
107+
98108
struct decoded_addr {
99109
struct skx_dev *dev;
100110
u64 addr;
@@ -133,6 +143,7 @@ typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int le
133143
int __init skx_adxl_get(void);
134144
void __exit skx_adxl_put(void);
135145
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
146+
void skx_set_mem_cfg(bool mem_cfg_2lm);
136147

137148
int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
138149
int skx_get_node_id(struct skx_dev *d, u8 *id);

0 commit comments

Comments
 (0)