Skip to content

Commit a366670

Browse files
qzhuo2aegl
authored andcommitted
EDAC/{skx_common,i10nm}: Fix incorrect far-memory error source indicator
The Granite Rapids CPUs with Flat2LM memory configurations may mistakenly report near-memory errors as far-memory errors, resulting in the invalid decoded ADXL results: EDAC skx: Bad imc -1 Fix this incorrect far-memory error source indicator by prefetching the decoded far-memory controller ID, and adjust the error source indicator to near-memory if the far-memory controller ID is invalid. Fixes: ba987ea ("EDAC/i10nm: Add Intel Granite Rapids server support") Signed-off-by: Qiuxu Zhuo <[email protected]> Signed-off-by: Tony Luck <[email protected]> Tested-by: Diego Garcia Rodriguez <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 2397f79 commit a366670

File tree

3 files changed

+25
-0
lines changed

3 files changed

+25
-0
lines changed

drivers/edac/i10nm_base.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,7 @@ static int __init i10nm_init(void)
10361036
return -ENODEV;
10371037

10381038
cfg = (struct res_config *)id->driver_data;
1039+
skx_set_res_cfg(cfg);
10391040
res_cfg = cfg;
10401041

10411042
rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);

drivers/edac/skx_common.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ static skx_show_retry_log_f skx_show_retry_rd_err_log;
4747
static u64 skx_tolm, skx_tohm;
4848
static LIST_HEAD(dev_edac_list);
4949
static bool skx_mem_cfg_2lm;
50+
static struct res_config *skx_res_cfg;
5051

5152
int skx_adxl_get(void)
5253
{
@@ -135,6 +136,22 @@ static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
135136
return false;
136137
}
137138

139+
/*
140+
* GNR with a Flat2LM memory configuration may mistakenly classify
141+
* a near-memory error(DDR5) as a far-memory error(CXL), resulting
142+
* in the incorrect selection of decoded ADXL components.
143+
* To address this, prefetch the decoded far-memory controller ID
144+
* and adjust the error source to near-memory if the far-memory
145+
* controller ID is invalid.
146+
*/
147+
if (skx_res_cfg && skx_res_cfg->type == GNR && err_src == ERR_SRC_2LM_FM) {
148+
res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
149+
if (res->imc == -1) {
150+
err_src = ERR_SRC_2LM_NM;
151+
edac_dbg(0, "Adjust the error source to near-memory.\n");
152+
}
153+
}
154+
138155
res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
139156
if (err_src == ERR_SRC_2LM_NM) {
140157
res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ?
@@ -191,6 +208,12 @@ void skx_set_mem_cfg(bool mem_cfg_2lm)
191208
}
192209
EXPORT_SYMBOL_GPL(skx_set_mem_cfg);
193210

211+
void skx_set_res_cfg(struct res_config *cfg)
212+
{
213+
skx_res_cfg = cfg;
214+
}
215+
EXPORT_SYMBOL_GPL(skx_set_res_cfg);
216+
194217
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
195218
{
196219
driver_decode = decode;

drivers/edac/skx_common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ int skx_adxl_get(void);
241241
void skx_adxl_put(void);
242242
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
243243
void skx_set_mem_cfg(bool mem_cfg_2lm);
244+
void skx_set_res_cfg(struct res_config *cfg);
244245

245246
int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
246247
int skx_get_node_id(struct skx_dev *d, u8 *id);

0 commit comments

Comments
 (0)