Skip to content

Commit ba987ea

Browse files
qzhuo2aegl
authored andcommitted
EDAC/i10nm: Add Intel Granite Rapids server support
The Granite Rapids CPU model uses similar memory controller registers as Sapphire Rapids server but with some different configurations: - Various memory controller numbers for different Granite Rapids CPUs. So detect the number of present memory controllers at run time. - Different MMIO offsets of memory controllers. - Different triples of bus/dev/fun of some PCI devices used in i10nm_edac. Add above configurations and Granite Rapids CPU model ID for EDAC support. [Tony: Fixed 2 typos s/strcture/structure/] Signed-off-by: Qiuxu Zhuo <[email protected]> Signed-off-by: Tony Luck <[email protected]> Link: https://lore.kernel.org/all/[email protected]
1 parent dd7814b commit ba987ea

File tree

2 files changed

+217
-25
lines changed

2 files changed

+217
-25
lines changed

drivers/edac/i10nm_base.c

Lines changed: 214 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#include "edac_module.h"
1414
#include "skx_common.h"
1515

16-
#define I10NM_REVISION "v0.0.5"
16+
#define I10NM_REVISION "v0.0.6"
1717
#define EDAC_MOD_STR "i10nm_edac"
1818

1919
/* Debug macros */
@@ -22,25 +22,34 @@
2222

2323
#define I10NM_GET_SCK_BAR(d, reg) \
2424
pci_read_config_dword((d)->uracu, 0xd0, &(reg))
25-
#define I10NM_GET_IMC_BAR(d, i, reg) \
26-
pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg))
25+
#define I10NM_GET_IMC_BAR(d, i, reg) \
26+
pci_read_config_dword((d)->uracu, \
27+
(res_cfg->type == GNR ? 0xd4 : 0xd8) + (i) * 4, &(reg))
2728
#define I10NM_GET_SAD(d, offset, i, reg)\
28-
pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg))
29+
pci_read_config_dword((d)->sad_all, (offset) + (i) * \
30+
(res_cfg->type == GNR ? 12 : 8), &(reg))
2931
#define I10NM_GET_HBM_IMC_BAR(d, reg) \
3032
pci_read_config_dword((d)->uracu, 0xd4, &(reg))
3133
#define I10NM_GET_CAPID3_CFG(d, reg) \
32-
pci_read_config_dword((d)->pcu_cr3, 0x90, &(reg))
34+
pci_read_config_dword((d)->pcu_cr3, \
35+
res_cfg->type == GNR ? 0x290 : 0x90, &(reg))
36+
#define I10NM_GET_CAPID5_CFG(d, reg) \
37+
pci_read_config_dword((d)->pcu_cr3, \
38+
res_cfg->type == GNR ? 0x298 : 0x98, &(reg))
3339
#define I10NM_GET_DIMMMTR(m, i, j) \
34-
readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \
40+
readl((m)->mbase + ((m)->hbm_mc ? 0x80c : \
41+
(res_cfg->type == GNR ? 0xc0c : 0x2080c)) + \
3542
(i) * (m)->chan_mmio_sz + (j) * 4)
3643
#define I10NM_GET_MCDDRTCFG(m, i) \
3744
readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \
3845
(i) * (m)->chan_mmio_sz)
3946
#define I10NM_GET_MCMTR(m, i) \
40-
readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \
47+
readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : \
48+
(res_cfg->type == GNR ? 0xaf8 : 0x20ef8)) + \
4149
(i) * (m)->chan_mmio_sz)
4250
#define I10NM_GET_AMAP(m, i) \
43-
readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \
51+
readl((m)->mbase + ((m)->hbm_mc ? 0x814 : \
52+
(res_cfg->type == GNR ? 0xc14 : 0x20814)) + \
4453
(i) * (m)->chan_mmio_sz)
4554
#define I10NM_GET_REG32(m, i, offset) \
4655
readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
@@ -56,7 +65,10 @@
5665
#define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \
5766
((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000)
5867

68+
#define I10NM_GNR_IMC_MMIO_OFFSET 0x24c000
69+
#define I10NM_GNR_IMC_MMIO_SIZE 0x4000
5970
#define I10NM_HBM_IMC_MMIO_SIZE 0x9000
71+
#define I10NM_DDR_IMC_CH_CNT(reg) GET_BITFIELD(reg, 21, 24)
6072
#define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30)
6173
#define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29)
6274

@@ -323,6 +335,79 @@ static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
323335
return pdev;
324336
}
325337

338+
/**
339+
* i10nm_get_imc_num() - Get the number of present DDR memory controllers.
340+
*
341+
* @cfg : The pointer to the structure of EDAC resource configurations.
342+
*
343+
* For Granite Rapids CPUs, the number of present DDR memory controllers read
344+
* at runtime overwrites the value statically configured in @cfg->ddr_imc_num.
345+
* For other CPUs, the number of present DDR memory controllers is statically
346+
* configured in @cfg->ddr_imc_num.
347+
*
348+
* RETURNS : 0 on success, < 0 on failure.
349+
*/
350+
static int i10nm_get_imc_num(struct res_config *cfg)
351+
{
352+
int n, imc_num, chan_num = 0;
353+
struct skx_dev *d;
354+
u32 reg;
355+
356+
list_for_each_entry(d, i10nm_edac_list, list) {
357+
d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->pcu_cr3_bdf.bus],
358+
res_cfg->pcu_cr3_bdf.dev,
359+
res_cfg->pcu_cr3_bdf.fun);
360+
if (!d->pcu_cr3)
361+
continue;
362+
363+
if (I10NM_GET_CAPID5_CFG(d, reg))
364+
continue;
365+
366+
n = I10NM_DDR_IMC_CH_CNT(reg);
367+
368+
if (!chan_num) {
369+
chan_num = n;
370+
edac_dbg(2, "Get DDR CH number: %d\n", chan_num);
371+
} else if (chan_num != n) {
372+
i10nm_printk(KERN_NOTICE, "Get DDR CH numbers: %d, %d\n", chan_num, n);
373+
}
374+
}
375+
376+
switch (cfg->type) {
377+
case GNR:
378+
/*
379+
* One channel per DDR memory controller for Granite Rapids CPUs.
380+
*/
381+
imc_num = chan_num;
382+
383+
if (!imc_num) {
384+
i10nm_printk(KERN_ERR, "Invalid DDR MC number\n");
385+
return -ENODEV;
386+
}
387+
388+
if (imc_num > I10NM_NUM_DDR_IMC) {
389+
i10nm_printk(KERN_ERR, "Need to make I10NM_NUM_DDR_IMC >= %d\n", imc_num);
390+
return -EINVAL;
391+
}
392+
393+
if (cfg->ddr_imc_num != imc_num) {
394+
/*
395+
* Store the number of present DDR memory controllers.
396+
*/
397+
cfg->ddr_imc_num = imc_num;
398+
edac_dbg(2, "Set DDR MC number: %d", imc_num);
399+
}
400+
401+
return 0;
402+
default:
403+
/*
404+
* For other CPUs, the number of present DDR memory controllers
405+
* is statically pre-configured in cfg->ddr_imc_num.
406+
*/
407+
return 0;
408+
}
409+
}
410+
326411
static bool i10nm_check_2lm(struct res_config *cfg)
327412
{
328413
struct skx_dev *d;
@@ -445,6 +530,98 @@ static bool i10nm_mc_decode(struct decoded_addr *res)
445530
return true;
446531
}
447532

533+
/**
534+
* get_gnr_mdev() - Get the PCI device of the @logical_idx-th DDR memory controller.
535+
*
536+
* @d : The pointer to the structure of CPU socket EDAC device.
537+
* @logical_idx : The logical index of the present memory controller (0 ~ max present MC# - 1).
538+
* @physical_idx : To store the corresponding physical index of @logical_idx.
539+
*
540+
* RETURNS : The PCI device of the @logical_idx-th DDR memory controller, NULL on failure.
541+
*/
542+
static struct pci_dev *get_gnr_mdev(struct skx_dev *d, int logical_idx, int *physical_idx)
543+
{
544+
#define GNR_MAX_IMC_PCI_CNT 28
545+
546+
struct pci_dev *mdev;
547+
int i, logical = 0;
548+
549+
/*
550+
* Detect present memory controllers from { PCI device: 8-5, function 7-1 }
551+
*/
552+
for (i = 0; i < GNR_MAX_IMC_PCI_CNT; i++) {
553+
mdev = pci_get_dev_wrapper(d->seg,
554+
d->bus[res_cfg->ddr_mdev_bdf.bus],
555+
res_cfg->ddr_mdev_bdf.dev + i / 7,
556+
res_cfg->ddr_mdev_bdf.fun + i % 7);
557+
558+
if (mdev) {
559+
if (logical == logical_idx) {
560+
*physical_idx = i;
561+
return mdev;
562+
}
563+
564+
pci_dev_put(mdev);
565+
logical++;
566+
}
567+
}
568+
569+
return NULL;
570+
}
571+
572+
/**
573+
* get_ddr_munit() - Get the resource of the i-th DDR memory controller.
574+
*
575+
* @d : The pointer to the structure of CPU socket EDAC device.
576+
* @i : The index of the CPU socket relative DDR memory controller.
577+
* @offset : To store the MMIO offset of the i-th DDR memory controller.
578+
* @size : To store the MMIO size of the i-th DDR memory controller.
579+
*
580+
* RETURNS : The PCI device of the i-th DDR memory controller, NULL on failure.
581+
*/
582+
static struct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsigned long *size)
583+
{
584+
struct pci_dev *mdev;
585+
int physical_idx;
586+
u32 reg;
587+
588+
switch (res_cfg->type) {
589+
case GNR:
590+
if (I10NM_GET_IMC_BAR(d, 0, reg)) {
591+
i10nm_printk(KERN_ERR, "Failed to get mc0 bar\n");
592+
return NULL;
593+
}
594+
595+
mdev = get_gnr_mdev(d, i, &physical_idx);
596+
if (!mdev)
597+
return NULL;
598+
599+
*offset = I10NM_GET_IMC_MMIO_OFFSET(reg) +
600+
I10NM_GNR_IMC_MMIO_OFFSET +
601+
physical_idx * I10NM_GNR_IMC_MMIO_SIZE;
602+
*size = I10NM_GNR_IMC_MMIO_SIZE;
603+
604+
break;
605+
default:
606+
if (I10NM_GET_IMC_BAR(d, i, reg)) {
607+
i10nm_printk(KERN_ERR, "Failed to get mc%d bar\n", i);
608+
return NULL;
609+
}
610+
611+
mdev = pci_get_dev_wrapper(d->seg,
612+
d->bus[res_cfg->ddr_mdev_bdf.bus],
613+
res_cfg->ddr_mdev_bdf.dev + i,
614+
res_cfg->ddr_mdev_bdf.fun);
615+
if (!mdev)
616+
return NULL;
617+
618+
*offset = I10NM_GET_IMC_MMIO_OFFSET(reg);
619+
*size = I10NM_GET_IMC_MMIO_SIZE(reg);
620+
}
621+
622+
return mdev;
623+
}
624+
448625
static int i10nm_get_ddr_munits(void)
449626
{
450627
struct pci_dev *mdev;
@@ -478,9 +655,8 @@ static int i10nm_get_ddr_munits(void)
478655
j++, base, reg);
479656

480657
for (i = 0; i < res_cfg->ddr_imc_num; i++) {
481-
mdev = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->ddr_mdev_bdf.bus],
482-
res_cfg->ddr_mdev_bdf.dev + i,
483-
res_cfg->ddr_mdev_bdf.fun);
658+
mdev = get_ddr_munit(d, i, &off, &size);
659+
484660
if (i == 0 && !mdev) {
485661
i10nm_printk(KERN_ERR, "No IMC found\n");
486662
return -ENODEV;
@@ -490,13 +666,6 @@ static int i10nm_get_ddr_munits(void)
490666

491667
d->imc[i].mdev = mdev;
492668

493-
if (I10NM_GET_IMC_BAR(d, i, reg)) {
494-
i10nm_printk(KERN_ERR, "Failed to get mc bar\n");
495-
return -ENODEV;
496-
}
497-
498-
off = I10NM_GET_IMC_MMIO_OFFSET(reg);
499-
size = I10NM_GET_IMC_MMIO_SIZE(reg);
500669
edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n",
501670
i, base + off, size, reg);
502671

@@ -536,9 +705,6 @@ static int i10nm_get_hbm_munits(void)
536705
u64 base;
537706

538707
list_for_each_entry(d, i10nm_edac_list, list) {
539-
d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->pcu_cr3_bdf.bus],
540-
res_cfg->pcu_cr3_bdf.dev,
541-
res_cfg->pcu_cr3_bdf.fun);
542708
if (!d->pcu_cr3)
543709
return -ENODEV;
544710

@@ -678,6 +844,23 @@ static struct res_config spr_cfg = {
678844
.offsets_demand_hbm1 = offsets_demand_spr_hbm1,
679845
};
680846

847+
static struct res_config gnr_cfg = {
848+
.type = GNR,
849+
.decs_did = 0x3252,
850+
.busno_cfg_offset = 0xd0,
851+
.ddr_imc_num = 12,
852+
.ddr_chan_num = 1,
853+
.ddr_dimm_num = 2,
854+
.ddr_chan_mmio_sz = 0x4000,
855+
.support_ddr5 = true,
856+
.sad_all_bdf = {0, 13, 0},
857+
.pcu_cr3_bdf = {0, 5, 0},
858+
.util_all_bdf = {0, 13, 1},
859+
.uracu_bdf = {0, 0, 1},
860+
.ddr_mdev_bdf = {0, 5, 1},
861+
.sad_all_offset = 0x300,
862+
};
863+
681864
static const struct x86_cpu_id i10nm_cpuids[] = {
682865
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
683866
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
@@ -686,6 +869,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = {
686869
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1),
687870
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg),
688871
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg),
872+
X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
689873
{}
690874
};
691875
MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
@@ -705,7 +889,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
705889
{
706890
struct skx_pvt *pvt = mci->pvt_info;
707891
struct skx_imc *imc = pvt->imc;
708-
u32 mtr, amap, mcddrtcfg;
892+
u32 mtr, amap, mcddrtcfg = 0;
709893
struct dimm_info *dimm;
710894
int i, j, ndimms;
711895

@@ -715,7 +899,10 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
715899

716900
ndimms = 0;
717901
amap = I10NM_GET_AMAP(imc, i);
718-
mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i);
902+
903+
if (res_cfg->type != GNR)
904+
mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i);
905+
719906
for (j = 0; j < imc->num_dimms; j++) {
720907
dimm = edac_get_dimm(mci, i, j, 0);
721908
mtr = I10NM_GET_DIMMMTR(imc, i, j);
@@ -834,6 +1021,10 @@ static int __init i10nm_init(void)
8341021
return -ENODEV;
8351022
}
8361023

1024+
rc = i10nm_get_imc_num(cfg);
1025+
if (rc < 0)
1026+
goto fail;
1027+
8371028
mem_cfg_2lm = i10nm_check_2lm(cfg);
8381029
skx_set_mem_cfg(mem_cfg_2lm);
8391030

drivers/edac/skx_common.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
#define SKX_NUM_CHANNELS 3 /* Channels per memory controller */
3434
#define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */
3535

36-
#define I10NM_NUM_DDR_IMC 4
36+
#define I10NM_NUM_DDR_IMC 12
3737
#define I10NM_NUM_DDR_CHANNELS 2
3838
#define I10NM_NUM_DDR_DIMMS 2
3939

@@ -129,7 +129,8 @@ struct skx_pvt {
129129
enum type {
130130
SKX,
131131
I10NM,
132-
SPR
132+
SPR,
133+
GNR
133134
};
134135

135136
enum {

0 commit comments

Comments
 (0)