Skip to content

Commit 5176a93

Browse files
yghannamsuryasaimadhu
authored andcommitted
x86/MCE/AMD, EDAC/mce_amd: Add new SMCA bank types
Add HWID and McaType values for new SMCA bank types, and add their error descriptions to edac_mce_amd. The "PHY" bank types all have the same error descriptions, and the NBIF and SHUB bank types have the same error descriptions. So reuse the same arrays where appropriate. [ bp: Remove useless comments over hwid types. ] Signed-off-by: Yazen Ghannam <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 1acd85f commit 5176a93

File tree

3 files changed

+151
-12
lines changed

3 files changed

+151
-12
lines changed

arch/x86/include/asm/mce.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,12 +313,19 @@ enum smca_bank_types {
313313
SMCA_SMU, /* System Management Unit */
314314
SMCA_SMU_V2,
315315
SMCA_MP5, /* Microprocessor 5 Unit */
316+
SMCA_MPDMA, /* MPDMA Unit */
316317
SMCA_NBIO, /* Northbridge IO Unit */
317318
SMCA_PCIE, /* PCI Express Unit */
318319
SMCA_PCIE_V2,
319320
SMCA_XGMI_PCS, /* xGMI PCS Unit */
321+
SMCA_NBIF, /* NBIF Unit */
322+
SMCA_SHUB, /* System HUB Unit */
323+
SMCA_SATA, /* SATA Unit */
324+
SMCA_USB, /* USB Unit */
325+
SMCA_GMI_PCS, /* GMI PCS Unit */
320326
SMCA_XGMI_PHY, /* xGMI PHY Unit */
321327
SMCA_WAFL_PHY, /* WAFL PHY Unit */
328+
SMCA_GMI_PHY, /* GMI PHY Unit */
322329
N_SMCA_BANK_TYPES
323330
};
324331

arch/x86/kernel/cpu/mce/amd.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,18 @@ static struct smca_bank_name smca_names[] = {
9595
[SMCA_PSP ... SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
9696
[SMCA_SMU ... SMCA_SMU_V2] = { "smu", "System Management Unit" },
9797
[SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
98+
[SMCA_MPDMA] = { "mpdma", "MPDMA Unit" },
9899
[SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
99100
[SMCA_PCIE ... SMCA_PCIE_V2] = { "pcie", "PCI Express Unit" },
100101
[SMCA_XGMI_PCS] = { "xgmi_pcs", "Ext Global Memory Interconnect PCS Unit" },
102+
[SMCA_NBIF] = { "nbif", "NBIF Unit" },
103+
[SMCA_SHUB] = { "shub", "System Hub Unit" },
104+
[SMCA_SATA] = { "sata", "SATA Unit" },
105+
[SMCA_USB] = { "usb", "USB Unit" },
106+
[SMCA_GMI_PCS] = { "gmi_pcs", "Global Memory Interconnect PCS Unit" },
101107
[SMCA_XGMI_PHY] = { "xgmi_phy", "Ext Global Memory Interconnect PHY Unit" },
102108
[SMCA_WAFL_PHY] = { "wafl_phy", "WAFL PHY Unit" },
109+
[SMCA_GMI_PHY] = { "gmi_phy", "Global Memory Interconnect PHY Unit" },
103110
};
104111

105112
static const char *smca_get_name(enum smca_bank_types t)
@@ -174,21 +181,25 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
174181
/* Microprocessor 5 Unit MCA type */
175182
{ SMCA_MP5, HWID_MCATYPE(0x01, 0x2) },
176183

184+
/* MPDMA MCA type */
185+
{ SMCA_MPDMA, HWID_MCATYPE(0x01, 0x3) },
186+
177187
/* Northbridge IO Unit MCA type */
178188
{ SMCA_NBIO, HWID_MCATYPE(0x18, 0x0) },
179189

180190
/* PCI Express Unit MCA type */
181191
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) },
182192
{ SMCA_PCIE_V2, HWID_MCATYPE(0x46, 0x1) },
183193

184-
/* xGMI PCS MCA type */
185194
{ SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) },
186-
187-
/* xGMI PHY MCA type */
195+
{ SMCA_NBIF, HWID_MCATYPE(0x6C, 0x0) },
196+
{ SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) },
197+
{ SMCA_SATA, HWID_MCATYPE(0xA8, 0x0) },
198+
{ SMCA_USB, HWID_MCATYPE(0xAA, 0x0) },
199+
{ SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) },
188200
{ SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) },
189-
190-
/* WAFL PHY MCA type */
191201
{ SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) },
202+
{ SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) },
192203
};
193204

194205
struct smca_bank smca_banks[MAX_NR_BANKS];

drivers/edac/mce_amd.c

Lines changed: 128 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,63 @@ static const char * const smca_mp5_mce_desc[] = {
399399
"Instruction Tag Cache Bank B ECC or parity error",
400400
};
401401

402+
static const char * const smca_mpdma_mce_desc[] = {
403+
"Main SRAM [31:0] bank ECC or parity error",
404+
"Main SRAM [63:32] bank ECC or parity error",
405+
"Main SRAM [95:64] bank ECC or parity error",
406+
"Main SRAM [127:96] bank ECC or parity error",
407+
"Data Cache Bank A ECC or parity error",
408+
"Data Cache Bank B ECC or parity error",
409+
"Data Tag Cache Bank A ECC or parity error",
410+
"Data Tag Cache Bank B ECC or parity error",
411+
"Instruction Cache Bank A ECC or parity error",
412+
"Instruction Cache Bank B ECC or parity error",
413+
"Instruction Tag Cache Bank A ECC or parity error",
414+
"Instruction Tag Cache Bank B ECC or parity error",
415+
"Data Cache Bank A ECC or parity error",
416+
"Data Cache Bank B ECC or parity error",
417+
"Data Tag Cache Bank A ECC or parity error",
418+
"Data Tag Cache Bank B ECC or parity error",
419+
"Instruction Cache Bank A ECC or parity error",
420+
"Instruction Cache Bank B ECC or parity error",
421+
"Instruction Tag Cache Bank A ECC or parity error",
422+
"Instruction Tag Cache Bank B ECC or parity error",
423+
"Data Cache Bank A ECC or parity error",
424+
"Data Cache Bank B ECC or parity error",
425+
"Data Tag Cache Bank A ECC or parity error",
426+
"Data Tag Cache Bank B ECC or parity error",
427+
"Instruction Cache Bank A ECC or parity error",
428+
"Instruction Cache Bank B ECC or parity error",
429+
"Instruction Tag Cache Bank A ECC or parity error",
430+
"Instruction Tag Cache Bank B ECC or parity error",
431+
"System Hub Read Buffer ECC or parity error",
432+
"MPDMA TVF DVSEC Memory ECC or parity error",
433+
"MPDMA TVF MMIO Mailbox0 ECC or parity error",
434+
"MPDMA TVF MMIO Mailbox1 ECC or parity error",
435+
"MPDMA TVF Doorbell Memory ECC or parity error",
436+
"MPDMA TVF SDP Slave Memory 0 ECC or parity error",
437+
"MPDMA TVF SDP Slave Memory 1 ECC or parity error",
438+
"MPDMA TVF SDP Slave Memory 2 ECC or parity error",
439+
"MPDMA TVF SDP Master Memory 0 ECC or parity error",
440+
"MPDMA TVF SDP Master Memory 1 ECC or parity error",
441+
"MPDMA TVF SDP Master Memory 2 ECC or parity error",
442+
"MPDMA TVF SDP Master Memory 3 ECC or parity error",
443+
"MPDMA TVF SDP Master Memory 4 ECC or parity error",
444+
"MPDMA TVF SDP Master Memory 5 ECC or parity error",
445+
"MPDMA TVF SDP Master Memory 6 ECC or parity error",
446+
"MPDMA PTE Command FIFO ECC or parity error",
447+
"MPDMA PTE Hub Data FIFO ECC or parity error",
448+
"MPDMA PTE Internal Data FIFO ECC or parity error",
449+
"MPDMA PTE Command Memory DMA ECC or parity error",
450+
"MPDMA PTE Command Memory Internal ECC or parity error",
451+
"MPDMA PTE DMA Completion FIFO ECC or parity error",
452+
"MPDMA PTE Tablewalk Completion FIFO ECC or parity error",
453+
"MPDMA PTE Descriptor Completion FIFO ECC or parity error",
454+
"MPDMA PTE ReadOnly Completion FIFO ECC or parity error",
455+
"MPDMA PTE DirectWrite Completion FIFO ECC or parity error",
456+
"SDP Watchdog Timer expired",
457+
};
458+
402459
static const char * const smca_nbio_mce_desc[] = {
403460
"ECC or Parity error",
404461
"PCIE error",
@@ -448,7 +505,7 @@ static const char * const smca_xgmipcs_mce_desc[] = {
448505
"Rx Replay Timeout Error",
449506
"LinkSub Tx Timeout Error",
450507
"LinkSub Rx Timeout Error",
451-
"Rx CMD Pocket Error",
508+
"Rx CMD Packet Error",
452509
};
453510

454511
static const char * const smca_xgmiphy_mce_desc[] = {
@@ -458,11 +515,66 @@ static const char * const smca_xgmiphy_mce_desc[] = {
458515
"PHY APB error",
459516
};
460517

461-
static const char * const smca_waflphy_mce_desc[] = {
462-
"RAM ECC Error",
463-
"ARC instruction buffer parity error",
464-
"ARC data buffer parity error",
465-
"PHY APB error",
518+
static const char * const smca_nbif_mce_desc[] = {
519+
"Timeout error from GMI",
520+
"SRAM ECC error",
521+
"NTB Error Event",
522+
"SDP Parity error",
523+
};
524+
525+
static const char * const smca_sata_mce_desc[] = {
526+
"Parity error for port 0",
527+
"Parity error for port 1",
528+
"Parity error for port 2",
529+
"Parity error for port 3",
530+
"Parity error for port 4",
531+
"Parity error for port 5",
532+
"Parity error for port 6",
533+
"Parity error for port 7",
534+
};
535+
536+
static const char * const smca_usb_mce_desc[] = {
537+
"Parity error or ECC error for S0 RAM0",
538+
"Parity error or ECC error for S0 RAM1",
539+
"Parity error or ECC error for S0 RAM2",
540+
"Parity error for PHY RAM0",
541+
"Parity error for PHY RAM1",
542+
"AXI Slave Response error",
543+
};
544+
545+
static const char * const smca_gmipcs_mce_desc[] = {
546+
"Data Loss Error",
547+
"Training Error",
548+
"Replay Parity Error",
549+
"Rx Fifo Underflow Error",
550+
"Rx Fifo Overflow Error",
551+
"CRC Error",
552+
"BER Exceeded Error",
553+
"Tx Fifo Underflow Error",
554+
"Replay Buffer Parity Error",
555+
"Tx Overflow Error",
556+
"Replay Fifo Overflow Error",
557+
"Replay Fifo Underflow Error",
558+
"Elastic Fifo Overflow Error",
559+
"Deskew Error",
560+
"Offline Error",
561+
"Data Startup Limit Error",
562+
"FC Init Timeout Error",
563+
"Recovery Timeout Error",
564+
"Ready Serial Timeout Error",
565+
"Ready Serial Attempt Error",
566+
"Recovery Attempt Error",
567+
"Recovery Relock Attempt Error",
568+
"Deskew Abort Error",
569+
"Rx Buffer Error",
570+
"Rx LFDS Fifo Overflow Error",
571+
"Rx LFDS Fifo Underflow Error",
572+
"LinkSub Tx Timeout Error",
573+
"LinkSub Rx Timeout Error",
574+
"Rx CMD Packet Error",
575+
"LFDS Training Timeout Error",
576+
"LFDS FC Init Timeout Error",
577+
"Data Loss Error",
466578
};
467579

468580
struct smca_mce_desc {
@@ -490,12 +602,21 @@ static struct smca_mce_desc smca_mce_descs[] = {
490602
[SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
491603
[SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) },
492604
[SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
605+
[SMCA_MPDMA] = { smca_mpdma_mce_desc, ARRAY_SIZE(smca_mpdma_mce_desc) },
493606
[SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) },
494607
[SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) },
495608
[SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) },
496609
[SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) },
610+
/* NBIF and SHUB have the same error descriptions, for now. */
611+
[SMCA_NBIF] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) },
612+
[SMCA_SHUB] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) },
613+
[SMCA_SATA] = { smca_sata_mce_desc, ARRAY_SIZE(smca_sata_mce_desc) },
614+
[SMCA_USB] = { smca_usb_mce_desc, ARRAY_SIZE(smca_usb_mce_desc) },
615+
[SMCA_GMI_PCS] = { smca_gmipcs_mce_desc, ARRAY_SIZE(smca_gmipcs_mce_desc) },
616+
/* All the PHY bank types have the same error descriptions, for now. */
497617
[SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
498-
[SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) },
618+
[SMCA_WAFL_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
619+
[SMCA_GMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
499620
};
500621

501622
static bool f12h_mc0_mce(u16 ec, u8 xec)

0 commit comments

Comments
 (0)