Skip to content

Commit 485d0a6

Browse files
shijujose4mchehab
authored andcommitted
rasdaemon: Add support for the CXL memory sparing events
Add support to log the CXL memory sparing events. Recording of CXL memory sparing events is not enabled because the sparing event record is used for start the CXL memory sparing operation. Signed-off-by: Shiju Jose <[email protected]> Signed-off-by: Mauro Carvalho Chehab <[email protected]>
1 parent 7bbc112 commit 485d0a6

File tree

5 files changed

+188
-1
lines changed

5 files changed

+188
-1
lines changed

ras-cxl-handler.c

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1518,3 +1518,157 @@ int ras_cxl_memory_module_event_handler(struct trace_seq *s,
15181518

15191519
return 0;
15201520
}
1521+
1522+
/*
1523+
* Memory Sparing Event Record - MSER
1524+
*
1525+
* CXL rev 3.2 section 8.2.10.2.1.4; Table 8-60
1526+
*/
1527+
#define CXL_MSER_VALID_CHANNEL BIT(0)
1528+
#define CXL_MSER_VALID_RANK BIT(1)
1529+
#define CXL_MSER_VALID_NIBBLE BIT(2)
1530+
#define CXL_MSER_VALID_BANK_GROUP BIT(3)
1531+
#define CXL_MSER_VALID_BANK BIT(4)
1532+
#define CXL_MSER_VALID_ROW BIT(5)
1533+
#define CXL_MSER_VALID_COLUMN BIT(6)
1534+
#define CXL_MSER_VALID_COMPONENT_ID BIT(7)
1535+
#define CXL_MSER_VALID_COMPONENT_ID_FORMAT BIT(8)
1536+
#define CXL_MSER_VALID_SUB_CHANNEL BIT(9)
1537+
1538+
#define CXL_MSER_QUERY_RES_FLAG BIT(0)
1539+
#define CXL_MSER_HARD_SPARING_FLAG BIT(1)
1540+
#define CXL_MSER_DEV_INITIATED_FLAG BIT(2)
1541+
1542+
static const struct cxl_event_flags cxl_mser_flags[] = {
1543+
{ .bit = CXL_MSER_QUERY_RES_FLAG, .flag = "QUERY_RESOURCES" },
1544+
{ .bit = CXL_MSER_HARD_SPARING_FLAG, .flag = "HARD_SPARING" },
1545+
{ .bit = CXL_MSER_DEV_INITIATED_FLAG, .flag = "DEVICE_INITIATED" },
1546+
};
1547+
1548+
int ras_cxl_memory_sparing_event_handler(struct trace_seq *s,
1549+
struct tep_record *record,
1550+
struct tep_event *event, void *context)
1551+
{
1552+
int len, i, rc;
1553+
unsigned long long val;
1554+
struct ras_cxl_memory_sparing_event ev;
1555+
1556+
memset(&ev, 0, sizeof(ev));
1557+
if (handle_ras_cxl_common_hdr(s, record, event, context, &ev.hdr) < 0)
1558+
return -1;
1559+
1560+
if (tep_get_field_val(s, event, "flags", record, &val, 1) < 0)
1561+
return -1;
1562+
ev.flags = val;
1563+
if (trace_seq_printf(s, "flags:0x%x ", ev.flags) <= 0)
1564+
return -1;
1565+
if (decode_cxl_event_flags(s, ev.flags, cxl_mser_flags,
1566+
ARRAY_SIZE(cxl_mser_flags)) < 0)
1567+
return -1;
1568+
1569+
if (tep_get_field_val(s, event, "result", record, &val, 1) < 0)
1570+
return -1;
1571+
ev.result = val;
1572+
if (trace_seq_printf(s, "result:0x%x ", ev.result) <= 0)
1573+
return -1;
1574+
1575+
if (tep_get_field_val(s, event, "validity_flags", record, &val, 1) < 0)
1576+
return -1;
1577+
ev.validity_flags = val;
1578+
1579+
if (tep_get_field_val(s, event, "res_avail", record, &val, 1) < 0)
1580+
return -1;
1581+
ev.res_avail = val;
1582+
if (trace_seq_printf(s, "spare resources available:%u ", ev.res_avail) <= 0)
1583+
return -1;
1584+
1585+
if (ev.validity_flags & CXL_MSER_VALID_CHANNEL) {
1586+
if (tep_get_field_val(s, event, "channel", record, &val, 1) < 0)
1587+
return -1;
1588+
ev.channel = val;
1589+
if (trace_seq_printf(s, "channel:%u ", ev.channel) <= 0)
1590+
return -1;
1591+
}
1592+
1593+
if (ev.validity_flags & CXL_MSER_VALID_SUB_CHANNEL) {
1594+
if (tep_get_field_val(s, event, "sub_channel", record, &val, 1) < 0)
1595+
return -1;
1596+
ev.sub_channel = val;
1597+
if (trace_seq_printf(s, "sub_channel:%u ", ev.sub_channel) <= 0)
1598+
return -1;
1599+
}
1600+
1601+
if (ev.validity_flags & CXL_MSER_VALID_RANK) {
1602+
if (tep_get_field_val(s, event, "rank", record, &val, 1) < 0)
1603+
return -1;
1604+
ev.rank = val;
1605+
if (trace_seq_printf(s, "rank:%u ", ev.rank) <= 0)
1606+
return -1;
1607+
}
1608+
1609+
if (ev.validity_flags & CXL_MSER_VALID_NIBBLE) {
1610+
if (tep_get_field_val(s, event, "nibble_mask", record, &val, 1) < 0)
1611+
return -1;
1612+
ev.nibble_mask = val;
1613+
if (trace_seq_printf(s, "nibble_mask:%u ", ev.nibble_mask) <= 0)
1614+
return -1;
1615+
}
1616+
1617+
if (ev.validity_flags & CXL_MSER_VALID_BANK_GROUP) {
1618+
if (tep_get_field_val(s, event, "bank_group", record, &val, 1) < 0)
1619+
return -1;
1620+
ev.bank_group = val;
1621+
if (trace_seq_printf(s, "bank_group:%u ", ev.bank_group) <= 0)
1622+
return -1;
1623+
}
1624+
1625+
if (ev.validity_flags & CXL_MSER_VALID_BANK) {
1626+
if (tep_get_field_val(s, event, "bank", record, &val, 1) < 0)
1627+
return -1;
1628+
ev.bank = val;
1629+
if (trace_seq_printf(s, "bank:%u ", ev.bank) <= 0)
1630+
return -1;
1631+
}
1632+
1633+
if (ev.validity_flags & CXL_MSER_VALID_ROW) {
1634+
if (tep_get_field_val(s, event, "row", record, &val, 1) < 0)
1635+
return -1;
1636+
ev.row = val;
1637+
if (trace_seq_printf(s, "row:%u ", ev.row) <= 0)
1638+
return -1;
1639+
}
1640+
1641+
if (ev.validity_flags & CXL_MSER_VALID_COLUMN) {
1642+
if (tep_get_field_val(s, event, "column", record, &val, 1) < 0)
1643+
return -1;
1644+
ev.column = val;
1645+
if (trace_seq_printf(s, "column:%u ", ev.column) <= 0)
1646+
return -1;
1647+
}
1648+
1649+
if (ev.validity_flags & CXL_MSER_VALID_COMPONENT_ID) {
1650+
ev.comp_id = tep_get_field_raw(s, event, "comp_id", record, &len, 1);
1651+
if (!ev.comp_id)
1652+
return -1;
1653+
if (trace_seq_printf(s, "comp_id:") <= 0)
1654+
return -1;
1655+
for (i = 0; i < CXL_EVENT_GEN_MED_COMP_ID_SIZE; i++) {
1656+
if (trace_seq_printf(s, "%02x ", ev.comp_id[i]) <= 0)
1657+
break;
1658+
}
1659+
1660+
if (ev.validity_flags & CXL_MSER_VALID_COMPONENT_ID_FORMAT) {
1661+
if (trace_seq_printf(s, "comp_id_pldm_valid_flags:") <= 0)
1662+
return -1;
1663+
if (decode_cxl_event_flags(s, ev.comp_id[0], cxl_pldm_comp_id_flags,
1664+
ARRAY_SIZE(cxl_pldm_comp_id_flags)) < 0)
1665+
return -1;
1666+
1667+
rc = ras_cxl_print_component_id(s, ev.comp_id, ev.entity_id, ev.res_id);
1668+
if (rc)
1669+
return rc;
1670+
}
1671+
}
1672+
1673+
return 0;
1674+
}

ras-cxl-handler.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,7 @@ int ras_cxl_dram_event_handler(struct trace_seq *s,
3737
int ras_cxl_memory_module_event_handler(struct trace_seq *s,
3838
struct tep_record *record,
3939
struct tep_event *event, void *context);
40+
int ras_cxl_memory_sparing_event_handler(struct trace_seq *s,
41+
struct tep_record *record,
42+
struct tep_event *event, void *context);
4043
#endif

ras-events.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ int toggle_ras_mc_event(int enable)
318318
rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_general_media", enable);
319319
rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_dram", enable);
320320
rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_memory_module", enable);
321+
rc |= __toggle_ras_mc_event(ras, "cxl", "cxl_memory_sparing", enable);
321322
#endif
322323

323324
#ifdef HAVE_SIGNAL
@@ -1215,6 +1216,14 @@ int handle_ras_events(int record_events, int enable_ipmitool)
12151216
else if (rc != EVENT_DISABLED)
12161217
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
12171218
"cxl", "memory_module");
1219+
1220+
rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_memory_sparing",
1221+
ras_cxl_memory_sparing_event_handler, NULL, CXL_MEMORY_SPARING_EVENT);
1222+
if (!rc)
1223+
num_events++;
1224+
else if (rc != EVENT_DISABLED)
1225+
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
1226+
"cxl", "cxl_memory_sparing");
12181227
#endif
12191228

12201229
#ifdef HAVE_SIGNAL

ras-events.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ enum {
2727
DEVLINK_EVENT,
2828
DISKERROR_EVENT,
2929
MF_EVENT,
30+
SIGNAL_EVENT,
3031
CXL_POISON_EVENT,
3132
CXL_AER_UE_EVENT,
3233
CXL_AER_CE_EVENT,
@@ -35,7 +36,7 @@ enum {
3536
CXL_GENERAL_MEDIA_EVENT,
3637
CXL_DRAM_EVENT,
3738
CXL_MEMORY_MODULE_EVENT,
38-
SIGNAL_EVENT,
39+
CXL_MEMORY_SPARING_EVENT,
3940
NR_EVENTS
4041
};
4142

ras-record.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,25 @@ struct ras_signal_event {
279279
int result;
280280
};
281281

282+
struct ras_cxl_memory_sparing_event {
283+
struct ras_cxl_event_common_hdr hdr;
284+
uint8_t flags;
285+
uint8_t result;
286+
uint16_t validity_flags;
287+
uint16_t res_avail;
288+
uint8_t channel;
289+
uint8_t rank;
290+
uint32_t nibble_mask;
291+
uint8_t bank_group;
292+
uint8_t bank;
293+
uint32_t row;
294+
uint16_t column;
295+
uint8_t sub_channel;
296+
uint8_t *comp_id;
297+
uint8_t entity_id[CXL_PLDM_ENTITY_ID_LEN];
298+
uint8_t res_id[CXL_PLDM_RES_ID_LEN];
299+
};
300+
282301
struct ras_mc_event;
283302
struct ras_aer_event;
284303
struct ras_extlog_event;
@@ -297,6 +316,7 @@ struct ras_cxl_general_media_event;
297316
struct ras_cxl_dram_event;
298317
struct ras_cxl_memory_module_event;
299318
struct ras_signal_event;
319+
struct ras_cxl_memory_sparing_event;
300320

301321
#ifdef HAVE_SQLITE3
302322

0 commit comments

Comments
 (0)