Skip to content

Commit 3a32c5b

Browse files
committed
Merge branch 'for-6.17/cxl-events-updates' into cxl-for-next
Update Common Event Record to CXL r3.2 definition. Add additional validity check for event records. Add memory sparing event record tracing.
2 parents 49d6e65 + f10f46a commit 3a32c5b

File tree

4 files changed

+195
-7
lines changed

4 files changed

+195
-7
lines changed

drivers/cxl/core/mbox.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,10 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
899899
trace_cxl_generic_event(cxlmd, type, uuid, &evt->generic);
900900
return;
901901
}
902+
if (event_type == CXL_CPER_EVENT_MEM_SPARING) {
903+
trace_cxl_memory_sparing(cxlmd, type, &evt->mem_sparing);
904+
return;
905+
}
902906

903907
if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) {
904908
u64 dpa, hpa = ULLONG_MAX, hpa_alias = ULLONG_MAX;
@@ -926,12 +930,30 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
926930
if (cxl_store_rec_gen_media((struct cxl_memdev *)cxlmd, evt))
927931
dev_dbg(&cxlmd->dev, "CXL store rec_gen_media failed\n");
928932

933+
if (evt->gen_media.media_hdr.descriptor &
934+
CXL_GMER_EVT_DESC_THRESHOLD_EVENT)
935+
WARN_ON_ONCE((evt->gen_media.media_hdr.type &
936+
CXL_GMER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE) &&
937+
!get_unaligned_le24(evt->gen_media.cme_count));
938+
else
939+
WARN_ON_ONCE(evt->gen_media.media_hdr.type &
940+
CXL_GMER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE);
941+
929942
trace_cxl_general_media(cxlmd, type, cxlr, hpa,
930943
hpa_alias, &evt->gen_media);
931944
} else if (event_type == CXL_CPER_EVENT_DRAM) {
932945
if (cxl_store_rec_dram((struct cxl_memdev *)cxlmd, evt))
933946
dev_dbg(&cxlmd->dev, "CXL store rec_dram failed\n");
934947

948+
if (evt->dram.media_hdr.descriptor &
949+
CXL_GMER_EVT_DESC_THRESHOLD_EVENT)
950+
WARN_ON_ONCE((evt->dram.media_hdr.type &
951+
CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE) &&
952+
!get_unaligned_le24(evt->dram.cvme_count));
953+
else
954+
WARN_ON_ONCE(evt->dram.media_hdr.type &
955+
CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE);
956+
935957
trace_cxl_dram(cxlmd, type, cxlr, hpa, hpa_alias,
936958
&evt->dram);
937959
}
@@ -952,6 +974,8 @@ static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd,
952974
ev_type = CXL_CPER_EVENT_DRAM;
953975
else if (uuid_equal(uuid, &CXL_EVENT_MEM_MODULE_UUID))
954976
ev_type = CXL_CPER_EVENT_MEM_MODULE;
977+
else if (uuid_equal(uuid, &CXL_EVENT_MEM_SPARING_UUID))
978+
ev_type = CXL_CPER_EVENT_MEM_SPARING;
955979

956980
cxl_event_trace_record(cxlmd, type, ev_type, uuid, &record->event);
957981
}

drivers/cxl/core/trace.h

Lines changed: 127 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -214,12 +214,16 @@ TRACE_EVENT(cxl_overflow,
214214
#define CXL_EVENT_RECORD_FLAG_PERF_DEGRADED BIT(4)
215215
#define CXL_EVENT_RECORD_FLAG_HW_REPLACE BIT(5)
216216
#define CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID BIT(6)
217+
#define CXL_EVENT_RECORD_FLAG_LD_ID_VALID BIT(7)
218+
#define CXL_EVENT_RECORD_FLAG_HEAD_ID_VALID BIT(8)
217219
#define show_hdr_flags(flags) __print_flags(flags, " | ", \
218220
{ CXL_EVENT_RECORD_FLAG_PERMANENT, "PERMANENT_CONDITION" }, \
219221
{ CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, "MAINTENANCE_NEEDED" }, \
220222
{ CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, "PERFORMANCE_DEGRADED" }, \
221223
{ CXL_EVENT_RECORD_FLAG_HW_REPLACE, "HARDWARE_REPLACEMENT_NEEDED" }, \
222-
{ CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID, "MAINT_OP_SUB_CLASS_VALID" } \
224+
{ CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID, "MAINT_OP_SUB_CLASS_VALID" }, \
225+
{ CXL_EVENT_RECORD_FLAG_LD_ID_VALID, "LD_ID_VALID" }, \
226+
{ CXL_EVENT_RECORD_FLAG_HEAD_ID_VALID, "HEAD_ID_VALID" } \
223227
)
224228

225229
/*
@@ -247,7 +251,9 @@ TRACE_EVENT(cxl_overflow,
247251
__field(u64, hdr_timestamp) \
248252
__field(u8, hdr_length) \
249253
__field(u8, hdr_maint_op_class) \
250-
__field(u8, hdr_maint_op_sub_class)
254+
__field(u8, hdr_maint_op_sub_class) \
255+
__field(u16, hdr_ld_id) \
256+
__field(u8, hdr_head_id)
251257

252258
#define CXL_EVT_TP_fast_assign(cxlmd, l, hdr) \
253259
__assign_str(memdev); \
@@ -260,18 +266,22 @@ TRACE_EVENT(cxl_overflow,
260266
__entry->hdr_related_handle = le16_to_cpu((hdr).related_handle); \
261267
__entry->hdr_timestamp = le64_to_cpu((hdr).timestamp); \
262268
__entry->hdr_maint_op_class = (hdr).maint_op_class; \
263-
__entry->hdr_maint_op_sub_class = (hdr).maint_op_sub_class
269+
__entry->hdr_maint_op_sub_class = (hdr).maint_op_sub_class; \
270+
__entry->hdr_ld_id = le16_to_cpu((hdr).ld_id); \
271+
__entry->hdr_head_id = (hdr).head_id
264272

265273
#define CXL_EVT_TP_printk(fmt, ...) \
266274
TP_printk("memdev=%s host=%s serial=%lld log=%s : time=%llu uuid=%pUb " \
267275
"len=%d flags='%s' handle=%x related_handle=%x " \
268-
"maint_op_class=%u maint_op_sub_class=%u : " fmt, \
276+
"maint_op_class=%u maint_op_sub_class=%u " \
277+
"ld_id=%x head_id=%x : " fmt, \
269278
__get_str(memdev), __get_str(host), __entry->serial, \
270279
cxl_event_log_type_str(__entry->log), \
271280
__entry->hdr_timestamp, &__entry->hdr_uuid, __entry->hdr_length,\
272281
show_hdr_flags(__entry->hdr_flags), __entry->hdr_handle, \
273282
__entry->hdr_related_handle, __entry->hdr_maint_op_class, \
274283
__entry->hdr_maint_op_sub_class, \
284+
__entry->hdr_ld_id, __entry->hdr_head_id, \
275285
##__VA_ARGS__)
276286

277287
TRACE_EVENT(cxl_generic_event,
@@ -496,7 +506,10 @@ TRACE_EVENT(cxl_general_media,
496506
uuid_copy(&__entry->region_uuid, &uuid_null);
497507
}
498508
__entry->cme_threshold_ev_flags = rec->cme_threshold_ev_flags;
499-
__entry->cme_count = get_unaligned_le24(rec->cme_count);
509+
if (rec->media_hdr.descriptor & CXL_GMER_EVT_DESC_THRESHOLD_EVENT)
510+
__entry->cme_count = get_unaligned_le24(rec->cme_count);
511+
else
512+
__entry->cme_count = 0;
500513
),
501514

502515
CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' " \
@@ -648,7 +661,10 @@ TRACE_EVENT(cxl_dram,
648661
CXL_EVENT_GEN_MED_COMP_ID_SIZE);
649662
__entry->sub_channel = rec->sub_channel;
650663
__entry->cme_threshold_ev_flags = rec->cme_threshold_ev_flags;
651-
__entry->cvme_count = get_unaligned_le24(rec->cvme_count);
664+
if (rec->media_hdr.descriptor & CXL_GMER_EVT_DESC_THRESHOLD_EVENT)
665+
__entry->cvme_count = get_unaligned_le24(rec->cvme_count);
666+
else
667+
__entry->cvme_count = 0;
652668
),
653669

654670
CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' sub_type='%s' " \
@@ -871,6 +887,111 @@ TRACE_EVENT(cxl_memory_module,
871887
)
872888
);
873889

890+
/*
891+
* Memory Sparing Event Record - MSER
892+
*
893+
* CXL rev 3.2 section 8.2.10.2.1.4; Table 8-60
894+
*/
895+
#define CXL_MSER_QUERY_RESOURCE_FLAG BIT(0)
896+
#define CXL_MSER_HARD_SPARING_FLAG BIT(1)
897+
#define CXL_MSER_DEV_INITED_FLAG BIT(2)
898+
#define show_mem_sparing_flags(flags) __print_flags(flags, "|", \
899+
{ CXL_MSER_QUERY_RESOURCE_FLAG, "Query Resources" }, \
900+
{ CXL_MSER_HARD_SPARING_FLAG, "Hard Sparing" }, \
901+
{ CXL_MSER_DEV_INITED_FLAG, "Device Initiated Sparing" } \
902+
)
903+
904+
#define CXL_MSER_VALID_CHANNEL BIT(0)
905+
#define CXL_MSER_VALID_RANK BIT(1)
906+
#define CXL_MSER_VALID_NIBBLE BIT(2)
907+
#define CXL_MSER_VALID_BANK_GROUP BIT(3)
908+
#define CXL_MSER_VALID_BANK BIT(4)
909+
#define CXL_MSER_VALID_ROW BIT(5)
910+
#define CXL_MSER_VALID_COLUMN BIT(6)
911+
#define CXL_MSER_VALID_COMPONENT_ID BIT(7)
912+
#define CXL_MSER_VALID_COMPONENT_ID_FORMAT BIT(8)
913+
#define CXL_MSER_VALID_SUB_CHANNEL BIT(9)
914+
#define show_mem_sparing_valid_flags(flags) __print_flags(flags, "|", \
915+
{ CXL_MSER_VALID_CHANNEL, "CHANNEL" }, \
916+
{ CXL_MSER_VALID_RANK, "RANK" }, \
917+
{ CXL_MSER_VALID_NIBBLE, "NIBBLE" }, \
918+
{ CXL_MSER_VALID_BANK_GROUP, "BANK GROUP" }, \
919+
{ CXL_MSER_VALID_BANK, "BANK" }, \
920+
{ CXL_MSER_VALID_ROW, "ROW" }, \
921+
{ CXL_MSER_VALID_COLUMN, "COLUMN" }, \
922+
{ CXL_MSER_VALID_COMPONENT_ID, "COMPONENT ID" }, \
923+
{ CXL_MSER_VALID_COMPONENT_ID_FORMAT, "COMPONENT ID PLDM FORMAT" }, \
924+
{ CXL_MSER_VALID_SUB_CHANNEL, "SUB CHANNEL" } \
925+
)
926+
927+
TRACE_EVENT(cxl_memory_sparing,
928+
929+
TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
930+
struct cxl_event_mem_sparing *rec),
931+
932+
TP_ARGS(cxlmd, log, rec),
933+
934+
TP_STRUCT__entry(
935+
CXL_EVT_TP_entry
936+
937+
/* Memory Sparing Event */
938+
__field(u8, flags)
939+
__field(u8, result)
940+
__field(u16, validity_flags)
941+
__field(u16, res_avail)
942+
__field(u8, channel)
943+
__field(u8, rank)
944+
__field(u32, nibble_mask)
945+
__field(u8, bank_group)
946+
__field(u8, bank)
947+
__field(u32, row)
948+
__field(u16, column)
949+
__field(u8, sub_channel)
950+
__array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
951+
),
952+
953+
TP_fast_assign(
954+
CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr);
955+
__entry->hdr_uuid = CXL_EVENT_MEM_SPARING_UUID;
956+
957+
/* Memory Sparing Event */
958+
__entry->flags = rec->flags;
959+
__entry->result = rec->result;
960+
__entry->validity_flags = le16_to_cpu(rec->validity_flags);
961+
__entry->res_avail = le16_to_cpu(rec->res_avail);
962+
__entry->channel = rec->channel;
963+
__entry->rank = rec->rank;
964+
__entry->nibble_mask = get_unaligned_le24(rec->nibble_mask);
965+
__entry->bank_group = rec->bank_group;
966+
__entry->bank = rec->bank;
967+
__entry->row = get_unaligned_le24(rec->row);
968+
__entry->column = le16_to_cpu(rec->column);
969+
__entry->sub_channel = rec->sub_channel;
970+
memcpy(__entry->comp_id, &rec->component_id,
971+
CXL_EVENT_GEN_MED_COMP_ID_SIZE);
972+
),
973+
974+
CXL_EVT_TP_printk("flags='%s' result=%u validity_flags='%s' " \
975+
"spare resource avail=%u channel=%u rank=%u " \
976+
"nibble_mask=%x bank_group=%u bank=%u " \
977+
"row=%u column=%u sub_channel=%u " \
978+
"comp_id=%s comp_id_pldm_valid_flags='%s' " \
979+
"pldm_entity_id=%s pldm_resource_id=%s",
980+
show_mem_sparing_flags(__entry->flags),
981+
__entry->result,
982+
show_mem_sparing_valid_flags(__entry->validity_flags),
983+
__entry->res_avail, __entry->channel, __entry->rank,
984+
__entry->nibble_mask, __entry->bank_group, __entry->bank,
985+
__entry->row, __entry->column, __entry->sub_channel,
986+
__print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
987+
show_comp_id_pldm_flags(__entry->comp_id[0]),
988+
show_pldm_entity_id(__entry->validity_flags, CXL_MSER_VALID_COMPONENT_ID,
989+
CXL_MSER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id),
990+
show_pldm_resource_id(__entry->validity_flags, CXL_MSER_VALID_COMPONENT_ID,
991+
CXL_MSER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id)
992+
)
993+
);
994+
874995
#define show_poison_trace_type(type) \
875996
__print_symbolic(type, \
876997
{ CXL_POISON_TRACE_LIST, "List" }, \

drivers/cxl/cxlmem.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,14 @@ struct cxl_mbox_identify {
633633
UUID_INIT(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86, 0x79, 0xba, 0xb1, \
634634
0x13, 0xb7, 0x74)
635635

636+
/*
637+
* Memory Sparing Event Record UUID
638+
* CXL rev 3.2 section 8.2.10.2.1.4: Table 8-60
639+
*/
640+
#define CXL_EVENT_MEM_SPARING_UUID \
641+
UUID_INIT(0xe71f3a40, 0x2d29, 0x4092, 0x8a, 0x39, 0x4d, 0x1c, 0x96, \
642+
0x6c, 0x7c, 0x65)
643+
636644
/*
637645
* Get Event Records output payload
638646
* CXL rev 3.0 section 8.2.9.2.2; Table 8-50

include/cxl/event.h

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ struct cxl_event_record_hdr {
1919
__le64 timestamp;
2020
u8 maint_op_class;
2121
u8 maint_op_sub_class;
22-
u8 reserved[14];
22+
__le16 ld_id;
23+
u8 head_id;
24+
u8 reserved[11];
2325
} __packed;
2426

2527
struct cxl_event_media_hdr {
@@ -108,11 +110,43 @@ struct cxl_event_mem_module {
108110
u8 reserved[0x2a];
109111
} __packed;
110112

113+
/*
114+
* Memory Sparing Event Record - MSER
115+
* CXL rev 3.2 section 8.2.10.2.1.4; Table 8-60
116+
*/
117+
struct cxl_event_mem_sparing {
118+
struct cxl_event_record_hdr hdr;
119+
/*
120+
* The fields maintenance operation class and maintenance operation
121+
* subclass defined in the Memory Sparing Event Record are the
122+
* duplication of the same in the common event record. Thus defined
123+
* as reserved and to be removed after the spec correction.
124+
*/
125+
u8 rsv1;
126+
u8 rsv2;
127+
u8 flags;
128+
u8 result;
129+
__le16 validity_flags;
130+
u8 reserved1[6];
131+
__le16 res_avail;
132+
u8 channel;
133+
u8 rank;
134+
u8 nibble_mask[3];
135+
u8 bank_group;
136+
u8 bank;
137+
u8 row[3];
138+
__le16 column;
139+
u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
140+
u8 sub_channel;
141+
u8 reserved2[0x25];
142+
} __packed;
143+
111144
union cxl_event {
112145
struct cxl_event_generic generic;
113146
struct cxl_event_gen_media gen_media;
114147
struct cxl_event_dram dram;
115148
struct cxl_event_mem_module mem_module;
149+
struct cxl_event_mem_sparing mem_sparing;
116150
/* dram & gen_media event header */
117151
struct cxl_event_media_hdr media_hdr;
118152
} __packed;
@@ -131,6 +165,7 @@ enum cxl_event_type {
131165
CXL_CPER_EVENT_GEN_MEDIA,
132166
CXL_CPER_EVENT_DRAM,
133167
CXL_CPER_EVENT_MEM_MODULE,
168+
CXL_CPER_EVENT_MEM_SPARING,
134169
};
135170

136171
#define CPER_CXL_DEVICE_ID_VALID BIT(0)

0 commit comments

Comments
 (0)