Skip to content

Commit 95b4947

Browse files
weiny2djbw
authored andcommitted
cxl/mem: Trace Memory Module Event Record
CXL rev 3.0 section 8.2.9.2.1.3 defines the Memory Module Event Record. Determine if the event read is memory module record and if so trace the record. Reviewed-by: Dan Williams <[email protected]> Reviewed-by: Jonathan Cameron <[email protected]> Signed-off-by: Ira Weiny <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Dan Williams <[email protected]>
1 parent 2d6c1e6 commit 95b4947

File tree

3 files changed

+182
-0
lines changed

3 files changed

+182
-0
lines changed

drivers/cxl/core/mbox.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,14 @@ static const uuid_t dram_event_uuid =
734734
UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab,
735735
0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24);
736736

737+
/*
738+
* Memory Module Event Record
739+
* CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
740+
*/
741+
static const uuid_t mem_mod_event_uuid =
742+
UUID_INIT(0xfe927475, 0xdd59, 0x4339,
743+
0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74);
744+
737745
static void cxl_event_trace_record(const struct device *dev,
738746
enum cxl_event_log_type type,
739747
struct cxl_event_record_raw *record)
@@ -749,6 +757,11 @@ static void cxl_event_trace_record(const struct device *dev,
749757
struct cxl_event_dram *rec = (struct cxl_event_dram *)record;
750758

751759
trace_cxl_dram(dev, type, rec);
760+
} else if (uuid_equal(id, &mem_mod_event_uuid)) {
761+
struct cxl_event_mem_module *rec =
762+
(struct cxl_event_mem_module *)record;
763+
764+
trace_cxl_memory_module(dev, type, rec);
752765
} else {
753766
/* For unknown record types print just the header */
754767
trace_cxl_generic_event(dev, type, record);

drivers/cxl/core/trace.h

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,149 @@ TRACE_EVENT(cxl_dram,
438438
)
439439
);
440440

441+
/*
442+
* Memory Module Event Record - MMER
443+
*
444+
* CXL res 3.0 section 8.2.9.2.1.3; Table 8-45
445+
*/
446+
#define CXL_MMER_HEALTH_STATUS_CHANGE 0x00
447+
#define CXL_MMER_MEDIA_STATUS_CHANGE 0x01
448+
#define CXL_MMER_LIFE_USED_CHANGE 0x02
449+
#define CXL_MMER_TEMP_CHANGE 0x03
450+
#define CXL_MMER_DATA_PATH_ERROR 0x04
451+
#define CXL_MMER_LSA_ERROR 0x05
452+
#define show_dev_evt_type(type) __print_symbolic(type, \
453+
{ CXL_MMER_HEALTH_STATUS_CHANGE, "Health Status Change" }, \
454+
{ CXL_MMER_MEDIA_STATUS_CHANGE, "Media Status Change" }, \
455+
{ CXL_MMER_LIFE_USED_CHANGE, "Life Used Change" }, \
456+
{ CXL_MMER_TEMP_CHANGE, "Temperature Change" }, \
457+
{ CXL_MMER_DATA_PATH_ERROR, "Data Path Error" }, \
458+
{ CXL_MMER_LSA_ERROR, "LSA Error" } \
459+
)
460+
461+
/*
462+
* Device Health Information - DHI
463+
*
464+
* CXL res 3.0 section 8.2.9.8.3.1; Table 8-100
465+
*/
466+
#define CXL_DHI_HS_MAINTENANCE_NEEDED BIT(0)
467+
#define CXL_DHI_HS_PERFORMANCE_DEGRADED BIT(1)
468+
#define CXL_DHI_HS_HW_REPLACEMENT_NEEDED BIT(2)
469+
#define show_health_status_flags(flags) __print_flags(flags, "|", \
470+
{ CXL_DHI_HS_MAINTENANCE_NEEDED, "MAINTENANCE_NEEDED" }, \
471+
{ CXL_DHI_HS_PERFORMANCE_DEGRADED, "PERFORMANCE_DEGRADED" }, \
472+
{ CXL_DHI_HS_HW_REPLACEMENT_NEEDED, "REPLACEMENT_NEEDED" } \
473+
)
474+
475+
#define CXL_DHI_MS_NORMAL 0x00
476+
#define CXL_DHI_MS_NOT_READY 0x01
477+
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOST 0x02
478+
#define CXL_DHI_MS_ALL_DATA_LOST 0x03
479+
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS 0x04
480+
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN 0x05
481+
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT 0x06
482+
#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS 0x07
483+
#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN 0x08
484+
#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT 0x09
485+
#define show_media_status(ms) __print_symbolic(ms, \
486+
{ CXL_DHI_MS_NORMAL, \
487+
"Normal" }, \
488+
{ CXL_DHI_MS_NOT_READY, \
489+
"Not Ready" }, \
490+
{ CXL_DHI_MS_WRITE_PERSISTENCY_LOST, \
491+
"Write Persistency Lost" }, \
492+
{ CXL_DHI_MS_ALL_DATA_LOST, \
493+
"All Data Lost" }, \
494+
{ CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS, \
495+
"Write Persistency Loss in the Event of Power Loss" }, \
496+
{ CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN, \
497+
"Write Persistency Loss in Event of Shutdown" }, \
498+
{ CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT, \
499+
"Write Persistency Loss Imminent" }, \
500+
{ CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS, \
501+
"All Data Loss in Event of Power Loss" }, \
502+
{ CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN, \
503+
"All Data loss in the Event of Shutdown" }, \
504+
{ CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT, \
505+
"All Data Loss Imminent" } \
506+
)
507+
508+
#define CXL_DHI_AS_NORMAL 0x0
509+
#define CXL_DHI_AS_WARNING 0x1
510+
#define CXL_DHI_AS_CRITICAL 0x2
511+
#define show_two_bit_status(as) __print_symbolic(as, \
512+
{ CXL_DHI_AS_NORMAL, "Normal" }, \
513+
{ CXL_DHI_AS_WARNING, "Warning" }, \
514+
{ CXL_DHI_AS_CRITICAL, "Critical" } \
515+
)
516+
#define show_one_bit_status(as) __print_symbolic(as, \
517+
{ CXL_DHI_AS_NORMAL, "Normal" }, \
518+
{ CXL_DHI_AS_WARNING, "Warning" } \
519+
)
520+
521+
#define CXL_DHI_AS_LIFE_USED(as) (as & 0x3)
522+
#define CXL_DHI_AS_DEV_TEMP(as) ((as & 0xC) >> 2)
523+
#define CXL_DHI_AS_COR_VOL_ERR_CNT(as) ((as & 0x10) >> 4)
524+
#define CXL_DHI_AS_COR_PER_ERR_CNT(as) ((as & 0x20) >> 5)
525+
526+
TRACE_EVENT(cxl_memory_module,
527+
528+
TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
529+
struct cxl_event_mem_module *rec),
530+
531+
TP_ARGS(dev, log, rec),
532+
533+
TP_STRUCT__entry(
534+
CXL_EVT_TP_entry
535+
536+
/* Memory Module Event */
537+
__field(u8, event_type)
538+
539+
/* Device Health Info */
540+
__field(u8, health_status)
541+
__field(u8, media_status)
542+
__field(u8, life_used)
543+
__field(u32, dirty_shutdown_cnt)
544+
__field(u32, cor_vol_err_cnt)
545+
__field(u32, cor_per_err_cnt)
546+
__field(s16, device_temp)
547+
__field(u8, add_status)
548+
),
549+
550+
TP_fast_assign(
551+
CXL_EVT_TP_fast_assign(dev, log, rec->hdr);
552+
553+
/* Memory Module Event */
554+
__entry->event_type = rec->event_type;
555+
556+
/* Device Health Info */
557+
__entry->health_status = rec->info.health_status;
558+
__entry->media_status = rec->info.media_status;
559+
__entry->life_used = rec->info.life_used;
560+
__entry->dirty_shutdown_cnt = get_unaligned_le32(rec->info.dirty_shutdown_cnt);
561+
__entry->cor_vol_err_cnt = get_unaligned_le32(rec->info.cor_vol_err_cnt);
562+
__entry->cor_per_err_cnt = get_unaligned_le32(rec->info.cor_per_err_cnt);
563+
__entry->device_temp = get_unaligned_le16(rec->info.device_temp);
564+
__entry->add_status = rec->info.add_status;
565+
),
566+
567+
CXL_EVT_TP_printk("event_type='%s' health_status='%s' media_status='%s' " \
568+
"as_life_used=%s as_dev_temp=%s as_cor_vol_err_cnt=%s " \
569+
"as_cor_per_err_cnt=%s life_used=%u device_temp=%d " \
570+
"dirty_shutdown_cnt=%u cor_vol_err_cnt=%u cor_per_err_cnt=%u",
571+
show_dev_evt_type(__entry->event_type),
572+
show_health_status_flags(__entry->health_status),
573+
show_media_status(__entry->media_status),
574+
show_two_bit_status(CXL_DHI_AS_LIFE_USED(__entry->add_status)),
575+
show_two_bit_status(CXL_DHI_AS_DEV_TEMP(__entry->add_status)),
576+
show_one_bit_status(CXL_DHI_AS_COR_VOL_ERR_CNT(__entry->add_status)),
577+
show_one_bit_status(CXL_DHI_AS_COR_PER_ERR_CNT(__entry->add_status)),
578+
__entry->life_used, __entry->device_temp,
579+
__entry->dirty_shutdown_cnt, __entry->cor_vol_err_cnt,
580+
__entry->cor_per_err_cnt
581+
)
582+
);
583+
441584
#endif /* _CXL_EVENTS_H */
442585

443586
#define TRACE_INCLUDE_FILE trace

drivers/cxl/cxlmem.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,32 @@ struct cxl_event_dram {
486486
u8 reserved[0x17];
487487
} __packed;
488488

489+
/*
490+
* Get Health Info Record
491+
* CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100
492+
*/
493+
struct cxl_get_health_info {
494+
u8 health_status;
495+
u8 media_status;
496+
u8 add_status;
497+
u8 life_used;
498+
u8 device_temp[2];
499+
u8 dirty_shutdown_cnt[4];
500+
u8 cor_vol_err_cnt[4];
501+
u8 cor_per_err_cnt[4];
502+
} __packed;
503+
504+
/*
505+
* Memory Module Event Record
506+
* CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
507+
*/
508+
struct cxl_event_mem_module {
509+
struct cxl_event_record_hdr hdr;
510+
u8 event_type;
511+
struct cxl_get_health_info info;
512+
u8 reserved[0x3d];
513+
} __packed;
514+
489515
struct cxl_mbox_get_partition_info {
490516
__le64 active_volatile_cap;
491517
__le64 active_persistent_cap;

0 commit comments

Comments
 (0)