Skip to content

Commit 6ebe28f

Browse files
weiny2djbw
authored andcommitted
cxl/mem: Read, trace, and clear events on driver load
CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <[email protected]> Signed-off-by: Ira Weiny <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Dan Williams <[email protected]>
1 parent 172738b commit 6ebe28f

File tree

5 files changed

+392
-1
lines changed

5 files changed

+392
-1
lines changed

drivers/cxl/core/mbox.c

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <cxl.h>
99

1010
#include "core.h"
11+
#include "trace.h"
1112

1213
static bool cxl_raw_allow_all;
1314

@@ -717,6 +718,152 @@ int cxl_enumerate_cmds(struct cxl_dev_state *cxlds)
717718
}
718719
EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
719720

721+
static int cxl_clear_event_record(struct cxl_dev_state *cxlds,
722+
enum cxl_event_log_type log,
723+
struct cxl_get_event_payload *get_pl)
724+
{
725+
struct cxl_mbox_clear_event_payload *payload;
726+
u16 total = le16_to_cpu(get_pl->record_count);
727+
u8 max_handles = CXL_CLEAR_EVENT_MAX_HANDLES;
728+
size_t pl_size = struct_size(payload, handles, max_handles);
729+
struct cxl_mbox_cmd mbox_cmd;
730+
u16 cnt;
731+
int rc = 0;
732+
int i;
733+
734+
/* Payload size may limit the max handles */
735+
if (pl_size > cxlds->payload_size) {
736+
max_handles = (cxlds->payload_size - sizeof(*payload)) /
737+
sizeof(__le16);
738+
pl_size = struct_size(payload, handles, max_handles);
739+
}
740+
741+
payload = kvzalloc(pl_size, GFP_KERNEL);
742+
if (!payload)
743+
return -ENOMEM;
744+
745+
*payload = (struct cxl_mbox_clear_event_payload) {
746+
.event_log = log,
747+
};
748+
749+
mbox_cmd = (struct cxl_mbox_cmd) {
750+
.opcode = CXL_MBOX_OP_CLEAR_EVENT_RECORD,
751+
.payload_in = payload,
752+
.size_in = pl_size,
753+
};
754+
755+
/*
756+
* Clear Event Records uses u8 for the handle cnt while Get Event
757+
* Record can return up to 0xffff records.
758+
*/
759+
i = 0;
760+
for (cnt = 0; cnt < total; cnt++) {
761+
payload->handles[i++] = get_pl->records[cnt].hdr.handle;
762+
dev_dbg(cxlds->dev, "Event log '%d': Clearing %u\n",
763+
log, le16_to_cpu(payload->handles[i]));
764+
765+
if (i == max_handles) {
766+
payload->nr_recs = i;
767+
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
768+
if (rc)
769+
goto free_pl;
770+
i = 0;
771+
}
772+
}
773+
774+
/* Clear what is left if any */
775+
if (i) {
776+
payload->nr_recs = i;
777+
mbox_cmd.size_in = struct_size(payload, handles, i);
778+
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
779+
if (rc)
780+
goto free_pl;
781+
}
782+
783+
free_pl:
784+
kvfree(payload);
785+
return rc;
786+
}
787+
788+
static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
789+
enum cxl_event_log_type type)
790+
{
791+
struct cxl_get_event_payload *payload;
792+
struct cxl_mbox_cmd mbox_cmd;
793+
u8 log_type = type;
794+
u16 nr_rec;
795+
796+
mutex_lock(&cxlds->event.log_lock);
797+
payload = cxlds->event.buf;
798+
799+
mbox_cmd = (struct cxl_mbox_cmd) {
800+
.opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
801+
.payload_in = &log_type,
802+
.size_in = sizeof(log_type),
803+
.payload_out = payload,
804+
.size_out = cxlds->payload_size,
805+
.min_out = struct_size(payload, records, 0),
806+
};
807+
808+
do {
809+
int rc, i;
810+
811+
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
812+
if (rc) {
813+
dev_err_ratelimited(cxlds->dev,
814+
"Event log '%d': Failed to query event records : %d",
815+
type, rc);
816+
break;
817+
}
818+
819+
nr_rec = le16_to_cpu(payload->record_count);
820+
if (!nr_rec)
821+
break;
822+
823+
for (i = 0; i < nr_rec; i++)
824+
trace_cxl_generic_event(cxlds->dev, type,
825+
&payload->records[i]);
826+
827+
if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
828+
trace_cxl_overflow(cxlds->dev, type, payload);
829+
830+
rc = cxl_clear_event_record(cxlds, type, payload);
831+
if (rc) {
832+
dev_err_ratelimited(cxlds->dev,
833+
"Event log '%d': Failed to clear events : %d",
834+
type, rc);
835+
break;
836+
}
837+
} while (nr_rec);
838+
839+
mutex_unlock(&cxlds->event.log_lock);
840+
}
841+
842+
/**
843+
* cxl_mem_get_event_records - Get Event Records from the device
844+
* @cxlds: The device data for the operation
845+
*
846+
* Retrieve all event records available on the device, report them as trace
847+
* events, and clear them.
848+
*
849+
* See CXL rev 3.0 @8.2.9.2.2 Get Event Records
850+
* See CXL rev 3.0 @8.2.9.2.3 Clear Event Records
851+
*/
852+
void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status)
853+
{
854+
dev_dbg(cxlds->dev, "Reading event logs: %x\n", status);
855+
856+
if (status & CXLDEV_EVENT_STATUS_FATAL)
857+
cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL);
858+
if (status & CXLDEV_EVENT_STATUS_FAIL)
859+
cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL);
860+
if (status & CXLDEV_EVENT_STATUS_WARN)
861+
cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN);
862+
if (status & CXLDEV_EVENT_STATUS_INFO)
863+
cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO);
864+
}
865+
EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL);
866+
720867
/**
721868
* cxl_mem_get_partition_info - Get partition info
722869
* @cxlds: The device data for the operation
@@ -868,6 +1015,7 @@ struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
8681015
}
8691016

8701017
mutex_init(&cxlds->mbox_mutex);
1018+
mutex_init(&cxlds->event.log_lock);
8711019
cxlds->dev = dev;
8721020

8731021
return cxlds;

drivers/cxl/core/trace.h

Lines changed: 120 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@
66
#if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ)
77
#define _CXL_EVENTS_H
88

9-
#include <cxl.h>
109
#include <linux/tracepoint.h>
10+
#include <asm-generic/unaligned.h>
11+
12+
#include <cxl.h>
13+
#include <cxlmem.h>
1114

1215
#define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0)
1316
#define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1)
@@ -103,6 +106,122 @@ TRACE_EVENT(cxl_aer_correctable_error,
103106
)
104107
);
105108

109+
#define cxl_event_log_type_str(type) \
110+
__print_symbolic(type, \
111+
{ CXL_EVENT_TYPE_INFO, "Informational" }, \
112+
{ CXL_EVENT_TYPE_WARN, "Warning" }, \
113+
{ CXL_EVENT_TYPE_FAIL, "Failure" }, \
114+
{ CXL_EVENT_TYPE_FATAL, "Fatal" })
115+
116+
TRACE_EVENT(cxl_overflow,
117+
118+
TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
119+
struct cxl_get_event_payload *payload),
120+
121+
TP_ARGS(dev, log, payload),
122+
123+
TP_STRUCT__entry(
124+
__string(dev_name, dev_name(dev))
125+
__field(int, log)
126+
__field(u64, first_ts)
127+
__field(u64, last_ts)
128+
__field(u16, count)
129+
),
130+
131+
TP_fast_assign(
132+
__assign_str(dev_name, dev_name(dev));
133+
__entry->log = log;
134+
__entry->count = le16_to_cpu(payload->overflow_err_count);
135+
__entry->first_ts = le64_to_cpu(payload->first_overflow_timestamp);
136+
__entry->last_ts = le64_to_cpu(payload->last_overflow_timestamp);
137+
),
138+
139+
TP_printk("%s: log=%s : %u records from %llu to %llu",
140+
__get_str(dev_name), cxl_event_log_type_str(__entry->log),
141+
__entry->count, __entry->first_ts, __entry->last_ts)
142+
143+
);
144+
145+
/*
146+
* Common Event Record Format
147+
* CXL 3.0 section 8.2.9.2.1; Table 8-42
148+
*/
149+
#define CXL_EVENT_RECORD_FLAG_PERMANENT BIT(2)
150+
#define CXL_EVENT_RECORD_FLAG_MAINT_NEEDED BIT(3)
151+
#define CXL_EVENT_RECORD_FLAG_PERF_DEGRADED BIT(4)
152+
#define CXL_EVENT_RECORD_FLAG_HW_REPLACE BIT(5)
153+
#define show_hdr_flags(flags) __print_flags(flags, " | ", \
154+
{ CXL_EVENT_RECORD_FLAG_PERMANENT, "PERMANENT_CONDITION" }, \
155+
{ CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, "MAINTENANCE_NEEDED" }, \
156+
{ CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, "PERFORMANCE_DEGRADED" }, \
157+
{ CXL_EVENT_RECORD_FLAG_HW_REPLACE, "HARDWARE_REPLACEMENT_NEEDED" } \
158+
)
159+
160+
/*
161+
* Define macros for the common header of each CXL event.
162+
*
163+
* Tracepoints using these macros must do 3 things:
164+
*
165+
* 1) Add CXL_EVT_TP_entry to TP_STRUCT__entry
166+
* 2) Use CXL_EVT_TP_fast_assign within TP_fast_assign;
167+
* pass the dev, log, and CXL event header
168+
* 3) Use CXL_EVT_TP_printk() instead of TP_printk()
169+
*
170+
* See the generic_event tracepoint as an example.
171+
*/
172+
#define CXL_EVT_TP_entry \
173+
__string(dev_name, dev_name(dev)) \
174+
__field(int, log) \
175+
__field_struct(uuid_t, hdr_uuid) \
176+
__field(u32, hdr_flags) \
177+
__field(u16, hdr_handle) \
178+
__field(u16, hdr_related_handle) \
179+
__field(u64, hdr_timestamp) \
180+
__field(u8, hdr_length) \
181+
__field(u8, hdr_maint_op_class)
182+
183+
#define CXL_EVT_TP_fast_assign(dev, l, hdr) \
184+
__assign_str(dev_name, dev_name(dev)); \
185+
__entry->log = (l); \
186+
memcpy(&__entry->hdr_uuid, &(hdr).id, sizeof(uuid_t)); \
187+
__entry->hdr_length = (hdr).length; \
188+
__entry->hdr_flags = get_unaligned_le24((hdr).flags); \
189+
__entry->hdr_handle = le16_to_cpu((hdr).handle); \
190+
__entry->hdr_related_handle = le16_to_cpu((hdr).related_handle); \
191+
__entry->hdr_timestamp = le64_to_cpu((hdr).timestamp); \
192+
__entry->hdr_maint_op_class = (hdr).maint_op_class
193+
194+
#define CXL_EVT_TP_printk(fmt, ...) \
195+
TP_printk("%s log=%s : time=%llu uuid=%pUb len=%d flags='%s' " \
196+
"handle=%x related_handle=%x maint_op_class=%u" \
197+
" : " fmt, \
198+
__get_str(dev_name), cxl_event_log_type_str(__entry->log), \
199+
__entry->hdr_timestamp, &__entry->hdr_uuid, __entry->hdr_length,\
200+
show_hdr_flags(__entry->hdr_flags), __entry->hdr_handle, \
201+
__entry->hdr_related_handle, __entry->hdr_maint_op_class, \
202+
##__VA_ARGS__)
203+
204+
TRACE_EVENT(cxl_generic_event,
205+
206+
TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
207+
struct cxl_event_record_raw *rec),
208+
209+
TP_ARGS(dev, log, rec),
210+
211+
TP_STRUCT__entry(
212+
CXL_EVT_TP_entry
213+
__array(u8, data, CXL_EVENT_RECORD_DATA_LENGTH)
214+
),
215+
216+
TP_fast_assign(
217+
CXL_EVT_TP_fast_assign(dev, log, rec->hdr);
218+
memcpy(__entry->data, &rec->data, CXL_EVENT_RECORD_DATA_LENGTH);
219+
),
220+
221+
CXL_EVT_TP_printk("%s",
222+
__print_hex(__entry->data, CXL_EVENT_RECORD_DATA_LENGTH))
223+
);
224+
106225
#endif /* _CXL_EVENTS_H */
107226

108227
#define TRACE_INCLUDE_FILE trace

drivers/cxl/cxl.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,18 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
156156
#define CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX 0x3
157157
#define CXLDEV_CAP_CAP_ID_MEMDEV 0x4000
158158

159+
/* CXL 3.0 8.2.8.3.1 Event Status Register */
160+
#define CXLDEV_DEV_EVENT_STATUS_OFFSET 0x00
161+
#define CXLDEV_EVENT_STATUS_INFO BIT(0)
162+
#define CXLDEV_EVENT_STATUS_WARN BIT(1)
163+
#define CXLDEV_EVENT_STATUS_FAIL BIT(2)
164+
#define CXLDEV_EVENT_STATUS_FATAL BIT(3)
165+
166+
#define CXLDEV_EVENT_STATUS_ALL (CXLDEV_EVENT_STATUS_INFO | \
167+
CXLDEV_EVENT_STATUS_WARN | \
168+
CXLDEV_EVENT_STATUS_FAIL | \
169+
CXLDEV_EVENT_STATUS_FATAL)
170+
159171
/* CXL 2.0 8.2.8.4 Mailbox Registers */
160172
#define CXLDEV_MBOX_CAPS_OFFSET 0x00
161173
#define CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0)

0 commit comments

Comments
 (0)