Skip to content

Commit 9c8c7a7

Browse files
zhanjunmattrope
authored andcommitted
drm/xe/guc: Prepare GuC register list and update ADS size for error capture
Add referenced registers defines and list of registers. Update GuC ADS size allocation to include space for the lists of error state capture register descriptors. Then, populate GuC ADS with the lists of registers we want GuC to report back to host on engine reset events. This list should include global, engine-class and engine-instance registers for every engine-class type on the current hardware. Ensure we allocate a persistent storage for the register lists that are populated into ADS so that we don't need to allocate memory during GT resets when GuC is reloaded and ADS population happens again. Signed-off-by: Zhanjun Dong <[email protected]> Reviewed-by: Alan Previn <[email protected]> Signed-off-by: Matt Roper <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent d6d87a1 commit 9c8c7a7

File tree

12 files changed

+979
-37
lines changed

12 files changed

+979
-37
lines changed

drivers/gpu/drm/xe/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ xe-y += xe_bb.o \
5656
xe_gt_topology.o \
5757
xe_guc.o \
5858
xe_guc_ads.o \
59+
xe_guc_capture.o \
5960
xe_guc_ct.o \
6061
xe_guc_db_mgr.o \
6162
xe_guc_hwconfig.o \
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
/* SPDX-License-Identifier: MIT */
2+
/*
3+
* Copyright © 2024 Intel Corporation
4+
*/
5+
6+
#ifndef _ABI_GUC_CAPTURE_ABI_H
7+
#define _ABI_GUC_CAPTURE_ABI_H
8+
9+
#include <linux/types.h>
10+
11+
/* Capture List Index */
12+
enum guc_capture_list_index_type {
13+
GUC_CAPTURE_LIST_INDEX_PF = 0,
14+
GUC_CAPTURE_LIST_INDEX_VF = 1,
15+
};
16+
17+
#define GUC_CAPTURE_LIST_INDEX_MAX (GUC_CAPTURE_LIST_INDEX_VF + 1)
18+
19+
/* Register-types of GuC capture register lists */
20+
enum guc_state_capture_type {
21+
GUC_STATE_CAPTURE_TYPE_GLOBAL = 0,
22+
GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
23+
GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE
24+
};
25+
26+
#define GUC_STATE_CAPTURE_TYPE_MAX (GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE + 1)
27+
28+
/* Class indecies for capture_class and capture_instance arrays */
29+
enum guc_capture_list_class_type {
30+
GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0,
31+
GUC_CAPTURE_LIST_CLASS_VIDEO = 1,
32+
GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE = 2,
33+
GUC_CAPTURE_LIST_CLASS_BLITTER = 3,
34+
GUC_CAPTURE_LIST_CLASS_GSC_OTHER = 4,
35+
};
36+
37+
#define GUC_CAPTURE_LIST_CLASS_MAX (GUC_CAPTURE_LIST_CLASS_GSC_OTHER + 1)
38+
39+
/**
40+
* struct guc_mmio_reg - GuC MMIO reg state struct
41+
*
42+
* GuC MMIO reg state struct
43+
*/
44+
struct guc_mmio_reg {
45+
/** @offset: MMIO Offset - filled in by Host */
46+
u32 offset;
47+
/** @value: MMIO Value - Used by Firmware to store value */
48+
u32 value;
49+
/** @flags: Flags for accessing the MMIO */
50+
u32 flags;
51+
/** @mask: Value of a mask to apply if mask with value is set */
52+
u32 mask;
53+
#define GUC_REGSET_MASKED BIT(0)
54+
#define GUC_REGSET_STEERING_NEEDED BIT(1)
55+
#define GUC_REGSET_MASKED_WITH_VALUE BIT(2)
56+
#define GUC_REGSET_RESTORE_ONLY BIT(3)
57+
#define GUC_REGSET_STEERING_GROUP GENMASK(16, 12)
58+
#define GUC_REGSET_STEERING_INSTANCE GENMASK(23, 20)
59+
} __packed;
60+
61+
/**
62+
* struct guc_mmio_reg_set - GuC register sets
63+
*
64+
* GuC register sets
65+
*/
66+
struct guc_mmio_reg_set {
67+
/** @address: register address */
68+
u32 address;
69+
/** @count: register count */
70+
u16 count;
71+
/** @reserved: reserved */
72+
u16 reserved;
73+
} __packed;
74+
75+
/**
76+
* struct guc_debug_capture_list_header - Debug capture list header.
77+
*
78+
* Debug capture list header.
79+
*/
80+
struct guc_debug_capture_list_header {
81+
/** @info: contains number of MMIO descriptors in the capture list. */
82+
u32 info;
83+
#define GUC_CAPTURELISTHDR_NUMDESCR GENMASK(15, 0)
84+
} __packed;
85+
86+
/**
87+
* struct guc_debug_capture_list - Debug capture list
88+
*
89+
* As part of ADS registration, these header structures (followed by
90+
* an array of 'struct guc_mmio_reg' entries) are used to register with
91+
* GuC microkernel the list of registers we want it to dump out prior
92+
* to a engine reset.
93+
*/
94+
struct guc_debug_capture_list {
95+
/** @header: Debug capture list header. */
96+
struct guc_debug_capture_list_header header;
97+
/** @regs: MMIO descriptors in the capture list. */
98+
struct guc_mmio_reg regs[];
99+
} __packed;
100+
101+
/**
102+
* struct guc_state_capture_header_t - State capture header.
103+
*
104+
* Prior to resetting engines that have hung or faulted, GuC microkernel
105+
* reports the engine error-state (register values that was read) by
106+
* logging them into the shared GuC log buffer using these hierarchy
107+
* of structures.
108+
*/
109+
struct guc_state_capture_header_t {
110+
/**
111+
* @owner: VFID
112+
* BR[ 7: 0] MBZ when SRIOV is disabled. When SRIOV is enabled
113+
* VFID is an integer in range [0, 63] where 0 means the state capture
114+
* is corresponding to the PF and an integer N in range [1, 63] means
115+
* the state capture is for VF N.
116+
*/
117+
u32 owner;
118+
#define GUC_STATE_CAPTURE_HEADER_VFID GENMASK(7, 0)
119+
/** @info: Engine class/instance and capture type info */
120+
u32 info;
121+
#define GUC_STATE_CAPTURE_HEADER_CAPTURE_TYPE GENMASK(3, 0) /* see guc_state_capture_type */
122+
#define GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS GENMASK(7, 4) /* see guc_capture_list_class_type */
123+
#define GUC_STATE_CAPTURE_HEADER_ENGINE_INSTANCE GENMASK(11, 8)
124+
/**
125+
* @lrca: logical ring context address.
126+
* if type-instance, LRCA (address) that hung, else set to ~0
127+
*/
128+
u32 lrca;
129+
/**
130+
* @guc_id: context_index.
131+
* if type-instance, context index of hung context, else set to ~0
132+
*/
133+
u32 guc_id;
134+
/** @num_mmio_entries: Number of captured MMIO entries. */
135+
u32 num_mmio_entries;
136+
#define GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES GENMASK(9, 0)
137+
} __packed;
138+
139+
/**
140+
* struct guc_state_capture_t - State capture.
141+
*
142+
* State capture
143+
*/
144+
struct guc_state_capture_t {
145+
/** @header: State capture header. */
146+
struct guc_state_capture_header_t header;
147+
/** @mmio_entries: Array of captured guc_mmio_reg entries. */
148+
struct guc_mmio_reg mmio_entries[];
149+
} __packed;
150+
151+
/* State Capture Group Type */
152+
enum guc_state_capture_group_type {
153+
GUC_STATE_CAPTURE_GROUP_TYPE_FULL = 0,
154+
GUC_STATE_CAPTURE_GROUP_TYPE_PARTIAL
155+
};
156+
157+
#define GUC_STATE_CAPTURE_GROUP_TYPE_MAX (GUC_STATE_CAPTURE_GROUP_TYPE_PARTIAL + 1)
158+
159+
/**
160+
* struct guc_state_capture_group_header_t - State capture group header
161+
*
162+
* State capture group header.
163+
*/
164+
struct guc_state_capture_group_header_t {
165+
/** @owner: VFID */
166+
u32 owner;
167+
#define GUC_STATE_CAPTURE_GROUP_HEADER_VFID GENMASK(7, 0)
168+
/** @info: Engine class/instance and capture type info */
169+
u32 info;
170+
#define GUC_STATE_CAPTURE_GROUP_HEADER_NUM_CAPTURES GENMASK(7, 0)
171+
#define GUC_STATE_CAPTURE_GROUP_HEADER_CAPTURE_GROUP_TYPE GENMASK(15, 8)
172+
} __packed;
173+
174+
/**
175+
* struct guc_state_capture_group_t - State capture group.
176+
*
177+
* this is the top level structure where an error-capture dump starts
178+
*/
179+
struct guc_state_capture_group_t {
180+
/** @grp_header: State capture group header. */
181+
struct guc_state_capture_group_header_t grp_header;
182+
/** @capture_entries: Array of state captures */
183+
struct guc_state_capture_t capture_entries[];
184+
} __packed;
185+
186+
#endif

drivers/gpu/drm/xe/regs/xe_gt_regs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,4 +567,6 @@
567567
#define GT_PERF_STATUS XE_REG(0x1381b4)
568568
#define VOLTAGE_MASK REG_GENMASK(10, 0)
569569

570+
#define SFC_DONE(n) XE_REG(0x1cc000 + (n) * 0x1000)
571+
570572
#endif

drivers/gpu/drm/xe/xe_guc.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "xe_gt_sriov_vf.h"
2424
#include "xe_gt_throttle.h"
2525
#include "xe_guc_ads.h"
26+
#include "xe_guc_capture.h"
2627
#include "xe_guc_ct.h"
2728
#include "xe_guc_db_mgr.h"
2829
#include "xe_guc_hwconfig.h"
@@ -339,6 +340,10 @@ int xe_guc_init(struct xe_guc *guc)
339340
if (ret)
340341
goto out;
341342

343+
ret = xe_guc_capture_init(guc);
344+
if (ret)
345+
goto out;
346+
342347
ret = xe_guc_ads_init(&guc->ads);
343348
if (ret)
344349
goto out;

drivers/gpu/drm/xe/xe_guc.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,4 +82,9 @@ static inline struct xe_device *guc_to_xe(struct xe_guc *guc)
8282
return gt_to_xe(guc_to_gt(guc));
8383
}
8484

85+
static inline struct drm_device *guc_to_drm(struct xe_guc *guc)
86+
{
87+
return &guc_to_xe(guc)->drm;
88+
}
89+
8590
#endif

0 commit comments

Comments
 (0)