Skip to content

Commit 0f1fdf5

Browse files
zhanjunmattrope
authored andcommitted
drm/xe/guc: Save manual engine capture into capture list
Save manual engine capture into capture list. This removes duplicate register definitions across manual-capture vs guc-err-capture. Signed-off-by: Zhanjun Dong <[email protected]> Reviewed-by: Alan Previn <[email protected]> Signed-off-by: Matt Roper <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent ecb6336 commit 0f1fdf5

File tree

6 files changed

+163
-310
lines changed

6 files changed

+163
-310
lines changed

drivers/gpu/drm/xe/xe_devcoredump.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
117117
drm_puts(&p, "\n**** HW Engines ****\n");
118118
for (i = 0; i < XE_NUM_HW_ENGINES; i++)
119119
if (ss->hwe[i])
120-
xe_hw_engine_snapshot_print(ss->hwe[i], &p);
120+
xe_engine_snapshot_print(ss->hwe[i], &p);
121121

122122
drm_puts(&p, "\n**** VM state ****\n");
123123
xe_vm_snapshot_print(ss->vm, &p);

drivers/gpu/drm/xe/xe_guc_capture.c

Lines changed: 149 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -935,20 +935,21 @@ guc_capture_init_node(struct xe_guc *guc, struct __guc_capture_parsed_output *no
935935
* guc->capture->cachelist and populated with the error-capture
936936
* data from GuC and then it's added into guc->capture->outlist linked
937937
* list. This list is used for matchup and printout by xe_devcoredump_read
938-
* and xe_hw_engine_snapshot_print, (when user invokes the devcoredump sysfs).
938+
* and xe_engine_snapshot_print, (when user invokes the devcoredump sysfs).
939939
*
940940
* GUC --> notify context reset:
941941
* -----------------------------
942942
* --> guc_exec_queue_timedout_job
943943
* L--> xe_devcoredump
944944
* L--> devcoredump_snapshot
945945
* --> xe_hw_engine_snapshot_capture
946+
* --> xe_engine_manual_capture(For manual capture)
946947
*
947948
* User Sysfs / Debugfs
948949
* --------------------
949950
* --> xe_devcoredump_read->
950951
* L--> xxx_snapshot_print
951-
* L--> xe_hw_engine_snapshot_print
952+
* L--> xe_engine_snapshot_print
952953
* Print register lists values saved at
953954
* guc->capture->outlist
954955
*
@@ -1524,6 +1525,129 @@ guc_capture_create_prealloc_nodes(struct xe_guc *guc)
15241525
__guc_capture_create_prealloc_nodes(guc);
15251526
}
15261527

1528+
static void
1529+
read_reg_to_node(struct xe_hw_engine *hwe, const struct __guc_mmio_reg_descr_group *list,
1530+
struct guc_mmio_reg *regs)
1531+
{
1532+
int i;
1533+
1534+
if (!list || list->num_regs == 0)
1535+
return;
1536+
1537+
if (!regs)
1538+
return;
1539+
1540+
for (i = 0; i < list->num_regs; i++) {
1541+
struct __guc_mmio_reg_descr desc = list->list[i];
1542+
u32 value;
1543+
1544+
if (!list->list)
1545+
return;
1546+
1547+
if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) {
1548+
value = xe_hw_engine_mmio_read32(hwe, desc.reg);
1549+
} else {
1550+
if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS &&
1551+
FIELD_GET(GUC_REGSET_STEERING_NEEDED, desc.flags)) {
1552+
int group, instance;
1553+
1554+
group = FIELD_GET(GUC_REGSET_STEERING_GROUP, desc.flags);
1555+
instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, desc.flags);
1556+
value = xe_gt_mcr_unicast_read(hwe->gt, XE_REG_MCR(desc.reg.addr),
1557+
group, instance);
1558+
} else {
1559+
value = xe_mmio_read32(&hwe->gt->mmio, desc.reg);
1560+
}
1561+
}
1562+
1563+
regs[i].value = value;
1564+
regs[i].offset = desc.reg.addr;
1565+
regs[i].flags = desc.flags;
1566+
regs[i].mask = desc.mask;
1567+
}
1568+
}
1569+
1570+
/**
1571+
* xe_engine_manual_capture - Take a manual engine snapshot from engine.
1572+
* @hwe: Xe HW Engine.
1573+
* @snapshot: The engine snapshot
1574+
*
1575+
* Take engine snapshot from engine read.
1576+
*
1577+
* Returns: None
1578+
*/
1579+
void
1580+
xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot)
1581+
{
1582+
struct xe_gt *gt = hwe->gt;
1583+
struct xe_device *xe = gt_to_xe(gt);
1584+
struct xe_guc *guc = &gt->uc.guc;
1585+
struct xe_devcoredump *devcoredump = &xe->devcoredump;
1586+
enum guc_capture_list_class_type capture_class;
1587+
const struct __guc_mmio_reg_descr_group *list;
1588+
struct __guc_capture_parsed_output *new;
1589+
enum guc_state_capture_type type;
1590+
u16 guc_id = 0;
1591+
u32 lrca = 0;
1592+
1593+
new = guc_capture_get_prealloc_node(guc);
1594+
if (!new)
1595+
return;
1596+
1597+
capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
1598+
for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
1599+
struct gcap_reg_list_info *reginfo = &new->reginfo[type];
1600+
/*
1601+
* regsinfo->regs is allocated based on guc->capture->max_mmio_per_node
1602+
* which is based on the descriptor list driving the population so
1603+
* should not overflow
1604+
*/
1605+
1606+
/* Get register list for the type/class */
1607+
list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
1608+
capture_class, false);
1609+
if (!list) {
1610+
xe_gt_dbg(gt, "Empty GuC capture register descriptor for %s",
1611+
hwe->name);
1612+
continue;
1613+
}
1614+
1615+
read_reg_to_node(hwe, list, reginfo->regs);
1616+
reginfo->num_regs = list->num_regs;
1617+
1618+
/* Capture steering registers for rcs/ccs */
1619+
if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) {
1620+
list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF,
1621+
type, capture_class, true);
1622+
if (list) {
1623+
read_reg_to_node(hwe, list, &reginfo->regs[reginfo->num_regs]);
1624+
reginfo->num_regs += list->num_regs;
1625+
}
1626+
}
1627+
}
1628+
1629+
if (devcoredump && devcoredump->captured) {
1630+
struct xe_guc_submit_exec_queue_snapshot *ge = devcoredump->snapshot.ge;
1631+
1632+
if (ge) {
1633+
guc_id = ge->guc.id;
1634+
if (ge->lrc[0])
1635+
lrca = ge->lrc[0]->context_desc;
1636+
}
1637+
}
1638+
1639+
new->eng_class = xe_engine_class_to_guc_class(hwe->class);
1640+
new->eng_inst = hwe->instance;
1641+
new->guc_id = guc_id;
1642+
new->lrca = lrca;
1643+
new->is_partial = 0;
1644+
new->locked = 1;
1645+
new->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
1646+
1647+
guc_capture_add_node_to_outlist(guc->capture, new);
1648+
devcoredump->snapshot.matched_node = new;
1649+
}
1650+
15271651
static struct guc_mmio_reg *
15281652
guc_capture_find_reg(struct gcap_reg_list_info *reginfo, u32 addr, u32 flags)
15291653
{
@@ -1609,7 +1733,7 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_
16091733
*
16101734
* This function prints out a given Xe HW Engine snapshot object.
16111735
*/
1612-
void xe_engine_guc_capture_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
1736+
void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
16131737
{
16141738
const char *grptype[GUC_STATE_CAPTURE_GROUP_TYPE_MAX] = {
16151739
"full-capture",
@@ -1648,6 +1772,8 @@ void xe_engine_guc_capture_print(struct xe_hw_engine_snapshot *snapshot, struct
16481772
drm_printf(p, "\tCoverage: %s\n", grptype[devcore_snapshot->matched_node->is_partial]);
16491773
drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
16501774
snapshot->forcewake.domain, snapshot->forcewake.ref);
1775+
drm_printf(p, "\tReserved: %s\n",
1776+
str_yes_no(snapshot->kernel_reserved));
16511777

16521778
for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
16531779
list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
@@ -1757,8 +1883,27 @@ xe_engine_snapshot_capture_for_job(struct xe_sched_job *job)
17571883
continue;
17581884
}
17591885

1760-
if (!coredump->snapshot.hwe[id])
1886+
if (!coredump->snapshot.hwe[id]) {
17611887
coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe, job);
1888+
} else {
1889+
struct __guc_capture_parsed_output *new;
1890+
1891+
new = xe_guc_capture_get_matching_and_lock(job);
1892+
if (new) {
1893+
struct xe_guc *guc = &q->gt->uc.guc;
1894+
1895+
/*
1896+
* If we are in here, it means we found a fresh
1897+
* GuC-err-capture node for this engine after
1898+
* previously failing to find a match in the
1899+
* early part of guc_exec_queue_timedout_job.
1900+
* Thus we must free the manually captured node
1901+
*/
1902+
guc_capture_free_outlist_node(guc->capture,
1903+
coredump->snapshot.matched_node);
1904+
coredump->snapshot.matched_node = new;
1905+
}
1906+
}
17621907

17631908
break;
17641909
}

drivers/gpu/drm/xe/xe_guc_capture.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,9 @@ const struct __guc_mmio_reg_descr_group *
5151
xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type,
5252
enum guc_capture_list_class_type capture_class, bool is_ext);
5353
struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job);
54+
void xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot);
55+
void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p);
5456
void xe_engine_snapshot_capture_for_job(struct xe_sched_job *job);
55-
void xe_engine_guc_capture_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p);
5657
void xe_guc_capture_steered_list_init(struct xe_guc *guc);
5758
void xe_guc_capture_put_matched_nodes(struct xe_guc *guc);
5859
int xe_guc_capture_init(struct xe_guc *guc);

0 commit comments

Comments
 (0)