Skip to content

Commit 84d15f4

Browse files
zhanjunmattrope
authored andcommitted
drm/xe/guc: Add capture size check in GuC log buffer
Capture-nodes generated by GuC are placed in the GuC capture ring buffer which is a sub-region of the larger Guc-Log-buffer. Add capture output size check before allocating the shared buffer. Signed-off-by: Zhanjun Dong <[email protected]> Reviewed-by: Alan Previn <[email protected]> Signed-off-by: Matt Roper <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent b170d69 commit 84d15f4

File tree

4 files changed

+174
-2
lines changed

4 files changed

+174
-2
lines changed

drivers/gpu/drm/xe/abi/guc_log_abi.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/* SPDX-License-Identifier: MIT */
2+
/*
3+
* Copyright © 2024 Intel Corporation
4+
*/
5+
6+
#ifndef _ABI_GUC_LOG_ABI_H
7+
#define _ABI_GUC_LOG_ABI_H
8+
9+
#include <linux/types.h>
10+
11+
/* GuC logging buffer types */
12+
enum guc_log_buffer_type {
13+
GUC_LOG_BUFFER_CRASH_DUMP,
14+
GUC_LOG_BUFFER_DEBUG,
15+
GUC_LOG_BUFFER_CAPTURE,
16+
};
17+
18+
#define GUC_LOG_BUFFER_TYPE_MAX 3
19+
20+
#endif

drivers/gpu/drm/xe/xe_guc_capture.c

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "xe_gt_mcr.h"
2323
#include "xe_gt_printk.h"
2424
#include "xe_guc.h"
25+
#include "xe_guc_ads.h"
2526
#include "xe_guc_capture.h"
2627
#include "xe_guc_capture_types.h"
2728
#include "xe_guc_ct.h"
@@ -669,6 +670,85 @@ size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc)
669670
return PAGE_ALIGN(total_size);
670671
}
671672

673+
static int guc_capture_output_size_est(struct xe_guc *guc)
674+
{
675+
struct xe_gt *gt = guc_to_gt(guc);
676+
struct xe_hw_engine *hwe;
677+
enum xe_hw_engine_id id;
678+
679+
int capture_size = 0;
680+
size_t tmp = 0;
681+
682+
if (!guc->capture)
683+
return -ENODEV;
684+
685+
/*
686+
* If every single engine-instance suffered a failure in quick succession but
687+
* were all unrelated, then a burst of multiple error-capture events would dump
688+
* registers for every one engine instance, one at a time. In this case, GuC
689+
* would even dump the global-registers repeatedly.
690+
*
691+
* For each engine instance, there would be 1 x guc_state_capture_group_t output
692+
* followed by 3 x guc_state_capture_t lists. The latter is how the register
693+
* dumps are split across different register types (where the '3' are global vs class
694+
* vs instance).
695+
*/
696+
for_each_hw_engine(hwe, gt, id) {
697+
enum guc_capture_list_class_type capture_class;
698+
699+
capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
700+
capture_size += sizeof(struct guc_state_capture_group_header_t) +
701+
(3 * sizeof(struct guc_state_capture_header_t));
702+
703+
if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_GLOBAL,
704+
0, &tmp, true))
705+
capture_size += tmp;
706+
if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
707+
capture_class, &tmp, true))
708+
capture_size += tmp;
709+
if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
710+
capture_class, &tmp, true))
711+
capture_size += tmp;
712+
}
713+
714+
return capture_size;
715+
}
716+
717+
/*
718+
* Add on a 3x multiplier to allow for multiple back-to-back captures occurring
719+
* before the Xe can read the data out and process it
720+
*/
721+
#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
722+
723+
static void check_guc_capture_size(struct xe_guc *guc)
724+
{
725+
int capture_size = guc_capture_output_size_est(guc);
726+
int spare_size = capture_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
727+
u32 buffer_size = xe_guc_log_section_size_capture(&guc->log);
728+
729+
/*
730+
* NOTE: capture_size is much smaller than the capture region
731+
* allocation (DG2: <80K vs 1MB).
732+
* Additionally, its based on space needed to fit all engines getting
733+
* reset at once within the same G2H handler task slot. This is very
734+
* unlikely. However, if GuC really does run out of space for whatever
735+
* reason, we will see an separate warning message when processing the
736+
* G2H event capture-notification, search for:
737+
* xe_guc_STATE_CAPTURE_EVENT_STATUS_NOSPACE.
738+
*/
739+
if (capture_size < 0)
740+
xe_gt_dbg(guc_to_gt(guc),
741+
"Failed to calculate error state capture buffer minimum size: %d!\n",
742+
capture_size);
743+
if (capture_size > buffer_size)
744+
xe_gt_dbg(guc_to_gt(guc), "Error state capture buffer maybe small: %d < %d\n",
745+
buffer_size, capture_size);
746+
else if (spare_size > buffer_size)
747+
xe_gt_dbg(guc_to_gt(guc),
748+
"Error state capture buffer lacks spare size: %d < %d (min = %d)\n",
749+
buffer_size, spare_size, capture_size);
750+
}
751+
672752
/*
673753
* xe_guc_capture_steered_list_init - Init steering register list
674754
* @guc: The GuC object
@@ -684,9 +764,10 @@ void xe_guc_capture_steered_list_init(struct xe_guc *guc)
684764
* the end of the pre-populated render list.
685765
*/
686766
guc_capture_alloc_steered_lists(guc);
767+
check_guc_capture_size(guc);
687768
}
688769

689-
/**
770+
/*
690771
* xe_guc_capture_init - Init for GuC register capture
691772
* @guc: The GuC object
692773
*

drivers/gpu/drm/xe/xe_guc_log.c

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,4 +270,70 @@ int xe_guc_log_init(struct xe_guc_log *log)
270270

271271
return 0;
272272
}
273+
273274
ALLOW_ERROR_INJECTION(xe_guc_log_init, ERRNO); /* See xe_pci_probe() */
275+
276+
static u32 xe_guc_log_section_size_crash(struct xe_guc_log *log)
277+
{
278+
return CRASH_BUFFER_SIZE;
279+
}
280+
281+
static u32 xe_guc_log_section_size_debug(struct xe_guc_log *log)
282+
{
283+
return DEBUG_BUFFER_SIZE;
284+
}
285+
286+
/**
287+
* xe_guc_log_section_size_capture - Get capture buffer size within log sections.
288+
* @log: The log object.
289+
*
290+
* This function will return the capture buffer size within log sections.
291+
*
292+
* Return: capture buffer size.
293+
*/
294+
u32 xe_guc_log_section_size_capture(struct xe_guc_log *log)
295+
{
296+
return CAPTURE_BUFFER_SIZE;
297+
}
298+
299+
/**
300+
* xe_guc_get_log_buffer_size - Get log buffer size for a type.
301+
* @log: The log object.
302+
* @type: The log buffer type
303+
*
304+
* Return: buffer size.
305+
*/
306+
u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type)
307+
{
308+
switch (type) {
309+
case GUC_LOG_BUFFER_CRASH_DUMP:
310+
return xe_guc_log_section_size_crash(log);
311+
case GUC_LOG_BUFFER_DEBUG:
312+
return xe_guc_log_section_size_debug(log);
313+
case GUC_LOG_BUFFER_CAPTURE:
314+
return xe_guc_log_section_size_capture(log);
315+
}
316+
return 0;
317+
}
318+
319+
/**
320+
* xe_guc_get_log_buffer_offset - Get offset in log buffer for a type.
321+
* @log: The log object.
322+
* @type: The log buffer type
323+
*
324+
* This function will return the offset in the log buffer for a type.
325+
* Return: buffer offset.
326+
*/
327+
u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type)
328+
{
329+
enum guc_log_buffer_type i;
330+
u32 offset = PAGE_SIZE;/* for the log_buffer_states */
331+
332+
for (i = GUC_LOG_BUFFER_CRASH_DUMP; i < GUC_LOG_BUFFER_TYPE_MAX; ++i) {
333+
if (i == type)
334+
break;
335+
offset += xe_guc_get_log_buffer_size(log, i);
336+
}
337+
338+
return offset;
339+
}

drivers/gpu/drm/xe/xe_guc_log.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#define _XE_GUC_LOG_H_
88

99
#include "xe_guc_log_types.h"
10+
#include "abi/guc_log_abi.h"
1011

1112
struct drm_printer;
1213
struct xe_device;
@@ -18,7 +19,7 @@ struct xe_device;
1819
#else
1920
#define CRASH_BUFFER_SIZE SZ_8K
2021
#define DEBUG_BUFFER_SIZE SZ_64K
21-
#define CAPTURE_BUFFER_SIZE SZ_16K
22+
#define CAPTURE_BUFFER_SIZE SZ_1M
2223
#endif
2324
/*
2425
* While we're using plain log level in i915, GuC controls are much more...
@@ -50,4 +51,8 @@ xe_guc_log_get_level(struct xe_guc_log *log)
5051
return log->level;
5152
}
5253

54+
u32 xe_guc_log_section_size_capture(struct xe_guc_log *log);
55+
u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type);
56+
u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type);
57+
5358
#endif

0 commit comments

Comments
 (0)