Skip to content

Commit cd5bbb2

Browse files
harishchegondiashutoshx
authored andcommitted
drm/xe/uapi: Add a device query to get EU stall sampling information
User space can get the EU stall data record size, EU stall capabilities, EU stall sampling rates, and per XeCore buffer size with query IOCTL DRM_IOCTL_XE_DEVICE_QUERY with .query set to DRM_XE_DEVICE_QUERY_EU_STALL. A struct drm_xe_query_eu_stall will be returned to the user space along with an array of supported sampling rates sorted in the fastest sampling rate first order. sampling_rates in struct drm_xe_query_eu_stall will point to the array of sampling rates. Any capabilities in EU stall sampling as of this patch are considered as base capabilities. New capability bits will be added for any new functionality added later. v12: Rename has_eu_stall_sampling_support() to xe_eu_stall_supported_on_platform() and move it to header file. v11: Check if EU stall sampling is supported on the platform. v10: Change comments and variable names as per feedback v9: Move reserved fields above num_sampling_rates in struct drm_xe_query_eu_stall. v7: Change sampling_rates from a pointer to flexible array. v6: Include EU stall sampling rates information and per XeCore buffer size in the query information. Reviewed-by: Ashutosh Dixit <[email protected]> Signed-off-by: Harish Chegondi <[email protected]> Signed-off-by: Ashutosh Dixit <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/67ba42796a5a99d648239c315694cd222812a49b.1740533885.git.harish.chegondi@intel.com
1 parent e827cf3 commit cd5bbb2

File tree

4 files changed

+131
-9
lines changed

4 files changed

+131
-9
lines changed

drivers/gpu/drm/xe/xe_eu_stall.c

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,46 @@ struct xe_eu_stall_data_xe2 {
120120
__u64 unused[6];
121121
} __packed;
122122

123-
static size_t xe_eu_stall_data_record_size(struct xe_device *xe)
123+
const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7};
124+
125+
/**
126+
* xe_eu_stall_get_sampling_rates - get EU stall sampling rates information.
127+
*
128+
* @num_rates: Pointer to a u32 to return the number of sampling rates.
129+
* @rates: double u64 pointer to point to an array of sampling rates.
130+
*
131+
* Stores the number of sampling rates and pointer to the array of
132+
* sampling rates in the input pointers.
133+
*
134+
* Returns: Size of the EU stall sampling rates array.
135+
*/
136+
size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates)
137+
{
138+
*num_rates = ARRAY_SIZE(eu_stall_sampling_rates);
139+
*rates = eu_stall_sampling_rates;
140+
141+
return sizeof(eu_stall_sampling_rates);
142+
}
143+
144+
/**
145+
* xe_eu_stall_get_per_xecore_buf_size - get per XeCore buffer size.
146+
*
147+
* Returns: The per XeCore buffer size used to allocate the per GT
148+
* EU stall data buffer.
149+
*/
150+
size_t xe_eu_stall_get_per_xecore_buf_size(void)
151+
{
152+
return per_xecore_buf_size;
153+
}
154+
155+
/**
156+
* xe_eu_stall_data_record_size - get EU stall data record size.
157+
*
158+
* @xe: Pointer to a Xe device.
159+
*
160+
* Returns: EU stall data record size.
161+
*/
162+
size_t xe_eu_stall_data_record_size(struct xe_device *xe)
124163
{
125164
size_t record_size = 0;
126165

@@ -812,11 +851,6 @@ static const struct file_operations fops_eu_stall = {
812851
.compat_ioctl = xe_eu_stall_stream_ioctl,
813852
};
814853

815-
static inline bool has_eu_stall_sampling_support(struct xe_device *xe)
816-
{
817-
return xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20;
818-
}
819-
820854
static int xe_eu_stall_stream_open_locked(struct drm_device *dev,
821855
struct eu_stall_open_properties *props,
822856
struct drm_file *file)
@@ -885,7 +919,7 @@ int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *f
885919
struct eu_stall_open_properties props = {};
886920
int ret;
887921

888-
if (!has_eu_stall_sampling_support(xe)) {
922+
if (!xe_eu_stall_supported_on_platform(xe)) {
889923
drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n");
890924
return -ENODEV;
891925
}

drivers/gpu/drm/xe/xe_eu_stall.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,17 @@
88

99
#include "xe_gt_types.h"
1010

11+
size_t xe_eu_stall_get_per_xecore_buf_size(void);
12+
size_t xe_eu_stall_data_record_size(struct xe_device *xe);
13+
size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates);
14+
1115
int xe_eu_stall_init(struct xe_gt *gt);
1216
int xe_eu_stall_stream_open(struct drm_device *dev,
1317
u64 data,
1418
struct drm_file *file);
19+
20+
static inline bool xe_eu_stall_supported_on_platform(struct xe_device *xe)
21+
{
22+
return xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20;
23+
}
1524
#endif

drivers/gpu/drm/xe/xe_query.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "regs/xe_gt_regs.h"
1717
#include "xe_bo.h"
1818
#include "xe_device.h"
19+
#include "xe_eu_stall.h"
1920
#include "xe_exec_queue.h"
2021
#include "xe_force_wake.h"
2122
#include "xe_ggtt.h"
@@ -729,6 +730,47 @@ static int query_pxp_status(struct xe_device *xe, struct drm_xe_device_query *qu
729730
return 0;
730731
}
731732

733+
static int query_eu_stall(struct xe_device *xe,
734+
struct drm_xe_device_query *query)
735+
{
736+
void __user *query_ptr = u64_to_user_ptr(query->data);
737+
struct drm_xe_query_eu_stall *info;
738+
size_t size, array_size;
739+
const u64 *rates;
740+
u32 num_rates;
741+
int ret;
742+
743+
if (!xe_eu_stall_supported_on_platform(xe)) {
744+
drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n");
745+
return -ENODEV;
746+
}
747+
748+
array_size = xe_eu_stall_get_sampling_rates(&num_rates, &rates);
749+
size = sizeof(struct drm_xe_query_eu_stall) + array_size;
750+
751+
if (query->size == 0) {
752+
query->size = size;
753+
return 0;
754+
} else if (XE_IOCTL_DBG(xe, query->size != size)) {
755+
return -EINVAL;
756+
}
757+
758+
info = kzalloc(size, GFP_KERNEL);
759+
if (!info)
760+
return -ENOMEM;
761+
762+
info->num_sampling_rates = num_rates;
763+
info->capabilities = DRM_XE_EU_STALL_CAPS_BASE;
764+
info->record_size = xe_eu_stall_data_record_size(xe);
765+
info->per_xecore_buf_size = xe_eu_stall_get_per_xecore_buf_size();
766+
memcpy(info->sampling_rates, rates, array_size);
767+
768+
ret = copy_to_user(query_ptr, info, size);
769+
kfree(info);
770+
771+
return ret ? -EFAULT : 0;
772+
}
773+
732774
static int (* const xe_query_funcs[])(struct xe_device *xe,
733775
struct drm_xe_device_query *query) = {
734776
query_engines,
@@ -741,6 +783,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe,
741783
query_uc_fw_version,
742784
query_oa_units,
743785
query_pxp_status,
786+
query_eu_stall,
744787
};
745788

746789
int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)

include/uapi/drm/xe_drm.h

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,7 @@ struct drm_xe_device_query {
735735
#define DRM_XE_DEVICE_QUERY_UC_FW_VERSION 7
736736
#define DRM_XE_DEVICE_QUERY_OA_UNITS 8
737737
#define DRM_XE_DEVICE_QUERY_PXP_STATUS 9
738+
#define DRM_XE_DEVICE_QUERY_EU_STALL 10
738739
/** @query: The type of data to query */
739740
__u32 query;
740741

@@ -1873,8 +1874,8 @@ enum drm_xe_eu_stall_property_id {
18731874
DRM_XE_EU_STALL_PROP_GT_ID = 1,
18741875

18751876
/**
1876-
* @DRM_XE_EU_STALL_PROP_SAMPLE_RATE: Sampling rate
1877-
* in GPU cycles.
1877+
* @DRM_XE_EU_STALL_PROP_SAMPLE_RATE: Sampling rate in
1878+
* GPU cycles from @sampling_rates in struct @drm_xe_query_eu_stall
18781879
*/
18791880
DRM_XE_EU_STALL_PROP_SAMPLE_RATE,
18801881

@@ -1886,6 +1887,41 @@ enum drm_xe_eu_stall_property_id {
18861887
DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS,
18871888
};
18881889

1890+
/**
1891+
* struct drm_xe_query_eu_stall - Information about EU stall sampling.
1892+
*
1893+
* If a query is made with a struct @drm_xe_device_query where .query
1894+
* is equal to @DRM_XE_DEVICE_QUERY_EU_STALL, then the reply uses
1895+
* struct @drm_xe_query_eu_stall in .data.
1896+
*/
1897+
struct drm_xe_query_eu_stall {
1898+
/** @extensions: Pointer to the first extension struct, if any */
1899+
__u64 extensions;
1900+
1901+
/** @capabilities: EU stall capabilities bit-mask */
1902+
__u64 capabilities;
1903+
#define DRM_XE_EU_STALL_CAPS_BASE (1 << 0)
1904+
1905+
/** @record_size: size of each EU stall data record */
1906+
__u64 record_size;
1907+
1908+
/** @per_xecore_buf_size: internal per XeCore buffer size */
1909+
__u64 per_xecore_buf_size;
1910+
1911+
/** @reserved: Reserved */
1912+
__u64 reserved[5];
1913+
1914+
/** @num_sampling_rates: Number of sampling rates in @sampling_rates array */
1915+
__u64 num_sampling_rates;
1916+
1917+
/**
1918+
* @sampling_rates: Flexible array of sampling rates
1919+
* sorted in the fastest to slowest order.
1920+
* Sampling rates are specified in GPU clock cycles.
1921+
*/
1922+
__u64 sampling_rates[];
1923+
};
1924+
18891925
#if defined(__cplusplus)
18901926
}
18911927
#endif

0 commit comments

Comments
 (0)