Skip to content

Commit 10acca9

Browse files
committed
Merge tag 'drm-xe-next-fixes-2025-08-06' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
- SRIOV: PF fixes and removal of need of module param (Michal) - Fix driver unbind around Devcoredump (Bala) - Mark xe driver as BROKEN if kernel page size is not 4kB (Simon) Signed-off-by: Dave Airlie <[email protected]> From: Rodrigo Vivi <[email protected]> Link: https://lore.kernel.org/r/[email protected]
2 parents 48bb97c + 022906a commit 10acca9

File tree

10 files changed

+112
-8
lines changed

10 files changed

+112
-8
lines changed

drivers/gpu/drm/xe/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ config DRM_XE
55
depends on KUNIT || !KUNIT
66
depends on INTEL_VSEC || !INTEL_VSEC
77
depends on X86_PLATFORM_DEVICES || !(X86 && ACPI)
8+
depends on PAGE_SIZE_4KB || COMPILE_TEST || BROKEN
89
select INTERVAL_TREE
910
# we need shmfs for the swappable backing store, and in particular
1011
# the shmem_readpage() which depends upon tmpfs

drivers/gpu/drm/xe/xe_device.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -802,10 +802,6 @@ int xe_device_probe(struct xe_device *xe)
802802
return err;
803803
}
804804

805-
err = xe_devcoredump_init(xe);
806-
if (err)
807-
return err;
808-
809805
/*
810806
* From here on, if a step fails, make sure a Driver-FLR is triggereed
811807
*/
@@ -870,6 +866,10 @@ int xe_device_probe(struct xe_device *xe)
870866
XE_WA(xe->tiles->media_gt, 15015404425_disable))
871867
XE_DEVICE_WA_DISABLE(xe, 15015404425);
872868

869+
err = xe_devcoredump_init(xe);
870+
if (err)
871+
return err;
872+
873873
xe_nvm_init(xe);
874874

875875
err = xe_heci_gsc_init(xe);

drivers/gpu/drm/xe/xe_gt_sriov_pf.c

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "xe_gt_sriov_pf_migration.h"
1717
#include "xe_gt_sriov_pf_service.h"
1818
#include "xe_gt_sriov_printk.h"
19+
#include "xe_guc_submit.h"
1920
#include "xe_mmio.h"
2021
#include "xe_pm.h"
2122

@@ -47,9 +48,16 @@ static int pf_alloc_metadata(struct xe_gt *gt)
4748

4849
static void pf_init_workers(struct xe_gt *gt)
4950
{
51+
xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
5052
INIT_WORK(&gt->sriov.pf.workers.restart, pf_worker_restart_func);
5153
}
5254

55+
static void pf_fini_workers(struct xe_gt *gt)
56+
{
57+
xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
58+
disable_work_sync(&gt->sriov.pf.workers.restart);
59+
}
60+
5361
/**
5462
* xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on PF.
5563
* @gt: the &xe_gt to initialize
@@ -79,6 +87,21 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
7987
return 0;
8088
}
8189

90+
static void pf_fini_action(void *arg)
91+
{
92+
struct xe_gt *gt = arg;
93+
94+
pf_fini_workers(gt);
95+
}
96+
97+
static int pf_init_late(struct xe_gt *gt)
98+
{
99+
struct xe_device *xe = gt_to_xe(gt);
100+
101+
xe_gt_assert(gt, IS_SRIOV_PF(xe));
102+
return devm_add_action_or_reset(xe->drm.dev, pf_fini_action, gt);
103+
}
104+
82105
/**
83106
* xe_gt_sriov_pf_init - Prepare SR-IOV PF data structures on PF.
84107
* @gt: the &xe_gt to initialize
@@ -95,7 +118,15 @@ int xe_gt_sriov_pf_init(struct xe_gt *gt)
95118
if (err)
96119
return err;
97120

98-
return xe_gt_sriov_pf_migration_init(gt);
121+
err = xe_gt_sriov_pf_migration_init(gt);
122+
if (err)
123+
return err;
124+
125+
err = pf_init_late(gt);
126+
if (err)
127+
return err;
128+
129+
return 0;
99130
}
100131

101132
static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe)
@@ -230,3 +261,27 @@ void xe_gt_sriov_pf_restart(struct xe_gt *gt)
230261
{
231262
pf_queue_restart(gt);
232263
}
264+
265+
static void pf_flush_restart(struct xe_gt *gt)
266+
{
267+
xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
268+
flush_work(&gt->sriov.pf.workers.restart);
269+
}
270+
271+
/**
272+
* xe_gt_sriov_pf_wait_ready() - Wait until per-GT PF SR-IOV support is ready.
273+
* @gt: the &xe_gt
274+
*
275+
* This function can only be called on PF.
276+
*
277+
* Return: 0 on success or a negative error code on failure.
278+
*/
279+
int xe_gt_sriov_pf_wait_ready(struct xe_gt *gt)
280+
{
281+
/* don't wait if there is another ongoing reset */
282+
if (xe_guc_read_stopped(&gt->uc.guc))
283+
return -EBUSY;
284+
285+
pf_flush_restart(gt);
286+
return 0;
287+
}

drivers/gpu/drm/xe/xe_gt_sriov_pf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ struct xe_gt;
1111
#ifdef CONFIG_PCI_IOV
1212
int xe_gt_sriov_pf_init_early(struct xe_gt *gt);
1313
int xe_gt_sriov_pf_init(struct xe_gt *gt);
14+
int xe_gt_sriov_pf_wait_ready(struct xe_gt *gt);
1415
void xe_gt_sriov_pf_init_hw(struct xe_gt *gt);
1516
void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid);
1617
void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt);

drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "xe_gt_sriov_pf_policy.h"
2323
#include "xe_gt_sriov_pf_service.h"
2424
#include "xe_pm.h"
25+
#include "xe_sriov_pf.h"
2526

2627
/*
2728
* /sys/kernel/debug/dri/0/
@@ -205,7 +206,8 @@ static int CONFIG##_set(void *data, u64 val) \
205206
return -EOVERFLOW; \
206207
\
207208
xe_pm_runtime_get(xe); \
208-
err = xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \
209+
err = xe_sriov_pf_wait_ready(xe) ?: \
210+
xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \
209211
xe_pm_runtime_put(xe); \
210212
\
211213
return err; \

drivers/gpu/drm/xe/xe_guc_capture.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1817,6 +1817,12 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm
18171817
str_yes_no(snapshot->kernel_reserved));
18181818

18191819
for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
1820+
/*
1821+
* FIXME: During devcoredump print we should avoid accessing the
1822+
* driver pointers for gt or engine. Printing should be done only
1823+
* using the snapshot captured. Here we are accessing the gt
1824+
* pointer. It should be fixed.
1825+
*/
18201826
list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
18211827
capture_class, false);
18221828
snapshot_print_by_list_order(snapshot, p, type, list);

drivers/gpu/drm/xe/xe_module.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,18 @@
2727
#define DEFAULT_PROBE_DISPLAY true
2828
#define DEFAULT_VRAM_BAR_SIZE 0
2929
#define DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE
30+
#define DEFAULT_MAX_VFS ~0
31+
#define DEFAULT_MAX_VFS_STR "unlimited"
3032
#define DEFAULT_WEDGED_MODE 1
3133
#define DEFAULT_SVM_NOTIFIER_SIZE 512
3234

3335
struct xe_modparam xe_modparam = {
3436
.probe_display = DEFAULT_PROBE_DISPLAY,
3537
.guc_log_level = DEFAULT_GUC_LOG_LEVEL,
3638
.force_probe = DEFAULT_FORCE_PROBE,
39+
#ifdef CONFIG_PCI_IOV
40+
.max_vfs = DEFAULT_MAX_VFS,
41+
#endif
3742
.wedged_mode = DEFAULT_WEDGED_MODE,
3843
.svm_notifier_size = DEFAULT_SVM_NOTIFIER_SIZE,
3944
/* the rest are 0 by default */
@@ -79,7 +84,8 @@ MODULE_PARM_DESC(force_probe,
7984
module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400);
8085
MODULE_PARM_DESC(max_vfs,
8186
"Limit number of Virtual Functions (VFs) that could be managed. "
82-
"(0 = no VFs [default]; N = allow up to N VFs)");
87+
"(0=no VFs; N=allow up to N VFs "
88+
"[default=" DEFAULT_MAX_VFS_STR "])");
8389
#endif
8490

8591
module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600);

drivers/gpu/drm/xe/xe_pci_sriov.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "xe_pci_sriov.h"
1313
#include "xe_pm.h"
1414
#include "xe_sriov.h"
15+
#include "xe_sriov_pf.h"
1516
#include "xe_sriov_pf_helpers.h"
1617
#include "xe_sriov_printk.h"
1718

@@ -138,6 +139,10 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
138139
xe_assert(xe, num_vfs <= total_vfs);
139140
xe_sriov_dbg(xe, "enabling %u VF%s\n", num_vfs, str_plural(num_vfs));
140141

142+
err = xe_sriov_pf_wait_ready(xe);
143+
if (err)
144+
goto out;
145+
141146
/*
142147
* We must hold additional reference to the runtime PM to keep PF in D0
143148
* during VFs lifetime, as our VFs do not implement the PM capability.
@@ -169,7 +174,7 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
169174
failed:
170175
pf_unprovision_vfs(xe, num_vfs);
171176
xe_pm_runtime_put(xe);
172-
177+
out:
173178
xe_sriov_notice(xe, "Failed to enable %u VF%s (%pe)\n",
174179
num_vfs, str_plural(num_vfs), ERR_PTR(err));
175180
return err;

drivers/gpu/drm/xe/xe_sriov_pf.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include "xe_assert.h"
1111
#include "xe_device.h"
12+
#include "xe_gt_sriov_pf.h"
1213
#include "xe_module.h"
1314
#include "xe_sriov.h"
1415
#include "xe_sriov_pf.h"
@@ -102,6 +103,32 @@ int xe_sriov_pf_init_early(struct xe_device *xe)
102103
return 0;
103104
}
104105

106+
/**
107+
* xe_sriov_pf_wait_ready() - Wait until PF is ready to operate.
108+
* @xe: the &xe_device to test
109+
*
110+
* This function can only be called on PF.
111+
*
112+
* Return: 0 on success or a negative error code on failure.
113+
*/
114+
int xe_sriov_pf_wait_ready(struct xe_device *xe)
115+
{
116+
struct xe_gt *gt;
117+
unsigned int id;
118+
int err;
119+
120+
if (xe_device_wedged(xe))
121+
return -ECANCELED;
122+
123+
for_each_gt(gt, xe, id) {
124+
err = xe_gt_sriov_pf_wait_ready(gt);
125+
if (err)
126+
return err;
127+
}
128+
129+
return 0;
130+
}
131+
105132
/**
106133
* xe_sriov_pf_print_vfs_summary - Print SR-IOV PF information.
107134
* @xe: the &xe_device to print info from

drivers/gpu/drm/xe/xe_sriov_pf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ struct xe_device;
1515
#ifdef CONFIG_PCI_IOV
1616
bool xe_sriov_pf_readiness(struct xe_device *xe);
1717
int xe_sriov_pf_init_early(struct xe_device *xe);
18+
int xe_sriov_pf_wait_ready(struct xe_device *xe);
1819
void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root);
1920
void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p);
2021
#else

0 commit comments

Comments
 (0)