Skip to content

Commit 9924db4

Browse files
committed
Merge tag 'drm-xe-next-2025-04-28-1' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
Core Changes: - Add drm_coredump_printer_is_full() (Matt Brost) Driver Changes: - Do not queue unneeded terminations from debugfs (Daniele) - Fix out-of-bound while enabling engine activity stats (Michal) - Use GT oriented message to report engine activity error (Michal) - Some fault-injection additions (Satyanarayana) - Fix an error pointer dereference (Harshit) - Fix capture of steering registers (John) - Use the steering flag when printing registers (John) - Cache DSS info when creating capture register list (John) - Backup VRAM in PM notifier instead of in the suspend / freeze callbacks (Matt Auld) - Fix CFI violation when accessing sysfs files (Jeevaka) - Fix kernel version docs for temperature and fan speed (Lucas) - Add devcoredump chunking (Matt Brost) - Update xe_ttm_access_memory to use GPU for non-visible access (Matt Brost) - Abort printing coredump in VM printer output if full (Matt Brost) - Resolve a possible circular locking dependency (Harish) - Don't support EU stall on SRIOV VF (Harish) - Drop force_alloc from xe_bo_evict in selftests (Matt Brost) Signed-off-by: Dave Airlie <[email protected]> From: Thomas Hellstrom <[email protected]> Link: https://lore.kernel.org/r/aA-mvTb6s909V8hu@fedora
2 parents d2b9e2f + 1bb53d0 commit 9924db4

32 files changed

+756
-244
lines changed

Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,39 +111,39 @@ Description: RO. Package current voltage in millivolt.
111111

112112
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp2_input
113113
Date: March 2025
114-
KernelVersion: 6.14
114+
KernelVersion: 6.15
115115
116116
Description: RO. Package temperature in millidegree Celsius.
117117

118118
Only supported for particular Intel Xe graphics platforms.
119119

120120
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp3_input
121121
Date: March 2025
122-
KernelVersion: 6.14
122+
KernelVersion: 6.15
123123
124124
Description: RO. VRAM temperature in millidegree Celsius.
125125

126126
Only supported for particular Intel Xe graphics platforms.
127127

128128
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan1_input
129129
Date: March 2025
130-
KernelVersion: 6.14
130+
KernelVersion: 6.16
131131
132132
Description: RO. Fan 1 speed in RPM.
133133

134134
Only supported for particular Intel Xe graphics platforms.
135135

136136
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan2_input
137137
Date: March 2025
138-
KernelVersion: 6.14
138+
KernelVersion: 6.16
139139
140140
Description: RO. Fan 2 speed in RPM.
141141

142142
Only supported for particular Intel Xe graphics platforms.
143143

144144
What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan3_input
145145
Date: March 2025
146-
KernelVersion: 6.14
146+
KernelVersion: 6.16
147147
148148
Description: RO. Fan 3 speed in RPM.
149149

drivers/gpu/drm/xe/tests/xe_bo.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
6060
}
6161

6262
/* Evict to system. CCS data should be copied. */
63-
ret = xe_bo_evict(bo, true);
63+
ret = xe_bo_evict(bo);
6464
if (ret) {
6565
KUNIT_FAIL(test, "Failed to evict bo.\n");
6666
return ret;

drivers/gpu/drm/xe/tests/xe_dma_buf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
6565
* the exporter and the importer should be the same bo.
6666
*/
6767
swap(exported->ttm.base.dma_buf, dmabuf);
68-
ret = xe_bo_evict(exported, true);
68+
ret = xe_bo_evict(exported);
6969
swap(exported->ttm.base.dma_buf, dmabuf);
7070
if (ret) {
7171
if (ret != -EINTR && ret != -ERESTARTSYS)

drivers/gpu/drm/xe/tests/xe_migrate.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
509509
dma_fence_put(fence);
510510

511511
kunit_info(test, "Evict vram buffer object\n");
512-
ret = xe_bo_evict(vram_bo, true);
512+
ret = xe_bo_evict(vram_bo);
513513
if (ret) {
514514
KUNIT_FAIL(test, "Failed to evict bo.\n");
515515
return;

drivers/gpu/drm/xe/xe_bo.c

Lines changed: 124 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,6 +1084,80 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
10841084
return lret;
10851085
}
10861086

1087+
/**
1088+
* xe_bo_notifier_prepare_pinned() - Prepare a pinned VRAM object to be backed
1089+
* up in system memory.
1090+
* @bo: The buffer object to prepare.
1091+
*
1092+
* On successful completion, the object backup pages are allocated. Expectation
1093+
* is that this is called from the PM notifier, prior to suspend/hibernation.
1094+
*
1095+
* Return: 0 on success. Negative error code on failure.
1096+
*/
1097+
int xe_bo_notifier_prepare_pinned(struct xe_bo *bo)
1098+
{
1099+
struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1100+
struct xe_bo *backup;
1101+
int ret = 0;
1102+
1103+
xe_bo_lock(bo, false);
1104+
1105+
xe_assert(xe, !bo->backup_obj);
1106+
1107+
/*
1108+
* Since this is called from the PM notifier we might have raced with
1109+
* someone unpinning this after we dropped the pinned list lock and
1110+
* grabbing the above bo lock.
1111+
*/
1112+
if (!xe_bo_is_pinned(bo))
1113+
goto out_unlock_bo;
1114+
1115+
if (!xe_bo_is_vram(bo))
1116+
goto out_unlock_bo;
1117+
1118+
if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1119+
goto out_unlock_bo;
1120+
1121+
backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size,
1122+
DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1123+
XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1124+
XE_BO_FLAG_PINNED);
1125+
if (IS_ERR(backup)) {
1126+
ret = PTR_ERR(backup);
1127+
goto out_unlock_bo;
1128+
}
1129+
1130+
backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1131+
ttm_bo_pin(&backup->ttm);
1132+
bo->backup_obj = backup;
1133+
1134+
out_unlock_bo:
1135+
xe_bo_unlock(bo);
1136+
return ret;
1137+
}
1138+
1139+
/**
1140+
* xe_bo_notifier_unprepare_pinned() - Undo the previous prepare operation.
1141+
* @bo: The buffer object to undo the prepare for.
1142+
*
1143+
* Always returns 0. The backup object is removed, if still present. Expectation
1144+
* it that this called from the PM notifier when undoing the prepare step.
1145+
*
1146+
* Return: Always returns 0.
1147+
*/
1148+
int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo)
1149+
{
1150+
xe_bo_lock(bo, false);
1151+
if (bo->backup_obj) {
1152+
ttm_bo_unpin(&bo->backup_obj->ttm);
1153+
xe_bo_put(bo->backup_obj);
1154+
bo->backup_obj = NULL;
1155+
}
1156+
xe_bo_unlock(bo);
1157+
1158+
return 0;
1159+
}
1160+
10871161
/**
10881162
* xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
10891163
* @bo: The buffer object to move.
@@ -1098,7 +1172,8 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
10981172
int xe_bo_evict_pinned(struct xe_bo *bo)
10991173
{
11001174
struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1101-
struct xe_bo *backup;
1175+
struct xe_bo *backup = bo->backup_obj;
1176+
bool backup_created = false;
11021177
bool unmap = false;
11031178
int ret = 0;
11041179

@@ -1120,12 +1195,17 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
11201195
if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
11211196
goto out_unlock_bo;
11221197

1123-
backup = xe_bo_create_locked(xe, NULL, NULL, bo->size, ttm_bo_type_kernel,
1124-
XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1125-
XE_BO_FLAG_PINNED);
1126-
if (IS_ERR(backup)) {
1127-
ret = PTR_ERR(backup);
1128-
goto out_unlock_bo;
1198+
if (!backup) {
1199+
backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size,
1200+
DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1201+
XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1202+
XE_BO_FLAG_PINNED);
1203+
if (IS_ERR(backup)) {
1204+
ret = PTR_ERR(backup);
1205+
goto out_unlock_bo;
1206+
}
1207+
backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1208+
backup_created = true;
11291209
}
11301210

11311211
if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
@@ -1173,12 +1253,12 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
11731253
bo->size);
11741254
}
11751255

1176-
bo->backup_obj = backup;
1256+
if (!bo->backup_obj)
1257+
bo->backup_obj = backup;
11771258

11781259
out_backup:
11791260
xe_bo_vunmap(backup);
1180-
xe_bo_unlock(backup);
1181-
if (ret)
1261+
if (ret && backup_created)
11821262
xe_bo_put(backup);
11831263
out_unlock_bo:
11841264
if (unmap)
@@ -1212,15 +1292,12 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
12121292
if (!backup)
12131293
return 0;
12141294

1215-
xe_bo_lock(backup, false);
1216-
1217-
ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx);
1218-
if (ret)
1219-
goto out_backup;
1295+
xe_bo_lock(bo, false);
12201296

1221-
if (WARN_ON(!dma_resv_trylock(bo->ttm.base.resv))) {
1222-
ret = -EBUSY;
1223-
goto out_backup;
1297+
if (!xe_bo_is_pinned(backup)) {
1298+
ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx);
1299+
if (ret)
1300+
goto out_unlock_bo;
12241301
}
12251302

12261303
if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
@@ -1261,7 +1338,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
12611338
if (iosys_map_is_null(&bo->vmap)) {
12621339
ret = xe_bo_vmap(bo);
12631340
if (ret)
1264-
goto out_unlock_bo;
1341+
goto out_backup;
12651342
unmap = true;
12661343
}
12671344

@@ -1271,15 +1348,17 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
12711348

12721349
bo->backup_obj = NULL;
12731350

1351+
out_backup:
1352+
xe_bo_vunmap(backup);
1353+
if (!bo->backup_obj) {
1354+
if (xe_bo_is_pinned(backup))
1355+
ttm_bo_unpin(&backup->ttm);
1356+
xe_bo_put(backup);
1357+
}
12741358
out_unlock_bo:
12751359
if (unmap)
12761360
xe_bo_vunmap(bo);
12771361
xe_bo_unlock(bo);
1278-
out_backup:
1279-
xe_bo_vunmap(backup);
1280-
xe_bo_unlock(backup);
1281-
if (!bo->backup_obj)
1282-
xe_bo_put(backup);
12831362
return ret;
12841363
}
12851364

@@ -1455,16 +1534,22 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
14551534
struct xe_res_cursor cursor;
14561535
struct xe_vram_region *vram;
14571536
int bytes_left = len;
1537+
int err = 0;
14581538

14591539
xe_bo_assert_held(bo);
14601540
xe_device_assert_mem_access(xe);
14611541

14621542
if (!mem_type_is_vram(ttm_bo->resource->mem_type))
14631543
return -EIO;
14641544

1465-
/* FIXME: Use GPU for non-visible VRAM */
1466-
if (!xe_ttm_resource_visible(ttm_bo->resource))
1467-
return -EIO;
1545+
if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) {
1546+
struct xe_migrate *migrate =
1547+
mem_type_to_migrate(xe, ttm_bo->resource->mem_type);
1548+
1549+
err = xe_migrate_access_memory(migrate, bo, offset, buf, len,
1550+
write);
1551+
goto out;
1552+
}
14681553

14691554
vram = res_to_mem_region(ttm_bo->resource);
14701555
xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
@@ -1488,7 +1573,8 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
14881573
xe_res_next(&cursor, PAGE_SIZE);
14891574
} while (bytes_left);
14901575

1491-
return len;
1576+
out:
1577+
return err ?: len;
14921578
}
14931579

14941580
const struct ttm_device_funcs xe_ttm_funcs = {
@@ -1532,6 +1618,9 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
15321618
if (bo->vm && xe_bo_is_user(bo))
15331619
xe_vm_put(bo->vm);
15341620

1621+
if (bo->parent_obj)
1622+
xe_bo_put(bo->parent_obj);
1623+
15351624
mutex_lock(&xe->mem_access.vram_userfault.lock);
15361625
if (!list_empty(&bo->vram_userfault_link))
15371626
list_del(&bo->vram_userfault_link);
@@ -2306,6 +2395,13 @@ void xe_bo_unpin(struct xe_bo *bo)
23062395
xe_assert(xe, !list_empty(&bo->pinned_link));
23072396
list_del_init(&bo->pinned_link);
23082397
spin_unlock(&xe->pinned.lock);
2398+
2399+
if (bo->backup_obj) {
2400+
if (xe_bo_is_pinned(bo->backup_obj))
2401+
ttm_bo_unpin(&bo->backup_obj->ttm);
2402+
xe_bo_put(bo->backup_obj);
2403+
bo->backup_obj = NULL;
2404+
}
23092405
}
23102406
ttm_bo_unpin(&bo->ttm);
23112407
if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))

drivers/gpu/drm/xe/xe_bo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,8 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type);
277277
int xe_bo_evict(struct xe_bo *bo);
278278

279279
int xe_bo_evict_pinned(struct xe_bo *bo);
280+
int xe_bo_notifier_prepare_pinned(struct xe_bo *bo);
281+
int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo);
280282
int xe_bo_restore_pinned(struct xe_bo *bo);
281283

282284
int xe_bo_dma_unmap_pinned(struct xe_bo *bo);

0 commit comments

Comments
 (0)