Skip to content

Commit 46f1f4b

Browse files
matt-auldlucasdemarchi
authored andcommitted
drm/xe: improve hibernation on igpu
The GGTT looks to be stored inside stolen memory on igpu which is not treated as normal RAM. The core kernel skips this memory range when creating the hibernation image, therefore when coming back from hibernation the GGTT programming is lost. This seems to cause issues with broken resume where GuC FW fails to load: [drm] *ERROR* GT0: load failed: status = 0x400000A0, time = 10ms, freq = 1250MHz (req 1300MHz), done = -1 [drm] *ERROR* GT0: load failed: status: Reset = 0, BootROM = 0x50, UKernel = 0x00, MIA = 0x00, Auth = 0x01 [drm] *ERROR* GT0: firmware signature verification failed [drm] *ERROR* CRITICAL: Xe has declared device 0000:00:02.0 as wedged. Current GGTT users are kernel internal and tracked as pinned, so it should be possible to hook into the existing save/restore logic that we use for dgpu, where the actual evict is skipped but on restore we importantly restore the GGTT programming. This has been confirmed to fix hibernation on at least ADL and MTL, though likely all igpu platforms are affected. This also means we have a hole in our testing, where the existing s4 tests only really test the driver hooks, and don't go as far as actually rebooting and restoring from the hibernation image and in turn powering down RAM (and therefore losing the contents of stolen). v2 (Brost) - Remove extra newline and drop unnecessary parentheses. Fixes: dd08ebf ("drm/xe: Introduce a new DRM driver for Intel GPUs") Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/3275 Signed-off-by: Matthew Auld <[email protected]> Cc: Matthew Brost <[email protected]> Cc: <[email protected]> # v6.8+ Reviewed-by: Matthew Brost <[email protected]> Reviewed-by: Lucas De Marchi <[email protected]> Signed-off-by: Matthew Brost <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] (cherry picked from commit f2a6b8e) Signed-off-by: Lucas De Marchi <[email protected]>
1 parent dd886a6 commit 46f1f4b

File tree

2 files changed

+16
-27
lines changed

2 files changed

+16
-27
lines changed

drivers/gpu/drm/xe/xe_bo.c

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,10 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
948948
if (WARN_ON(!xe_bo_is_pinned(bo)))
949949
return -EINVAL;
950950

951-
if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
951+
if (WARN_ON(xe_bo_is_vram(bo)))
952+
return -EINVAL;
953+
954+
if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo)))
952955
return -EINVAL;
953956

954957
if (!mem_type_is_vram(place->mem_type))
@@ -1723,6 +1726,7 @@ int xe_bo_pin_external(struct xe_bo *bo)
17231726

17241727
int xe_bo_pin(struct xe_bo *bo)
17251728
{
1729+
struct ttm_place *place = &bo->placements[0];
17261730
struct xe_device *xe = xe_bo_device(bo);
17271731
int err;
17281732

@@ -1753,22 +1757,19 @@ int xe_bo_pin(struct xe_bo *bo)
17531757
*/
17541758
if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
17551759
bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
1756-
struct ttm_place *place = &(bo->placements[0]);
1757-
17581760
if (mem_type_is_vram(place->mem_type)) {
17591761
xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
17601762

17611763
place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
17621764
vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
17631765
place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
17641766
}
1767+
}
17651768

1766-
if (mem_type_is_vram(place->mem_type) ||
1767-
bo->flags & XE_BO_FLAG_GGTT) {
1768-
spin_lock(&xe->pinned.lock);
1769-
list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
1770-
spin_unlock(&xe->pinned.lock);
1771-
}
1769+
if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
1770+
spin_lock(&xe->pinned.lock);
1771+
list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
1772+
spin_unlock(&xe->pinned.lock);
17721773
}
17731774

17741775
ttm_bo_pin(&bo->ttm);
@@ -1816,24 +1817,18 @@ void xe_bo_unpin_external(struct xe_bo *bo)
18161817

18171818
void xe_bo_unpin(struct xe_bo *bo)
18181819
{
1820+
struct ttm_place *place = &bo->placements[0];
18191821
struct xe_device *xe = xe_bo_device(bo);
18201822

18211823
xe_assert(xe, !bo->ttm.base.import_attach);
18221824
xe_assert(xe, xe_bo_is_pinned(bo));
18231825

1824-
if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1825-
bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
1826-
struct ttm_place *place = &(bo->placements[0]);
1827-
1828-
if (mem_type_is_vram(place->mem_type) ||
1829-
bo->flags & XE_BO_FLAG_GGTT) {
1830-
spin_lock(&xe->pinned.lock);
1831-
xe_assert(xe, !list_empty(&bo->pinned_link));
1832-
list_del_init(&bo->pinned_link);
1833-
spin_unlock(&xe->pinned.lock);
1834-
}
1826+
if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
1827+
spin_lock(&xe->pinned.lock);
1828+
xe_assert(xe, !list_empty(&bo->pinned_link));
1829+
list_del_init(&bo->pinned_link);
1830+
spin_unlock(&xe->pinned.lock);
18351831
}
1836-
18371832
ttm_bo_unpin(&bo->ttm);
18381833
}
18391834

drivers/gpu/drm/xe/xe_bo_evict.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,6 @@ int xe_bo_evict_all(struct xe_device *xe)
3434
u8 id;
3535
int ret;
3636

37-
if (!IS_DGFX(xe))
38-
return 0;
39-
4037
/* User memory */
4138
for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
4239
struct ttm_resource_manager *man =
@@ -125,9 +122,6 @@ int xe_bo_restore_kernel(struct xe_device *xe)
125122
struct xe_bo *bo;
126123
int ret;
127124

128-
if (!IS_DGFX(xe))
129-
return 0;
130-
131125
spin_lock(&xe->pinned.lock);
132126
for (;;) {
133127
bo = list_first_entry_or_null(&xe->pinned.evicted,

0 commit comments

Comments
 (0)