Skip to content

Commit 00c8efc

Browse files
author
Thomas Hellström
committed
drm/xe: Add a shrinker for xe bos
Rather than relying on the TTM watermark accounting add a shrinker for xe_bos in TT or system memory. Leverage the newly added TTM per-page shrinking and shmem backup support. Although xe doesn't fully support WONTNEED (purgeable) bos yet, introduce and add shrinker support for purgeable ttm_tts. v2: - Cleanups bugfixes and a KUNIT shrinker test. - Add writeback support, and activate if kswapd. v3: - Move the try_shrink() helper to core TTM. - Minor cleanups. v4: - Add runtime pm for the shrinker. Shrinking may require an active device for CCS metadata copying. v5: - Separately purge ghost- and zombie objects in the shrinker. - Fix a format specifier - type inconsistency. (Kernel test robot). v7: - s/long/s64/ (Christian König) - s/sofar/progress/ (Matt Brost) v8: - Rebase on Xe KUNIT update. - Add content verifying to the shrinker kunit test. - Split out TTM changes to a separate patch. - Get rid of multiple bool arguments for clarity (Matt Brost) - Avoid an error pointer dereference (Matt Brost) - Avoid an integer overflow (Matt Auld) - Address misc review comments by Matt Brost. v9: - Fix a compliation error. - Rebase. v10: - Update to new LRU walk interface. - Rework ghost-, zombie and purged object shrinking. - Rebase. v11: - Use additional TTM helpers. - Honor __GFP_FS and __GFP_IO - Rebase. v13: - Use ttm_tt_setup_backup(). v14: - Don't set up backup on imported bos. v15: - Rebase on backup interface changes. Cc: Christian König <[email protected]> Cc: Somalapuram Amaranath <[email protected]> Cc: Matthew Brost <[email protected]> Cc: <[email protected]> Signed-off-by: Thomas Hellström <[email protected]> Reviewed-by: Matthew Brost <[email protected]> Acked-by: Christian König <[email protected]> Link: https://lore.kernel.org/intel-xe/[email protected]
1 parent 70d645d commit 00c8efc

File tree

8 files changed

+513
-18
lines changed

8 files changed

+513
-18
lines changed

drivers/gpu/drm/xe/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ xe-y += xe_bb.o \
9494
xe_ring_ops.o \
9595
xe_sa.o \
9696
xe_sched_job.o \
97+
xe_shrinker.o \
9798
xe_step.o \
9899
xe_sync.o \
99100
xe_tile.o \

drivers/gpu/drm/xe/tests/xe_bo.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -514,8 +514,13 @@ static int shrink_test_run_device(struct xe_device *xe)
514514
* other way around, they may not be subject to swapping...
515515
*/
516516
if (alloced < purgeable) {
517+
xe_ttm_tt_account_subtract(&xe_tt->ttm);
517518
xe_tt->purgeable = true;
519+
xe_ttm_tt_account_add(&xe_tt->ttm);
518520
bo->ttm.priority = 0;
521+
spin_lock(&bo->ttm.bdev->lru_lock);
522+
ttm_bo_move_to_lru_tail(&bo->ttm);
523+
spin_unlock(&bo->ttm.bdev->lru_lock);
519524
} else {
520525
int ret = shrink_test_fill_random(bo, &prng, link);
521526

@@ -570,7 +575,6 @@ static int shrink_test_run_device(struct xe_device *xe)
570575
if (ret == -EINTR)
571576
intr = true;
572577
} while (ret == -EINTR && !signal_pending(current));
573-
574578
if (!ret && !purgeable)
575579
failed = shrink_test_verify(test, bo, count, &prng, link);
576580

drivers/gpu/drm/xe/xe_bo.c

Lines changed: 185 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <drm/drm_drv.h>
1111
#include <drm/drm_gem_ttm_helper.h>
1212
#include <drm/drm_managed.h>
13+
#include <drm/ttm/ttm_backup.h>
1314
#include <drm/ttm/ttm_device.h>
1415
#include <drm/ttm/ttm_placement.h>
1516
#include <drm/ttm/ttm_tt.h>
@@ -25,6 +26,7 @@
2526
#include "xe_pm.h"
2627
#include "xe_preempt_fence.h"
2728
#include "xe_res_cursor.h"
29+
#include "xe_shrinker.h"
2830
#include "xe_trace_bo.h"
2931
#include "xe_ttm_stolen_mgr.h"
3032
#include "xe_vm.h"
@@ -281,9 +283,11 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
281283
}
282284
}
283285

286+
/* struct xe_ttm_tt - Subclassed ttm_tt for xe */
284287
struct xe_ttm_tt {
285288
struct ttm_tt ttm;
286-
struct device *dev;
289+
/** @xe - The xe device */
290+
struct xe_device *xe;
287291
struct sg_table sgt;
288292
struct sg_table *sg;
289293
/** @purgeable: Whether the content of the pages of @ttm is purgeable. */
@@ -296,21 +300,22 @@ static int xe_tt_map_sg(struct ttm_tt *tt)
296300
unsigned long num_pages = tt->num_pages;
297301
int ret;
298302

299-
XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
303+
XE_WARN_ON((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
304+
!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE));
300305

301306
if (xe_tt->sg)
302307
return 0;
303308

304309
ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
305310
num_pages, 0,
306311
(u64)num_pages << PAGE_SHIFT,
307-
xe_sg_segment_size(xe_tt->dev),
312+
xe_sg_segment_size(xe_tt->xe->drm.dev),
308313
GFP_KERNEL);
309314
if (ret)
310315
return ret;
311316

312317
xe_tt->sg = &xe_tt->sgt;
313-
ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
318+
ret = dma_map_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL,
314319
DMA_ATTR_SKIP_CPU_SYNC);
315320
if (ret) {
316321
sg_free_table(xe_tt->sg);
@@ -326,7 +331,7 @@ static void xe_tt_unmap_sg(struct ttm_tt *tt)
326331
struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
327332

328333
if (xe_tt->sg) {
329-
dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
334+
dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg,
330335
DMA_BIDIRECTIONAL, 0);
331336
sg_free_table(xe_tt->sg);
332337
xe_tt->sg = NULL;
@@ -341,21 +346,47 @@ struct sg_table *xe_bo_sg(struct xe_bo *bo)
341346
return xe_tt->sg;
342347
}
343348

349+
/*
350+
* Account ttm pages against the device shrinker's shrinkable and
351+
* purgeable counts.
352+
*/
353+
static void xe_ttm_tt_account_add(struct ttm_tt *tt)
354+
{
355+
struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
356+
357+
if (xe_tt->purgeable)
358+
xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, tt->num_pages);
359+
else
360+
xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, tt->num_pages, 0);
361+
}
362+
363+
static void xe_ttm_tt_account_subtract(struct ttm_tt *tt)
364+
{
365+
struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
366+
367+
if (xe_tt->purgeable)
368+
xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, -(long)tt->num_pages);
369+
else
370+
xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, -(long)tt->num_pages, 0);
371+
}
372+
344373
static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
345374
u32 page_flags)
346375
{
347376
struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
348377
struct xe_device *xe = xe_bo_device(bo);
349-
struct xe_ttm_tt *tt;
378+
struct xe_ttm_tt *xe_tt;
379+
struct ttm_tt *tt;
350380
unsigned long extra_pages;
351381
enum ttm_caching caching = ttm_cached;
352382
int err;
353383

354-
tt = kzalloc(sizeof(*tt), GFP_KERNEL);
355-
if (!tt)
384+
xe_tt = kzalloc(sizeof(*xe_tt), GFP_KERNEL);
385+
if (!xe_tt)
356386
return NULL;
357387

358-
tt->dev = xe->drm.dev;
388+
tt = &xe_tt->ttm;
389+
xe_tt->xe = xe;
359390

360391
extra_pages = 0;
361392
if (xe_bo_needs_ccs_pages(bo))
@@ -401,42 +432,66 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
401432
caching = ttm_uncached;
402433
}
403434

404-
err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
435+
if (ttm_bo->type != ttm_bo_type_sg)
436+
page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
437+
438+
err = ttm_tt_init(tt, &bo->ttm, page_flags, caching, extra_pages);
405439
if (err) {
406-
kfree(tt);
440+
kfree(xe_tt);
407441
return NULL;
408442
}
409443

410-
return &tt->ttm;
444+
if (ttm_bo->type != ttm_bo_type_sg) {
445+
err = ttm_tt_setup_backup(tt);
446+
if (err) {
447+
ttm_tt_fini(tt);
448+
kfree(xe_tt);
449+
return NULL;
450+
}
451+
}
452+
453+
return tt;
411454
}
412455

413456
static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
414457
struct ttm_operation_ctx *ctx)
415458
{
459+
struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
416460
int err;
417461

418462
/*
419463
* dma-bufs are not populated with pages, and the dma-
420464
* addresses are set up when moved to XE_PL_TT.
421465
*/
422-
if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
466+
if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
467+
!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
423468
return 0;
424469

425-
err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
470+
if (ttm_tt_is_backed_up(tt) && !xe_tt->purgeable) {
471+
err = ttm_tt_restore(ttm_dev, tt, ctx);
472+
} else {
473+
ttm_tt_clear_backed_up(tt);
474+
err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
475+
}
426476
if (err)
427477
return err;
428478

429-
return err;
479+
xe_tt->purgeable = false;
480+
xe_ttm_tt_account_add(tt);
481+
482+
return 0;
430483
}
431484

432485
static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
433486
{
434-
if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
487+
if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
488+
!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
435489
return;
436490

437491
xe_tt_unmap_sg(tt);
438492

439-
return ttm_pool_free(&ttm_dev->pool, tt);
493+
ttm_pool_free(&ttm_dev->pool, tt);
494+
xe_ttm_tt_account_subtract(tt);
440495
}
441496

442497
static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
@@ -871,6 +926,111 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
871926
return ret;
872927
}
873928

929+
static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx,
930+
struct ttm_buffer_object *bo,
931+
unsigned long *scanned)
932+
{
933+
long lret;
934+
935+
/* Fake move to system, without copying data. */
936+
if (bo->resource->mem_type != XE_PL_SYSTEM) {
937+
struct ttm_resource *new_resource;
938+
939+
lret = ttm_bo_wait_ctx(bo, ctx);
940+
if (lret)
941+
return lret;
942+
943+
lret = ttm_bo_mem_space(bo, &sys_placement, &new_resource, ctx);
944+
if (lret)
945+
return lret;
946+
947+
xe_tt_unmap_sg(bo->ttm);
948+
ttm_bo_move_null(bo, new_resource);
949+
}
950+
951+
*scanned += bo->ttm->num_pages;
952+
lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
953+
{.purge = true,
954+
.writeback = false,
955+
.allow_move = false});
956+
957+
if (lret > 0)
958+
xe_ttm_tt_account_subtract(bo->ttm);
959+
960+
return lret;
961+
}
962+
963+
/**
964+
* xe_bo_shrink() - Try to shrink an xe bo.
965+
* @ctx: The struct ttm_operation_ctx used for shrinking.
966+
* @bo: The TTM buffer object whose pages to shrink.
967+
* @flags: Flags governing the shrink behaviour.
968+
* @scanned: Pointer to a counter of the number of pages
969+
* attempted to shrink.
970+
*
971+
* Try to shrink- or purge a bo, and if it succeeds, unmap dma.
972+
* Note that we need to be able to handle also non xe bos
973+
* (ghost bos), but only if the struct ttm_tt is embedded in
974+
* a struct xe_ttm_tt. When the function attempts to shrink
975+
* the pages of a buffer object, The value pointed to by @scanned
976+
* is updated.
977+
*
978+
* Return: The number of pages shrunken or purged, or negative error
979+
* code on failure.
980+
*/
981+
long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
982+
const struct xe_bo_shrink_flags flags,
983+
unsigned long *scanned)
984+
{
985+
struct ttm_tt *tt = bo->ttm;
986+
struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
987+
struct ttm_place place = {.mem_type = bo->resource->mem_type};
988+
struct xe_bo *xe_bo = ttm_to_xe_bo(bo);
989+
struct xe_device *xe = xe_tt->xe;
990+
bool needs_rpm;
991+
long lret = 0L;
992+
993+
if (!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE) ||
994+
(flags.purge && !xe_tt->purgeable))
995+
return -EBUSY;
996+
997+
if (!ttm_bo_eviction_valuable(bo, &place))
998+
return -EBUSY;
999+
1000+
if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo))
1001+
return xe_bo_shrink_purge(ctx, bo, scanned);
1002+
1003+
if (xe_tt->purgeable) {
1004+
if (bo->resource->mem_type != XE_PL_SYSTEM)
1005+
lret = xe_bo_move_notify(xe_bo, ctx);
1006+
if (!lret)
1007+
lret = xe_bo_shrink_purge(ctx, bo, scanned);
1008+
goto out_unref;
1009+
}
1010+
1011+
/* System CCS needs gpu copy when moving PL_TT -> PL_SYSTEM */
1012+
needs_rpm = (!IS_DGFX(xe) && bo->resource->mem_type != XE_PL_SYSTEM &&
1013+
xe_bo_needs_ccs_pages(xe_bo));
1014+
if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
1015+
goto out_unref;
1016+
1017+
*scanned += tt->num_pages;
1018+
lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1019+
{.purge = false,
1020+
.writeback = flags.writeback,
1021+
.allow_move = true});
1022+
if (needs_rpm)
1023+
xe_pm_runtime_put(xe);
1024+
1025+
if (lret > 0)
1026+
xe_ttm_tt_account_subtract(tt);
1027+
1028+
out_unref:
1029+
xe_bo_put(xe_bo);
1030+
1031+
return lret;
1032+
}
1033+
8741034
/**
8751035
* xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
8761036
* @bo: The buffer object to move.
@@ -1885,6 +2045,8 @@ int xe_bo_pin_external(struct xe_bo *bo)
18852045
}
18862046

18872047
ttm_bo_pin(&bo->ttm);
2048+
if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2049+
xe_ttm_tt_account_subtract(bo->ttm.ttm);
18882050

18892051
/*
18902052
* FIXME: If we always use the reserve / unreserve functions for locking
@@ -1944,6 +2106,8 @@ int xe_bo_pin(struct xe_bo *bo)
19442106
}
19452107

19462108
ttm_bo_pin(&bo->ttm);
2109+
if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2110+
xe_ttm_tt_account_subtract(bo->ttm.ttm);
19472111

19482112
/*
19492113
* FIXME: If we always use the reserve / unreserve functions for locking
@@ -1978,6 +2142,8 @@ void xe_bo_unpin_external(struct xe_bo *bo)
19782142
spin_unlock(&xe->pinned.lock);
19792143

19802144
ttm_bo_unpin(&bo->ttm);
2145+
if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2146+
xe_ttm_tt_account_add(bo->ttm.ttm);
19812147

19822148
/*
19832149
* FIXME: If we always use the reserve / unreserve functions for locking
@@ -2001,6 +2167,8 @@ void xe_bo_unpin(struct xe_bo *bo)
20012167
spin_unlock(&xe->pinned.lock);
20022168
}
20032169
ttm_bo_unpin(&bo->ttm);
2170+
if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2171+
xe_ttm_tt_account_add(bo->ttm.ttm);
20042172
}
20052173

20062174
/**

0 commit comments

Comments
 (0)