Skip to content

Commit 5cc3325

Browse files
author
Thomas Hellström
committed
drm/xe: Rework eviction rejection of bound external bos
For preempt_fence mode VM's we're rejecting eviction of shared bos during VM_BIND. However, since we do this in the move() callback, we're getting an eviction failure warning from TTM. The TTM callback intended for these things is eviction_valuable(). However, the latter doesn't pass in the struct ttm_operation_ctx needed to determine whether the caller needs this. Instead, attach the needed information to the vm under the vm->resv, until we've been able to update TTM to provide the needed information. And add sufficient lockdep checks to prevent misuse and races. v2: - Fix a copy-paste error in xe_vm_clear_validating() v3: - Fix kerneldoc errors. Signed-off-by: Thomas Hellström <[email protected]> Fixes: 0af944f ("drm/xe: Reject BO eviction if BO is bound to current VM") Reviewed-by: Matthew Brost <[email protected]> Link: https://lore.kernel.org/r/[email protected] (cherry picked from commit 9d55586) Signed-off-by: Thomas Hellström <[email protected]>
1 parent 2182f35 commit 5cc3325

File tree

3 files changed

+105
-18
lines changed

3 files changed

+105
-18
lines changed

drivers/gpu/drm/xe/xe_bo.c

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -841,21 +841,6 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
841841
goto out;
842842
}
843843

844-
/* Reject BO eviction if BO is bound to current VM. */
845-
if (evict && ctx->resv) {
846-
struct drm_gpuvm_bo *vm_bo;
847-
848-
drm_gem_for_each_gpuvm_bo(vm_bo, &bo->ttm.base) {
849-
struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
850-
851-
if (xe_vm_resv(vm) == ctx->resv &&
852-
xe_vm_in_preempt_fence_mode(vm)) {
853-
ret = -EBUSY;
854-
goto out;
855-
}
856-
}
857-
}
858-
859844
/*
860845
* Failed multi-hop where the old_mem is still marked as
861846
* TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
@@ -1013,6 +998,25 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx,
1013998
return lret;
1014999
}
10151000

1001+
static bool
1002+
xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place)
1003+
{
1004+
struct drm_gpuvm_bo *vm_bo;
1005+
1006+
if (!ttm_bo_eviction_valuable(bo, place))
1007+
return false;
1008+
1009+
if (!xe_bo_is_xe_bo(bo))
1010+
return true;
1011+
1012+
drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
1013+
if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm)))
1014+
return false;
1015+
}
1016+
1017+
return true;
1018+
}
1019+
10161020
/**
10171021
* xe_bo_shrink() - Try to shrink an xe bo.
10181022
* @ctx: The struct ttm_operation_ctx used for shrinking.
@@ -1047,7 +1051,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
10471051
(flags.purge && !xe_tt->purgeable))
10481052
return -EBUSY;
10491053

1050-
if (!ttm_bo_eviction_valuable(bo, &place))
1054+
if (!xe_bo_eviction_valuable(bo, &place))
10511055
return -EBUSY;
10521056

10531057
if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo))
@@ -1588,7 +1592,7 @@ const struct ttm_device_funcs xe_ttm_funcs = {
15881592
.io_mem_pfn = xe_ttm_io_mem_pfn,
15891593
.access_memory = xe_ttm_access_memory,
15901594
.release_notify = xe_ttm_bo_release_notify,
1591-
.eviction_valuable = ttm_bo_eviction_valuable,
1595+
.eviction_valuable = xe_bo_eviction_valuable,
15921596
.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
15931597
.swap_notify = xe_ttm_bo_swap_notify,
15941598
};
@@ -2431,6 +2435,8 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
24312435
.no_wait_gpu = false,
24322436
.gfp_retry_mayfail = true,
24332437
};
2438+
struct pin_cookie cookie;
2439+
int ret;
24342440

24352441
if (vm) {
24362442
lockdep_assert_held(&vm->lock);
@@ -2440,8 +2446,12 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
24402446
ctx.resv = xe_vm_resv(vm);
24412447
}
24422448

2449+
cookie = xe_vm_set_validating(vm, allow_res_evict);
24432450
trace_xe_bo_validate(bo);
2444-
return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
2451+
ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
2452+
xe_vm_clear_validating(vm, allow_res_evict, cookie);
2453+
2454+
return ret;
24452455
}
24462456

24472457
bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)

drivers/gpu/drm/xe/xe_vm.h

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,75 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap);
301301
void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p);
302302
void xe_vm_snapshot_free(struct xe_vm_snapshot *snap);
303303

304+
/**
305+
* xe_vm_set_validating() - Register this task as currently making bos resident
306+
* @allow_res_evict: Allow eviction of buffer objects bound to @vm when
307+
* validating.
308+
* @vm: Pointer to the vm or NULL.
309+
*
310+
* Register this task as currently making bos resident for the vm. Intended
311+
* to avoid eviction by the same task of shared bos bound to the vm.
312+
* Call with the vm's resv lock held.
313+
*
314+
* Return: A pin cookie that should be used for xe_vm_clear_validating().
315+
*/
316+
static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm,
317+
bool allow_res_evict)
318+
{
319+
struct pin_cookie cookie = {};
320+
321+
if (vm && !allow_res_evict) {
322+
xe_vm_assert_held(vm);
323+
cookie = lockdep_pin_lock(&xe_vm_resv(vm)->lock.base);
324+
/* Pairs with READ_ONCE in xe_vm_is_validating() */
325+
WRITE_ONCE(vm->validating, current);
326+
}
327+
328+
return cookie;
329+
}
330+
331+
/**
332+
* xe_vm_clear_validating() - Unregister this task as currently making bos resident
333+
* @vm: Pointer to the vm or NULL
334+
* @allow_res_evict: Eviction from @vm was allowed. Must be set to the same
335+
* value as for xe_vm_set_validation().
336+
* @cookie: Cookie obtained from xe_vm_set_validating().
337+
*
338+
* Register this task as currently making bos resident for the vm. Intended
339+
* to avoid eviction by the same task of shared bos bound to the vm.
340+
* Call with the vm's resv lock held.
341+
*/
342+
static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict,
343+
struct pin_cookie cookie)
344+
{
345+
if (vm && !allow_res_evict) {
346+
lockdep_unpin_lock(&xe_vm_resv(vm)->lock.base, cookie);
347+
/* Pairs with READ_ONCE in xe_vm_is_validating() */
348+
WRITE_ONCE(vm->validating, NULL);
349+
}
350+
}
351+
352+
/**
353+
* xe_vm_is_validating() - Whether bos bound to the vm are currently being made resident
354+
* by the current task.
355+
* @vm: Pointer to the vm.
356+
*
357+
* If this function returns %true, we should be in a vm resv locked region, since
358+
* the current process is the same task that called xe_vm_set_validating().
359+
* The function asserts that that's indeed the case.
360+
*
361+
* Return: %true if the task is currently making bos resident, %false otherwise.
362+
*/
363+
static inline bool xe_vm_is_validating(struct xe_vm *vm)
364+
{
365+
/* Pairs with WRITE_ONCE in xe_vm_is_validating() */
366+
if (READ_ONCE(vm->validating) == current) {
367+
xe_vm_assert_held(vm);
368+
return true;
369+
}
370+
return false;
371+
}
372+
304373
#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
305374
void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma);
306375
#else

drivers/gpu/drm/xe/xe_vm_types.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,14 @@ struct xe_vm {
310310
* protected by the vm resv.
311311
*/
312312
u64 tlb_flush_seqno;
313+
/**
314+
* @validating: The task that is currently making bos resident for this vm.
315+
* Protected by the VM's resv for writing. Opportunistic reading can be done
316+
* using READ_ONCE. Note: This is a workaround for the
317+
* TTM eviction_valuable() callback not being passed a struct
318+
* ttm_operation_context(). Future work might want to address this.
319+
*/
320+
struct task_struct *validating;
313321
/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
314322
bool batch_invalidate_tlb;
315323
/** @xef: XE file handle for tracking this VM's drm client */

0 commit comments

Comments
 (0)