Skip to content

Commit 4a31741

Browse files
committed
drm/i915/gem: Refine occupancy test in kill_context()
Don't just look at the very last request in a queue when deciding if we need to evict the context from the GPU, as that request may still be in the submission queue while the rest of the context is running! Instead, walk back along the queued requests looking for the active request and checking that. Fixes: 2e0986a ("drm/i915/gem: Cancel contexts when hangchecking is disabled") Testcase: igt/gem_ctx_persistence/queued Signed-off-by: Chris Wilson <[email protected]> Cc: Tvrtko Ursulin <[email protected]> Cc: Mika Kuoppala <[email protected]> Cc: Matthew Auld <[email protected]> Reviewed-by: Matthew Auld <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 2b73b35 commit 4a31741

File tree

1 file changed

+32
-15
lines changed

1 file changed

+32
-15
lines changed

drivers/gpu/drm/i915/gem/i915_gem_context.c

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -333,10 +333,8 @@ static bool __cancel_engine(struct intel_engine_cs *engine)
333333
return __reset_engine(engine);
334334
}
335335

336-
static struct intel_engine_cs *
337-
active_engine(struct dma_fence *fence, struct intel_context *ce)
336+
static struct intel_engine_cs *__active_engine(struct i915_request *rq)
338337
{
339-
struct i915_request *rq = to_request(fence);
340338
struct intel_engine_cs *engine, *locked;
341339

342340
/*
@@ -360,6 +358,29 @@ active_engine(struct dma_fence *fence, struct intel_context *ce)
360358
return engine;
361359
}
362360

361+
static struct intel_engine_cs *active_engine(struct intel_context *ce)
362+
{
363+
struct intel_engine_cs *engine = NULL;
364+
struct i915_request *rq;
365+
366+
if (!ce->timeline)
367+
return NULL;
368+
369+
rcu_read_lock();
370+
list_for_each_entry_reverse(rq, &ce->timeline->requests, link) {
371+
if (i915_request_completed(rq))
372+
break;
373+
374+
/* Check with the backend if the request is inflight */
375+
engine = __active_engine(rq);
376+
if (engine)
377+
break;
378+
}
379+
rcu_read_unlock();
380+
381+
return engine;
382+
}
383+
363384
static void kill_context(struct i915_gem_context *ctx)
364385
{
365386
struct i915_gem_engines_iter it;
@@ -383,17 +404,15 @@ static void kill_context(struct i915_gem_context *ctx)
383404
*/
384405
for_each_gem_engine(ce, __context_engines_static(ctx), it) {
385406
struct intel_engine_cs *engine;
386-
struct dma_fence *fence;
387-
388-
if (!ce->timeline)
389-
continue;
390407

391-
fence = i915_active_fence_get(&ce->timeline->last_request);
392-
if (!fence)
393-
continue;
394-
395-
/* Check with the backend if the request is still inflight */
396-
engine = active_engine(fence, ce);
408+
/*
409+
* Check the current active state of this context; if we
410+
* are currently executing on the GPU we need to evict
411+
* ourselves. On the other hand, if we haven't yet been
412+
* submitted to the GPU or if everything is complete,
413+
* we have nothing to do.
414+
*/
415+
engine = active_engine(ce);
397416

398417
/* First attempt to gracefully cancel the context */
399418
if (engine && !__cancel_engine(engine))
@@ -403,8 +422,6 @@ static void kill_context(struct i915_gem_context *ctx)
403422
* reset. We hope the collateral damage is worth it.
404423
*/
405424
__reset_context(ctx, engine);
406-
407-
dma_fence_put(fence);
408425
}
409426
}
410427

0 commit comments

Comments
 (0)