Skip to content

Commit 86d8ddc

Browse files
johnharr-intelrodrigovivi
authored andcommitted
drm/i915: Fix request ref counting during error capture & debugfs dump
When GuC support was added to error capture, the reference counting around the request object was broken. Fix it up. The context based search manages the spinlocking around the search internally. So it needs to grab the reference count internally as well. The execlist only request based search relies on external locking, so it needs an external reference count but within the spinlock not outside it. The only other caller of the context based search is the code for dumping engine state to debugfs. That code wasn't previously getting an explicit reference at all as it does everything while holding the execlist specific spinlock. So, that needs updaing as well as that spinlock doesn't help when using GuC submission. Rather than trying to conditionally get/put depending on submission model, just change it to always do the get/put. v2: Explicitly document adding an extra blank line in some dense code (Andy Shevchenko). Fix multiple potential null pointer derefs in case of no request found (some spotted by Tvrtko, but there was more!). Also fix a leaked request in case of !started and another in __guc_reset_context now that intel_context_find_active_request is actually reference counting the returned request. v3: Add a _get suffix to intel_context_find_active_request now that it grabs a reference (Daniele). v4: Split the intel_guc_find_hung_context change to a separate patch and rename intel_context_find_active_request_get to intel_context_get_active_request (Tvrtko). v5: s/locking/reference counting/ in commit message (Tvrtko) Fixes: dc0dad3 ("drm/i915/guc: Fix for error capture after full GPU reset with GuC") Fixes: 573ba12 ("drm/i915/guc: Capture error state on context reset") Signed-off-by: John Harrison <[email protected]> Reviewed-by: Daniele Ceraolo Spurio <[email protected]> Acked-by: Tvrtko Ursulin <[email protected]> Cc: Matthew Brost <[email protected]> Cc: Jani Nikula <[email protected]> Cc: Joonas Lahtinen <[email protected]> Cc: Rodrigo Vivi <[email protected]> Cc: Andrzej Hajda <[email protected]> Cc: Matthew Auld <[email protected]> Cc: Matt Roper <[email protected]> Cc: Umesh Nerlige Ramappa <[email protected]> Cc: Michael Cheng <[email protected]> Cc: Lucas De Marchi <[email protected]> Cc: Tejas Upadhyay <[email protected]> Cc: Andy Shevchenko <[email protected]> Cc: Aravind Iddamsetty <[email protected]> Cc: Alan Previn <[email protected]> Cc: Bruce Chang <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] (cherry picked from commit 3700e35) Signed-off-by: Rodrigo Vivi <[email protected]>
1 parent 87b04e5 commit 86d8ddc

File tree

5 files changed

+17
-12
lines changed

5 files changed

+17
-12
lines changed

drivers/gpu/drm/i915/gt/intel_context.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,7 @@ struct i915_request *intel_context_create_request(struct intel_context *ce)
528528
return rq;
529529
}
530530

531-
struct i915_request *intel_context_find_active_request(struct intel_context *ce)
531+
struct i915_request *intel_context_get_active_request(struct intel_context *ce)
532532
{
533533
struct intel_context *parent = intel_context_to_parent(ce);
534534
struct i915_request *rq, *active = NULL;
@@ -552,6 +552,8 @@ struct i915_request *intel_context_find_active_request(struct intel_context *ce)
552552

553553
active = rq;
554554
}
555+
if (active)
556+
active = i915_request_get_rcu(active);
555557
spin_unlock_irqrestore(&parent->guc_state.lock, flags);
556558

557559
return active;

drivers/gpu/drm/i915/gt/intel_context.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
268268

269269
struct i915_request *intel_context_create_request(struct intel_context *ce);
270270

271-
struct i915_request *
272-
intel_context_find_active_request(struct intel_context *ce);
271+
struct i915_request *intel_context_get_active_request(struct intel_context *ce);
273272

274273
static inline bool intel_context_is_barrier(const struct intel_context *ce)
275274
{

drivers/gpu/drm/i915/gt/intel_engine_cs.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2217,9 +2217,11 @@ static void engine_dump_active_requests(struct intel_engine_cs *engine, struct d
22172217
if (guc) {
22182218
ce = intel_engine_get_hung_context(engine);
22192219
if (ce)
2220-
hung_rq = intel_context_find_active_request(ce);
2220+
hung_rq = intel_context_get_active_request(ce);
22212221
} else {
22222222
hung_rq = intel_engine_execlist_find_hung_request(engine);
2223+
if (hung_rq)
2224+
hung_rq = i915_request_get_rcu(hung_rq);
22232225
}
22242226

22252227
if (hung_rq)
@@ -2230,6 +2232,8 @@ static void engine_dump_active_requests(struct intel_engine_cs *engine, struct d
22302232
else
22312233
intel_engine_dump_active_requests(&engine->sched_engine->requests,
22322234
hung_rq, m);
2235+
if (hung_rq)
2236+
i915_request_put(hung_rq);
22332237
}
22342238

22352239
void intel_engine_dump(struct intel_engine_cs *engine,

drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1702,7 +1702,7 @@ static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t st
17021702
goto next_context;
17031703

17041704
guilty = false;
1705-
rq = intel_context_find_active_request(ce);
1705+
rq = intel_context_get_active_request(ce);
17061706
if (!rq) {
17071707
head = ce->ring->tail;
17081708
goto out_replay;
@@ -1715,6 +1715,7 @@ static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t st
17151715
head = intel_ring_wrap(ce->ring, rq->head);
17161716

17171717
__i915_request_reset(rq, guilty);
1718+
i915_request_put(rq);
17181719
out_replay:
17191720
guc_reset_state(ce, head, guilty);
17201721
next_context:

drivers/gpu/drm/i915/i915_gpu_error.c

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1607,7 +1607,7 @@ capture_engine(struct intel_engine_cs *engine,
16071607
ce = intel_engine_get_hung_context(engine);
16081608
if (ce) {
16091609
intel_engine_clear_hung_context(engine);
1610-
rq = intel_context_find_active_request(ce);
1610+
rq = intel_context_get_active_request(ce);
16111611
if (!rq || !i915_request_started(rq))
16121612
goto no_request_capture;
16131613
} else {
@@ -1618,21 +1618,18 @@ capture_engine(struct intel_engine_cs *engine,
16181618
if (!intel_uc_uses_guc_submission(&engine->gt->uc)) {
16191619
spin_lock_irqsave(&engine->sched_engine->lock, flags);
16201620
rq = intel_engine_execlist_find_hung_request(engine);
1621+
if (rq)
1622+
rq = i915_request_get_rcu(rq);
16211623
spin_unlock_irqrestore(&engine->sched_engine->lock,
16221624
flags);
16231625
}
16241626
}
1625-
if (rq)
1626-
rq = i915_request_get_rcu(rq);
1627-
16281627
if (!rq)
16291628
goto no_request_capture;
16301629

16311630
capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
1632-
if (!capture) {
1633-
i915_request_put(rq);
1631+
if (!capture)
16341632
goto no_request_capture;
1635-
}
16361633
if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
16371634
intel_guc_capture_get_matching_node(engine->gt, ee, ce);
16381635

@@ -1642,6 +1639,8 @@ capture_engine(struct intel_engine_cs *engine,
16421639
return ee;
16431640

16441641
no_request_capture:
1642+
if (rq)
1643+
i915_request_put(rq);
16451644
kfree(ee);
16461645
return NULL;
16471646
}

0 commit comments

Comments
 (0)