Skip to content

Commit 5bc4b43

Browse files
johnharr-intelrodrigovivi
authored andcommitted
drm/i915: Fix up locking around dumping requests lists
The debugfs dump of requests was confused about what state requires the execlist lock versus the GuC lock. There was also a bunch of duplicated messy code between it and the error capture code. So refactor the hung request search into a re-usable function. And reduce the span of the execlist state lock to only the execlist specific code paths. In order to do that, also move the report of hold count (which is an execlist only concept) from the top level dump function to the lower level execlist specific function. Also, move the execlist specific code into the execlist source file. v2: Rename some functions and move to more appropriate files (Daniele). v3: Rename new execlist dump function (Daniele) Fixes: dc0dad3 ("drm/i915/guc: Fix for error capture after full GPU reset with GuC") Signed-off-by: John Harrison <[email protected]> Reviewed-by: Daniele Ceraolo Spurio <[email protected]> Acked-by: Tvrtko Ursulin <[email protected]> Cc: Matthew Brost <[email protected]> Cc: Jani Nikula <[email protected]> Cc: Joonas Lahtinen <[email protected]> Cc: Rodrigo Vivi <[email protected]> Cc: Matt Roper <[email protected]> Cc: Umesh Nerlige Ramappa <[email protected]> Cc: Michael Cheng <[email protected]> Cc: Lucas De Marchi <[email protected]> Cc: Bruce Chang <[email protected]> Cc: Alan Previn <[email protected]> Cc: Matthew Auld <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] (cherry picked from commit a4be3dc) Signed-off-by: Rodrigo Vivi <[email protected]>
1 parent 86d8ddc commit 5bc4b43

File tree

5 files changed

+73
-62
lines changed

5 files changed

+73
-62
lines changed

drivers/gpu/drm/i915/gt/intel_engine.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,8 @@ void intel_engine_dump_active_requests(struct list_head *requests,
248248
ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine,
249249
ktime_t *now);
250250

251-
struct i915_request *
252-
intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine);
251+
void intel_engine_get_hung_entity(struct intel_engine_cs *engine,
252+
struct intel_context **ce, struct i915_request **rq);
253253

254254
u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
255255
struct intel_context *

drivers/gpu/drm/i915/gt/intel_engine_cs.c

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2094,17 +2094,6 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
20942094
}
20952095
}
20962096

2097-
static unsigned long list_count(struct list_head *list)
2098-
{
2099-
struct list_head *pos;
2100-
unsigned long count = 0;
2101-
2102-
list_for_each(pos, list)
2103-
count++;
2104-
2105-
return count;
2106-
}
2107-
21082097
static unsigned long read_ul(void *p, size_t x)
21092098
{
21102099
return *(unsigned long *)(p + x);
@@ -2196,11 +2185,11 @@ void intel_engine_dump_active_requests(struct list_head *requests,
21962185
}
21972186
}
21982187

2199-
static void engine_dump_active_requests(struct intel_engine_cs *engine, struct drm_printer *m)
2188+
static void engine_dump_active_requests(struct intel_engine_cs *engine,
2189+
struct drm_printer *m)
22002190
{
2191+
struct intel_context *hung_ce = NULL;
22012192
struct i915_request *hung_rq = NULL;
2202-
struct intel_context *ce;
2203-
bool guc;
22042193

22052194
/*
22062195
* No need for an engine->irq_seqno_barrier() before the seqno reads.
@@ -2209,29 +2198,20 @@ static void engine_dump_active_requests(struct intel_engine_cs *engine, struct d
22092198
* But the intention here is just to report an instantaneous snapshot
22102199
* so that's fine.
22112200
*/
2212-
lockdep_assert_held(&engine->sched_engine->lock);
2201+
intel_engine_get_hung_entity(engine, &hung_ce, &hung_rq);
22132202

22142203
drm_printf(m, "\tRequests:\n");
22152204

2216-
guc = intel_uc_uses_guc_submission(&engine->gt->uc);
2217-
if (guc) {
2218-
ce = intel_engine_get_hung_context(engine);
2219-
if (ce)
2220-
hung_rq = intel_context_get_active_request(ce);
2221-
} else {
2222-
hung_rq = intel_engine_execlist_find_hung_request(engine);
2223-
if (hung_rq)
2224-
hung_rq = i915_request_get_rcu(hung_rq);
2225-
}
2226-
22272205
if (hung_rq)
22282206
engine_dump_request(hung_rq, m, "\t\thung");
2207+
else if (hung_ce)
2208+
drm_printf(m, "\t\tGot hung ce but no hung rq!\n");
22292209

2230-
if (guc)
2210+
if (intel_uc_uses_guc_submission(&engine->gt->uc))
22312211
intel_guc_dump_active_requests(engine, hung_rq, m);
22322212
else
2233-
intel_engine_dump_active_requests(&engine->sched_engine->requests,
2234-
hung_rq, m);
2213+
intel_execlists_dump_active_requests(engine, hung_rq, m);
2214+
22352215
if (hung_rq)
22362216
i915_request_put(hung_rq);
22372217
}
@@ -2243,7 +2223,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
22432223
struct i915_gpu_error * const error = &engine->i915->gpu_error;
22442224
struct i915_request *rq;
22452225
intel_wakeref_t wakeref;
2246-
unsigned long flags;
22472226
ktime_t dummy;
22482227

22492228
if (header) {
@@ -2280,13 +2259,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
22802259
i915_reset_count(error));
22812260
print_properties(engine, m);
22822261

2283-
spin_lock_irqsave(&engine->sched_engine->lock, flags);
22842262
engine_dump_active_requests(engine, m);
22852263

2286-
drm_printf(m, "\tOn hold?: %lu\n",
2287-
list_count(&engine->sched_engine->hold));
2288-
spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
2289-
22902264
drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base);
22912265
wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
22922266
if (wakeref) {
@@ -2332,8 +2306,7 @@ intel_engine_create_virtual(struct intel_engine_cs **siblings,
23322306
return siblings[0]->cops->create_virtual(siblings, count, flags);
23332307
}
23342308

2335-
struct i915_request *
2336-
intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine)
2309+
static struct i915_request *engine_execlist_find_hung_request(struct intel_engine_cs *engine)
23372310
{
23382311
struct i915_request *request, *active = NULL;
23392312

@@ -2385,6 +2358,33 @@ intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine)
23852358
return active;
23862359
}
23872360

2361+
void intel_engine_get_hung_entity(struct intel_engine_cs *engine,
2362+
struct intel_context **ce, struct i915_request **rq)
2363+
{
2364+
unsigned long flags;
2365+
2366+
*ce = intel_engine_get_hung_context(engine);
2367+
if (*ce) {
2368+
intel_engine_clear_hung_context(engine);
2369+
2370+
*rq = intel_context_get_active_request(*ce);
2371+
return;
2372+
}
2373+
2374+
/*
2375+
* Getting here with GuC enabled means it is a forced error capture
2376+
* with no actual hang. So, no need to attempt the execlist search.
2377+
*/
2378+
if (intel_uc_uses_guc_submission(&engine->gt->uc))
2379+
return;
2380+
2381+
spin_lock_irqsave(&engine->sched_engine->lock, flags);
2382+
*rq = engine_execlist_find_hung_request(engine);
2383+
if (*rq)
2384+
*rq = i915_request_get_rcu(*rq);
2385+
spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
2386+
}
2387+
23882388
void xehp_enable_ccs_engines(struct intel_engine_cs *engine)
23892389
{
23902390
/*

drivers/gpu/drm/i915/gt/intel_execlists_submission.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4148,6 +4148,33 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
41484148
spin_unlock_irqrestore(&sched_engine->lock, flags);
41494149
}
41504150

4151+
static unsigned long list_count(struct list_head *list)
4152+
{
4153+
struct list_head *pos;
4154+
unsigned long count = 0;
4155+
4156+
list_for_each(pos, list)
4157+
count++;
4158+
4159+
return count;
4160+
}
4161+
4162+
void intel_execlists_dump_active_requests(struct intel_engine_cs *engine,
4163+
struct i915_request *hung_rq,
4164+
struct drm_printer *m)
4165+
{
4166+
unsigned long flags;
4167+
4168+
spin_lock_irqsave(&engine->sched_engine->lock, flags);
4169+
4170+
intel_engine_dump_active_requests(&engine->sched_engine->requests, hung_rq, m);
4171+
4172+
drm_printf(m, "\tOn hold?: %lu\n",
4173+
list_count(&engine->sched_engine->hold));
4174+
4175+
spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
4176+
}
4177+
41514178
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
41524179
#include "selftest_execlists.c"
41534180
#endif

drivers/gpu/drm/i915/gt/intel_execlists_submission.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
3232
int indent),
3333
unsigned int max);
3434

35+
void intel_execlists_dump_active_requests(struct intel_engine_cs *engine,
36+
struct i915_request *hung_rq,
37+
struct drm_printer *m);
38+
3539
bool
3640
intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
3741

drivers/gpu/drm/i915/i915_gpu_error.c

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1596,35 +1596,15 @@ capture_engine(struct intel_engine_cs *engine,
15961596
{
15971597
struct intel_engine_capture_vma *capture = NULL;
15981598
struct intel_engine_coredump *ee;
1599-
struct intel_context *ce;
1599+
struct intel_context *ce = NULL;
16001600
struct i915_request *rq = NULL;
1601-
unsigned long flags;
16021601

16031602
ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL, dump_flags);
16041603
if (!ee)
16051604
return NULL;
16061605

1607-
ce = intel_engine_get_hung_context(engine);
1608-
if (ce) {
1609-
intel_engine_clear_hung_context(engine);
1610-
rq = intel_context_get_active_request(ce);
1611-
if (!rq || !i915_request_started(rq))
1612-
goto no_request_capture;
1613-
} else {
1614-
/*
1615-
* Getting here with GuC enabled means it is a forced error capture
1616-
* with no actual hang. So, no need to attempt the execlist search.
1617-
*/
1618-
if (!intel_uc_uses_guc_submission(&engine->gt->uc)) {
1619-
spin_lock_irqsave(&engine->sched_engine->lock, flags);
1620-
rq = intel_engine_execlist_find_hung_request(engine);
1621-
if (rq)
1622-
rq = i915_request_get_rcu(rq);
1623-
spin_unlock_irqrestore(&engine->sched_engine->lock,
1624-
flags);
1625-
}
1626-
}
1627-
if (!rq)
1606+
intel_engine_get_hung_entity(engine, &ce, &rq);
1607+
if (!rq || !i915_request_started(rq))
16281608
goto no_request_capture;
16291609

16301610
capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);

0 commit comments

Comments
 (0)