Skip to content

Commit a857add

Browse files
ickle-intelAndi Shyti
authored andcommitted
drm/i915/gt: Mark the GT as dead when mmio is unreliable
After we detect that mmio is returning all 0xff, we believe that the GPU has dropped off the pci bus and is dead. Mark the device as wedged such that we can propagate the failure back to userspace and wait for recovery. Signed-off-by: Chris Wilson <[email protected]> Signed-off-by: Andi Shyti <[email protected]> Reviewed-by: Jonathan Cavitt <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent b7b930d commit a857add

File tree

4 files changed

+24
-3
lines changed

4 files changed

+24
-3
lines changed

drivers/gpu/drm/i915/gt/intel_gt.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,4 +208,10 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt,
208208
void intel_gt_bind_context_set_ready(struct intel_gt *gt);
209209
void intel_gt_bind_context_set_unready(struct intel_gt *gt);
210210
bool intel_gt_is_bind_context_ready(struct intel_gt *gt);
211+
212+
static inline void intel_gt_set_wedged_async(struct intel_gt *gt)
213+
{
214+
queue_work(system_highpri_wq, &gt->wedge);
215+
}
216+
211217
#endif /* __INTEL_GT_H__ */

drivers/gpu/drm/i915/gt/intel_gt_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,8 @@ struct intel_gt {
292292
struct gt_defaults defaults;
293293
struct kobject *sysfs_defaults;
294294

295+
struct work_struct wedge;
296+
295297
struct i915_perf_gt perf;
296298

297299
/** link: &ggtt.gt_list */

drivers/gpu/drm/i915/gt/intel_reset.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1013,6 +1013,15 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
10131013
GT_TRACE(gt, "end\n");
10141014
}
10151015

1016+
static void set_wedged_work(struct work_struct *w)
1017+
{
1018+
struct intel_gt *gt = container_of(w, struct intel_gt, wedge);
1019+
intel_wakeref_t wf;
1020+
1021+
with_intel_runtime_pm(gt->uncore->rpm, wf)
1022+
__intel_gt_set_wedged(gt);
1023+
}
1024+
10161025
void intel_gt_set_wedged(struct intel_gt *gt)
10171026
{
10181027
intel_wakeref_t wakeref;
@@ -1614,6 +1623,7 @@ void intel_gt_init_reset(struct intel_gt *gt)
16141623
init_waitqueue_head(&gt->reset.queue);
16151624
mutex_init(&gt->reset.mutex);
16161625
init_srcu_struct(&gt->reset.backoff_srcu);
1626+
INIT_WORK(&gt->wedge, set_wedged_work);
16171627

16181628
/*
16191629
* While undesirable to wait inside the shrinker, complain anyway.
@@ -1640,7 +1650,7 @@ static void intel_wedge_me(struct work_struct *work)
16401650
struct intel_wedge_me *w = container_of(work, typeof(*w), work.work);
16411651

16421652
gt_err(w->gt, "%s timed out, cancelling all in-flight rendering.\n", w->name);
1643-
intel_gt_set_wedged(w->gt);
1653+
set_wedged_work(&w->gt->wedge);
16441654
}
16451655

16461656
void __intel_init_wedge(struct intel_wedge_me *w,

drivers/gpu/drm/i915/intel_uncore.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <drm/drm_managed.h>
2525
#include <linux/pm_runtime.h>
2626

27+
#include "gt/intel_gt.h"
2728
#include "gt/intel_engine_regs.h"
2829
#include "gt/intel_gt_regs.h"
2930

@@ -180,14 +181,16 @@ fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d)
180181
if (!wait_ack_clear(d, FORCEWAKE_KERNEL))
181182
return;
182183

183-
if (fw_ack(d) == ~0)
184+
if (fw_ack(d) == ~0) {
184185
drm_err(&d->uncore->i915->drm,
185186
"%s: MMIO unreliable (forcewake register returns 0xFFFFFFFF)!\n",
186187
intel_uncore_forcewake_domain_to_str(d->id));
187-
else
188+
intel_gt_set_wedged_async(d->uncore->gt);
189+
} else {
188190
drm_err(&d->uncore->i915->drm,
189191
"%s: timed out waiting for forcewake ack to clear.\n",
190192
intel_uncore_forcewake_domain_to_str(d->id));
193+
}
191194

192195
add_taint_for_CI(d->uncore->i915, TAINT_WARN); /* CI now unreliable */
193196
}

0 commit comments

Comments
 (0)