Skip to content

Commit 9b4d059

Browse files
tursulindanvet
authored andcommitted
drm/i915: Request watchdog infrastructure
Prepares the plumbing for setting request/fence expiration time. All code is put in place but is never activated due yet missing ability to actually configure the timer. Outline of the basic operation: A timer is started when request is ready for execution. If the request completes (retires) before the timer fires, timer is cancelled and nothing further happens. If the timer fires request is added to a lockless list and worker queued. Purpose of this is twofold: a) It allows request cancellation from a more friendly context and b) coalesces multiple expirations into a single event of consuming the list. Worker locklessly consumes the list of expired requests and cancels them all using previous added i915_request_cancel(). Associated timeout value is stored in rq->context.watchdog.timeout_us. v2: * Log expiration. v3: * Include more information about user timeline in the log message. v4: * Remove obsolete comment and fix formatting. (Matt) Signed-off-by: Tvrtko Ursulin <[email protected]> Cc: Daniel Vetter <[email protected]> Reviewed-by: Matthew Auld <[email protected]> Signed-off-by: Daniel Vetter <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 90a79a9 commit 9b4d059

File tree

8 files changed

+106
-0
lines changed

8 files changed

+106
-0
lines changed

drivers/gpu/drm/i915/gt/intel_context_types.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ struct intel_context {
9797
#define CONTEXT_FORCE_SINGLE_SUBMISSION 7
9898
#define CONTEXT_NOPREEMPT 8
9999

100+
struct {
101+
u64 timeout_us;
102+
} watchdog;
103+
100104
u32 *lrc_reg_state;
101105
union {
102106
struct {

drivers/gpu/drm/i915/gt/intel_execlists_submission.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@
66
#ifndef __INTEL_EXECLISTS_SUBMISSION_H__
77
#define __INTEL_EXECLISTS_SUBMISSION_H__
88

9+
#include <linux/llist.h>
910
#include <linux/types.h>
1011

1112
struct drm_printer;
1213

1314
struct i915_request;
1415
struct intel_context;
1516
struct intel_engine_cs;
17+
struct intel_gt;
1618

1719
enum {
1820
INTEL_CONTEXT_SCHEDULE_IN = 0,

drivers/gpu/drm/i915/gt/intel_gt.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
2929
INIT_LIST_HEAD(&gt->closed_vma);
3030
spin_lock_init(&gt->closed_lock);
3131

32+
init_llist_head(&gt->watchdog.list);
33+
INIT_WORK(&gt->watchdog.work, intel_gt_watchdog_work);
34+
3235
intel_gt_init_buffer_pool(gt);
3336
intel_gt_init_reset(gt);
3437
intel_gt_init_requests(gt);

drivers/gpu/drm/i915/gt/intel_gt.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,4 +77,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
7777
void intel_gt_info_print(const struct intel_gt_info *info,
7878
struct drm_printer *p);
7979

80+
void intel_gt_watchdog_work(struct work_struct *work);
81+
8082
#endif /* __INTEL_GT_H__ */

drivers/gpu/drm/i915/gt/intel_gt_requests.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "i915_drv.h" /* for_each_engine() */
1010
#include "i915_request.h"
1111
#include "intel_engine_heartbeat.h"
12+
#include "intel_execlists_submission.h"
1213
#include "intel_gt.h"
1314
#include "intel_gt_pm.h"
1415
#include "intel_gt_requests.h"
@@ -243,4 +244,31 @@ void intel_gt_fini_requests(struct intel_gt *gt)
243244
{
244245
/* Wait until the work is marked as finished before unloading! */
245246
cancel_delayed_work_sync(&gt->requests.retire_work);
247+
248+
flush_work(&gt->watchdog.work);
249+
}
250+
251+
void intel_gt_watchdog_work(struct work_struct *work)
252+
{
253+
struct intel_gt *gt =
254+
container_of(work, typeof(*gt), watchdog.work);
255+
struct i915_request *rq, *rn;
256+
struct llist_node *first;
257+
258+
first = llist_del_all(&gt->watchdog.list);
259+
if (!first)
260+
return;
261+
262+
llist_for_each_entry_safe(rq, rn, first, watchdog.link) {
263+
if (!i915_request_completed(rq)) {
264+
struct dma_fence *f = &rq->fence;
265+
266+
pr_notice("Fence expiration time out i915-%s:%s:%llx!\n",
267+
f->ops->get_driver_name(f),
268+
f->ops->get_timeline_name(f),
269+
f->seqno);
270+
i915_request_cancel(rq, -EINTR);
271+
}
272+
i915_request_put(rq);
273+
}
246274
}

drivers/gpu/drm/i915/gt/intel_gt_types.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88

99
#include <linux/ktime.h>
1010
#include <linux/list.h>
11+
#include <linux/llist.h>
1112
#include <linux/mutex.h>
1213
#include <linux/notifier.h>
1314
#include <linux/spinlock.h>
1415
#include <linux/types.h>
16+
#include <linux/workqueue.h>
1517

1618
#include "uc/intel_uc.h"
1719

@@ -52,6 +54,11 @@ struct intel_gt {
5254
struct delayed_work retire_work;
5355
} requests;
5456

57+
struct {
58+
struct llist_head list;
59+
struct work_struct work;
60+
} watchdog;
61+
5562
struct intel_wakeref wakeref;
5663
atomic_t user_wakeref;
5764

drivers/gpu/drm/i915/i915_request.c

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,53 @@ static void remove_from_engine(struct i915_request *rq)
321321
__notify_execute_cb_imm(rq);
322322
}
323323

324+
static void __rq_init_watchdog(struct i915_request *rq)
325+
{
326+
rq->watchdog.timer.function = NULL;
327+
}
328+
329+
static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
330+
{
331+
struct i915_request *rq =
332+
container_of(hrtimer, struct i915_request, watchdog.timer);
333+
struct intel_gt *gt = rq->engine->gt;
334+
335+
if (!i915_request_completed(rq)) {
336+
if (llist_add(&rq->watchdog.link, &gt->watchdog.list))
337+
schedule_work(&gt->watchdog.work);
338+
} else {
339+
i915_request_put(rq);
340+
}
341+
342+
return HRTIMER_NORESTART;
343+
}
344+
345+
static void __rq_arm_watchdog(struct i915_request *rq)
346+
{
347+
struct i915_request_watchdog *wdg = &rq->watchdog;
348+
struct intel_context *ce = rq->context;
349+
350+
if (!ce->watchdog.timeout_us)
351+
return;
352+
353+
hrtimer_init(&wdg->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
354+
wdg->timer.function = __rq_watchdog_expired;
355+
hrtimer_start_range_ns(&wdg->timer,
356+
ns_to_ktime(ce->watchdog.timeout_us *
357+
NSEC_PER_USEC),
358+
NSEC_PER_MSEC,
359+
HRTIMER_MODE_REL);
360+
i915_request_get(rq);
361+
}
362+
363+
static void __rq_cancel_watchdog(struct i915_request *rq)
364+
{
365+
struct i915_request_watchdog *wdg = &rq->watchdog;
366+
367+
if (wdg->timer.function && hrtimer_try_to_cancel(&wdg->timer) > 0)
368+
i915_request_put(rq);
369+
}
370+
324371
bool i915_request_retire(struct i915_request *rq)
325372
{
326373
if (!__i915_request_is_complete(rq))
@@ -332,6 +379,8 @@ bool i915_request_retire(struct i915_request *rq)
332379
trace_i915_request_retire(rq);
333380
i915_request_mark_complete(rq);
334381

382+
__rq_cancel_watchdog(rq);
383+
335384
/*
336385
* We know the GPU must have read the request to have
337386
* sent us the seqno + interrupt, so use the position
@@ -761,6 +810,8 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
761810

762811
if (unlikely(fence->error))
763812
i915_request_set_error_once(request, fence->error);
813+
else
814+
__rq_arm_watchdog(request);
764815

765816
/*
766817
* We need to serialize use of the submit_request() callback
@@ -947,6 +998,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
947998

948999
/* No zalloc, everything must be cleared after use */
9491000
rq->batch = NULL;
1001+
__rq_init_watchdog(rq);
9501002
GEM_BUG_ON(rq->capture_list);
9511003
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
9521004

drivers/gpu/drm/i915/i915_request.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@
2626
#define I915_REQUEST_H
2727

2828
#include <linux/dma-fence.h>
29+
#include <linux/hrtimer.h>
2930
#include <linux/irq_work.h>
31+
#include <linux/llist.h>
3032
#include <linux/lockdep.h>
3133

3234
#include "gem/i915_gem_context_types.h"
@@ -277,6 +279,12 @@ struct i915_request {
277279
/** timeline->request entry for this request */
278280
struct list_head link;
279281

282+
/** Watchdog support fields. */
283+
struct i915_request_watchdog {
284+
struct llist_node link;
285+
struct hrtimer timer;
286+
} watchdog;
287+
280288
I915_SELFTEST_DECLARE(struct {
281289
struct list_head link;
282290
unsigned long delay;

0 commit comments

Comments
 (0)