Skip to content

Commit 38b237e

Browse files
ickledanvet
authored andcommitted
drm/i915: Individual request cancellation
Currently, we cancel outstanding requests within a context when the context is closed. We may also want to cancel individual requests using the same graceful preemption mechanism. v2 (Tvrtko): * Cancel waiters carefully considering no timeline lock and RCU. * Fixed selftests. v3 (Tvrtko): * Remove error propagation to waiters for now. v4 (Tvrtko): * Rebase for extracted i915_request_active_engine. (Matt) Signed-off-by: Chris Wilson <[email protected]> Signed-off-by: Tvrtko Ursulin <[email protected]> Reviewed-by: Matthew Auld <[email protected]> [danvet: Resolve conflict because intel_engine_flush_scheduler is still called intel_engine_flush_submission] Signed-off-by: Daniel Vetter <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 7dbc19d commit 38b237e

File tree

5 files changed

+242
-6
lines changed

5 files changed

+242
-6
lines changed

drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
279279
mutex_unlock(&ce->timeline->mutex);
280280
}
281281

282+
intel_engine_flush_submission(engine);
282283
intel_engine_pm_put(engine);
283284
return err;
284285
}

drivers/gpu/drm/i915/gt/intel_execlists_submission.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,11 @@ static void reset_active(struct i915_request *rq,
470470
ce->lrc.lrca = lrc_update_regs(ce, engine, head);
471471
}
472472

473+
static bool bad_request(const struct i915_request *rq)
474+
{
475+
return rq->fence.error && i915_request_started(rq);
476+
}
477+
473478
static struct intel_engine_cs *
474479
__execlists_schedule_in(struct i915_request *rq)
475480
{
@@ -482,7 +487,7 @@ __execlists_schedule_in(struct i915_request *rq)
482487
!intel_engine_has_heartbeat(engine)))
483488
intel_context_set_banned(ce);
484489

485-
if (unlikely(intel_context_is_banned(ce)))
490+
if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
486491
reset_active(rq, engine);
487492

488493
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
@@ -1208,7 +1213,7 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
12081213
return 0;
12091214

12101215
/* Force a fast reset for terminated contexts (ignoring sysfs!) */
1211-
if (unlikely(intel_context_is_banned(rq->context)))
1216+
if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
12121217
return 1;
12131218

12141219
return READ_ONCE(engine->props.preempt_timeout_ms);

drivers/gpu/drm/i915/i915_request.c

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,10 @@
3333
#include "gem/i915_gem_context.h"
3434
#include "gt/intel_breadcrumbs.h"
3535
#include "gt/intel_context.h"
36+
#include "gt/intel_engine.h"
37+
#include "gt/intel_engine_heartbeat.h"
3638
#include "gt/intel_gpu_commands.h"
39+
#include "gt/intel_reset.h"
3740
#include "gt/intel_ring.h"
3841
#include "gt/intel_rps.h"
3942

@@ -542,20 +545,22 @@ void __i915_request_skip(struct i915_request *rq)
542545
rq->infix = rq->postfix;
543546
}
544547

545-
void i915_request_set_error_once(struct i915_request *rq, int error)
548+
bool i915_request_set_error_once(struct i915_request *rq, int error)
546549
{
547550
int old;
548551

549552
GEM_BUG_ON(!IS_ERR_VALUE((long)error));
550553

551554
if (i915_request_signaled(rq))
552-
return;
555+
return false;
553556

554557
old = READ_ONCE(rq->fence.error);
555558
do {
556559
if (fatal_error(old))
557-
return;
560+
return false;
558561
} while (!try_cmpxchg(&rq->fence.error, &old, error));
562+
563+
return true;
559564
}
560565

561566
void i915_request_mark_eio(struct i915_request *rq)
@@ -722,6 +727,28 @@ void i915_request_unsubmit(struct i915_request *request)
722727
spin_unlock_irqrestore(&engine->active.lock, flags);
723728
}
724729

730+
static void __cancel_request(struct i915_request *rq)
731+
{
732+
struct intel_engine_cs *engine = NULL;
733+
734+
i915_request_active_engine(rq, &engine);
735+
736+
if (engine && intel_engine_pulse(engine))
737+
intel_gt_handle_error(engine->gt, engine->mask, 0,
738+
"request cancellation by %s",
739+
current->comm);
740+
}
741+
742+
void i915_request_cancel(struct i915_request *rq, int error)
743+
{
744+
if (!i915_request_set_error_once(rq, error))
745+
return;
746+
747+
set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
748+
749+
__cancel_request(rq);
750+
}
751+
725752
static int __i915_sw_fence_call
726753
submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
727754
{

drivers/gpu/drm/i915/i915_request.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ struct i915_request * __must_check
300300
i915_request_create(struct intel_context *ce);
301301

302302
void __i915_request_skip(struct i915_request *rq);
303-
void i915_request_set_error_once(struct i915_request *rq, int error);
303+
bool i915_request_set_error_once(struct i915_request *rq, int error);
304304
void i915_request_mark_eio(struct i915_request *rq);
305305

306306
struct i915_request *__i915_request_commit(struct i915_request *request);
@@ -356,6 +356,8 @@ void i915_request_submit(struct i915_request *request);
356356
void __i915_request_unsubmit(struct i915_request *request);
357357
void i915_request_unsubmit(struct i915_request *request);
358358

359+
void i915_request_cancel(struct i915_request *rq, int error);
360+
359361
long i915_request_wait(struct i915_request *rq,
360362
unsigned int flags,
361363
long timeout)

drivers/gpu/drm/i915/selftests/i915_request.c

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,206 @@ static int live_nop_request(void *arg)
609609
return err;
610610
}
611611

612+
static int __cancel_inactive(struct intel_engine_cs *engine)
613+
{
614+
struct intel_context *ce;
615+
struct igt_spinner spin;
616+
struct i915_request *rq;
617+
int err = 0;
618+
619+
if (igt_spinner_init(&spin, engine->gt))
620+
return -ENOMEM;
621+
622+
ce = intel_context_create(engine);
623+
if (IS_ERR(ce)) {
624+
err = PTR_ERR(ce);
625+
goto out_spin;
626+
}
627+
628+
rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
629+
if (IS_ERR(rq)) {
630+
err = PTR_ERR(rq);
631+
goto out_ce;
632+
}
633+
634+
pr_debug("%s: Cancelling inactive request\n", engine->name);
635+
i915_request_cancel(rq, -EINTR);
636+
i915_request_get(rq);
637+
i915_request_add(rq);
638+
639+
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
640+
struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
641+
642+
pr_err("%s: Failed to cancel inactive request\n", engine->name);
643+
intel_engine_dump(engine, &p, "%s\n", engine->name);
644+
err = -ETIME;
645+
goto out_rq;
646+
}
647+
648+
if (rq->fence.error != -EINTR) {
649+
pr_err("%s: fence not cancelled (%u)\n",
650+
engine->name, rq->fence.error);
651+
err = -EINVAL;
652+
}
653+
654+
out_rq:
655+
i915_request_put(rq);
656+
out_ce:
657+
intel_context_put(ce);
658+
out_spin:
659+
igt_spinner_fini(&spin);
660+
if (err)
661+
pr_err("%s: %s error %d\n", __func__, engine->name, err);
662+
return err;
663+
}
664+
665+
static int __cancel_active(struct intel_engine_cs *engine)
666+
{
667+
struct intel_context *ce;
668+
struct igt_spinner spin;
669+
struct i915_request *rq;
670+
int err = 0;
671+
672+
if (igt_spinner_init(&spin, engine->gt))
673+
return -ENOMEM;
674+
675+
ce = intel_context_create(engine);
676+
if (IS_ERR(ce)) {
677+
err = PTR_ERR(ce);
678+
goto out_spin;
679+
}
680+
681+
rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
682+
if (IS_ERR(rq)) {
683+
err = PTR_ERR(rq);
684+
goto out_ce;
685+
}
686+
687+
pr_debug("%s: Cancelling active request\n", engine->name);
688+
i915_request_get(rq);
689+
i915_request_add(rq);
690+
if (!igt_wait_for_spinner(&spin, rq)) {
691+
struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
692+
693+
pr_err("Failed to start spinner on %s\n", engine->name);
694+
intel_engine_dump(engine, &p, "%s\n", engine->name);
695+
err = -ETIME;
696+
goto out_rq;
697+
}
698+
i915_request_cancel(rq, -EINTR);
699+
700+
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
701+
struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
702+
703+
pr_err("%s: Failed to cancel active request\n", engine->name);
704+
intel_engine_dump(engine, &p, "%s\n", engine->name);
705+
err = -ETIME;
706+
goto out_rq;
707+
}
708+
709+
if (rq->fence.error != -EINTR) {
710+
pr_err("%s: fence not cancelled (%u)\n",
711+
engine->name, rq->fence.error);
712+
err = -EINVAL;
713+
}
714+
715+
out_rq:
716+
i915_request_put(rq);
717+
out_ce:
718+
intel_context_put(ce);
719+
out_spin:
720+
igt_spinner_fini(&spin);
721+
if (err)
722+
pr_err("%s: %s error %d\n", __func__, engine->name, err);
723+
return err;
724+
}
725+
726+
static int __cancel_completed(struct intel_engine_cs *engine)
727+
{
728+
struct intel_context *ce;
729+
struct igt_spinner spin;
730+
struct i915_request *rq;
731+
int err = 0;
732+
733+
if (igt_spinner_init(&spin, engine->gt))
734+
return -ENOMEM;
735+
736+
ce = intel_context_create(engine);
737+
if (IS_ERR(ce)) {
738+
err = PTR_ERR(ce);
739+
goto out_spin;
740+
}
741+
742+
rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
743+
if (IS_ERR(rq)) {
744+
err = PTR_ERR(rq);
745+
goto out_ce;
746+
}
747+
igt_spinner_end(&spin);
748+
i915_request_get(rq);
749+
i915_request_add(rq);
750+
751+
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
752+
err = -ETIME;
753+
goto out_rq;
754+
}
755+
756+
pr_debug("%s: Cancelling completed request\n", engine->name);
757+
i915_request_cancel(rq, -EINTR);
758+
if (rq->fence.error) {
759+
pr_err("%s: fence not cancelled (%u)\n",
760+
engine->name, rq->fence.error);
761+
err = -EINVAL;
762+
}
763+
764+
out_rq:
765+
i915_request_put(rq);
766+
out_ce:
767+
intel_context_put(ce);
768+
out_spin:
769+
igt_spinner_fini(&spin);
770+
if (err)
771+
pr_err("%s: %s error %d\n", __func__, engine->name, err);
772+
return err;
773+
}
774+
775+
static int live_cancel_request(void *arg)
776+
{
777+
struct drm_i915_private *i915 = arg;
778+
struct intel_engine_cs *engine;
779+
780+
/*
781+
* Check cancellation of requests. We expect to be able to immediately
782+
* cancel active requests, even if they are currently on the GPU.
783+
*/
784+
785+
for_each_uabi_engine(engine, i915) {
786+
struct igt_live_test t;
787+
int err, err2;
788+
789+
if (!intel_engine_has_preemption(engine))
790+
continue;
791+
792+
err = igt_live_test_begin(&t, i915, __func__, engine->name);
793+
if (err)
794+
return err;
795+
796+
err = __cancel_inactive(engine);
797+
if (err == 0)
798+
err = __cancel_active(engine);
799+
if (err == 0)
800+
err = __cancel_completed(engine);
801+
802+
err2 = igt_live_test_end(&t);
803+
if (err)
804+
return err;
805+
if (err2)
806+
return err2;
807+
}
808+
809+
return 0;
810+
}
811+
612812
static struct i915_vma *empty_batch(struct drm_i915_private *i915)
613813
{
614814
struct drm_i915_gem_object *obj;
@@ -1486,6 +1686,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
14861686
SUBTEST(live_sequential_engines),
14871687
SUBTEST(live_parallel_engines),
14881688
SUBTEST(live_empty_request),
1689+
SUBTEST(live_cancel_request),
14891690
SUBTEST(live_breadcrumbs_smoketest),
14901691
};
14911692

0 commit comments

Comments
 (0)