Skip to content

Commit 8ab3a38

Browse files
icklejlahtine-intel
authored andcommitted
drm/i915/gt: Incrementally check for rewinding
In commit 5ba32c7 ("drm/i915/execlists: Always force a context reload when rewinding RING_TAIL"), we placed the check for rewinding a context on actually submitting the next request in that context. This was so that we only had to check once, and could do so with precision avoiding as many forced restores as possible. For example, to ensure that we can resubmit the same request a couple of times, we include a small wa_tail such that on the next submission, the ring->tail will appear to move forwards when resubmitting the same request. This is very common as it will happen for every lite-restore to fill the second port after a context switch. However, intel_ring_direction() is limited in precision to movements of upto half the ring size. The consequence being that if we tried to unwind many requests, we could exceed half the ring and flip the sense of the direction, so missing a force restore. As no request can be greater than half the ring (i.e. 2048 bytes in the smallest case), we can check for rollback incrementally. As we check against the tail that would be submitted, we do not lose any sensitivity and allow lite restores for the simple case. We still need to double check upon submitting the context, to allow for multiple preemptions and resubmissions. Fixes: 5ba32c7 ("drm/i915/execlists: Always force a context reload when rewinding RING_TAIL") Signed-off-by: Chris Wilson <[email protected]> Cc: Mika Kuoppala <[email protected]> Cc: <[email protected]> # v5.4+ Reviewed-by: Bruce Chang <[email protected]> Reviewed-by: Mika Kuoppala <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] (cherry picked from commit e36ba81) Signed-off-by: Joonas Lahtinen <[email protected]>
1 parent a43555a commit 8ab3a38

File tree

6 files changed

+154
-4
lines changed

6 files changed

+154
-4
lines changed

drivers/gpu/drm/i915/gt/intel_engine_cs.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,7 @@ static int engine_setup_common(struct intel_engine_cs *engine)
646646
struct measure_breadcrumb {
647647
struct i915_request rq;
648648
struct intel_ring ring;
649-
u32 cs[1024];
649+
u32 cs[2048];
650650
};
651651

652652
static int measure_breadcrumb_dw(struct intel_context *ce)
@@ -668,6 +668,8 @@ static int measure_breadcrumb_dw(struct intel_context *ce)
668668

669669
frame->ring.vaddr = frame->cs;
670670
frame->ring.size = sizeof(frame->cs);
671+
frame->ring.wrap =
672+
BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size);
671673
frame->ring.effective_size = frame->ring.size;
672674
intel_ring_update_space(&frame->ring);
673675
frame->rq.ring = &frame->ring;

drivers/gpu/drm/i915/gt/intel_lrc.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1134,6 +1134,13 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
11341134
list_move(&rq->sched.link, pl);
11351135
set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
11361136

1137+
/* Check in case we rollback so far we wrap [size/2] */
1138+
if (intel_ring_direction(rq->ring,
1139+
intel_ring_wrap(rq->ring,
1140+
rq->tail),
1141+
rq->ring->tail) > 0)
1142+
rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1143+
11371144
active = rq;
11381145
} else {
11391146
struct intel_engine_cs *owner = rq->context->engine;
@@ -1498,8 +1505,9 @@ static u64 execlists_update_context(struct i915_request *rq)
14981505
* HW has a tendency to ignore us rewinding the TAIL to the end of
14991506
* an earlier request.
15001507
*/
1508+
GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail);
1509+
prev = rq->ring->tail;
15011510
tail = intel_ring_set_tail(rq->ring, rq->tail);
1502-
prev = ce->lrc_reg_state[CTX_RING_TAIL];
15031511
if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
15041512
desc |= CTX_DESC_FORCE_RESTORE;
15051513
ce->lrc_reg_state[CTX_RING_TAIL] = tail;
@@ -4758,6 +4766,14 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
47584766
return 0;
47594767
}
47604768

4769+
static void assert_request_valid(struct i915_request *rq)
4770+
{
4771+
struct intel_ring *ring __maybe_unused = rq->ring;
4772+
4773+
/* Can we unwind this request without appearing to go forwards? */
4774+
GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
4775+
}
4776+
47614777
/*
47624778
* Reserve space for 2 NOOPs at the end of each request to be
47634779
* used as a workaround for not being allowed to do lite
@@ -4770,6 +4786,9 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
47704786
*cs++ = MI_NOOP;
47714787
request->wa_tail = intel_ring_offset(request, cs);
47724788

4789+
/* Check that entire request is less than half the ring */
4790+
assert_request_valid(request);
4791+
47734792
return cs;
47744793
}
47754794

drivers/gpu/drm/i915/gt/intel_ring.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,3 +315,7 @@ int intel_ring_cacheline_align(struct i915_request *rq)
315315
GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
316316
return 0;
317317
}
318+
319+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
320+
#include "selftest_ring.c"
321+
#endif

drivers/gpu/drm/i915/gt/selftest_mocs.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,20 @@ struct live_mocs {
1818
void *vaddr;
1919
};
2020

21+
static struct intel_context *mocs_context_create(struct intel_engine_cs *engine)
22+
{
23+
struct intel_context *ce;
24+
25+
ce = intel_context_create(engine);
26+
if (IS_ERR(ce))
27+
return ce;
28+
29+
/* We build large requests to read the registers from the ring */
30+
ce->ring = __intel_context_ring_size(SZ_16K);
31+
32+
return ce;
33+
}
34+
2135
static int request_add_sync(struct i915_request *rq, int err)
2236
{
2337
i915_request_get(rq);
@@ -301,7 +315,7 @@ static int live_mocs_clean(void *arg)
301315
for_each_engine(engine, gt, id) {
302316
struct intel_context *ce;
303317

304-
ce = intel_context_create(engine);
318+
ce = mocs_context_create(engine);
305319
if (IS_ERR(ce)) {
306320
err = PTR_ERR(ce);
307321
break;
@@ -395,7 +409,7 @@ static int live_mocs_reset(void *arg)
395409
for_each_engine(engine, gt, id) {
396410
struct intel_context *ce;
397411

398-
ce = intel_context_create(engine);
412+
ce = mocs_context_create(engine);
399413
if (IS_ERR(ce)) {
400414
err = PTR_ERR(ce);
401415
break;
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Copyright © 2020 Intel Corporation
4+
*/
5+
6+
static struct intel_ring *mock_ring(unsigned long sz)
7+
{
8+
struct intel_ring *ring;
9+
10+
ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
11+
if (!ring)
12+
return NULL;
13+
14+
kref_init(&ring->ref);
15+
ring->size = sz;
16+
ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(sz);
17+
ring->effective_size = sz;
18+
ring->vaddr = (void *)(ring + 1);
19+
atomic_set(&ring->pin_count, 1);
20+
21+
intel_ring_update_space(ring);
22+
23+
return ring;
24+
}
25+
26+
static void mock_ring_free(struct intel_ring *ring)
27+
{
28+
kfree(ring);
29+
}
30+
31+
static int check_ring_direction(struct intel_ring *ring,
32+
u32 next, u32 prev,
33+
int expected)
34+
{
35+
int result;
36+
37+
result = intel_ring_direction(ring, next, prev);
38+
if (result < 0)
39+
result = -1;
40+
else if (result > 0)
41+
result = 1;
42+
43+
if (result != expected) {
44+
pr_err("intel_ring_direction(%u, %u):%d != %d\n",
45+
next, prev, result, expected);
46+
return -EINVAL;
47+
}
48+
49+
return 0;
50+
}
51+
52+
static int check_ring_step(struct intel_ring *ring, u32 x, u32 step)
53+
{
54+
u32 prev = x, next = intel_ring_wrap(ring, x + step);
55+
int err = 0;
56+
57+
err |= check_ring_direction(ring, next, next, 0);
58+
err |= check_ring_direction(ring, prev, prev, 0);
59+
err |= check_ring_direction(ring, next, prev, 1);
60+
err |= check_ring_direction(ring, prev, next, -1);
61+
62+
return err;
63+
}
64+
65+
static int check_ring_offset(struct intel_ring *ring, u32 x, u32 step)
66+
{
67+
int err = 0;
68+
69+
err |= check_ring_step(ring, x, step);
70+
err |= check_ring_step(ring, intel_ring_wrap(ring, x + 1), step);
71+
err |= check_ring_step(ring, intel_ring_wrap(ring, x - 1), step);
72+
73+
return err;
74+
}
75+
76+
static int igt_ring_direction(void *dummy)
77+
{
78+
struct intel_ring *ring;
79+
unsigned int half = 2048;
80+
int step, err = 0;
81+
82+
ring = mock_ring(2 * half);
83+
if (!ring)
84+
return -ENOMEM;
85+
86+
GEM_BUG_ON(ring->size != 2 * half);
87+
88+
/* Precision of wrap detection is limited to ring->size / 2 */
89+
for (step = 1; step < half; step <<= 1) {
90+
err |= check_ring_offset(ring, 0, step);
91+
err |= check_ring_offset(ring, half, step);
92+
}
93+
err |= check_ring_step(ring, 0, half - 64);
94+
95+
/* And check unwrapped handling for good measure */
96+
err |= check_ring_offset(ring, 0, 2 * half + 64);
97+
err |= check_ring_offset(ring, 3 * half, 1);
98+
99+
mock_ring_free(ring);
100+
return err;
101+
}
102+
103+
int intel_ring_mock_selftests(void)
104+
{
105+
static const struct i915_subtest tests[] = {
106+
SUBTEST(igt_ring_direction),
107+
};
108+
109+
return i915_subtests(tests, NULL);
110+
}

drivers/gpu/drm/i915/selftests/i915_mock_selftests.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ selftest(fence, i915_sw_fence_mock_selftests)
2121
selftest(scatterlist, scatterlist_mock_selftests)
2222
selftest(syncmap, i915_syncmap_mock_selftests)
2323
selftest(uncore, intel_uncore_mock_selftests)
24+
selftest(ring, intel_ring_mock_selftests)
2425
selftest(engine, intel_engine_cs_mock_selftests)
2526
selftest(timelines, intel_timeline_mock_selftests)
2627
selftest(requests, i915_request_mock_selftests)

0 commit comments

Comments
 (0)