Skip to content

Commit ff20afc

Browse files
author
Thomas Hellström
committed
drm/i915: Update error capture code to avoid using the current vma state
With asynchronous migrations, the vma state may be several migrations ahead of the state that matches the request we're capturing. Address that by introducing an i915_vma_snapshot structure that can be used to snapshot relevant state at request submission. In order to make sure we access the correct memory, the snapshots take references on relevant sg-tables and memory regions. Also move the capture list allocation out of the fence signaling critical path and use the CONFIG_DRM_I915_CAPTURE_ERROR define to avoid compiling in members and functions used for error capture when they're not used. Finally, Introduce lockdep annotation. v4: - Break out the capture allocation mode change to a separate patch. v5: - Fix compilation error in the !CONFIG_DRM_I915_CAPTURE_ERROR case (kernel test robot) v6: - Use #if IS_ENABLED() instead of #ifdef to match driver style. - Move yet another change of allocation mode to the separate patch. - Commit message rework due to patch reordering. v7: - Adjust for removal of region refcounting. Signed-off-by: Thomas Hellström <[email protected]> Reviewed-by: Ramalingam C <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 49a8bf5 commit ff20afc

File tree

8 files changed

+554
-95
lines changed

8 files changed

+554
-95
lines changed

drivers/gpu/drm/i915/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ i915-y += \
173173
i915_trace_points.o \
174174
i915_ttm_buddy_manager.o \
175175
i915_vma.o \
176+
i915_vma_snapshot.o \
176177
intel_wopcm.o
177178

178179
# general-purpose microcontroller (GuC) support

drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

Lines changed: 112 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "i915_gem_ioctls.h"
3030
#include "i915_trace.h"
3131
#include "i915_user_extensions.h"
32+
#include "i915_vma_snapshot.h"
3233

3334
struct eb_vma {
3435
struct i915_vma *vma;
@@ -307,11 +308,15 @@ struct i915_execbuffer {
307308

308309
struct eb_fence *fences;
309310
unsigned long num_fences;
311+
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
312+
struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE + 1];
313+
#endif
310314
};
311315

312316
static int eb_parse(struct i915_execbuffer *eb);
313317
static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle);
314318
static void eb_unpin_engine(struct i915_execbuffer *eb);
319+
static void eb_capture_release(struct i915_execbuffer *eb);
315320

316321
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
317322
{
@@ -1043,6 +1048,7 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
10431048
i915_vma_put(vma);
10441049
}
10451050

1051+
eb_capture_release(eb);
10461052
eb_unpin_engine(eb);
10471053
}
10481054

@@ -1880,36 +1886,113 @@ eb_find_first_request_added(struct i915_execbuffer *eb)
18801886
return NULL;
18811887
}
18821888

1883-
static int eb_move_to_gpu(struct i915_execbuffer *eb)
1889+
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
1890+
1891+
/* Stage with GFP_KERNEL allocations before we enter the signaling critical path */
1892+
static void eb_capture_stage(struct i915_execbuffer *eb)
18841893
{
18851894
const unsigned int count = eb->buffer_count;
1886-
unsigned int i = count;
1887-
int err = 0, j;
1895+
unsigned int i = count, j;
1896+
struct i915_vma_snapshot *vsnap;
18881897

18891898
while (i--) {
18901899
struct eb_vma *ev = &eb->vma[i];
18911900
struct i915_vma *vma = ev->vma;
18921901
unsigned int flags = ev->flags;
1893-
struct drm_i915_gem_object *obj = vma->obj;
18941902

1895-
assert_vma_held(vma);
1903+
if (!(flags & EXEC_OBJECT_CAPTURE))
1904+
continue;
18961905

1897-
if (flags & EXEC_OBJECT_CAPTURE) {
1906+
vsnap = i915_vma_snapshot_alloc(GFP_KERNEL);
1907+
if (!vsnap)
1908+
continue;
1909+
1910+
i915_vma_snapshot_init(vsnap, vma, "user");
1911+
for_each_batch_create_order(eb, j) {
18981912
struct i915_capture_list *capture;
18991913

1900-
for_each_batch_create_order(eb, j) {
1901-
if (!eb->requests[j])
1902-
break;
1914+
capture = kmalloc(sizeof(*capture), GFP_KERNEL);
1915+
if (!capture)
1916+
continue;
19031917

1904-
capture = kmalloc(sizeof(*capture), GFP_KERNEL);
1905-
if (capture) {
1906-
capture->next =
1907-
eb->requests[j]->capture_list;
1908-
capture->vma = vma;
1909-
eb->requests[j]->capture_list = capture;
1910-
}
1911-
}
1918+
capture->next = eb->capture_lists[j];
1919+
capture->vma_snapshot = i915_vma_snapshot_get(vsnap);
1920+
eb->capture_lists[j] = capture;
1921+
}
1922+
i915_vma_snapshot_put(vsnap);
1923+
}
1924+
}
1925+
1926+
/* Commit once we're in the critical path */
1927+
static void eb_capture_commit(struct i915_execbuffer *eb)
1928+
{
1929+
unsigned int j;
1930+
1931+
for_each_batch_create_order(eb, j) {
1932+
struct i915_request *rq = eb->requests[j];
1933+
1934+
if (!rq)
1935+
break;
1936+
1937+
rq->capture_list = eb->capture_lists[j];
1938+
eb->capture_lists[j] = NULL;
1939+
}
1940+
}
1941+
1942+
/*
1943+
* Release anything that didn't get committed due to errors.
1944+
* The capture_list will otherwise be freed at request retire.
1945+
*/
1946+
static void eb_capture_release(struct i915_execbuffer *eb)
1947+
{
1948+
unsigned int j;
1949+
1950+
for_each_batch_create_order(eb, j) {
1951+
if (eb->capture_lists[j]) {
1952+
i915_request_free_capture_list(eb->capture_lists[j]);
1953+
eb->capture_lists[j] = NULL;
19121954
}
1955+
}
1956+
}
1957+
1958+
static void eb_capture_list_clear(struct i915_execbuffer *eb)
1959+
{
1960+
memset(eb->capture_lists, 0, sizeof(eb->capture_lists));
1961+
}
1962+
1963+
#else
1964+
1965+
static void eb_capture_stage(struct i915_execbuffer *eb)
1966+
{
1967+
}
1968+
1969+
static void eb_capture_commit(struct i915_execbuffer *eb)
1970+
{
1971+
}
1972+
1973+
static void eb_capture_release(struct i915_execbuffer *eb)
1974+
{
1975+
}
1976+
1977+
static void eb_capture_list_clear(struct i915_execbuffer *eb)
1978+
{
1979+
}
1980+
1981+
#endif
1982+
1983+
static int eb_move_to_gpu(struct i915_execbuffer *eb)
1984+
{
1985+
const unsigned int count = eb->buffer_count;
1986+
unsigned int i = count;
1987+
int err = 0, j;
1988+
1989+
while (i--) {
1990+
struct eb_vma *ev = &eb->vma[i];
1991+
struct i915_vma *vma = ev->vma;
1992+
unsigned int flags = ev->flags;
1993+
struct drm_i915_gem_object *obj = vma->obj;
1994+
1995+
assert_vma_held(vma);
19131996

19141997
/*
19151998
* If the GPU is not _reading_ through the CPU cache, we need
@@ -1990,6 +2073,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
19902073

19912074
/* Unconditionally flush any chipset caches (for streaming writes). */
19922075
intel_gt_chipset_flush(eb->gt);
2076+
eb_capture_commit(eb);
2077+
19932078
return 0;
19942079

19952080
err_skip:
@@ -3132,13 +3217,14 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
31323217
}
31333218

31343219
/*
3135-
* Whilst this request exists, batch_obj will be on the
3136-
* active_list, and so will hold the active reference. Only when
3137-
* this request is retired will the batch_obj be moved onto
3138-
* the inactive_list and lose its active reference. Hence we do
3139-
* not need to explicitly hold another reference here.
3220+
* Not really on stack, but we don't want to call
3221+
* kfree on the batch_snapshot when we put it, so use the
3222+
* _onstack interface.
31403223
*/
3141-
eb->requests[i]->batch = eb->batches[i]->vma;
3224+
if (eb->batches[i]->vma)
3225+
i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot,
3226+
eb->batches[i]->vma,
3227+
"batch");
31423228
if (eb->batch_pool) {
31433229
GEM_BUG_ON(intel_context_is_parallel(eb->context));
31443230
intel_gt_buffer_pool_mark_active(eb->batch_pool,
@@ -3187,6 +3273,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
31873273
eb.fences = NULL;
31883274
eb.num_fences = 0;
31893275

3276+
eb_capture_list_clear(&eb);
3277+
31903278
memset(eb.requests, 0, sizeof(struct i915_request *) *
31913279
ARRAY_SIZE(eb.requests));
31923280
eb.composite_fence = NULL;
@@ -3273,6 +3361,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
32733361
}
32743362

32753363
ww_acquire_done(&eb.ww.ctx);
3364+
eb_capture_stage(&eb);
32763365

32773366
out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
32783367
if (IS_ERR(out_fence)) {

drivers/gpu/drm/i915/gt/intel_engine_cs.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1676,14 +1676,18 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
16761676

16771677
static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
16781678
{
1679+
struct i915_vma_snapshot *vsnap = &rq->batch_snapshot;
16791680
void *ring;
16801681
int size;
16811682

1683+
if (!i915_vma_snapshot_present(vsnap))
1684+
vsnap = NULL;
1685+
16821686
drm_printf(m,
16831687
"[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
16841688
rq->head, rq->postfix, rq->tail,
1685-
rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
1686-
rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
1689+
vsnap ? upper_32_bits(vsnap->gtt_offset) : ~0u,
1690+
vsnap ? lower_32_bits(vsnap->gtt_offset) : ~0u);
16871691

16881692
size = rq->tail - rq->head;
16891693
if (rq->tail < rq->head)

0 commit comments

Comments
 (0)