Skip to content

Commit 1193081

Browse files
author
Thomas Hellström
committed
drm/i915: Avoid using the i915_fence_array when collecting dependencies
Since the gt migration code was using only a single fence for dependencies, these were collected in a dma_fence_array. However, it turns out that it's illegal to use some dma_fences in a dma_fence_array, in particular other dma_fence_arrays and dma_fence_chains, and this causes trouble for us moving forward. Have the gt migration code instead take a const struct i915_deps for dependencies. This means we can skip the dma_fence_array creation and instead pass the struct i915_deps instead to circumvent the problem. v2: - Make the prev_deps() function static. (kernel test robot <[email protected]>) - Update the struct i915_deps kerneldoc. v4: - Rebase. Fixes: 5652df8 ("drm/i915/ttm: Update i915_gem_obj_copy_ttm() to be asynchronous") Signed-off-by: Thomas Hellström <[email protected]> Reviewed-by: Matthew Auld <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 1c40d40 commit 1193081

File tree

6 files changed

+91
-112
lines changed

6 files changed

+91
-112
lines changed

drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c

Lines changed: 33 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
* Copyright © 2021 Intel Corporation
44
*/
55

6-
#include <linux/dma-fence-array.h>
7-
86
#include <drm/ttm/ttm_bo_driver.h>
97

108
#include "i915_drv.h"
@@ -44,17 +42,12 @@ void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
4442
#endif
4543

4644
/**
47-
* DOC: Set of utilities to dynamically collect dependencies and
48-
* eventually coalesce them into a single fence which is fed into
49-
* the GT migration code, since it only accepts a single dependency
50-
* fence.
51-
* The single fence returned from these utilities, in the case of
52-
* dependencies from multiple fence contexts, a struct dma_fence_array,
53-
* since the i915 request code can break that up and await the individual
54-
* fences.
45+
* DOC: Set of utilities to dynamically collect dependencies into a
46+
* structure which is fed into the GT migration code.
5547
*
5648
* Once we can do async unbinding, this is also needed to coalesce
57-
* the migration fence with the unbind fences.
49+
* the migration fence with the unbind fences if these are coalesced
50+
* post-migration.
5851
*
5952
* While collecting the individual dependencies, we store the refcounted
6053
* struct dma_fence pointers in a realloc-managed pointer array, since
@@ -65,32 +58,13 @@ void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
6558
* A struct i915_deps need to be initialized using i915_deps_init().
6659
* If i915_deps_add_dependency() or i915_deps_add_resv() return an
6760
* error code they will internally call i915_deps_fini(), which frees
68-
* all internal references and allocations. After a call to
69-
* i915_deps_to_fence(), or i915_deps_sync(), the struct should similarly
70-
* be viewed as uninitialized.
61+
* all internal references and allocations.
7162
*
7263
* We might want to break this out into a separate file as a utility.
7364
*/
7465

7566
#define I915_DEPS_MIN_ALLOC_CHUNK 8U
7667

77-
/**
78-
* struct i915_deps - Collect dependencies into a single dma-fence
79-
* @single: Storage for pointer if the collection is a single fence.
80-
* @fence: Allocated array of fence pointers if more than a single fence;
81-
* otherwise points to the address of @single.
82-
* @num_deps: Current number of dependency fences.
83-
* @fences_size: Size of the @fences array in number of pointers.
84-
* @gfp: Allocation mode.
85-
*/
86-
struct i915_deps {
87-
struct dma_fence *single;
88-
struct dma_fence **fences;
89-
unsigned int num_deps;
90-
unsigned int fences_size;
91-
gfp_t gfp;
92-
};
93-
9468
static void i915_deps_reset_fences(struct i915_deps *deps)
9569
{
9670
if (deps->fences != &deps->single)
@@ -163,7 +137,7 @@ static int i915_deps_grow(struct i915_deps *deps, struct dma_fence *fence,
163137
return ret;
164138
}
165139

166-
static int i915_deps_sync(struct i915_deps *deps,
140+
static int i915_deps_sync(const struct i915_deps *deps,
167141
const struct ttm_operation_ctx *ctx)
168142
{
169143
struct dma_fence **fences = deps->fences;
@@ -183,7 +157,6 @@ static int i915_deps_sync(struct i915_deps *deps,
183157
break;
184158
}
185159

186-
i915_deps_fini(deps);
187160
return ret;
188161
}
189162

@@ -221,34 +194,6 @@ static int i915_deps_add_dependency(struct i915_deps *deps,
221194
return i915_deps_grow(deps, fence, ctx);
222195
}
223196

224-
static struct dma_fence *i915_deps_to_fence(struct i915_deps *deps,
225-
const struct ttm_operation_ctx *ctx)
226-
{
227-
struct dma_fence_array *array;
228-
229-
if (deps->num_deps == 0)
230-
return NULL;
231-
232-
if (deps->num_deps == 1) {
233-
deps->num_deps = 0;
234-
return deps->fences[0];
235-
}
236-
237-
/*
238-
* TODO: Alter the allocation mode here to not try too hard to
239-
* make things async.
240-
*/
241-
array = dma_fence_array_create(deps->num_deps, deps->fences, 0, 0,
242-
false);
243-
if (!array)
244-
return ERR_PTR(i915_deps_sync(deps, ctx));
245-
246-
deps->fences = NULL;
247-
i915_deps_reset_fences(deps);
248-
249-
return &array->base;
250-
}
251-
252197
static int i915_deps_add_resv(struct i915_deps *deps, struct dma_resv *resv,
253198
bool all, const bool no_excl,
254199
const struct ttm_operation_ctx *ctx)
@@ -387,7 +332,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
387332
struct ttm_resource *dst_mem,
388333
struct ttm_tt *dst_ttm,
389334
struct sg_table *dst_st,
390-
struct dma_fence *dep)
335+
const struct i915_deps *deps)
391336
{
392337
struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
393338
bdev);
@@ -411,7 +356,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
411356
return ERR_PTR(-EINVAL);
412357

413358
intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
414-
ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, dep,
359+
ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, deps,
415360
dst_st->sgl, dst_level,
416361
i915_ttm_gtt_binds_lmem(dst_mem),
417362
0, &rq);
@@ -425,7 +370,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
425370
src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
426371
intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
427372
ret = intel_context_migrate_copy(to_gt(i915)->migrate.context,
428-
dep, src_rsgt->table.sgl,
373+
deps, src_rsgt->table.sgl,
429374
src_level,
430375
i915_ttm_gtt_binds_lmem(bo->resource),
431376
dst_st->sgl, dst_level,
@@ -610,18 +555,19 @@ i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work,
610555
}
611556

612557
static struct dma_fence *
613-
__i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
558+
__i915_ttm_move(struct ttm_buffer_object *bo,
559+
const struct ttm_operation_ctx *ctx, bool clear,
614560
struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm,
615561
struct i915_refct_sgt *dst_rsgt, bool allow_accel,
616-
struct dma_fence *move_dep)
562+
const struct i915_deps *move_deps)
617563
{
618564
struct i915_ttm_memcpy_work *copy_work = NULL;
619565
struct i915_ttm_memcpy_arg _arg, *arg = &_arg;
620566
struct dma_fence *fence = ERR_PTR(-EINVAL);
621567

622568
if (allow_accel) {
623569
fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
624-
&dst_rsgt->table, move_dep);
570+
&dst_rsgt->table, move_deps);
625571

626572
/*
627573
* We only need to intercept the error when moving to lmem.
@@ -655,8 +601,8 @@ __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
655601

656602
if (!IS_ERR(fence))
657603
goto out;
658-
} else if (move_dep) {
659-
int err = dma_fence_wait(move_dep, true);
604+
} else if (move_deps) {
605+
int err = i915_deps_sync(move_deps, ctx);
660606

661607
if (err)
662608
return ERR_PTR(err);
@@ -680,29 +626,21 @@ __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
680626
return fence;
681627
}
682628

683-
static struct dma_fence *prev_fence(struct ttm_buffer_object *bo,
684-
struct ttm_operation_ctx *ctx)
629+
static int
630+
prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
631+
struct i915_deps *deps)
685632
{
686-
struct i915_deps deps;
687633
int ret;
688634

689-
/*
690-
* Instead of trying hard with GFP_KERNEL to allocate memory,
691-
* the dependency collection will just sync if it doesn't
692-
* succeed.
693-
*/
694-
i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
695-
ret = i915_deps_add_dependency(&deps, bo->moving, ctx);
635+
ret = i915_deps_add_dependency(deps, bo->moving, ctx);
696636
if (!ret)
697637
/*
698638
* TODO: Only await excl fence here, and shared fences before
699639
* signaling the migration fence.
700640
*/
701-
ret = i915_deps_add_resv(&deps, bo->base.resv, true, false, ctx);
702-
if (ret)
703-
return ERR_PTR(ret);
641+
ret = i915_deps_add_resv(deps, bo->base.resv, true, false, ctx);
704642

705-
return i915_deps_to_fence(&deps, ctx);
643+
return ret;
706644
}
707645

708646
/**
@@ -756,16 +694,18 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
756694

757695
clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
758696
if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
759-
struct dma_fence *dep = prev_fence(bo, ctx);
697+
struct i915_deps deps;
760698

761-
if (IS_ERR(dep)) {
699+
i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
700+
ret = prev_deps(bo, ctx, &deps);
701+
if (ret) {
762702
i915_refct_sgt_put(dst_rsgt);
763-
return PTR_ERR(dep);
703+
return ret;
764704
}
765705

766-
migration_fence = __i915_ttm_move(bo, clear, dst_mem, bo->ttm,
767-
dst_rsgt, true, dep);
768-
dma_fence_put(dep);
706+
migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, bo->ttm,
707+
dst_rsgt, true, &deps);
708+
i915_deps_fini(&deps);
769709
}
770710

771711
/* We can possibly get an -ERESTARTSYS here */
@@ -826,7 +766,7 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
826766
.interruptible = intr,
827767
};
828768
struct i915_refct_sgt *dst_rsgt;
829-
struct dma_fence *copy_fence, *dep_fence;
769+
struct dma_fence *copy_fence;
830770
struct i915_deps deps;
831771
int ret, shared_err;
832772

@@ -847,15 +787,12 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
847787
if (ret)
848788
return ret;
849789

850-
dep_fence = i915_deps_to_fence(&deps, &ctx);
851-
if (IS_ERR(dep_fence))
852-
return PTR_ERR(dep_fence);
853-
854790
dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
855-
copy_fence = __i915_ttm_move(src_bo, false, dst_bo->resource,
791+
copy_fence = __i915_ttm_move(src_bo, &ctx, false, dst_bo->resource,
856792
dst_bo->ttm, dst_rsgt, allow_accel,
857-
dep_fence);
793+
&deps);
858794

795+
i915_deps_fini(&deps);
859796
i915_refct_sgt_put(dst_rsgt);
860797
if (IS_ERR_OR_NULL(copy_fence))
861798
return PTR_ERR_OR_ZERO(copy_fence);

drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,23 @@ struct ttm_tt;
1818
struct drm_i915_gem_object;
1919
struct i915_refct_sgt;
2020

21+
/**
22+
* struct i915_deps - Collect dependencies into a single dma-fence
23+
* @single: Storage for pointer if the collection is a single fence.
24+
* @fences: Allocated array of fence pointers if more than a single fence;
25+
* otherwise points to the address of @single.
26+
* @num_deps: Current number of dependency fences.
27+
* @fences_size: Size of the @fences array in number of pointers.
28+
* @gfp: Allocation mode.
29+
*/
30+
struct i915_deps {
31+
struct dma_fence *single;
32+
struct dma_fence **fences;
33+
unsigned int num_deps;
34+
unsigned int fences_size;
35+
gfp_t gfp;
36+
};
37+
2138
int i915_ttm_move_notify(struct ttm_buffer_object *bo);
2239

2340
I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_failure_modes(bool gpu_migration,

drivers/gpu/drm/i915/gt/intel_migrate.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ static int emit_copy(struct i915_request *rq, int size)
404404

405405
int
406406
intel_context_migrate_copy(struct intel_context *ce,
407-
struct dma_fence *await,
407+
const struct i915_deps *deps,
408408
struct scatterlist *src,
409409
enum i915_cache_level src_cache_level,
410410
bool src_is_lmem,
@@ -431,8 +431,8 @@ intel_context_migrate_copy(struct intel_context *ce,
431431
goto out_ce;
432432
}
433433

434-
if (await) {
435-
err = i915_request_await_dma_fence(rq, await);
434+
if (deps) {
435+
err = i915_request_await_deps(rq, deps);
436436
if (err)
437437
goto out_rq;
438438

@@ -442,7 +442,7 @@ intel_context_migrate_copy(struct intel_context *ce,
442442
goto out_rq;
443443
}
444444

445-
await = NULL;
445+
deps = NULL;
446446
}
447447

448448
/* The PTE updates + copy must not be interrupted. */
@@ -525,7 +525,7 @@ static int emit_clear(struct i915_request *rq, int size, u32 value)
525525

526526
int
527527
intel_context_migrate_clear(struct intel_context *ce,
528-
struct dma_fence *await,
528+
const struct i915_deps *deps,
529529
struct scatterlist *sg,
530530
enum i915_cache_level cache_level,
531531
bool is_lmem,
@@ -550,8 +550,8 @@ intel_context_migrate_clear(struct intel_context *ce,
550550
goto out_ce;
551551
}
552552

553-
if (await) {
554-
err = i915_request_await_dma_fence(rq, await);
553+
if (deps) {
554+
err = i915_request_await_deps(rq, deps);
555555
if (err)
556556
goto out_rq;
557557

@@ -561,7 +561,7 @@ intel_context_migrate_clear(struct intel_context *ce,
561561
goto out_rq;
562562
}
563563

564-
await = NULL;
564+
deps = NULL;
565565
}
566566

567567
/* The PTE updates + clear must not be interrupted. */
@@ -599,7 +599,7 @@ intel_context_migrate_clear(struct intel_context *ce,
599599

600600
int intel_migrate_copy(struct intel_migrate *m,
601601
struct i915_gem_ww_ctx *ww,
602-
struct dma_fence *await,
602+
const struct i915_deps *deps,
603603
struct scatterlist *src,
604604
enum i915_cache_level src_cache_level,
605605
bool src_is_lmem,
@@ -624,7 +624,7 @@ int intel_migrate_copy(struct intel_migrate *m,
624624
if (err)
625625
goto out;
626626

627-
err = intel_context_migrate_copy(ce, await,
627+
err = intel_context_migrate_copy(ce, deps,
628628
src, src_cache_level, src_is_lmem,
629629
dst, dst_cache_level, dst_is_lmem,
630630
out);
@@ -638,7 +638,7 @@ int intel_migrate_copy(struct intel_migrate *m,
638638
int
639639
intel_migrate_clear(struct intel_migrate *m,
640640
struct i915_gem_ww_ctx *ww,
641-
struct dma_fence *await,
641+
const struct i915_deps *deps,
642642
struct scatterlist *sg,
643643
enum i915_cache_level cache_level,
644644
bool is_lmem,
@@ -661,7 +661,7 @@ intel_migrate_clear(struct intel_migrate *m,
661661
if (err)
662662
goto out;
663663

664-
err = intel_context_migrate_clear(ce, await, sg, cache_level,
664+
err = intel_context_migrate_clear(ce, deps, sg, cache_level,
665665
is_lmem, value, out);
666666

667667
intel_context_unpin(ce);

0 commit comments

Comments
 (0)