Skip to content

Commit 343dd24

Browse files
committed
drm/xe/oa: Signal output fences
Introduce 'struct xe_oa_fence' which includes the dma_fence used to signal output fences in the xe_sync array. The fences are signaled asynchronously. When there are no output fences to signal, the OA configuration wait is synchronously re-introduced into the ioctl. v2: Don't wait in the work, use callback + delayed work (Matt B) Use a single, not a per-fence spinlock (Matt Brost) v3: Move ofence alloc before job submission (Matt) Assert, don't fail, from dma_fence_add_callback (Matt) Additional dma_fence_get for dma_fence_wait (Matt) Change dma_fence_wait to non-interruptible (Matt) v4: Introduce last_fence to prevent uaf if stream is closed with pending OA config jobs v5: Remove oa_fence_lock, move spinlock back into xe_oa_fence to prevent uaf Suggested-by: Matthew Brost <[email protected]> Reviewed-by: Jonathan Cavitt <[email protected]> Signed-off-by: Ashutosh Dixit <[email protected]> Reviewed-by: Matthew Brost <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 2fb4350 commit 343dd24

File tree

2 files changed

+105
-17
lines changed

2 files changed

+105
-17
lines changed

drivers/gpu/drm/xe/xe_oa.c

Lines changed: 102 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,17 @@ struct xe_oa_config_bo {
100100
struct xe_bb *bb;
101101
};
102102

103+
struct xe_oa_fence {
104+
/* @base: dma fence base */
105+
struct dma_fence base;
106+
/* @lock: lock for the fence */
107+
spinlock_t lock;
108+
/* @work: work to signal @base */
109+
struct delayed_work work;
110+
/* @cb: callback to schedule @work */
111+
struct dma_fence_cb cb;
112+
};
113+
103114
#define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x
104115

105116
static const struct xe_oa_format oa_formats[] = {
@@ -172,10 +183,10 @@ static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_se
172183
return oa_config;
173184
}
174185

175-
static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo)
186+
static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *last_fence)
176187
{
177188
xe_oa_config_put(oa_bo->oa_config);
178-
xe_bb_free(oa_bo->bb, NULL);
189+
xe_bb_free(oa_bo->bb, last_fence);
179190
kfree(oa_bo);
180191
}
181192

@@ -655,7 +666,8 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream)
655666

656667
xe_oa_config_put(stream->oa_config);
657668
llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
658-
free_oa_config_bo(oa_bo);
669+
free_oa_config_bo(oa_bo, stream->last_fence);
670+
dma_fence_put(stream->last_fence);
659671
}
660672

661673
static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
@@ -951,40 +963,113 @@ xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_c
951963
return oa_bo;
952964
}
953965

966+
static void xe_oa_update_last_fence(struct xe_oa_stream *stream, struct dma_fence *fence)
967+
{
968+
dma_fence_put(stream->last_fence);
969+
stream->last_fence = dma_fence_get(fence);
970+
}
971+
972+
static void xe_oa_fence_work_fn(struct work_struct *w)
973+
{
974+
struct xe_oa_fence *ofence = container_of(w, typeof(*ofence), work.work);
975+
976+
/* Signal fence to indicate new OA configuration is active */
977+
dma_fence_signal(&ofence->base);
978+
dma_fence_put(&ofence->base);
979+
}
980+
981+
static void xe_oa_config_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
982+
{
983+
/* Additional empirical delay needed for NOA programming after registers are written */
984+
#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
985+
986+
struct xe_oa_fence *ofence = container_of(cb, typeof(*ofence), cb);
987+
988+
INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn);
989+
queue_delayed_work(system_unbound_wq, &ofence->work,
990+
usecs_to_jiffies(NOA_PROGRAM_ADDITIONAL_DELAY_US));
991+
dma_fence_put(fence);
992+
}
993+
994+
static const char *xe_oa_get_driver_name(struct dma_fence *fence)
995+
{
996+
return "xe_oa";
997+
}
998+
999+
static const char *xe_oa_get_timeline_name(struct dma_fence *fence)
1000+
{
1001+
return "unbound";
1002+
}
1003+
1004+
static const struct dma_fence_ops xe_oa_fence_ops = {
1005+
.get_driver_name = xe_oa_get_driver_name,
1006+
.get_timeline_name = xe_oa_get_timeline_name,
1007+
};
1008+
9541009
static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config)
9551010
{
9561011
#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
9571012
struct xe_oa_config_bo *oa_bo;
958-
int err = 0, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
1013+
struct xe_oa_fence *ofence;
1014+
int i, err, num_signal = 0;
9591015
struct dma_fence *fence;
960-
long timeout;
9611016

962-
/* Emit OA configuration batch */
1017+
ofence = kzalloc(sizeof(*ofence), GFP_KERNEL);
1018+
if (!ofence) {
1019+
err = -ENOMEM;
1020+
goto exit;
1021+
}
1022+
9631023
oa_bo = xe_oa_alloc_config_buffer(stream, config);
9641024
if (IS_ERR(oa_bo)) {
9651025
err = PTR_ERR(oa_bo);
9661026
goto exit;
9671027
}
9681028

1029+
/* Emit OA configuration batch */
9691030
fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb);
9701031
if (IS_ERR(fence)) {
9711032
err = PTR_ERR(fence);
9721033
goto exit;
9731034
}
9741035

975-
/* Wait till all previous batches have executed */
976-
timeout = dma_fence_wait_timeout(fence, false, 5 * HZ);
977-
dma_fence_put(fence);
978-
if (timeout < 0)
979-
err = timeout;
980-
else if (!timeout)
981-
err = -ETIME;
982-
if (err)
983-
drm_dbg(&stream->oa->xe->drm, "dma_fence_wait_timeout err %d\n", err);
1036+
/* Point of no return: initialize and set fence to signal */
1037+
spin_lock_init(&ofence->lock);
1038+
dma_fence_init(&ofence->base, &xe_oa_fence_ops, &ofence->lock, 0, 0);
9841039

985-
/* Additional empirical delay needed for NOA programming after registers are written */
986-
usleep_range(us, 2 * us);
1040+
for (i = 0; i < stream->num_syncs; i++) {
1041+
if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL)
1042+
num_signal++;
1043+
xe_sync_entry_signal(&stream->syncs[i], &ofence->base);
1044+
}
1045+
1046+
/* Additional dma_fence_get in case we dma_fence_wait */
1047+
if (!num_signal)
1048+
dma_fence_get(&ofence->base);
1049+
1050+
/* Update last fence too before adding callback */
1051+
xe_oa_update_last_fence(stream, fence);
1052+
1053+
/* Add job fence callback to schedule work to signal ofence->base */
1054+
err = dma_fence_add_callback(fence, &ofence->cb, xe_oa_config_cb);
1055+
xe_gt_assert(stream->gt, !err || err == -ENOENT);
1056+
if (err == -ENOENT)
1057+
xe_oa_config_cb(fence, &ofence->cb);
1058+
1059+
/* If nothing needs to be signaled we wait synchronously */
1060+
if (!num_signal) {
1061+
dma_fence_wait(&ofence->base, false);
1062+
dma_fence_put(&ofence->base);
1063+
}
1064+
1065+
/* Done with syncs */
1066+
for (i = 0; i < stream->num_syncs; i++)
1067+
xe_sync_entry_cleanup(&stream->syncs[i]);
1068+
kfree(stream->syncs);
1069+
1070+
return 0;
9871071
exit:
1072+
kfree(ofence);
9881073
return err;
9891074
}
9901075

drivers/gpu/drm/xe/xe_oa_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,9 @@ struct xe_oa_stream {
239239
/** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */
240240
u32 no_preempt;
241241

242+
/** @last_fence: fence to use in stream destroy when needed */
243+
struct dma_fence *last_fence;
244+
242245
/** @num_syncs: size of @syncs array */
243246
u32 num_syncs;
244247

0 commit comments

Comments
 (0)