Skip to content

Commit 817c70e

Browse files
unerligerodrigovivi
authored andcommitted
drm/xe: Fix use after free when client stats are captured
xe_file_close triggers an asynchronous queue cleanup and then frees up the xef object. Since queue cleanup flushes all pending jobs and the KMD stores client usage stats into the xef object after jobs are flushed, we see a use-after-free for the xef object. Resolve this by taking a reference to xef from xe_exec_queue. While at it, revert an earlier change that contained a partial work around for this issue. v2: - Take a ref to xef even for the VM bind queue (Matt) - Squash patches relevant to that fix and work around (Lucas) v3: Fix typo (Lucas) Fixes: ce62827 ("drm/xe: Do not access xe file when updating exec queue run_ticks") Fixes: 6109f24 ("drm/xe: Add helper to accumulate exec queue runtime") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/issues/1908 Signed-off-by: Umesh Nerlige Ramappa <[email protected]> Reviewed-by: Matthew Brost <[email protected]> Reviewed-by: Lucas De Marchi <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] Signed-off-by: Lucas De Marchi <[email protected]> (cherry picked from commit 2149ded) Signed-off-by: Rodrigo Vivi <[email protected]>
1 parent 6309f9b commit 817c70e

File tree

3 files changed

+13
-9
lines changed

3 files changed

+13
-9
lines changed

drivers/gpu/drm/xe/xe_drm_client.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -251,11 +251,8 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
251251

252252
/* Accumulate all the exec queues from this client */
253253
mutex_lock(&xef->exec_queue.lock);
254-
xa_for_each(&xef->exec_queue.xa, i, q) {
254+
xa_for_each(&xef->exec_queue.xa, i, q)
255255
xe_exec_queue_update_run_ticks(q);
256-
xef->run_ticks[q->class] += q->run_ticks - q->old_run_ticks;
257-
q->old_run_ticks = q->run_ticks;
258-
}
259256
mutex_unlock(&xef->exec_queue.lock);
260257

261258
/* Get the total GPU cycles */

drivers/gpu/drm/xe/xe_exec_queue.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q)
3737
{
3838
if (q->vm)
3939
xe_vm_put(q->vm);
40+
41+
if (q->xef)
42+
xe_file_put(q->xef);
43+
4044
kfree(q);
4145
}
4246

@@ -649,6 +653,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
649653
goto kill_exec_queue;
650654

651655
args->exec_queue_id = id;
656+
q->xef = xe_file_get(xef);
652657

653658
return 0;
654659

@@ -762,6 +767,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
762767
*/
763768
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
764769
{
770+
struct xe_file *xef;
765771
struct xe_lrc *lrc;
766772
u32 old_ts, new_ts;
767773

@@ -773,6 +779,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
773779
if (!q->vm || !q->vm->xef)
774780
return;
775781

782+
xef = q->vm->xef;
783+
776784
/*
777785
* Only sample the first LRC. For parallel submission, all of them are
778786
* scheduled together and we compensate that below by multiplying by
@@ -783,7 +791,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
783791
*/
784792
lrc = q->lrc[0];
785793
new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
786-
q->run_ticks += (new_ts - old_ts) * q->width;
794+
xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
787795
}
788796

789797
void xe_exec_queue_kill(struct xe_exec_queue *q)

drivers/gpu/drm/xe/xe_exec_queue_types.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ enum xe_exec_queue_priority {
3838
* a kernel object.
3939
*/
4040
struct xe_exec_queue {
41+
/** @xef: Back pointer to xe file if this is user created exec queue */
42+
struct xe_file *xef;
43+
4144
/** @gt: graphics tile this exec queue can submit to */
4245
struct xe_gt *gt;
4346
/**
@@ -139,10 +142,6 @@ struct xe_exec_queue {
139142
* Protected by @vm's resv. Unused if @vm == NULL.
140143
*/
141144
u64 tlb_flush_seqno;
142-
/** @old_run_ticks: prior hw engine class run time in ticks for this exec queue */
143-
u64 old_run_ticks;
144-
/** @run_ticks: hw engine class run time in ticks for this exec queue */
145-
u64 run_ticks;
146145
/** @lrc: logical ring context for this exec queue */
147146
struct xe_lrc *lrc[];
148147
};

0 commit comments

Comments
 (0)