Skip to content

Commit 3c88b8f

Browse files
Thomas Hellströmlucasdemarchi
authored andcommitted
drm/xe: Use ring ops TLB invalidation for rebinds
For each rebind we insert a GuC TLB invalidation and add a corresponding unordered TLB invalidation fence. This might add a huge number of TLB invalidation fences to wait for so rather than doing that, defer the TLB invalidation to the next ring ops for each affected exec queue. Since the TLB is invalidated on exec_queue switch, we need to invalidate once for each affected exec_queue. v2: - Simplify if-statements around the tlb_flush_seqno. (Matthew Brost) - Add some comments and asserts. Fixes: 5387e86 ("drm/xe: Add TLB invalidation fence after rebinds issued from execs") Cc: Matthew Brost <[email protected]> Cc: <[email protected]> # v6.8+ Signed-off-by: Thomas Hellström <[email protected]> Reviewed-by: Matthew Brost <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] (cherry picked from commit 4fc4899) Signed-off-by: Lucas De Marchi <[email protected]>
1 parent 39cd87c commit 3c88b8f

File tree

6 files changed

+30
-9
lines changed

6 files changed

+30
-9
lines changed

drivers/gpu/drm/xe/xe_exec_queue_types.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,11 @@ struct xe_exec_queue {
148148
const struct xe_ring_ops *ring_ops;
149149
/** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */
150150
struct drm_sched_entity *entity;
151+
/**
152+
* @tlb_flush_seqno: The seqno of the last rebind tlb flush performed
153+
* Protected by @vm's resv. Unused if @vm == NULL.
154+
*/
155+
u64 tlb_flush_seqno;
151156
/** @lrc: logical ring context for this exec queue */
152157
struct xe_lrc lrc[];
153158
};

drivers/gpu/drm/xe/xe_pt.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1254,11 +1254,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
12541254
* non-faulting LR, in particular on user-space batch buffer chaining,
12551255
* it needs to be done here.
12561256
*/
1257-
if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) ||
1258-
(!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
1257+
if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
12591258
ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
12601259
if (!ifence)
12611260
return ERR_PTR(-ENOMEM);
1261+
} else if (rebind && !xe_vm_in_lr_mode(vm)) {
1262+
/* We bump also if batch_invalidate_tlb is true */
1263+
vm->tlb_flush_seqno++;
12621264
}
12631265

12641266
rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);

drivers/gpu/drm/xe/xe_ring_ops.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -219,10 +219,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
219219
{
220220
u32 dw[MAX_JOB_SIZE_DW], i = 0;
221221
u32 ppgtt_flag = get_ppgtt_flag(job);
222-
struct xe_vm *vm = job->q->vm;
223222
struct xe_gt *gt = job->q->gt;
224223

225-
if (vm && vm->batch_invalidate_tlb) {
224+
if (job->ring_ops_flush_tlb) {
226225
dw[i++] = preparser_disable(true);
227226
i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
228227
seqno, true, dw, i);
@@ -270,7 +269,6 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
270269
struct xe_gt *gt = job->q->gt;
271270
struct xe_device *xe = gt_to_xe(gt);
272271
bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE;
273-
struct xe_vm *vm = job->q->vm;
274272

275273
dw[i++] = preparser_disable(true);
276274

@@ -282,13 +280,13 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
282280
i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
283281
}
284282

285-
if (vm && vm->batch_invalidate_tlb)
283+
if (job->ring_ops_flush_tlb)
286284
i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
287285
seqno, true, dw, i);
288286

289287
dw[i++] = preparser_disable(false);
290288

291-
if (!vm || !vm->batch_invalidate_tlb)
289+
if (!job->ring_ops_flush_tlb)
292290
i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
293291
seqno, dw, i);
294292

@@ -317,7 +315,6 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
317315
struct xe_gt *gt = job->q->gt;
318316
struct xe_device *xe = gt_to_xe(gt);
319317
bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
320-
struct xe_vm *vm = job->q->vm;
321318
u32 mask_flags = 0;
322319

323320
dw[i++] = preparser_disable(true);
@@ -327,7 +324,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
327324
mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
328325

329326
/* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
330-
i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i);
327+
i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i);
331328

332329
/* hsdes: 1809175790 */
333330
if (has_aux_ccs(xe))

drivers/gpu/drm/xe/xe_sched_job.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,16 @@ bool xe_sched_job_completed(struct xe_sched_job *job)
250250

251251
void xe_sched_job_arm(struct xe_sched_job *job)
252252
{
253+
struct xe_exec_queue *q = job->q;
254+
struct xe_vm *vm = q->vm;
255+
256+
if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) &&
257+
(vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) {
258+
xe_vm_assert_held(vm);
259+
q->tlb_flush_seqno = vm->tlb_flush_seqno;
260+
job->ring_ops_flush_tlb = true;
261+
}
262+
253263
drm_sched_job_arm(&job->drm);
254264
}
255265

drivers/gpu/drm/xe/xe_sched_job_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ struct xe_sched_job {
3939
} user_fence;
4040
/** @migrate_flush_flags: Additional flush flags for migration jobs */
4141
u32 migrate_flush_flags;
42+
/** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
43+
bool ring_ops_flush_tlb;
4244
/** @batch_addr: batch buffer address of job */
4345
u64 batch_addr[];
4446
};

drivers/gpu/drm/xe/xe_vm_types.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,11 @@ struct xe_vm {
264264
bool capture_once;
265265
} error_capture;
266266

267+
/**
268+
* @tlb_flush_seqno: Required TLB flush seqno for the next exec.
269+
* protected by the vm resv.
270+
*/
271+
u64 tlb_flush_seqno;
267272
/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
268273
bool batch_invalidate_tlb;
269274
/** @xef: XE file handle for tracking this VM's drm client */

0 commit comments

Comments
 (0)