Skip to content

Commit 58bfe66

Browse files
mbrost05rodrigovivi
authored andcommitted
drm/xe: Drop xe_gt_tlb_invalidation_wait
Having two methods to wait on GT TLB invalidations is not ideal. Remove xe_gt_tlb_invalidation_wait and only use GT TLB invalidation fences. In addition to two methods being less than ideal, once GT TLB invalidations are coalesced the seqno cannot be assigned during xe_gt_tlb_invalidation_ggtt/range. Thus xe_gt_tlb_invalidation_wait would not have a seqno to wait one. A fence however can be armed and later signaled. v3: - Add explaination about coalescing to commit message v4: - Don't put dma fence if defined on stack (CI) v5: - Initialize ret to zero (CI) v6: - Use invalidation_fence_signal helper in tlb timeout (Matthew Auld) Signed-off-by: Matthew Brost <[email protected]> Reviewed-by: Nirmoy Das <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] (cherry picked from commit 61ac035) Signed-off-by: Rodrigo Vivi <[email protected]>
1 parent 90be4cc commit 58bfe66

File tree

4 files changed

+80
-110
lines changed

4 files changed

+80
-110
lines changed

drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c

Lines changed: 55 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include "xe_trace.h"
1818
#include "regs/xe_guc_regs.h"
1919

20+
#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS
21+
2022
/*
2123
* TLB inval depends on pending commands in the CT queue and then the real
2224
* invalidation time. Double up the time to process full CT queue
@@ -33,6 +35,23 @@ static long tlb_timeout_jiffies(struct xe_gt *gt)
3335
return hw_tlb_timeout + 2 * delay;
3436
}
3537

38+
static void
39+
__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
40+
{
41+
bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
42+
43+
trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
44+
dma_fence_signal(&fence->base);
45+
if (!stack)
46+
dma_fence_put(&fence->base);
47+
}
48+
49+
static void
50+
invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
51+
{
52+
list_del(&fence->link);
53+
__invalidation_fence_signal(xe, fence);
54+
}
3655

3756
static void xe_gt_tlb_fence_timeout(struct work_struct *work)
3857
{
@@ -54,10 +73,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
5473
xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
5574
fence->seqno, gt->tlb_invalidation.seqno_recv);
5675

57-
list_del(&fence->link);
5876
fence->base.error = -ETIME;
59-
dma_fence_signal(&fence->base);
60-
dma_fence_put(&fence->base);
77+
invalidation_fence_signal(xe, fence);
6178
}
6279
if (!list_empty(&gt->tlb_invalidation.pending_fences))
6380
queue_delayed_work(system_wq,
@@ -87,21 +104,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
87104
return 0;
88105
}
89106

90-
static void
91-
__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
92-
{
93-
trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
94-
dma_fence_signal(&fence->base);
95-
dma_fence_put(&fence->base);
96-
}
97-
98-
static void
99-
invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
100-
{
101-
list_del(&fence->link);
102-
__invalidation_fence_signal(xe, fence);
103-
}
104-
105107
/**
106108
* xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
107109
* @gt: graphics tile
@@ -111,7 +113,6 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe
111113
void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
112114
{
113115
struct xe_gt_tlb_invalidation_fence *fence, *next;
114-
struct xe_guc *guc = &gt->uc.guc;
115116
int pending_seqno;
116117

117118
/*
@@ -134,7 +135,6 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
134135
else
135136
pending_seqno = gt->tlb_invalidation.seqno - 1;
136137
WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno);
137-
wake_up_all(&guc->ct.wq);
138138

139139
list_for_each_entry_safe(fence, next,
140140
&gt->tlb_invalidation.pending_fences, link)
@@ -165,6 +165,8 @@ static int send_tlb_invalidation(struct xe_guc *guc,
165165
int seqno;
166166
int ret;
167167

168+
xe_gt_assert(gt, fence);
169+
168170
/*
169171
* XXX: The seqno algorithm relies on TLB invalidation being processed
170172
* in order which they currently are, if that changes the algorithm will
@@ -173,10 +175,8 @@ static int send_tlb_invalidation(struct xe_guc *guc,
173175

174176
mutex_lock(&guc->ct.lock);
175177
seqno = gt->tlb_invalidation.seqno;
176-
if (fence) {
177-
fence->seqno = seqno;
178-
trace_xe_gt_tlb_invalidation_fence_send(xe, fence);
179-
}
178+
fence->seqno = seqno;
179+
trace_xe_gt_tlb_invalidation_fence_send(xe, fence);
180180
action[1] = seqno;
181181
ret = xe_guc_ct_send_locked(&guc->ct, action, len,
182182
G2H_LEN_DW_TLB_INVALIDATE, 1);
@@ -209,7 +209,6 @@ static int send_tlb_invalidation(struct xe_guc *guc,
209209
TLB_INVALIDATION_SEQNO_MAX;
210210
if (!gt->tlb_invalidation.seqno)
211211
gt->tlb_invalidation.seqno = 1;
212-
ret = seqno;
213212
}
214213
mutex_unlock(&guc->ct.lock);
215214

@@ -223,22 +222,24 @@ static int send_tlb_invalidation(struct xe_guc *guc,
223222
/**
224223
* xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
225224
* @gt: graphics tile
225+
* @fence: invalidation fence which will be signal on TLB invalidation
226+
* completion
226227
*
227228
* Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
228-
* caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion.
229+
* caller can use the invalidation fence to wait for completion.
229230
*
230-
* Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
231-
* negative error code on error.
231+
* Return: 0 on success, negative error code on error
232232
*/
233-
static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
233+
static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt,
234+
struct xe_gt_tlb_invalidation_fence *fence)
234235
{
235236
u32 action[] = {
236237
XE_GUC_ACTION_TLB_INVALIDATION,
237238
0, /* seqno, replaced in send_tlb_invalidation */
238239
MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
239240
};
240241

241-
return send_tlb_invalidation(&gt->uc.guc, NULL, action,
242+
return send_tlb_invalidation(&gt->uc.guc, fence, action,
242243
ARRAY_SIZE(action));
243244
}
244245

@@ -257,13 +258,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
257258

258259
if (xe_guc_ct_enabled(&gt->uc.guc.ct) &&
259260
gt->uc.guc.submission_state.enabled) {
260-
int seqno;
261+
struct xe_gt_tlb_invalidation_fence fence;
262+
int ret;
261263

262-
seqno = xe_gt_tlb_invalidation_guc(gt);
263-
if (seqno <= 0)
264-
return seqno;
264+
xe_gt_tlb_invalidation_fence_init(gt, &fence, true);
265+
ret = xe_gt_tlb_invalidation_guc(gt, &fence);
266+
if (ret < 0)
267+
return ret;
265268

266-
xe_gt_tlb_invalidation_wait(gt, seqno);
269+
xe_gt_tlb_invalidation_fence_wait(&fence);
267270
} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
268271
if (IS_SRIOV_VF(xe))
269272
return 0;
@@ -290,18 +293,16 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
290293
*
291294
* @gt: graphics tile
292295
* @fence: invalidation fence which will be signal on TLB invalidation
293-
* completion, can be NULL
296+
* completion
294297
* @start: start address
295298
* @end: end address
296299
* @asid: address space id
297300
*
298301
* Issue a range based TLB invalidation if supported, if not fallback to a full
299-
* TLB invalidation. Completion of TLB is asynchronous and caller can either use
300-
* the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
301-
* completion.
302+
* TLB invalidation. Completion of TLB is asynchronous and caller can use
303+
* the invalidation fence to wait for completion.
302304
*
303-
* Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
304-
* negative error code on error.
305+
* Return: Negative error code on error, 0 on success
305306
*/
306307
int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
307308
struct xe_gt_tlb_invalidation_fence *fence,
@@ -312,11 +313,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
312313
u32 action[MAX_TLB_INVALIDATION_LEN];
313314
int len = 0;
314315

316+
xe_gt_assert(gt, fence);
317+
315318
/* Execlists not supported */
316319
if (gt_to_xe(gt)->info.force_execlist) {
317-
if (fence)
318-
__invalidation_fence_signal(xe, fence);
319-
320+
__invalidation_fence_signal(xe, fence);
320321
return 0;
321322
}
322323

@@ -382,12 +383,10 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
382383
* @vma: VMA to invalidate
383384
*
384385
* Issue a range based TLB invalidation if supported, if not fallback to a full
385-
* TLB invalidation. Completion of TLB is asynchronous and caller can either use
386-
* the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
387-
* completion.
386+
* TLB invalidation. Completion of TLB is asynchronous and caller can use
387+
* the invalidation fence to wait for completion.
388388
*
389-
* Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
390-
* negative error code on error.
389+
* Return: Negative error code on error, 0 on success
391390
*/
392391
int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
393392
struct xe_gt_tlb_invalidation_fence *fence,
@@ -400,43 +399,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
400399
xe_vma_vm(vma)->usm.asid);
401400
}
402401

403-
/**
404-
* xe_gt_tlb_invalidation_wait - Wait for TLB to complete
405-
* @gt: graphics tile
406-
* @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
407-
*
408-
* Wait for tlb_timeout_jiffies() for a TLB invalidation to complete.
409-
*
410-
* Return: 0 on success, -ETIME on TLB invalidation timeout
411-
*/
412-
int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
413-
{
414-
struct xe_guc *guc = &gt->uc.guc;
415-
int ret;
416-
417-
/* Execlists not supported */
418-
if (gt_to_xe(gt)->info.force_execlist)
419-
return 0;
420-
421-
/*
422-
* XXX: See above, this algorithm only works if seqno are always in
423-
* order
424-
*/
425-
ret = wait_event_timeout(guc->ct.wq,
426-
tlb_invalidation_seqno_past(gt, seqno),
427-
tlb_timeout_jiffies(gt));
428-
if (!ret) {
429-
struct drm_printer p = xe_gt_err_printer(gt);
430-
431-
xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
432-
seqno, gt->tlb_invalidation.seqno_recv);
433-
xe_guc_ct_print(&guc->ct, &p, true);
434-
return -ETIME;
435-
}
436-
437-
return 0;
438-
}
439-
440402
/**
441403
* xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
442404
* @guc: guc
@@ -480,12 +442,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
480442
return 0;
481443
}
482444

483-
/*
484-
* wake_up_all() and wait_event_timeout() already have the correct
485-
* barriers.
486-
*/
487445
WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]);
488-
wake_up_all(&guc->ct.wq);
489446

490447
list_for_each_entry_safe(fence, next,
491448
&gt->tlb_invalidation.pending_fences, link) {
@@ -530,17 +487,22 @@ static const struct dma_fence_ops invalidation_fence_ops = {
530487
* xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence
531488
* @gt: GT
532489
* @fence: TLB invalidation fence to initialize
490+
* @stack: fence is stack variable
533491
*
534492
* Initialize TLB invalidation fence for use
535493
*/
536494
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
537-
struct xe_gt_tlb_invalidation_fence *fence)
495+
struct xe_gt_tlb_invalidation_fence *fence,
496+
bool stack)
538497
{
539498
spin_lock_irq(&gt->tlb_invalidation.lock);
540499
dma_fence_init(&fence->base, &invalidation_fence_ops,
541500
&gt->tlb_invalidation.lock,
542501
dma_fence_context_alloc(1), 1);
543502
spin_unlock_irq(&gt->tlb_invalidation.lock);
544503
INIT_LIST_HEAD(&fence->link);
545-
dma_fence_get(&fence->base);
504+
if (stack)
505+
set_bit(FENCE_STACK_BIT, &fence->base.flags);
506+
else
507+
dma_fence_get(&fence->base);
546508
}

drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,16 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
2323
int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
2424
struct xe_gt_tlb_invalidation_fence *fence,
2525
u64 start, u64 end, u32 asid);
26-
int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
2726
int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
2827

2928
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
30-
struct xe_gt_tlb_invalidation_fence *fence);
29+
struct xe_gt_tlb_invalidation_fence *fence,
30+
bool stack);
31+
32+
static inline void
33+
xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)
34+
{
35+
dma_fence_wait(&fence->base, false);
36+
}
3137

3238
#endif /* _XE_GT_TLB_INVALIDATION_ */

drivers/gpu/drm/xe/xe_pt.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1153,7 +1153,7 @@ static int invalidation_fence_init(struct xe_gt *gt,
11531153

11541154
trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base);
11551155

1156-
xe_gt_tlb_invalidation_fence_init(gt, &ifence->base);
1156+
xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false);
11571157

11581158
ifence->fence = fence;
11591159
ifence->gt = gt;

drivers/gpu/drm/xe/xe_vm.c

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3341,10 +3341,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
33413341
{
33423342
struct xe_device *xe = xe_vma_vm(vma)->xe;
33433343
struct xe_tile *tile;
3344+
struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE];
33443345
u32 tile_needs_invalidate = 0;
3345-
int seqno[XE_MAX_TILES_PER_DEVICE];
33463346
u8 id;
3347-
int ret;
3347+
int ret = 0;
33483348

33493349
xe_assert(xe, !xe_vma_is_null(vma));
33503350
trace_xe_vma_invalidate(vma);
@@ -3369,29 +3369,31 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
33693369

33703370
for_each_tile(tile, xe, id) {
33713371
if (xe_pt_zap_ptes(tile, vma)) {
3372-
tile_needs_invalidate |= BIT(id);
33733372
xe_device_wmb(xe);
3373+
xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
3374+
&fence[id], true);
3375+
33743376
/*
33753377
* FIXME: We potentially need to invalidate multiple
33763378
* GTs within the tile
33773379
*/
3378-
seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
3379-
if (seqno[id] < 0)
3380-
return seqno[id];
3381-
}
3382-
}
3383-
3384-
for_each_tile(tile, xe, id) {
3385-
if (tile_needs_invalidate & BIT(id)) {
3386-
ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
3380+
ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
3381+
&fence[id], vma);
33873382
if (ret < 0)
3388-
return ret;
3383+
goto wait;
3384+
3385+
tile_needs_invalidate |= BIT(id);
33893386
}
33903387
}
33913388

3389+
wait:
3390+
for_each_tile(tile, xe, id)
3391+
if (tile_needs_invalidate & BIT(id))
3392+
xe_gt_tlb_invalidation_fence_wait(&fence[id]);
3393+
33923394
vma->tile_invalidated = vma->tile_mask;
33933395

3394-
return 0;
3396+
return ret;
33953397
}
33963398

33973399
struct xe_vm_snapshot {

0 commit comments

Comments
 (0)