17
17
#include "xe_trace.h"
18
18
#include "regs/xe_guc_regs.h"
19
19
20
+ #define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS
21
+
20
22
/*
21
23
* TLB inval depends on pending commands in the CT queue and then the real
22
24
* invalidation time. Double up the time to process full CT queue
@@ -33,6 +35,23 @@ static long tlb_timeout_jiffies(struct xe_gt *gt)
33
35
return hw_tlb_timeout + 2 * delay ;
34
36
}
35
37
38
+ static void
39
+ __invalidation_fence_signal (struct xe_device * xe , struct xe_gt_tlb_invalidation_fence * fence )
40
+ {
41
+ bool stack = test_bit (FENCE_STACK_BIT , & fence -> base .flags );
42
+
43
+ trace_xe_gt_tlb_invalidation_fence_signal (xe , fence );
44
+ dma_fence_signal (& fence -> base );
45
+ if (!stack )
46
+ dma_fence_put (& fence -> base );
47
+ }
48
+
49
+ static void
50
+ invalidation_fence_signal (struct xe_device * xe , struct xe_gt_tlb_invalidation_fence * fence )
51
+ {
52
+ list_del (& fence -> link );
53
+ __invalidation_fence_signal (xe , fence );
54
+ }
36
55
37
56
static void xe_gt_tlb_fence_timeout (struct work_struct * work )
38
57
{
@@ -54,10 +73,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
54
73
xe_gt_err (gt , "TLB invalidation fence timeout, seqno=%d recv=%d" ,
55
74
fence -> seqno , gt -> tlb_invalidation .seqno_recv );
56
75
57
- list_del (& fence -> link );
58
76
fence -> base .error = - ETIME ;
59
- dma_fence_signal (& fence -> base );
60
- dma_fence_put (& fence -> base );
77
+ invalidation_fence_signal (xe , fence );
61
78
}
62
79
if (!list_empty (& gt -> tlb_invalidation .pending_fences ))
63
80
queue_delayed_work (system_wq ,
@@ -87,21 +104,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
87
104
return 0 ;
88
105
}
89
106
90
- static void
91
- __invalidation_fence_signal (struct xe_device * xe , struct xe_gt_tlb_invalidation_fence * fence )
92
- {
93
- trace_xe_gt_tlb_invalidation_fence_signal (xe , fence );
94
- dma_fence_signal (& fence -> base );
95
- dma_fence_put (& fence -> base );
96
- }
97
-
98
- static void
99
- invalidation_fence_signal (struct xe_device * xe , struct xe_gt_tlb_invalidation_fence * fence )
100
- {
101
- list_del (& fence -> link );
102
- __invalidation_fence_signal (xe , fence );
103
- }
104
-
105
107
/**
106
108
* xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
107
109
* @gt: graphics tile
@@ -111,7 +113,6 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe
111
113
void xe_gt_tlb_invalidation_reset (struct xe_gt * gt )
112
114
{
113
115
struct xe_gt_tlb_invalidation_fence * fence , * next ;
114
- struct xe_guc * guc = & gt -> uc .guc ;
115
116
int pending_seqno ;
116
117
117
118
/*
@@ -134,7 +135,6 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
134
135
else
135
136
pending_seqno = gt -> tlb_invalidation .seqno - 1 ;
136
137
WRITE_ONCE (gt -> tlb_invalidation .seqno_recv , pending_seqno );
137
- wake_up_all (& guc -> ct .wq );
138
138
139
139
list_for_each_entry_safe (fence , next ,
140
140
& gt -> tlb_invalidation .pending_fences , link )
@@ -165,6 +165,8 @@ static int send_tlb_invalidation(struct xe_guc *guc,
165
165
int seqno ;
166
166
int ret ;
167
167
168
+ xe_gt_assert (gt , fence );
169
+
168
170
/*
169
171
* XXX: The seqno algorithm relies on TLB invalidation being processed
170
172
* in order which they currently are, if that changes the algorithm will
@@ -173,10 +175,8 @@ static int send_tlb_invalidation(struct xe_guc *guc,
173
175
174
176
mutex_lock (& guc -> ct .lock );
175
177
seqno = gt -> tlb_invalidation .seqno ;
176
- if (fence ) {
177
- fence -> seqno = seqno ;
178
- trace_xe_gt_tlb_invalidation_fence_send (xe , fence );
179
- }
178
+ fence -> seqno = seqno ;
179
+ trace_xe_gt_tlb_invalidation_fence_send (xe , fence );
180
180
action [1 ] = seqno ;
181
181
ret = xe_guc_ct_send_locked (& guc -> ct , action , len ,
182
182
G2H_LEN_DW_TLB_INVALIDATE , 1 );
@@ -209,7 +209,6 @@ static int send_tlb_invalidation(struct xe_guc *guc,
209
209
TLB_INVALIDATION_SEQNO_MAX ;
210
210
if (!gt -> tlb_invalidation .seqno )
211
211
gt -> tlb_invalidation .seqno = 1 ;
212
- ret = seqno ;
213
212
}
214
213
mutex_unlock (& guc -> ct .lock );
215
214
@@ -223,22 +222,24 @@ static int send_tlb_invalidation(struct xe_guc *guc,
223
222
/**
224
223
* xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
225
224
* @gt: graphics tile
225
+ * @fence: invalidation fence which will be signal on TLB invalidation
226
+ * completion
226
227
*
227
228
* Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
228
- * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion.
229
+ * caller can use the invalidation fence to wait for completion.
229
230
*
230
- * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
231
- * negative error code on error.
231
+ * Return: 0 on success, negative error code on error
232
232
*/
233
- static int xe_gt_tlb_invalidation_guc (struct xe_gt * gt )
233
+ static int xe_gt_tlb_invalidation_guc (struct xe_gt * gt ,
234
+ struct xe_gt_tlb_invalidation_fence * fence )
234
235
{
235
236
u32 action [] = {
236
237
XE_GUC_ACTION_TLB_INVALIDATION ,
237
238
0 , /* seqno, replaced in send_tlb_invalidation */
238
239
MAKE_INVAL_OP (XE_GUC_TLB_INVAL_GUC ),
239
240
};
240
241
241
- return send_tlb_invalidation (& gt -> uc .guc , NULL , action ,
242
+ return send_tlb_invalidation (& gt -> uc .guc , fence , action ,
242
243
ARRAY_SIZE (action ));
243
244
}
244
245
@@ -257,13 +258,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
257
258
258
259
if (xe_guc_ct_enabled (& gt -> uc .guc .ct ) &&
259
260
gt -> uc .guc .submission_state .enabled ) {
260
- int seqno ;
261
+ struct xe_gt_tlb_invalidation_fence fence ;
262
+ int ret ;
261
263
262
- seqno = xe_gt_tlb_invalidation_guc (gt );
263
- if (seqno <= 0 )
264
- return seqno ;
264
+ xe_gt_tlb_invalidation_fence_init (gt , & fence , true);
265
+ ret = xe_gt_tlb_invalidation_guc (gt , & fence );
266
+ if (ret < 0 )
267
+ return ret ;
265
268
266
- xe_gt_tlb_invalidation_wait ( gt , seqno );
269
+ xe_gt_tlb_invalidation_fence_wait ( & fence );
267
270
} else if (xe_device_uc_enabled (xe ) && !xe_device_wedged (xe )) {
268
271
if (IS_SRIOV_VF (xe ))
269
272
return 0 ;
@@ -290,18 +293,16 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
290
293
*
291
294
* @gt: graphics tile
292
295
* @fence: invalidation fence which will be signal on TLB invalidation
293
- * completion, can be NULL
296
+ * completion
294
297
* @start: start address
295
298
* @end: end address
296
299
* @asid: address space id
297
300
*
298
301
* Issue a range based TLB invalidation if supported, if not fallback to a full
299
- * TLB invalidation. Completion of TLB is asynchronous and caller can either use
300
- * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
301
- * completion.
302
+ * TLB invalidation. Completion of TLB is asynchronous and caller can use
303
+ * the invalidation fence to wait for completion.
302
304
*
303
- * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
304
- * negative error code on error.
305
+ * Return: Negative error code on error, 0 on success
305
306
*/
306
307
int xe_gt_tlb_invalidation_range (struct xe_gt * gt ,
307
308
struct xe_gt_tlb_invalidation_fence * fence ,
@@ -312,11 +313,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
312
313
u32 action [MAX_TLB_INVALIDATION_LEN ];
313
314
int len = 0 ;
314
315
316
+ xe_gt_assert (gt , fence );
317
+
315
318
/* Execlists not supported */
316
319
if (gt_to_xe (gt )-> info .force_execlist ) {
317
- if (fence )
318
- __invalidation_fence_signal (xe , fence );
319
-
320
+ __invalidation_fence_signal (xe , fence );
320
321
return 0 ;
321
322
}
322
323
@@ -382,12 +383,10 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
382
383
* @vma: VMA to invalidate
383
384
*
384
385
* Issue a range based TLB invalidation if supported, if not fallback to a full
385
- * TLB invalidation. Completion of TLB is asynchronous and caller can either use
386
- * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
387
- * completion.
386
+ * TLB invalidation. Completion of TLB is asynchronous and caller can use
387
+ * the invalidation fence to wait for completion.
388
388
*
389
- * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
390
- * negative error code on error.
389
+ * Return: Negative error code on error, 0 on success
391
390
*/
392
391
int xe_gt_tlb_invalidation_vma (struct xe_gt * gt ,
393
392
struct xe_gt_tlb_invalidation_fence * fence ,
@@ -400,43 +399,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
400
399
xe_vma_vm (vma )-> usm .asid );
401
400
}
402
401
403
- /**
404
- * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
405
- * @gt: graphics tile
406
- * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation
407
- *
408
- * Wait for tlb_timeout_jiffies() for a TLB invalidation to complete.
409
- *
410
- * Return: 0 on success, -ETIME on TLB invalidation timeout
411
- */
412
- int xe_gt_tlb_invalidation_wait (struct xe_gt * gt , int seqno )
413
- {
414
- struct xe_guc * guc = & gt -> uc .guc ;
415
- int ret ;
416
-
417
- /* Execlists not supported */
418
- if (gt_to_xe (gt )-> info .force_execlist )
419
- return 0 ;
420
-
421
- /*
422
- * XXX: See above, this algorithm only works if seqno are always in
423
- * order
424
- */
425
- ret = wait_event_timeout (guc -> ct .wq ,
426
- tlb_invalidation_seqno_past (gt , seqno ),
427
- tlb_timeout_jiffies (gt ));
428
- if (!ret ) {
429
- struct drm_printer p = xe_gt_err_printer (gt );
430
-
431
- xe_gt_err (gt , "TLB invalidation time'd out, seqno=%d, recv=%d\n" ,
432
- seqno , gt -> tlb_invalidation .seqno_recv );
433
- xe_guc_ct_print (& guc -> ct , & p , true);
434
- return - ETIME ;
435
- }
436
-
437
- return 0 ;
438
- }
439
-
440
402
/**
441
403
* xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
442
404
* @guc: guc
@@ -480,12 +442,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
480
442
return 0 ;
481
443
}
482
444
483
- /*
484
- * wake_up_all() and wait_event_timeout() already have the correct
485
- * barriers.
486
- */
487
445
WRITE_ONCE (gt -> tlb_invalidation .seqno_recv , msg [0 ]);
488
- wake_up_all (& guc -> ct .wq );
489
446
490
447
list_for_each_entry_safe (fence , next ,
491
448
& gt -> tlb_invalidation .pending_fences , link ) {
@@ -530,17 +487,22 @@ static const struct dma_fence_ops invalidation_fence_ops = {
530
487
* xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence
531
488
* @gt: GT
532
489
* @fence: TLB invalidation fence to initialize
490
+ * @stack: fence is stack variable
533
491
*
534
492
* Initialize TLB invalidation fence for use
535
493
*/
536
494
void xe_gt_tlb_invalidation_fence_init (struct xe_gt * gt ,
537
- struct xe_gt_tlb_invalidation_fence * fence )
495
+ struct xe_gt_tlb_invalidation_fence * fence ,
496
+ bool stack )
538
497
{
539
498
spin_lock_irq (& gt -> tlb_invalidation .lock );
540
499
dma_fence_init (& fence -> base , & invalidation_fence_ops ,
541
500
& gt -> tlb_invalidation .lock ,
542
501
dma_fence_context_alloc (1 ), 1 );
543
502
spin_unlock_irq (& gt -> tlb_invalidation .lock );
544
503
INIT_LIST_HEAD (& fence -> link );
545
- dma_fence_get (& fence -> base );
504
+ if (stack )
505
+ set_bit (FENCE_STACK_BIT , & fence -> base .flags );
506
+ else
507
+ dma_fence_get (& fence -> base );
546
508
}
0 commit comments