Skip to content

Commit cfcbc05

Browse files
committed
drm/xe: fix unbalanced rpm put() with fence_fini()
Currently we can call fence_fini() twice if something goes wrong when sending the GuC CT for the tlb request, since we signal the fence and return an error, leading to the caller also calling fini() on the error path in the case of stack version of the flow, which leads to an extra rpm put() which might later cause device to enter suspend when it shouldn't. It looks like we can just drop the fini() call since the fence signaller side will already call this for us. There are known mysterious splats with device going to sleep even with an rpm ref, and this could be one candidate. v2 (Matt B): - Prefer warning if we detect double fini() Fixes: 0a382f9 ("drm/xe: Hold a PM ref when GT TLB invalidations are inflight") Signed-off-by: Matthew Auld <[email protected]> Cc: Matthew Brost <[email protected]> Cc: Nirmoy Das <[email protected]> Reviewed-by: Matthew Brost <[email protected]> Reviewed-by: Nirmoy Das <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 8fb1da9 commit cfcbc05

File tree

3 files changed

+15
-23
lines changed

3 files changed

+15
-23
lines changed

drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,15 @@ static long tlb_timeout_jiffies(struct xe_gt *gt)
3737
return hw_tlb_timeout + 2 * delay;
3838
}
3939

40+
static void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence)
41+
{
42+
if (WARN_ON_ONCE(!fence->gt))
43+
return;
44+
45+
xe_pm_runtime_put(gt_to_xe(fence->gt));
46+
fence->gt = NULL; /* fini() should be called once */
47+
}
48+
4049
static void
4150
__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
4251
{
@@ -204,7 +213,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
204213
tlb_timeout_jiffies(gt));
205214
}
206215
spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
207-
} else if (ret < 0) {
216+
} else {
208217
__invalidation_fence_signal(xe, fence);
209218
}
210219
if (!ret) {
@@ -267,10 +276,8 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
267276

268277
xe_gt_tlb_invalidation_fence_init(gt, &fence, true);
269278
ret = xe_gt_tlb_invalidation_guc(gt, &fence);
270-
if (ret < 0) {
271-
xe_gt_tlb_invalidation_fence_fini(&fence);
279+
if (ret)
272280
return ret;
273-
}
274281

275282
xe_gt_tlb_invalidation_fence_wait(&fence);
276283
} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
@@ -498,7 +505,8 @@ static const struct dma_fence_ops invalidation_fence_ops = {
498505
* @stack: fence is stack variable
499506
*
500507
* Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini
501-
* must be called if fence is not signaled.
508+
* will be automatically called when fence is signalled (all fences must signal),
509+
* even on error.
502510
*/
503511
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
504512
struct xe_gt_tlb_invalidation_fence *fence,
@@ -518,14 +526,3 @@ void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
518526
dma_fence_get(&fence->base);
519527
fence->gt = gt;
520528
}
521-
522-
/**
523-
* xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence
524-
* @fence: TLB invalidation fence to finalize
525-
*
526-
* Drop PM ref which fence took durinig init.
527-
*/
528-
void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence)
529-
{
530-
xe_pm_runtime_put(gt_to_xe(fence->gt));
531-
}

drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
2828
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
2929
struct xe_gt_tlb_invalidation_fence *fence,
3030
bool stack);
31-
void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence);
3231

3332
static inline void
3433
xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)

drivers/gpu/drm/xe/xe_vm.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3199,10 +3199,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
31993199

32003200
ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
32013201
&fence[fence_id], vma);
3202-
if (ret < 0) {
3203-
xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
3202+
if (ret)
32043203
goto wait;
3205-
}
32063204
++fence_id;
32073205

32083206
if (!tile->media_gt)
@@ -3214,10 +3212,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
32143212

32153213
ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
32163214
&fence[fence_id], vma);
3217-
if (ret < 0) {
3218-
xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
3215+
if (ret)
32193216
goto wait;
3220-
}
32213217
++fence_id;
32223218
}
32233219
}

0 commit comments

Comments
 (0)