Skip to content

Commit a33c969

Browse files
committed
drm/xe/pxp: Don't kill queues while holding PXP locks
xe_exec_queue_kill can sleep, so we can't call it from under a spinlock. We can instead move the queues to a separate list and then kill them all after we release the spinlock. Furthermore, xe_exec_queue_kill can take the VM lock so we can't call it while holding the PXP mutex because the mutex is taken under VM lock at queue creation time. Note that while it is safe to call the kill without holding the mutex, we must call it after the PXP state has been updated, otherwise an app might be able to create a queue between the invalidation and the state update, which would break the state machine. Since being in the list is used to track whether RPM cleanup is needed, we can no longer defer that to queue_destroy, so we perform it immediately instead. v2: also avoid calling kill() under pxp->mutex. Reported-by: Dan Carpenter <[email protected]> Closes: https://lore.kernel.org/intel-xe/[email protected]/ Fixes: f8caa80 ("drm/xe/pxp: Add PXP queue tracking and session start") Signed-off-by: Daniele Ceraolo Spurio <[email protected]> Cc: John Harrison <[email protected]> Cc: Matthew Brost <[email protected]> Reviewed-by: Matthew Brost <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent e67a35b commit a33c969

File tree

1 file changed

+44
-34
lines changed

1 file changed

+44
-34
lines changed

drivers/gpu/drm/xe/xe_pxp.c

Lines changed: 44 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -132,14 +132,6 @@ static int pxp_wait_for_session_state(struct xe_pxp *pxp, u32 id, bool in_play)
132132

133133
static void pxp_invalidate_queues(struct xe_pxp *pxp);
134134

135-
static void pxp_invalidate_state(struct xe_pxp *pxp)
136-
{
137-
pxp_invalidate_queues(pxp);
138-
139-
if (pxp->status == XE_PXP_ACTIVE)
140-
pxp->key_instance++;
141-
}
142-
143135
static int pxp_terminate_hw(struct xe_pxp *pxp)
144136
{
145137
struct xe_gt *gt = pxp->gt;
@@ -193,7 +185,8 @@ static void pxp_terminate(struct xe_pxp *pxp)
193185

194186
mutex_lock(&pxp->mutex);
195187

196-
pxp_invalidate_state(pxp);
188+
if (pxp->status == XE_PXP_ACTIVE)
189+
pxp->key_instance++;
197190

198191
/*
199192
* we'll mark the status as needing termination on resume, so no need to
@@ -220,6 +213,8 @@ static void pxp_terminate(struct xe_pxp *pxp)
220213

221214
mutex_unlock(&pxp->mutex);
222215

216+
pxp_invalidate_queues(pxp);
217+
223218
ret = pxp_terminate_hw(pxp);
224219
if (ret) {
225220
drm_err(&xe->drm, "PXP termination failed: %pe\n", ERR_PTR(ret));
@@ -665,23 +660,15 @@ int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q)
665660
return ret;
666661
}
667662

668-
/**
669-
* xe_pxp_exec_queue_remove - remove a queue from the PXP list
670-
* @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled)
671-
* @q: the queue to remove from the list
672-
*
673-
* If PXP is enabled and the exec_queue is in the list, the queue will be
674-
* removed from the list and its PM reference will be released. It is safe to
675-
* call this function multiple times for the same queue.
676-
*/
677-
void xe_pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q)
663+
static void __pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q, bool lock)
678664
{
679665
bool need_pm_put = false;
680666

681667
if (!xe_pxp_is_enabled(pxp))
682668
return;
683669

684-
spin_lock_irq(&pxp->queues.lock);
670+
if (lock)
671+
spin_lock_irq(&pxp->queues.lock);
685672

686673
if (!list_empty(&q->pxp.link)) {
687674
list_del_init(&q->pxp.link);
@@ -690,36 +677,54 @@ void xe_pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q)
690677

691678
q->pxp.type = DRM_XE_PXP_TYPE_NONE;
692679

693-
spin_unlock_irq(&pxp->queues.lock);
680+
if (lock)
681+
spin_unlock_irq(&pxp->queues.lock);
694682

695683
if (need_pm_put)
696684
xe_pm_runtime_put(pxp->xe);
697685
}
698686

687+
/**
688+
* xe_pxp_exec_queue_remove - remove a queue from the PXP list
689+
* @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled)
690+
* @q: the queue to remove from the list
691+
*
692+
* If PXP is enabled and the exec_queue is in the list, the queue will be
693+
* removed from the list and its PM reference will be released. It is safe to
694+
* call this function multiple times for the same queue.
695+
*/
696+
void xe_pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q)
697+
{
698+
__pxp_exec_queue_remove(pxp, q, true);
699+
}
700+
699701
static void pxp_invalidate_queues(struct xe_pxp *pxp)
700702
{
701703
struct xe_exec_queue *tmp, *q;
704+
LIST_HEAD(to_clean);
702705

703706
spin_lock_irq(&pxp->queues.lock);
704707

705-
/*
706-
* Removing a queue from the PXP list requires a put of the RPM ref that
707-
* the queue holds to keep the PXP session alive, which can't be done
708-
* under spinlock. Since it is safe to kill a queue multiple times, we
709-
* can leave the invalid queue in the list for now and postpone the
710-
* removal and associated RPM put to when the queue is destroyed.
711-
*/
712-
list_for_each_entry(tmp, &pxp->queues.list, pxp.link) {
713-
q = xe_exec_queue_get_unless_zero(tmp);
714-
708+
list_for_each_entry_safe(q, tmp, &pxp->queues.list, pxp.link) {
709+
q = xe_exec_queue_get_unless_zero(q);
715710
if (!q)
716711
continue;
717712

713+
list_move_tail(&q->pxp.link, &to_clean);
714+
}
715+
spin_unlock_irq(&pxp->queues.lock);
716+
717+
list_for_each_entry_safe(q, tmp, &to_clean, pxp.link) {
718718
xe_exec_queue_kill(q);
719+
720+
/*
721+
* We hold a ref to the queue so there is no risk of racing with
722+
* the calls to exec_queue_remove coming from exec_queue_destroy.
723+
*/
724+
__pxp_exec_queue_remove(pxp, q, false);
725+
719726
xe_exec_queue_put(q);
720727
}
721-
722-
spin_unlock_irq(&pxp->queues.lock);
723728
}
724729

725730
/**
@@ -816,6 +821,7 @@ int xe_pxp_obj_key_check(struct xe_pxp *pxp, struct drm_gem_object *obj)
816821
*/
817822
int xe_pxp_pm_suspend(struct xe_pxp *pxp)
818823
{
824+
bool needs_queue_inval = false;
819825
int ret = 0;
820826

821827
if (!xe_pxp_is_enabled(pxp))
@@ -848,7 +854,8 @@ int xe_pxp_pm_suspend(struct xe_pxp *pxp)
848854
break;
849855
fallthrough;
850856
case XE_PXP_ACTIVE:
851-
pxp_invalidate_state(pxp);
857+
pxp->key_instance++;
858+
needs_queue_inval = true;
852859
break;
853860
default:
854861
drm_err(&pxp->xe->drm, "unexpected state during PXP suspend: %u",
@@ -865,6 +872,9 @@ int xe_pxp_pm_suspend(struct xe_pxp *pxp)
865872

866873
mutex_unlock(&pxp->mutex);
867874

875+
if (needs_queue_inval)
876+
pxp_invalidate_queues(pxp);
877+
868878
/*
869879
* if there is a termination in progress, wait for it.
870880
* We need to wait outside the lock because the completion is done from

0 commit comments

Comments
 (0)