Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 37 additions & 29 deletions include/asm-generic/rqspinlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ static __always_inline void release_held_lock_entry(void)
* <error> for lock B
* release_held_lock_entry
*
* try_cmpxchg_acquire for lock A
* grab_held_lock_entry
* try_cmpxchg_acquire for lock A
*
* Lack of any ordering means reordering may occur such that dec, inc
* are done before entry is overwritten. This permits a remote lock
Expand All @@ -139,13 +139,8 @@ static __always_inline void release_held_lock_entry(void)
* CPU holds a lock it is attempting to acquire, leading to false ABBA
* diagnosis).
*
* In case of unlock, we will always do a release on the lock word after
* releasing the entry, ensuring that other CPUs cannot hold the lock
* (and make conclusions about deadlocks) until the entry has been
* cleared on the local CPU, preventing any anomalies. Reordering is
* still possible there, but a remote CPU cannot observe a lock in our
* table which it is already holding, since visibility entails our
* release store for the said lock has not retired.
* The case of unlock is treated differently due to NMI reentrancy, see
* comments in res_spin_unlock.
*
* In theory we don't have a problem if the dec and WRITE_ONCE above get
* reordered with each other, we either notice an empty NULL entry on
Expand Down Expand Up @@ -175,10 +170,16 @@ static __always_inline int res_spin_lock(rqspinlock_t *lock)
{
int val = 0;

if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) {
grab_held_lock_entry(lock);
/*
* Grab the deadlock detection entry before doing the cmpxchg, so that
* reentrancy due to NMIs between the succeeding cmpxchg and creation of
* held lock entry can correctly detect an acquisition attempt in the
* interrupted context.
*/
grab_held_lock_entry(lock);

if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
return 0;
}
return resilient_queued_spin_lock_slowpath(lock, val);
}

Expand All @@ -192,28 +193,35 @@ static __always_inline void res_spin_unlock(rqspinlock_t *lock)
{
struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks);

if (unlikely(rqh->cnt > RES_NR_HELD))
goto unlock;
WRITE_ONCE(rqh->locks[rqh->cnt - 1], NULL);
unlock:
/*
* Release barrier, ensures correct ordering. See release_held_lock_entry
* for details. Perform release store instead of queued_spin_unlock,
* since we use this function for test-and-set fallback as well. When we
* have CONFIG_QUEUED_SPINLOCKS=n, we clear the full 4-byte lockword.
* Release barrier, ensures correct ordering. Perform release store
* instead of queued_spin_unlock, since we use this function for the TAS
* fallback as well. When we have CONFIG_QUEUED_SPINLOCKS=n, we clear
* the full 4-byte lockword.
*/
smp_store_release(&lock->locked, 0);
if (likely(rqh->cnt <= RES_NR_HELD))
WRITE_ONCE(rqh->locks[rqh->cnt - 1], NULL);
/*
* Unlike release_held_lock_entry, we do the lock word release before
* rewriting the entry back to NULL, and place no ordering between the
* WRITE_ONCE and dec, and possible reordering with grabbing an entry.
*
* This opens up a window where another CPU could acquire this lock, and
* then observe it in our table on the current CPU, leading to possible
* misdiagnosis of ABBA when we get reordered with a
* grab_held_lock_entry's writes (see the case described in
* release_held_lock_entry comments).
*
* Like release_held_lock_entry, we can do the release before the dec.
* We simply care about not seeing the 'lock' in our table from a remote
* CPU once the lock has been released, which doesn't rely on the dec.
* This could be avoided if we did the smp_store_release right before
* the dec, ensuring that the remote CPU could only acquire this lock
* and never observe this lock in our table.
*
* Unlike smp_wmb(), release is not a two way fence, hence it is
* possible for a inc to move up and reorder with our clearing of the
* entry. This isn't a problem however, as for a misdiagnosis of ABBA,
* the remote CPU needs to hold this lock, which won't be released until
* the store below is done, which would ensure the entry is overwritten
* to NULL, etc.
* However, that opens up a window where reentrant NMIs on this same
* CPU could have their AA heuristics fail to fire if they land between
* the WRITE_ONCE and unlock release store, which would result in a
* timeout.
*/
smp_store_release(&lock->locked, 0);
this_cpu_dec(rqspinlock_held_locks.cnt);
}

Expand Down
15 changes: 6 additions & 9 deletions kernel/bpf/rqspinlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,10 @@ int __lockfunc resilient_tas_spin_lock(rqspinlock_t *lock)
int val, ret = 0;

RES_INIT_TIMEOUT(ts);
/*
* The fast path is not invoked for the TAS fallback, so we must grab
* the deadlock detection entry here.
*/
grab_held_lock_entry(lock);

/*
Expand Down Expand Up @@ -400,10 +404,7 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
goto queue;
}

/*
* Grab an entry in the held locks array, to enable deadlock detection.
*/
grab_held_lock_entry(lock);
/* Deadlock detection entry already held after failing fast path. */

/*
* We're pending, wait for the owner to go away.
Expand Down Expand Up @@ -451,11 +452,7 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
*/
queue:
lockevent_inc(lock_slowpath);
/*
* Grab deadlock detection entry for the queue path.
*/
grab_held_lock_entry(lock);

/* Deadlock detection entry already held after failing fast path. */
node = this_cpu_ptr(&rqnodes[0].mcs);
idx = node->count++;
tail = encode_tail(smp_processor_id(), idx);
Expand Down
Loading