Skip to content

Commit 652fa53

Browse files
committed
Merge tag 'locking-urgent-2020-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking fixes from Thomas Gleixner: "Three small fixes/updates for the locking core code: - Plug a task struct reference leak in the percpu rswem implementation. - Document the refcount interaction with PID_MAX_LIMIT - Improve the 'invalid wait context' data dump in lockdep so it contains all information which is required to decode the problem" * tag 'locking-urgent-2020-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: locking/lockdep: Improve 'invalid wait context' splat locking/refcount: Document interaction with PID_MAX_LIMIT locking/percpu-rwsem: Fix a task_struct refcount
2 parents 4119bf9 + 9a019db commit 652fa53

File tree

3 files changed

+51
-26
lines changed

3 files changed

+51
-26
lines changed

include/linux/refcount.h

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,24 @@
3838
* atomic operations, then the count will continue to edge closer to 0. If it
3939
* reaches a value of 1 before /any/ of the threads reset it to the saturated
4040
* value, then a concurrent refcount_dec_and_test() may erroneously free the
41-
* underlying object. Given the precise timing details involved with the
42-
* round-robin scheduling of each thread manipulating the refcount and the need
43-
* to hit the race multiple times in succession, there doesn't appear to be a
44-
* practical avenue of attack even if using refcount_add() operations with
45-
* larger increments.
41+
* underlying object.
42+
* Linux limits the maximum number of tasks to PID_MAX_LIMIT, which is currently
43+
* 0x400000 (and can't easily be raised in the future beyond FUTEX_TID_MASK).
44+
* With the current PID limit, if no batched refcounting operations are used and
45+
* the attacker can't repeatedly trigger kernel oopses in the middle of refcount
46+
* operations, this makes it impossible for a saturated refcount to leave the
47+
* saturation range, even if it is possible for multiple uses of the same
48+
* refcount to nest in the context of a single task:
49+
*
50+
* (UINT_MAX+1-REFCOUNT_SATURATED) / PID_MAX_LIMIT =
51+
* 0x40000000 / 0x400000 = 0x100 = 256
52+
*
53+
* If hundreds of references are added/removed with a single refcounting
54+
* operation, it may potentially be possible to leave the saturation range; but
55+
* given the precise timing details involved with the round-robin scheduling of
56+
* each thread manipulating the refcount and the need to hit the race multiple
57+
* times in succession, there doesn't appear to be a practical avenue of attack
58+
* even if using refcount_add() operations with larger increments.
4659
*
4760
* Memory ordering
4861
* ===============

kernel/locking/lockdep.c

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3952,10 +3952,36 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
39523952
return ret;
39533953
}
39543954

3955+
static inline short task_wait_context(struct task_struct *curr)
3956+
{
3957+
/*
3958+
* Set appropriate wait type for the context; for IRQs we have to take
3959+
* into account force_irqthread as that is implied by PREEMPT_RT.
3960+
*/
3961+
if (curr->hardirq_context) {
3962+
/*
3963+
* Check if force_irqthreads will run us threaded.
3964+
*/
3965+
if (curr->hardirq_threaded || curr->irq_config)
3966+
return LD_WAIT_CONFIG;
3967+
3968+
return LD_WAIT_SPIN;
3969+
} else if (curr->softirq_context) {
3970+
/*
3971+
* Softirqs are always threaded.
3972+
*/
3973+
return LD_WAIT_CONFIG;
3974+
}
3975+
3976+
return LD_WAIT_MAX;
3977+
}
3978+
39553979
static int
39563980
print_lock_invalid_wait_context(struct task_struct *curr,
39573981
struct held_lock *hlock)
39583982
{
3983+
short curr_inner;
3984+
39593985
if (!debug_locks_off())
39603986
return 0;
39613987
if (debug_locks_silent)
@@ -3971,6 +3997,10 @@ print_lock_invalid_wait_context(struct task_struct *curr,
39713997
print_lock(hlock);
39723998

39733999
pr_warn("other info that might help us debug this:\n");
4000+
4001+
curr_inner = task_wait_context(curr);
4002+
pr_warn("context-{%d:%d}\n", curr_inner, curr_inner);
4003+
39744004
lockdep_print_held_locks(curr);
39754005

39764006
pr_warn("stack backtrace:\n");
@@ -4017,26 +4047,7 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
40174047
}
40184048
depth++;
40194049

4020-
/*
4021-
* Set appropriate wait type for the context; for IRQs we have to take
4022-
* into account force_irqthread as that is implied by PREEMPT_RT.
4023-
*/
4024-
if (curr->hardirq_context) {
4025-
/*
4026-
* Check if force_irqthreads will run us threaded.
4027-
*/
4028-
if (curr->hardirq_threaded || curr->irq_config)
4029-
curr_inner = LD_WAIT_CONFIG;
4030-
else
4031-
curr_inner = LD_WAIT_SPIN;
4032-
} else if (curr->softirq_context) {
4033-
/*
4034-
* Softirqs are always threaded.
4035-
*/
4036-
curr_inner = LD_WAIT_CONFIG;
4037-
} else {
4038-
curr_inner = LD_WAIT_MAX;
4039-
}
4050+
curr_inner = task_wait_context(curr);
40404051

40414052
for (; depth < curr->lockdep_depth; depth++) {
40424053
struct held_lock *prev = curr->held_locks + depth;

kernel/locking/percpu-rwsem.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,14 +118,15 @@ static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
118118
unsigned int mode, int wake_flags,
119119
void *key)
120120
{
121-
struct task_struct *p = get_task_struct(wq_entry->private);
122121
bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
123122
struct percpu_rw_semaphore *sem = key;
123+
struct task_struct *p;
124124

125125
/* concurrent against percpu_down_write(), can get stolen */
126126
if (!__percpu_rwsem_trylock(sem, reader))
127127
return 1;
128128

129+
p = get_task_struct(wq_entry->private);
129130
list_del_init(&wq_entry->entry);
130131
smp_store_release(&wq_entry->private, NULL);
131132

0 commit comments

Comments
 (0)