Skip to content

Commit 9d235ac

Browse files
committed
Merge branch 'ucount-fixes-for-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull ucounts fixes from Eric Biederman: "There has been one very hard to track down bug in the ucount code that we have been tracking since roughly v5.14 was released. Alex managed to find a reliable reproducer a few days ago and then I was able to instrument the code and figure out what the issue was. It turns out the sigqueue_alloc single atomic operation optimization did not play nicely with ucounts multiple level rlimits. It turned out that either sigqueue_alloc or sigqueue_free could be operating on multiple levels and trigger the conditions for the optimization on more than one level at the same time. To deal with that situation I have introduced inc_rlimit_get_ucounts and dec_rlimit_put_ucounts that just focuses on the optimization and the rlimit and ucount changes. While looking into the big bug I found I couple of other little issues so I am including those fixes here as well. When I have time I would very much like to dig into process ownership of the shared signal queue and see if we could pick a single owner for the entire queue so that all of the rlimits can count to that owner. That should entirely remove the need to call get_ucounts and put_ucounts in sigqueue_alloc and sigqueue_free. It is difficult because Linux unlike POSIX supports setuid that works on a single thread" * 'ucount-fixes-for-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: ucounts: Move get_ucounts from cred_alloc_blank to key_change_session_keyring ucounts: Proper error handling in set_cred_ucounts ucounts: Pair inc_rlimit_ucounts with dec_rlimit_ucoutns in commit_creds ucounts: Fix signal ucount refcounting
2 parents 6c2c712 + 5ebcbe3 commit 9d235ac

File tree

5 files changed

+69
-24
lines changed

5 files changed

+69
-24
lines changed

include/linux/user_namespace.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type t
127127

128128
long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
129129
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
130+
long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type);
131+
void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type);
130132
bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
131133

132134
static inline void set_rlimit_ucount_max(struct user_namespace *ns,

kernel/cred.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,6 @@ struct cred *cred_alloc_blank(void)
225225
#ifdef CONFIG_DEBUG_CREDENTIALS
226226
new->magic = CRED_MAGIC;
227227
#endif
228-
new->ucounts = get_ucounts(&init_ucounts);
229-
230228
if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0)
231229
goto error;
232230

@@ -501,7 +499,7 @@ int commit_creds(struct cred *new)
501499
inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1);
502500
rcu_assign_pointer(task->real_cred, new);
503501
rcu_assign_pointer(task->cred, new);
504-
if (new->user != old->user)
502+
if (new->user != old->user || new->user_ns != old->user_ns)
505503
dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1);
506504
alter_cred_subscribers(old, -2);
507505

@@ -669,7 +667,7 @@ int set_cred_ucounts(struct cred *new)
669667
{
670668
struct task_struct *task = current;
671669
const struct cred *old = task->real_cred;
672-
struct ucounts *old_ucounts = new->ucounts;
670+
struct ucounts *new_ucounts, *old_ucounts = new->ucounts;
673671

674672
if (new->user == old->user && new->user_ns == old->user_ns)
675673
return 0;
@@ -681,9 +679,10 @@ int set_cred_ucounts(struct cred *new)
681679
if (old_ucounts && old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid))
682680
return 0;
683681

684-
if (!(new->ucounts = alloc_ucounts(new->user_ns, new->euid)))
682+
if (!(new_ucounts = alloc_ucounts(new->user_ns, new->euid)))
685683
return -EAGAIN;
686684

685+
new->ucounts = new_ucounts;
687686
if (old_ucounts)
688687
put_ucounts(old_ucounts);
689688

kernel/signal.c

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -426,22 +426,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
426426
*/
427427
rcu_read_lock();
428428
ucounts = task_ucounts(t);
429-
sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
430-
switch (sigpending) {
431-
case 1:
432-
if (likely(get_ucounts(ucounts)))
433-
break;
434-
fallthrough;
435-
case LONG_MAX:
436-
/*
437-
* we need to decrease the ucount in the userns tree on any
438-
* failure to avoid counts leaking.
439-
*/
440-
dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
441-
rcu_read_unlock();
442-
return NULL;
443-
}
429+
sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
444430
rcu_read_unlock();
431+
if (!sigpending)
432+
return NULL;
445433

446434
if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
447435
q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
@@ -450,8 +438,7 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
450438
}
451439

452440
if (unlikely(q == NULL)) {
453-
if (dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1))
454-
put_ucounts(ucounts);
441+
dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
455442
} else {
456443
INIT_LIST_HEAD(&q->list);
457444
q->flags = sigqueue_flags;
@@ -464,8 +451,8 @@ static void __sigqueue_free(struct sigqueue *q)
464451
{
465452
if (q->flags & SIGQUEUE_PREALLOC)
466453
return;
467-
if (q->ucounts && dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) {
468-
put_ucounts(q->ucounts);
454+
if (q->ucounts) {
455+
dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING);
469456
q->ucounts = NULL;
470457
}
471458
kmem_cache_free(sigqueue_cachep, q);

kernel/ucount.c

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,55 @@ bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
284284
return (new == 0);
285285
}
286286

287+
static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts,
288+
struct ucounts *last, enum ucount_type type)
289+
{
290+
struct ucounts *iter, *next;
291+
for (iter = ucounts; iter != last; iter = next) {
292+
long dec = atomic_long_add_return(-1, &iter->ucount[type]);
293+
WARN_ON_ONCE(dec < 0);
294+
next = iter->ns->ucounts;
295+
if (dec == 0)
296+
put_ucounts(iter);
297+
}
298+
}
299+
300+
void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type)
301+
{
302+
do_dec_rlimit_put_ucounts(ucounts, NULL, type);
303+
}
304+
305+
long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type)
306+
{
307+
/* Caller must hold a reference to ucounts */
308+
struct ucounts *iter;
309+
long dec, ret = 0;
310+
311+
for (iter = ucounts; iter; iter = iter->ns->ucounts) {
312+
long max = READ_ONCE(iter->ns->ucount_max[type]);
313+
long new = atomic_long_add_return(1, &iter->ucount[type]);
314+
if (new < 0 || new > max)
315+
goto unwind;
316+
if (iter == ucounts)
317+
ret = new;
318+
/*
319+
* Grab an extra ucount reference for the caller when
320+
* the rlimit count was previously 0.
321+
*/
322+
if (new != 1)
323+
continue;
324+
if (!get_ucounts(iter))
325+
goto dec_unwind;
326+
}
327+
return ret;
328+
dec_unwind:
329+
dec = atomic_long_add_return(-1, &iter->ucount[type]);
330+
WARN_ON_ONCE(dec < 0);
331+
unwind:
332+
do_dec_rlimit_put_ucounts(ucounts, iter, type);
333+
return 0;
334+
}
335+
287336
bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max)
288337
{
289338
struct ucounts *iter;

security/keys/process_keys.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,13 @@ void key_change_session_keyring(struct callback_head *twork)
918918
return;
919919
}
920920

921+
/* If get_ucounts fails more bits are needed in the refcount */
922+
if (unlikely(!get_ucounts(old->ucounts))) {
923+
WARN_ONCE(1, "In %s get_ucounts failed\n", __func__);
924+
put_cred(new);
925+
return;
926+
}
927+
921928
new-> uid = old-> uid;
922929
new-> euid = old-> euid;
923930
new-> suid = old-> suid;
@@ -927,6 +934,7 @@ void key_change_session_keyring(struct callback_head *twork)
927934
new-> sgid = old-> sgid;
928935
new->fsgid = old->fsgid;
929936
new->user = get_uid(old->user);
937+
new->ucounts = old->ucounts;
930938
new->user_ns = get_user_ns(old->user_ns);
931939
new->group_info = get_group_info(old->group_info);
932940

0 commit comments

Comments
 (0)