Skip to content

Commit eea9673

Browse files
committed
exec: Add exec_update_mutex to replace cred_guard_mutex
The cred_guard_mutex is problematic as it is held over possibly indefinite waits for userspace. The possible indefinite waits for userspace that I have identified are: The cred_guard_mutex is held in PTRACE_EVENT_EXIT waiting for the tracer. The cred_guard_mutex is held over "put_user(0, tsk->clear_child_tid)" in exit_mm(). The cred_guard_mutex is held over "get_user(futex_offset, ...") in exit_robust_list. The cred_guard_mutex held over copy_strings. The functions get_user and put_user can trigger a page fault which can potentially wait indefinitely in the case of userfaultfd or if userspace implements part of the page fault path. In any of those cases the userspace process that the kernel is waiting for might make a different system call that winds up taking the cred_guard_mutex and result in deadlock. Holding a mutex over any of those possibly indefinite waits for userspace does not appear necessary. Add exec_update_mutex that will just cover updating the process during exec where the permissions and the objects pointed to by the task struct may be out of sync. The plan is to switch the users of cred_guard_mutex to exec_update_mutex one by one. This lets us move forward while still being careful and not introducing any regressions. Link: https://lore.kernel.org/lkml/[email protected]/ Link: https://lore.kernel.org/lkml/AM6PR03MB5170B06F3A2B75EFB98D071AE4E60@AM6PR03MB5170.eurprd03.prod.outlook.com/ Link: https://lore.kernel.org/linux-fsdevel/[email protected]/ Link: https://lore.kernel.org/lkml/[email protected]/ Link: https://lore.kernel.org/lkml/[email protected]/ Ref: 45c1a15 ("Add PTRACE_O_TRACEVFORKDONE and PTRACE_O_TRACEEXIT facilities.") Ref: 456f17c ("[PATCH] user-vm-unlock-2.5.31-A2") Reviewed-by: Kirill Tkhai <[email protected]> Signed-off-by: "Eric W. Biederman" <[email protected]> Signed-off-by: Bernd Edlinger <[email protected]> Signed-off-by: Eric W. Biederman <[email protected]>
1 parent ccf0fa6 commit eea9673

File tree

5 files changed

+36
-5
lines changed

5 files changed

+36
-5
lines changed

fs/exec.c

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,16 +1010,26 @@ ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
10101010
}
10111011
EXPORT_SYMBOL(read_code);
10121012

1013+
/*
1014+
* Maps the mm_struct mm into the current task struct.
1015+
* On success, this function returns with the mutex
1016+
* exec_update_mutex locked.
1017+
*/
10131018
static int exec_mmap(struct mm_struct *mm)
10141019
{
10151020
struct task_struct *tsk;
10161021
struct mm_struct *old_mm, *active_mm;
1022+
int ret;
10171023

10181024
/* Notify parent that we're no longer interested in the old VM */
10191025
tsk = current;
10201026
old_mm = current->mm;
10211027
exec_mm_release(tsk, old_mm);
10221028

1029+
ret = mutex_lock_killable(&tsk->signal->exec_update_mutex);
1030+
if (ret)
1031+
return ret;
1032+
10231033
if (old_mm) {
10241034
sync_mm_rss(old_mm);
10251035
/*
@@ -1031,9 +1041,11 @@ static int exec_mmap(struct mm_struct *mm)
10311041
down_read(&old_mm->mmap_sem);
10321042
if (unlikely(old_mm->core_state)) {
10331043
up_read(&old_mm->mmap_sem);
1044+
mutex_unlock(&tsk->signal->exec_update_mutex);
10341045
return -EINTR;
10351046
}
10361047
}
1048+
10371049
task_lock(tsk);
10381050
active_mm = tsk->active_mm;
10391051
membarrier_exec_mmap(mm);
@@ -1288,11 +1300,12 @@ int flush_old_exec(struct linux_binprm * bprm)
12881300
goto out;
12891301

12901302
/*
1291-
* After clearing bprm->mm (to mark that current is using the
1292-
* prepared mm now), we have nothing left of the original
1303+
* After setting bprm->called_exec_mmap (to mark that current is
1304+
* using the prepared mm now), we have nothing left of the original
12931305
* process. If anything from here on returns an error, the check
12941306
* in search_binary_handler() will SEGV current.
12951307
*/
1308+
bprm->called_exec_mmap = 1;
12961309
bprm->mm = NULL;
12971310

12981311
#ifdef CONFIG_POSIX_TIMERS
@@ -1438,6 +1451,8 @@ static void free_bprm(struct linux_binprm *bprm)
14381451
{
14391452
free_arg_pages(bprm);
14401453
if (bprm->cred) {
1454+
if (bprm->called_exec_mmap)
1455+
mutex_unlock(&current->signal->exec_update_mutex);
14411456
mutex_unlock(&current->signal->cred_guard_mutex);
14421457
abort_creds(bprm->cred);
14431458
}
@@ -1487,6 +1502,7 @@ void install_exec_creds(struct linux_binprm *bprm)
14871502
* credentials; any time after this it may be unlocked.
14881503
*/
14891504
security_bprm_committed_creds(bprm);
1505+
mutex_unlock(&current->signal->exec_update_mutex);
14901506
mutex_unlock(&current->signal->cred_guard_mutex);
14911507
}
14921508
EXPORT_SYMBOL(install_exec_creds);
@@ -1678,7 +1694,7 @@ int search_binary_handler(struct linux_binprm *bprm)
16781694

16791695
read_lock(&binfmt_lock);
16801696
put_binfmt(fmt);
1681-
if (retval < 0 && !bprm->mm) {
1697+
if (retval < 0 && bprm->called_exec_mmap) {
16821698
/* we got to flush_old_exec() and failed after it */
16831699
read_unlock(&binfmt_lock);
16841700
force_sigsegv(SIGSEGV);

include/linux/binfmts.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,13 @@ struct linux_binprm {
4444
* exec has happened. Used to sanitize execution environment
4545
* and to set AT_SECURE auxv for glibc.
4646
*/
47-
secureexec:1;
47+
secureexec:1,
48+
/*
49+
* Set by flush_old_exec, when exec_mmap has been called.
50+
* This is past the point of no return, when the
51+
* exec_update_mutex has been taken.
52+
*/
53+
called_exec_mmap:1;
4854
#ifdef __alpha__
4955
unsigned int taso:1;
5056
#endif

include/linux/sched/signal.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,14 @@ struct signal_struct {
224224

225225
struct mutex cred_guard_mutex; /* guard against foreign influences on
226226
* credential calculations
227-
* (notably. ptrace) */
227+
* (notably. ptrace)
228+
* Deprecated do not use in new code.
229+
* Use exec_update_mutex instead.
230+
*/
231+
struct mutex exec_update_mutex; /* Held while task_struct is being
232+
* updated during exec, and may have
233+
* inconsistent permissions.
234+
*/
228235
} __randomize_layout;
229236

230237
/*

init/init_task.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ static struct signal_struct init_signals = {
2626
.multiprocess = HLIST_HEAD_INIT,
2727
.rlim = INIT_RLIMITS,
2828
.cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
29+
.exec_update_mutex = __MUTEX_INITIALIZER(init_signals.exec_update_mutex),
2930
#ifdef CONFIG_POSIX_TIMERS
3031
.posix_timers = LIST_HEAD_INIT(init_signals.posix_timers),
3132
.cputimer = {

kernel/fork.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1594,6 +1594,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
15941594
sig->oom_score_adj_min = current->signal->oom_score_adj_min;
15951595

15961596
mutex_init(&sig->cred_guard_mutex);
1597+
mutex_init(&sig->exec_update_mutex);
15971598

15981599
return 0;
15991600
}

0 commit comments

Comments
 (0)