Skip to content

Commit 4b871ce

Browse files
committed
Merged 'Infrastructure to allow fixing exec deadlocks' from Bernd Edlinger
This is an infrastructure change that makes way for fixing this issue. Each patch was already posted previously so this is just a cleanup of the original mailing list thread(s) which got out of control by now. Everything started here: https://lore.kernel.org/lkml/AM6PR03MB5170B06F3A2B75EFB98D071AE4E60@AM6PR03MB5170.eurprd03.prod.outlook.com/ I added reviewed-by tags from the mailing list threads, except when withdrawn. It took a lot longer than expected to collect everything from the mailinglist threads, since several commit messages have been infected with typos, and they got fixed without a new patch version. - Correct the point of no return. - Add two new mutexes to replace cred_guard_mutex. - Fix each use of cred_guard_mutex. - Update documentation. - Add a test case. -- EWB Removed the last 2 patches they need more work Bernd Edlinger (9): exec: Fix a deadlock in strace selftests/ptrace: add test cases for dead-locks mm: docs: Fix a comment in process_vm_rw_core kernel: doc: remove outdated comment cred.c kernel/kcmp.c: Use new infrastructure to fix deadlocks in execve proc: Use new infrastructure to fix deadlocks in execve proc: io_accounting: Use new infrastructure to fix deadlocks in execve perf: Use new infrastructure to fix deadlocks in execve pidfd: Use new infrastructure to fix deadlocks in execve Eric W. Biederman (5): exec: Only compute current once in flush_old_exec exec: Factor unshare_sighand out of de_thread and call it separately exec: Move cleanup of posix timers on exec out of de_thread exec: Move exec_mmap right after de_thread in flush_old_exec exec: Add exec_update_mutex to replace cred_guard_mutex fs/exec.c | 78 +++++++++++++++++++--------- fs/proc/base.c | 10 ++-- include/linux/binfmts.h | 8 ++- include/linux/sched/signal.h | 9 +++- init/init_task.c | 1 + kernel/cred.c | 2 - kernel/events/core.c | 12 ++--- kernel/fork.c | 5 +- kernel/kcmp.c | 8 +-- kernel/pid.c | 4 +- mm/process_vm_access.c | 2 +- tools/testing/selftests/ptrace/Makefile | 4 +- tools/testing/selftests/ptrace/vmaccess.c | 86 +++++++++++++++++++++++++++++++ 13 files changed, 179 insertions(+), 50 deletions(-) Signed-off-by: Bernd Edlinger <[email protected]> Signed-off-by: "Eric W. Biederman" <[email protected]>
2 parents a0d4a14 + 501f932 commit 4b871ce

File tree

13 files changed

+179
-50
lines changed

13 files changed

+179
-50
lines changed

fs/exec.c

Lines changed: 54 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,16 +1010,26 @@ ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
10101010
}
10111011
EXPORT_SYMBOL(read_code);
10121012

1013+
/*
1014+
* Maps the mm_struct mm into the current task struct.
1015+
* On success, this function returns with the mutex
1016+
* exec_update_mutex locked.
1017+
*/
10131018
static int exec_mmap(struct mm_struct *mm)
10141019
{
10151020
struct task_struct *tsk;
10161021
struct mm_struct *old_mm, *active_mm;
1022+
int ret;
10171023

10181024
/* Notify parent that we're no longer interested in the old VM */
10191025
tsk = current;
10201026
old_mm = current->mm;
10211027
exec_mm_release(tsk, old_mm);
10221028

1029+
ret = mutex_lock_killable(&tsk->signal->exec_update_mutex);
1030+
if (ret)
1031+
return ret;
1032+
10231033
if (old_mm) {
10241034
sync_mm_rss(old_mm);
10251035
/*
@@ -1031,9 +1041,11 @@ static int exec_mmap(struct mm_struct *mm)
10311041
down_read(&old_mm->mmap_sem);
10321042
if (unlikely(old_mm->core_state)) {
10331043
up_read(&old_mm->mmap_sem);
1044+
mutex_unlock(&tsk->signal->exec_update_mutex);
10341045
return -EINTR;
10351046
}
10361047
}
1048+
10371049
task_lock(tsk);
10381050
active_mm = tsk->active_mm;
10391051
membarrier_exec_mmap(mm);
@@ -1189,10 +1201,22 @@ static int de_thread(struct task_struct *tsk)
11891201
/* we have changed execution domain */
11901202
tsk->exit_signal = SIGCHLD;
11911203

1192-
#ifdef CONFIG_POSIX_TIMERS
1193-
exit_itimers(sig);
1194-
flush_itimer_signals();
1195-
#endif
1204+
BUG_ON(!thread_group_leader(tsk));
1205+
return 0;
1206+
1207+
killed:
1208+
/* protects against exit_notify() and __exit_signal() */
1209+
read_lock(&tasklist_lock);
1210+
sig->group_exit_task = NULL;
1211+
sig->notify_count = 0;
1212+
read_unlock(&tasklist_lock);
1213+
return -EAGAIN;
1214+
}
1215+
1216+
1217+
static int unshare_sighand(struct task_struct *me)
1218+
{
1219+
struct sighand_struct *oldsighand = me->sighand;
11961220

11971221
if (refcount_read(&oldsighand->count) != 1) {
11981222
struct sighand_struct *newsighand;
@@ -1210,23 +1234,13 @@ static int de_thread(struct task_struct *tsk)
12101234

12111235
write_lock_irq(&tasklist_lock);
12121236
spin_lock(&oldsighand->siglock);
1213-
rcu_assign_pointer(tsk->sighand, newsighand);
1237+
rcu_assign_pointer(me->sighand, newsighand);
12141238
spin_unlock(&oldsighand->siglock);
12151239
write_unlock_irq(&tasklist_lock);
12161240

12171241
__cleanup_sighand(oldsighand);
12181242
}
1219-
1220-
BUG_ON(!thread_group_leader(tsk));
12211243
return 0;
1222-
1223-
killed:
1224-
/* protects against exit_notify() and __exit_signal() */
1225-
read_lock(&tasklist_lock);
1226-
sig->group_exit_task = NULL;
1227-
sig->notify_count = 0;
1228-
read_unlock(&tasklist_lock);
1229-
return -EAGAIN;
12301244
}
12311245

12321246
char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
@@ -1260,13 +1274,13 @@ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
12601274
*/
12611275
int flush_old_exec(struct linux_binprm * bprm)
12621276
{
1277+
struct task_struct *me = current;
12631278
int retval;
12641279

12651280
/*
1266-
* Make sure we have a private signal table and that
1267-
* we are unassociated from the previous thread group.
1281+
* Make this the only thread in the thread group.
12681282
*/
1269-
retval = de_thread(current);
1283+
retval = de_thread(me);
12701284
if (retval)
12711285
goto out;
12721286

@@ -1286,26 +1300,39 @@ int flush_old_exec(struct linux_binprm * bprm)
12861300
goto out;
12871301

12881302
/*
1289-
* After clearing bprm->mm (to mark that current is using the
1290-
* prepared mm now), we have nothing left of the original
1303+
* After setting bprm->called_exec_mmap (to mark that current is
1304+
* using the prepared mm now), we have nothing left of the original
12911305
* process. If anything from here on returns an error, the check
12921306
* in search_binary_handler() will SEGV current.
12931307
*/
1308+
bprm->called_exec_mmap = 1;
12941309
bprm->mm = NULL;
12951310

1311+
#ifdef CONFIG_POSIX_TIMERS
1312+
exit_itimers(me->signal);
1313+
flush_itimer_signals();
1314+
#endif
1315+
1316+
/*
1317+
* Make the signal table private.
1318+
*/
1319+
retval = unshare_sighand(me);
1320+
if (retval)
1321+
goto out;
1322+
12961323
set_fs(USER_DS);
1297-
current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
1324+
me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
12981325
PF_NOFREEZE | PF_NO_SETAFFINITY);
12991326
flush_thread();
1300-
current->personality &= ~bprm->per_clear;
1327+
me->personality &= ~bprm->per_clear;
13011328

13021329
/*
13031330
* We have to apply CLOEXEC before we change whether the process is
13041331
* dumpable (in setup_new_exec) to avoid a race with a process in userspace
13051332
* trying to access the should-be-closed file descriptors of a process
13061333
* undergoing exec(2).
13071334
*/
1308-
do_close_on_exec(current->files);
1335+
do_close_on_exec(me->files);
13091336
return 0;
13101337

13111338
out:
@@ -1424,6 +1451,8 @@ static void free_bprm(struct linux_binprm *bprm)
14241451
{
14251452
free_arg_pages(bprm);
14261453
if (bprm->cred) {
1454+
if (bprm->called_exec_mmap)
1455+
mutex_unlock(&current->signal->exec_update_mutex);
14271456
mutex_unlock(&current->signal->cred_guard_mutex);
14281457
abort_creds(bprm->cred);
14291458
}
@@ -1473,6 +1502,7 @@ void install_exec_creds(struct linux_binprm *bprm)
14731502
* credentials; any time after this it may be unlocked.
14741503
*/
14751504
security_bprm_committed_creds(bprm);
1505+
mutex_unlock(&current->signal->exec_update_mutex);
14761506
mutex_unlock(&current->signal->cred_guard_mutex);
14771507
}
14781508
EXPORT_SYMBOL(install_exec_creds);
@@ -1664,7 +1694,7 @@ int search_binary_handler(struct linux_binprm *bprm)
16641694

16651695
read_lock(&binfmt_lock);
16661696
put_binfmt(fmt);
1667-
if (retval < 0 && !bprm->mm) {
1697+
if (retval < 0 && bprm->called_exec_mmap) {
16681698
/* we got to flush_old_exec() and failed after it */
16691699
read_unlock(&binfmt_lock);
16701700
force_sigsegv(SIGSEGV);

fs/proc/base.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -405,19 +405,19 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
405405

406406
static int lock_trace(struct task_struct *task)
407407
{
408-
int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
408+
int err = mutex_lock_killable(&task->signal->exec_update_mutex);
409409
if (err)
410410
return err;
411411
if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
412-
mutex_unlock(&task->signal->cred_guard_mutex);
412+
mutex_unlock(&task->signal->exec_update_mutex);
413413
return -EPERM;
414414
}
415415
return 0;
416416
}
417417

418418
static void unlock_trace(struct task_struct *task)
419419
{
420-
mutex_unlock(&task->signal->cred_guard_mutex);
420+
mutex_unlock(&task->signal->exec_update_mutex);
421421
}
422422

423423
#ifdef CONFIG_STACKTRACE
@@ -2883,7 +2883,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
28832883
unsigned long flags;
28842884
int result;
28852885

2886-
result = mutex_lock_killable(&task->signal->cred_guard_mutex);
2886+
result = mutex_lock_killable(&task->signal->exec_update_mutex);
28872887
if (result)
28882888
return result;
28892889

@@ -2919,7 +2919,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
29192919
result = 0;
29202920

29212921
out_unlock:
2922-
mutex_unlock(&task->signal->cred_guard_mutex);
2922+
mutex_unlock(&task->signal->exec_update_mutex);
29232923
return result;
29242924
}
29252925

include/linux/binfmts.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,13 @@ struct linux_binprm {
4444
* exec has happened. Used to sanitize execution environment
4545
* and to set AT_SECURE auxv for glibc.
4646
*/
47-
secureexec:1;
47+
secureexec:1,
48+
/*
49+
* Set by flush_old_exec, when exec_mmap has been called.
50+
* This is past the point of no return, when the
51+
* exec_update_mutex has been taken.
52+
*/
53+
called_exec_mmap:1;
4854
#ifdef __alpha__
4955
unsigned int taso:1;
5056
#endif

include/linux/sched/signal.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,14 @@ struct signal_struct {
224224

225225
struct mutex cred_guard_mutex; /* guard against foreign influences on
226226
* credential calculations
227-
* (notably. ptrace) */
227+
* (notably. ptrace)
228+
* Deprecated do not use in new code.
229+
* Use exec_update_mutex instead.
230+
*/
231+
struct mutex exec_update_mutex; /* Held while task_struct is being
232+
* updated during exec, and may have
233+
* inconsistent permissions.
234+
*/
228235
} __randomize_layout;
229236

230237
/*

init/init_task.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ static struct signal_struct init_signals = {
2626
.multiprocess = HLIST_HEAD_INIT,
2727
.rlim = INIT_RLIMITS,
2828
.cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
29+
.exec_update_mutex = __MUTEX_INITIALIZER(init_signals.exec_update_mutex),
2930
#ifdef CONFIG_POSIX_TIMERS
3031
.posix_timers = LIST_HEAD_INIT(init_signals.posix_timers),
3132
.cputimer = {

kernel/cred.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -675,8 +675,6 @@ void __init cred_init(void)
675675
* The caller may change these controls afterwards if desired.
676676
*
677677
* Returns the new credentials or NULL if out of memory.
678-
*
679-
* Does not take, and does not return holding current->cred_replace_mutex.
680678
*/
681679
struct cred *prepare_kernel_cred(struct task_struct *daemon)
682680
{

kernel/events/core.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,7 +1249,7 @@ static void put_ctx(struct perf_event_context *ctx)
12491249
* function.
12501250
*
12511251
* Lock order:
1252-
* cred_guard_mutex
1252+
* exec_update_mutex
12531253
* task_struct::perf_event_mutex
12541254
* perf_event_context::mutex
12551255
* perf_event::child_mutex;
@@ -11263,14 +11263,14 @@ SYSCALL_DEFINE5(perf_event_open,
1126311263
}
1126411264

1126511265
if (task) {
11266-
err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
11266+
err = mutex_lock_interruptible(&task->signal->exec_update_mutex);
1126711267
if (err)
1126811268
goto err_task;
1126911269

1127011270
/*
1127111271
* Reuse ptrace permission checks for now.
1127211272
*
11273-
* We must hold cred_guard_mutex across this and any potential
11273+
* We must hold exec_update_mutex across this and any potential
1127411274
* perf_install_in_context() call for this new event to
1127511275
* serialize against exec() altering our credentials (and the
1127611276
* perf_event_exit_task() that could imply).
@@ -11559,7 +11559,7 @@ SYSCALL_DEFINE5(perf_event_open,
1155911559
mutex_unlock(&ctx->mutex);
1156011560

1156111561
if (task) {
11562-
mutex_unlock(&task->signal->cred_guard_mutex);
11562+
mutex_unlock(&task->signal->exec_update_mutex);
1156311563
put_task_struct(task);
1156411564
}
1156511565

@@ -11595,7 +11595,7 @@ SYSCALL_DEFINE5(perf_event_open,
1159511595
free_event(event);
1159611596
err_cred:
1159711597
if (task)
11598-
mutex_unlock(&task->signal->cred_guard_mutex);
11598+
mutex_unlock(&task->signal->exec_update_mutex);
1159911599
err_task:
1160011600
if (task)
1160111601
put_task_struct(task);
@@ -11900,7 +11900,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
1190011900
/*
1190111901
* When a child task exits, feed back event values to parent events.
1190211902
*
11903-
* Can be called with cred_guard_mutex held when called from
11903+
* Can be called with exec_update_mutex held when called from
1190411904
* install_exec_creds().
1190511905
*/
1190611906
void perf_event_exit_task(struct task_struct *child)

kernel/fork.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1224,7 +1224,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
12241224
struct mm_struct *mm;
12251225
int err;
12261226

1227-
err = mutex_lock_killable(&task->signal->cred_guard_mutex);
1227+
err = mutex_lock_killable(&task->signal->exec_update_mutex);
12281228
if (err)
12291229
return ERR_PTR(err);
12301230

@@ -1234,7 +1234,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
12341234
mmput(mm);
12351235
mm = ERR_PTR(-EACCES);
12361236
}
1237-
mutex_unlock(&task->signal->cred_guard_mutex);
1237+
mutex_unlock(&task->signal->exec_update_mutex);
12381238

12391239
return mm;
12401240
}
@@ -1594,6 +1594,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
15941594
sig->oom_score_adj_min = current->signal->oom_score_adj_min;
15951595

15961596
mutex_init(&sig->cred_guard_mutex);
1597+
mutex_init(&sig->exec_update_mutex);
15971598

15981599
return 0;
15991600
}

kernel/kcmp.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
173173
/*
174174
* One should have enough rights to inspect task details.
175175
*/
176-
ret = kcmp_lock(&task1->signal->cred_guard_mutex,
177-
&task2->signal->cred_guard_mutex);
176+
ret = kcmp_lock(&task1->signal->exec_update_mutex,
177+
&task2->signal->exec_update_mutex);
178178
if (ret)
179179
goto err;
180180
if (!ptrace_may_access(task1, PTRACE_MODE_READ_REALCREDS) ||
@@ -229,8 +229,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
229229
}
230230

231231
err_unlock:
232-
kcmp_unlock(&task1->signal->cred_guard_mutex,
233-
&task2->signal->cred_guard_mutex);
232+
kcmp_unlock(&task1->signal->exec_update_mutex,
233+
&task2->signal->exec_update_mutex);
234234
err:
235235
put_task_struct(task1);
236236
put_task_struct(task2);

kernel/pid.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,7 @@ static struct file *__pidfd_fget(struct task_struct *task, int fd)
577577
struct file *file;
578578
int ret;
579579

580-
ret = mutex_lock_killable(&task->signal->cred_guard_mutex);
580+
ret = mutex_lock_killable(&task->signal->exec_update_mutex);
581581
if (ret)
582582
return ERR_PTR(ret);
583583

@@ -586,7 +586,7 @@ static struct file *__pidfd_fget(struct task_struct *task, int fd)
586586
else
587587
file = ERR_PTR(-EPERM);
588588

589-
mutex_unlock(&task->signal->cred_guard_mutex);
589+
mutex_unlock(&task->signal->exec_update_mutex);
590590

591591
return file ?: ERR_PTR(-EBADF);
592592
}

0 commit comments

Comments
 (0)