Skip to content

Commit 0258b5f

Browse files
committed
coredump: Limit coredumps to a single thread group
Today when a signal is delivered with a handler of SIG_DFL whose default behavior is to generate a core dump not only that process but every process that shares the mm is killed. In the case of vfork this looks like a real world problem. Consider the following well defined sequence. if (vfork() == 0) { execve(...); _exit(EXIT_FAILURE); } If a signal that generates a core dump is received after vfork but before the execve changes the mm the process that called vfork will also be killed (as the mm is shared). Similarly if the execve fails after the point of no return the kernel delivers SIGSEGV which will kill both the exec'ing process and because the mm is shared the process that called vfork as well. As far as I can tell this behavior is a violation of people's reasonable expectations, POSIX, and is unnecessarily fragile when the system is low on memory. Solve this by making a userspace visible change to only kill a single process/thread group. This is possible because Jann Horn recently modified[1] the coredump code so that the mm can safely be modified while the coredump is happening. With LinuxThreads long gone I don't expect anyone to have a notice this behavior change in practice. To accomplish this move the core_state pointer from mm_struct to signal_struct, which allows different thread groups to coredump simultatenously. In zap_threads remove the work to kill anything except for the current thread group. v2: Remove core_state from the VM_BUG_ON_MM print to fix compile failure when CONFIG_DEBUG_VM is enabled. Reported-by: Stephen Rothwell <[email protected]> [1] a07279c ("binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot") Fixes: d89f384 ("[PATCH] thread-aware coredumps, 2.5.43-C3") History-tree: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Link: https://lkml.kernel.org/r/87y27mvnke.fsf@disp2133 Link: https://lkml.kernel.org/r/[email protected] Reviewed-by: Kees Cook <[email protected]> Signed-off-by: "Eric W. Biederman" <[email protected]>
1 parent 9230738 commit 0258b5f

File tree

9 files changed

+34
-106
lines changed

9 files changed

+34
-106
lines changed

fs/binfmt_elf.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,7 +1834,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
18341834
/*
18351835
* Allocate a structure for each thread.
18361836
*/
1837-
for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1837+
for (ct = &dump_task->signal->core_state->dumper; ct; ct = ct->next) {
18381838
t = kzalloc(offsetof(struct elf_thread_core_info,
18391839
notes[info->thread_notes]),
18401840
GFP_KERNEL);
@@ -2024,7 +2024,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
20242024
if (!elf_note_info_init(info))
20252025
return 0;
20262026

2027-
for (ct = current->mm->core_state->dumper.next;
2027+
for (ct = current->signal->core_state->dumper.next;
20282028
ct; ct = ct->next) {
20292029
ets = kzalloc(sizeof(*ets), GFP_KERNEL);
20302030
if (!ets)

fs/binfmt_elf_fdpic.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1494,7 +1494,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
14941494
if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
14951495
goto end_coredump;
14961496

1497-
for (ct = current->mm->core_state->dumper.next;
1497+
for (ct = current->signal->core_state->dumper.next;
14981498
ct; ct = ct->next) {
14991499
tmp = elf_dump_thread_status(cprm->siginfo->si_signo,
15001500
ct->task, &thread_status_size);

fs/coredump.c

Lines changed: 9 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -369,99 +369,34 @@ static int zap_process(struct task_struct *start, int exit_code, int flags)
369369
return nr;
370370
}
371371

372-
static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
372+
static int zap_threads(struct task_struct *tsk,
373373
struct core_state *core_state, int exit_code)
374374
{
375-
struct task_struct *g, *p;
376-
unsigned long flags;
377375
int nr = -EAGAIN;
378376

379377
spin_lock_irq(&tsk->sighand->siglock);
380378
if (!signal_group_exit(tsk->signal)) {
381-
mm->core_state = core_state;
379+
tsk->signal->core_state = core_state;
382380
tsk->signal->group_exit_task = tsk;
383381
nr = zap_process(tsk, exit_code, 0);
384382
clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
383+
tsk->flags |= PF_DUMPCORE;
384+
atomic_set(&core_state->nr_threads, nr);
385385
}
386386
spin_unlock_irq(&tsk->sighand->siglock);
387-
if (unlikely(nr < 0))
388-
return nr;
389-
390-
tsk->flags |= PF_DUMPCORE;
391-
if (atomic_read(&mm->mm_users) == nr + 1)
392-
goto done;
393-
/*
394-
* We should find and kill all tasks which use this mm, and we should
395-
* count them correctly into ->nr_threads. We don't take tasklist
396-
* lock, but this is safe wrt:
397-
*
398-
* fork:
399-
* None of sub-threads can fork after zap_process(leader). All
400-
* processes which were created before this point should be
401-
* visible to zap_threads() because copy_process() adds the new
402-
* process to the tail of init_task.tasks list, and lock/unlock
403-
* of ->siglock provides a memory barrier.
404-
*
405-
* do_exit:
406-
* The caller holds mm->mmap_lock. This means that the task which
407-
* uses this mm can't pass coredump_task_exit(), so it can't exit
408-
* or clear its ->mm.
409-
*
410-
* de_thread:
411-
* It does list_replace_rcu(&leader->tasks, &current->tasks),
412-
* we must see either old or new leader, this does not matter.
413-
* However, it can change p->sighand, so lock_task_sighand(p)
414-
* must be used. Since p->mm != NULL and we hold ->mmap_lock
415-
* it can't fail.
416-
*
417-
* Note also that "g" can be the old leader with ->mm == NULL
418-
* and already unhashed and thus removed from ->thread_group.
419-
* This is OK, __unhash_process()->list_del_rcu() does not
420-
* clear the ->next pointer, we will find the new leader via
421-
* next_thread().
422-
*/
423-
rcu_read_lock();
424-
for_each_process(g) {
425-
if (g == tsk->group_leader)
426-
continue;
427-
if (g->flags & PF_KTHREAD)
428-
continue;
429-
430-
for_each_thread(g, p) {
431-
if (unlikely(!p->mm))
432-
continue;
433-
if (unlikely(p->mm == mm)) {
434-
lock_task_sighand(p, &flags);
435-
nr += zap_process(p, exit_code,
436-
SIGNAL_GROUP_EXIT);
437-
unlock_task_sighand(p, &flags);
438-
}
439-
break;
440-
}
441-
}
442-
rcu_read_unlock();
443-
done:
444-
atomic_set(&core_state->nr_threads, nr);
445387
return nr;
446388
}
447389

448390
static int coredump_wait(int exit_code, struct core_state *core_state)
449391
{
450392
struct task_struct *tsk = current;
451-
struct mm_struct *mm = tsk->mm;
452393
int core_waiters = -EBUSY;
453394

454395
init_completion(&core_state->startup);
455396
core_state->dumper.task = tsk;
456397
core_state->dumper.next = NULL;
457398

458-
if (mmap_write_lock_killable(mm))
459-
return -EINTR;
460-
461-
if (!mm->core_state)
462-
core_waiters = zap_threads(tsk, mm, core_state, exit_code);
463-
mmap_write_unlock(mm);
464-
399+
core_waiters = zap_threads(tsk, core_state, exit_code);
465400
if (core_waiters > 0) {
466401
struct core_thread *ptr;
467402

@@ -483,7 +418,7 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
483418
return core_waiters;
484419
}
485420

486-
static void coredump_finish(struct mm_struct *mm, bool core_dumped)
421+
static void coredump_finish(bool core_dumped)
487422
{
488423
struct core_thread *curr, *next;
489424
struct task_struct *task;
@@ -493,9 +428,10 @@ static void coredump_finish(struct mm_struct *mm, bool core_dumped)
493428
current->signal->group_exit_code |= 0x80;
494429
current->signal->group_exit_task = NULL;
495430
current->signal->flags = SIGNAL_GROUP_EXIT;
431+
next = current->signal->core_state->dumper.next;
432+
current->signal->core_state = NULL;
496433
spin_unlock_irq(&current->sighand->siglock);
497434

498-
next = mm->core_state->dumper.next;
499435
while ((curr = next) != NULL) {
500436
next = curr->next;
501437
task = curr->task;
@@ -507,8 +443,6 @@ static void coredump_finish(struct mm_struct *mm, bool core_dumped)
507443
curr->task = NULL;
508444
wake_up_process(task);
509445
}
510-
511-
mm->core_state = NULL;
512446
}
513447

514448
static bool dump_interrupted(void)
@@ -839,7 +773,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
839773
fail_unlock:
840774
kfree(argv);
841775
kfree(cn.corename);
842-
coredump_finish(mm, core_dumped);
776+
coredump_finish(core_dumped);
843777
revert_creds(old_cred);
844778
fail_creds:
845779
put_cred(cred);

fs/proc/array.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -408,9 +408,9 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
408408
cpumask_pr_args(&task->cpus_mask));
409409
}
410410

411-
static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
411+
static inline void task_core_dumping(struct seq_file *m, struct task_struct *task)
412412
{
413-
seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state);
413+
seq_put_decimal_ull(m, "CoreDumping:\t", !!task->signal->core_state);
414414
seq_putc(m, '\n');
415415
}
416416

@@ -436,7 +436,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
436436

437437
if (mm) {
438438
task_mem(m, mm);
439-
task_core_dumping(m, mm);
439+
task_core_dumping(m, task);
440440
task_thp_status(m, mm);
441441
mmput(mm);
442442
}

include/linux/mm_types.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -387,17 +387,6 @@ struct vm_area_struct {
387387
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
388388
} __randomize_layout;
389389

390-
struct core_thread {
391-
struct task_struct *task;
392-
struct core_thread *next;
393-
};
394-
395-
struct core_state {
396-
atomic_t nr_threads;
397-
struct core_thread dumper;
398-
struct completion startup;
399-
};
400-
401390
struct kioctx_table;
402391
struct mm_struct {
403392
struct {
@@ -518,8 +507,6 @@ struct mm_struct {
518507

519508
unsigned long flags; /* Must use atomic bitops to access */
520509

521-
struct core_state *core_state; /* coredumping support */
522-
523510
#ifdef CONFIG_AIO
524511
spinlock_t ioctx_lock;
525512
struct kioctx_table __rcu *ioctx_table;

include/linux/sched/signal.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,17 @@ struct multiprocess_signals {
7272
struct hlist_node node;
7373
};
7474

75+
struct core_thread {
76+
struct task_struct *task;
77+
struct core_thread *next;
78+
};
79+
80+
struct core_state {
81+
atomic_t nr_threads;
82+
struct core_thread dumper;
83+
struct completion startup;
84+
};
85+
7586
/*
7687
* NOTE! "signal_struct" does not have its own
7788
* locking, because a shared signal_struct always
@@ -110,6 +121,8 @@ struct signal_struct {
110121
int group_stop_count;
111122
unsigned int flags; /* see SIGNAL_* flags below */
112123

124+
struct core_state *core_state; /* coredumping support */
125+
113126
/*
114127
* PR_SET_CHILD_SUBREAPER marks a process, like a service
115128
* manager, to re-parent orphan (double-forking) child processes

kernel/exit.c

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -342,23 +342,18 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
342342
static void coredump_task_exit(struct task_struct *tsk)
343343
{
344344
struct core_state *core_state;
345-
struct mm_struct *mm;
346-
347-
mm = tsk->mm;
348-
if (!mm)
349-
return;
350345

351346
/*
352347
* Serialize with any possible pending coredump.
353-
* We must hold mmap_lock around checking core_state
348+
* We must hold siglock around checking core_state
354349
* and setting PF_POSTCOREDUMP. The core-inducing thread
355350
* will increment ->nr_threads for each thread in the
356351
* group without PF_POSTCOREDUMP set.
357352
*/
358-
mmap_read_lock(mm);
353+
spin_lock_irq(&tsk->sighand->siglock);
359354
tsk->flags |= PF_POSTCOREDUMP;
360-
core_state = mm->core_state;
361-
mmap_read_unlock(mm);
355+
core_state = tsk->signal->core_state;
356+
spin_unlock_irq(&tsk->sighand->siglock);
362357
if (core_state) {
363358
struct core_thread self;
364359

kernel/fork.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1044,7 +1044,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
10441044
seqcount_init(&mm->write_protect_seq);
10451045
mmap_init_lock(mm);
10461046
INIT_LIST_HEAD(&mm->mmlist);
1047-
mm->core_state = NULL;
10481047
mm_pgtables_bytes_init(mm);
10491048
mm->map_count = 0;
10501049
mm->locked_vm = 0;

mm/debug.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ void dump_mm(const struct mm_struct *mm)
214214
"start_code %lx end_code %lx start_data %lx end_data %lx\n"
215215
"start_brk %lx brk %lx start_stack %lx\n"
216216
"arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
217-
"binfmt %px flags %lx core_state %px\n"
217+
"binfmt %px flags %lx\n"
218218
#ifdef CONFIG_AIO
219219
"ioctx_table %px\n"
220220
#endif
@@ -246,7 +246,7 @@ void dump_mm(const struct mm_struct *mm)
246246
mm->start_code, mm->end_code, mm->start_data, mm->end_data,
247247
mm->start_brk, mm->brk, mm->start_stack,
248248
mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,
249-
mm->binfmt, mm->flags, mm->core_state,
249+
mm->binfmt, mm->flags,
250250
#ifdef CONFIG_AIO
251251
mm->ioctx_table,
252252
#endif

0 commit comments

Comments
 (0)