Skip to content

Commit 3f66f86

Browse files
committed
per signal_struct coredumps
Current coredumps are mixed up with the exit code, the signal handling code and with the ptrace code in was they are much more complicated than necessary and difficult to follow. This series of changes starts with ptrace_stop and cleans it up, making it easier to follow what is happening in ptrace_stop. Then cleans up the exec interactions with coredumps. Then cleans up the coredump interactions with exit. Then the coredump interactions with the signal handling code is clean up. The first and last changes are bug fixes for minor bugs. I believe the fact that vfork followed by execve can kill the process the called vfork if exec fails is sufficient justification to change the userspace visible behavior. In previous conversations it was suggested that some of these cleanups did not stand on their own. I think I have managed to organize things so all of their patches stand on their own. Which means that if for some reason the last change needs to be reverted we can still keep the gains from the other changes. Eric W. Biederman (6): signal: Remove the bogus sigkill_pending in ptrace_stop ptrace: Remove the unnecessary arguments from arch_ptrace_stop exec: Check for a pending fatal signal instead of core_state exit: Factor coredump_exit_mm out of exit_mm coredump: Don't perform any cleanups before dumping core coredump: Limit coredumps to a single thread group arch/ia64/include/asm/ptrace.h | 4 +- arch/sparc/include/asm/ptrace.h | 8 ++-- fs/binfmt_elf.c | 4 +- fs/binfmt_elf_fdpic.c | 2 +- fs/coredump.c | 88 ++++++----------------------------------- fs/exec.c | 14 +++---- fs/proc/array.c | 6 +-- include/linux/mm_types.h | 13 ------ include/linux/ptrace.h | 22 +++++------ include/linux/sched.h | 1 + include/linux/sched/signal.h | 13 ++++++ kernel/exit.c | 76 +++++++++++++++++++---------------- kernel/fork.c | 4 +- kernel/signal.c | 49 ++++------------------- mm/debug.c | 4 +- mm/oom_kill.c | 6 +-- 16 files changed, 106 insertions(+), 208 deletions(-) Link: https://lkml.kernel.org/r/87v92qx2c6.fsf@disp2133 Signed-off-by: "Eric W. Biederman" <[email protected]>
2 parents 6880fa6 + 0258b5f commit 3f66f86

File tree

16 files changed

+106
-208
lines changed

16 files changed

+106
-208
lines changed

arch/ia64/include/asm/ptrace.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,9 @@ static inline long regs_return_value(struct pt_regs *regs)
129129
extern void ia64_decrement_ip (struct pt_regs *pt);
130130

131131
extern void ia64_ptrace_stop(void);
132-
#define arch_ptrace_stop(code, info) \
132+
#define arch_ptrace_stop() \
133133
ia64_ptrace_stop()
134-
#define arch_ptrace_stop_needed(code, info) \
134+
#define arch_ptrace_stop_needed() \
135135
(!test_thread_flag(TIF_RESTORE_RSE))
136136

137137
extern void ptrace_attach_sync_user_rbs (struct task_struct *);

arch/sparc/include/asm/ptrace.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@ static inline bool pt_regs_clear_syscall(struct pt_regs *regs)
2626
return (regs->tstate &= ~TSTATE_SYSCALL);
2727
}
2828

29-
#define arch_ptrace_stop_needed(exit_code, info) \
29+
#define arch_ptrace_stop_needed() \
3030
({ flush_user_windows(); \
3131
get_thread_wsaved() != 0; \
3232
})
3333

34-
#define arch_ptrace_stop(exit_code, info) \
34+
#define arch_ptrace_stop() \
3535
synchronize_user_stack()
3636

3737
#define current_pt_regs() \
@@ -129,12 +129,12 @@ static inline bool pt_regs_clear_syscall(struct pt_regs *regs)
129129
return (regs->psr &= ~PSR_SYSCALL);
130130
}
131131

132-
#define arch_ptrace_stop_needed(exit_code, info) \
132+
#define arch_ptrace_stop_needed() \
133133
({ flush_user_windows(); \
134134
current_thread_info()->w_saved != 0; \
135135
})
136136

137-
#define arch_ptrace_stop(exit_code, info) \
137+
#define arch_ptrace_stop() \
138138
synchronize_user_stack()
139139

140140
#define current_pt_regs() \

fs/binfmt_elf.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,7 +1834,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
18341834
/*
18351835
* Allocate a structure for each thread.
18361836
*/
1837-
for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1837+
for (ct = &dump_task->signal->core_state->dumper; ct; ct = ct->next) {
18381838
t = kzalloc(offsetof(struct elf_thread_core_info,
18391839
notes[info->thread_notes]),
18401840
GFP_KERNEL);
@@ -2024,7 +2024,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
20242024
if (!elf_note_info_init(info))
20252025
return 0;
20262026

2027-
for (ct = current->mm->core_state->dumper.next;
2027+
for (ct = current->signal->core_state->dumper.next;
20282028
ct; ct = ct->next) {
20292029
ets = kzalloc(sizeof(*ets), GFP_KERNEL);
20302030
if (!ets)

fs/binfmt_elf_fdpic.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1494,7 +1494,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
14941494
if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
14951495
goto end_coredump;
14961496

1497-
for (ct = current->mm->core_state->dumper.next;
1497+
for (ct = current->signal->core_state->dumper.next;
14981498
ct; ct = ct->next) {
14991499
tmp = elf_dump_thread_status(cprm->siginfo->si_signo,
15001500
ct->task, &thread_status_size);

fs/coredump.c

Lines changed: 11 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ static int zap_process(struct task_struct *start, int exit_code, int flags)
359359

360360
for_each_thread(start, t) {
361361
task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
362-
if (t != current && t->mm) {
362+
if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
363363
sigaddset(&t->pending.signal, SIGKILL);
364364
signal_wake_up(t, 1);
365365
nr++;
@@ -369,99 +369,34 @@ static int zap_process(struct task_struct *start, int exit_code, int flags)
369369
return nr;
370370
}
371371

372-
static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
372+
static int zap_threads(struct task_struct *tsk,
373373
struct core_state *core_state, int exit_code)
374374
{
375-
struct task_struct *g, *p;
376-
unsigned long flags;
377375
int nr = -EAGAIN;
378376

379377
spin_lock_irq(&tsk->sighand->siglock);
380378
if (!signal_group_exit(tsk->signal)) {
381-
mm->core_state = core_state;
379+
tsk->signal->core_state = core_state;
382380
tsk->signal->group_exit_task = tsk;
383381
nr = zap_process(tsk, exit_code, 0);
384382
clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
383+
tsk->flags |= PF_DUMPCORE;
384+
atomic_set(&core_state->nr_threads, nr);
385385
}
386386
spin_unlock_irq(&tsk->sighand->siglock);
387-
if (unlikely(nr < 0))
388-
return nr;
389-
390-
tsk->flags |= PF_DUMPCORE;
391-
if (atomic_read(&mm->mm_users) == nr + 1)
392-
goto done;
393-
/*
394-
* We should find and kill all tasks which use this mm, and we should
395-
* count them correctly into ->nr_threads. We don't take tasklist
396-
* lock, but this is safe wrt:
397-
*
398-
* fork:
399-
* None of sub-threads can fork after zap_process(leader). All
400-
* processes which were created before this point should be
401-
* visible to zap_threads() because copy_process() adds the new
402-
* process to the tail of init_task.tasks list, and lock/unlock
403-
* of ->siglock provides a memory barrier.
404-
*
405-
* do_exit:
406-
* The caller holds mm->mmap_lock. This means that the task which
407-
* uses this mm can't pass exit_mm(), so it can't exit or clear
408-
* its ->mm.
409-
*
410-
* de_thread:
411-
* It does list_replace_rcu(&leader->tasks, &current->tasks),
412-
* we must see either old or new leader, this does not matter.
413-
* However, it can change p->sighand, so lock_task_sighand(p)
414-
* must be used. Since p->mm != NULL and we hold ->mmap_lock
415-
* it can't fail.
416-
*
417-
* Note also that "g" can be the old leader with ->mm == NULL
418-
* and already unhashed and thus removed from ->thread_group.
419-
* This is OK, __unhash_process()->list_del_rcu() does not
420-
* clear the ->next pointer, we will find the new leader via
421-
* next_thread().
422-
*/
423-
rcu_read_lock();
424-
for_each_process(g) {
425-
if (g == tsk->group_leader)
426-
continue;
427-
if (g->flags & PF_KTHREAD)
428-
continue;
429-
430-
for_each_thread(g, p) {
431-
if (unlikely(!p->mm))
432-
continue;
433-
if (unlikely(p->mm == mm)) {
434-
lock_task_sighand(p, &flags);
435-
nr += zap_process(p, exit_code,
436-
SIGNAL_GROUP_EXIT);
437-
unlock_task_sighand(p, &flags);
438-
}
439-
break;
440-
}
441-
}
442-
rcu_read_unlock();
443-
done:
444-
atomic_set(&core_state->nr_threads, nr);
445387
return nr;
446388
}
447389

448390
static int coredump_wait(int exit_code, struct core_state *core_state)
449391
{
450392
struct task_struct *tsk = current;
451-
struct mm_struct *mm = tsk->mm;
452393
int core_waiters = -EBUSY;
453394

454395
init_completion(&core_state->startup);
455396
core_state->dumper.task = tsk;
456397
core_state->dumper.next = NULL;
457398

458-
if (mmap_write_lock_killable(mm))
459-
return -EINTR;
460-
461-
if (!mm->core_state)
462-
core_waiters = zap_threads(tsk, mm, core_state, exit_code);
463-
mmap_write_unlock(mm);
464-
399+
core_waiters = zap_threads(tsk, core_state, exit_code);
465400
if (core_waiters > 0) {
466401
struct core_thread *ptr;
467402

@@ -483,7 +418,7 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
483418
return core_waiters;
484419
}
485420

486-
static void coredump_finish(struct mm_struct *mm, bool core_dumped)
421+
static void coredump_finish(bool core_dumped)
487422
{
488423
struct core_thread *curr, *next;
489424
struct task_struct *task;
@@ -493,22 +428,21 @@ static void coredump_finish(struct mm_struct *mm, bool core_dumped)
493428
current->signal->group_exit_code |= 0x80;
494429
current->signal->group_exit_task = NULL;
495430
current->signal->flags = SIGNAL_GROUP_EXIT;
431+
next = current->signal->core_state->dumper.next;
432+
current->signal->core_state = NULL;
496433
spin_unlock_irq(&current->sighand->siglock);
497434

498-
next = mm->core_state->dumper.next;
499435
while ((curr = next) != NULL) {
500436
next = curr->next;
501437
task = curr->task;
502438
/*
503-
* see exit_mm(), curr->task must not see
439+
* see coredump_task_exit(), curr->task must not see
504440
* ->task == NULL before we read ->next.
505441
*/
506442
smp_mb();
507443
curr->task = NULL;
508444
wake_up_process(task);
509445
}
510-
511-
mm->core_state = NULL;
512446
}
513447

514448
static bool dump_interrupted(void)
@@ -839,7 +773,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
839773
fail_unlock:
840774
kfree(argv);
841775
kfree(cn.corename);
842-
coredump_finish(mm, core_dumped);
776+
coredump_finish(core_dumped);
843777
revert_creds(old_cred);
844778
fail_creds:
845779
put_cred(cred);

fs/exec.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -987,16 +987,14 @@ static int exec_mmap(struct mm_struct *mm)
987987

988988
if (old_mm) {
989989
/*
990-
* Make sure that if there is a core dump in progress
991-
* for the old mm, we get out and die instead of going
992-
* through with the exec. We must hold mmap_lock around
993-
* checking core_state and changing tsk->mm.
990+
* If there is a pending fatal signal perhaps a signal
991+
* whose default action is to create a coredump get
992+
* out and die instead of going through with the exec.
994993
*/
995-
mmap_read_lock(old_mm);
996-
if (unlikely(old_mm->core_state)) {
997-
mmap_read_unlock(old_mm);
994+
ret = mmap_read_lock_killable(old_mm);
995+
if (ret) {
998996
up_write(&tsk->signal->exec_update_lock);
999-
return -EINTR;
997+
return ret;
1000998
}
1001999
}
10021000

fs/proc/array.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -408,9 +408,9 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
408408
cpumask_pr_args(&task->cpus_mask));
409409
}
410410

411-
static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
411+
static inline void task_core_dumping(struct seq_file *m, struct task_struct *task)
412412
{
413-
seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state);
413+
seq_put_decimal_ull(m, "CoreDumping:\t", !!task->signal->core_state);
414414
seq_putc(m, '\n');
415415
}
416416

@@ -436,7 +436,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
436436

437437
if (mm) {
438438
task_mem(m, mm);
439-
task_core_dumping(m, mm);
439+
task_core_dumping(m, task);
440440
task_thp_status(m, mm);
441441
mmput(mm);
442442
}

include/linux/mm_types.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -387,17 +387,6 @@ struct vm_area_struct {
387387
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
388388
} __randomize_layout;
389389

390-
struct core_thread {
391-
struct task_struct *task;
392-
struct core_thread *next;
393-
};
394-
395-
struct core_state {
396-
atomic_t nr_threads;
397-
struct core_thread dumper;
398-
struct completion startup;
399-
};
400-
401390
struct kioctx_table;
402391
struct mm_struct {
403392
struct {
@@ -518,8 +507,6 @@ struct mm_struct {
518507

519508
unsigned long flags; /* Must use atomic bitops to access */
520509

521-
struct core_state *core_state; /* coredumping support */
522-
523510
#ifdef CONFIG_AIO
524511
spinlock_t ioctx_lock;
525512
struct kioctx_table __rcu *ioctx_table;

include/linux/ptrace.h

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -362,29 +362,25 @@ static inline void user_single_step_report(struct pt_regs *regs)
362362
#ifndef arch_ptrace_stop_needed
363363
/**
364364
* arch_ptrace_stop_needed - Decide whether arch_ptrace_stop() should be called
365-
* @code: current->exit_code value ptrace will stop with
366-
* @info: siginfo_t pointer (or %NULL) for signal ptrace will stop with
367365
*
368366
* This is called with the siglock held, to decide whether or not it's
369-
* necessary to release the siglock and call arch_ptrace_stop() with the
370-
* same @code and @info arguments. It can be defined to a constant if
371-
* arch_ptrace_stop() is never required, or always is. On machines where
372-
* this makes sense, it should be defined to a quick test to optimize out
373-
* calling arch_ptrace_stop() when it would be superfluous. For example,
374-
* if the thread has not been back to user mode since the last stop, the
375-
* thread state might indicate that nothing needs to be done.
367+
* necessary to release the siglock and call arch_ptrace_stop(). It can be
368+
* defined to a constant if arch_ptrace_stop() is never required, or always
369+
* is. On machines where this makes sense, it should be defined to a quick
370+
* test to optimize out calling arch_ptrace_stop() when it would be
371+
* superfluous. For example, if the thread has not been back to user mode
372+
* since the last stop, the thread state might indicate that nothing needs
373+
* to be done.
376374
*
377375
* This is guaranteed to be invoked once before a task stops for ptrace and
378376
* may include arch-specific operations necessary prior to a ptrace stop.
379377
*/
380-
#define arch_ptrace_stop_needed(code, info) (0)
378+
#define arch_ptrace_stop_needed() (0)
381379
#endif
382380

383381
#ifndef arch_ptrace_stop
384382
/**
385383
* arch_ptrace_stop - Do machine-specific work before stopping for ptrace
386-
* @code: current->exit_code value ptrace will stop with
387-
* @info: siginfo_t pointer (or %NULL) for signal ptrace will stop with
388384
*
389385
* This is called with no locks held when arch_ptrace_stop_needed() has
390386
* just returned nonzero. It is allowed to block, e.g. for user memory
@@ -394,7 +390,7 @@ static inline void user_single_step_report(struct pt_regs *regs)
394390
* we only do it when the arch requires it for this particular stop, as
395391
* indicated by arch_ptrace_stop_needed().
396392
*/
397-
#define arch_ptrace_stop(code, info) do { } while (0)
393+
#define arch_ptrace_stop() do { } while (0)
398394
#endif
399395

400396
#ifndef current_pt_regs

include/linux/sched.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1664,6 +1664,7 @@ extern struct pid *cad_pid;
16641664
#define PF_VCPU 0x00000001 /* I'm a virtual CPU */
16651665
#define PF_IDLE 0x00000002 /* I am an IDLE thread */
16661666
#define PF_EXITING 0x00000004 /* Getting shut down */
1667+
#define PF_POSTCOREDUMP 0x00000008 /* Coredumps should ignore this task */
16671668
#define PF_IO_WORKER 0x00000010 /* Task is an IO worker */
16681669
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
16691670
#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */

0 commit comments

Comments
 (0)