Skip to content

Commit 4dd6566

Browse files
committed
Merge patch series "coredump: hand a pidfd to the usermode coredump helper"
Christian Brauner <[email protected]> says: Give userspace a way to instruct the kernel to install a pidfd for the crashing process into the process started as a usermode helper. There's still tricky race-windows that cannot be easily or sometimes not closed at all by userspace. There's various ways like looking at the start time of a process to make sure that the usermode helper process is started after the crashing process but it's all very very brittle and fraught with peril. The crashed-but-not-reaped process can be killed by userspace before coredump processing programs like systemd-coredump have had time to manually open a PIDFD from the PID the kernel provides them, which means they can be tricked into reading from an arbitrary process, and they run with full privileges as they are usermode helper processes. Even if that specific race-window wouldn't exist it's still the safest and cleanest way to let the kernel provide the pidfd directly instead of requiring userspace to do it manually. In parallel with this commit we already have systemd adding support for this in [1]. When the usermode helper process is forked we install a pidfd file descriptor three into the usermode helper's file descriptor table so it's available to the exec'd program. Since usermode helpers are either children of the system_unbound_wq workqueue or kthreadd we know that the file descriptor table is empty and can thus always use three as the file descriptor number. Note, that we'll install a pidfd for the thread-group leader even if a subthread is calling do_coredump(). We know that task linkage hasn't been removed yet and even if this @current isn't the actual thread-group leader we know that the thread-group leader cannot be reaped until @current has exited. [1]: systemd/systemd#37125 * patches from https://lore.kernel.org/[email protected]: coredump: hand a pidfd to the usermode coredump helper coredump: fix error handling for replace_fd() pidfs: move O_RDWR into pidfs_alloc_file() Link: https://lore.kernel.org/[email protected] Signed-off-by: Christian Brauner <[email protected]>
2 parents e1b477c + b5325b2 commit 4dd6566

File tree

3 files changed

+61
-6
lines changed

3 files changed

+61
-6
lines changed

fs/coredump.c

Lines changed: 59 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#include <linux/timekeeping.h>
4444
#include <linux/sysctl.h>
4545
#include <linux/elf.h>
46+
#include <linux/pidfs.h>
47+
#include <uapi/linux/pidfd.h>
4648

4749
#include <linux/uaccess.h>
4850
#include <asm/mmu_context.h>
@@ -60,6 +62,12 @@ static void free_vma_snapshot(struct coredump_params *cprm);
6062
#define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024)
6163
/* Define a reasonable max cap */
6264
#define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024)
65+
/*
66+
* File descriptor number for the pidfd for the thread-group leader of
67+
* the coredumping task installed into the usermode helper's file
68+
* descriptor table.
69+
*/
70+
#define COREDUMP_PIDFD_NUMBER 3
6371

6472
static int core_uses_pid;
6573
static unsigned int core_pipe_limit;
@@ -339,6 +347,27 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
339347
case 'C':
340348
err = cn_printf(cn, "%d", cprm->cpu);
341349
break;
350+
/* pidfd number */
351+
case 'F': {
352+
/*
353+
* Installing a pidfd only makes sense if
354+
* we actually spawn a usermode helper.
355+
*/
356+
if (!ispipe)
357+
break;
358+
359+
/*
360+
* Note that we'll install a pidfd for the
361+
* thread-group leader. We know that task
362+
* linkage hasn't been removed yet and even if
363+
* this @current isn't the actual thread-group
364+
* leader we know that the thread-group leader
365+
* cannot be reaped until @current has exited.
366+
*/
367+
cprm->pid = task_tgid(current);
368+
err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER);
369+
break;
370+
}
342371
default:
343372
break;
344373
}
@@ -493,7 +522,7 @@ static void wait_for_dump_helpers(struct file *file)
493522
}
494523

495524
/*
496-
* umh_pipe_setup
525+
* umh_coredump_setup
497526
* helper function to customize the process used
498527
* to collect the core in userspace. Specifically
499528
* it sets up a pipe and installs it as fd 0 (stdin)
@@ -503,22 +532,46 @@ static void wait_for_dump_helpers(struct file *file)
503532
* is a special value that we use to trap recursive
504533
* core dumps
505534
*/
506-
static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
535+
static int umh_coredump_setup(struct subprocess_info *info, struct cred *new)
507536
{
508537
struct file *files[2];
509538
struct coredump_params *cp = (struct coredump_params *)info->data;
510-
int err = create_pipe_files(files, 0);
539+
int err;
540+
541+
if (cp->pid) {
542+
struct file *pidfs_file __free(fput) = NULL;
543+
544+
pidfs_file = pidfs_alloc_file(cp->pid, 0);
545+
if (IS_ERR(pidfs_file))
546+
return PTR_ERR(pidfs_file);
547+
548+
/*
549+
* Usermode helpers are childen of either
550+
* system_unbound_wq or of kthreadd. So we know that
551+
* we're starting off with a clean file descriptor
552+
* table. So we should always be able to use
553+
* COREDUMP_PIDFD_NUMBER as our file descriptor value.
554+
*/
555+
err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0);
556+
if (err < 0)
557+
return err;
558+
}
559+
560+
err = create_pipe_files(files, 0);
511561
if (err)
512562
return err;
513563

514564
cp->file = files[1];
515565

516566
err = replace_fd(0, files[0], 0);
517567
fput(files[0]);
568+
if (err < 0)
569+
return err;
570+
518571
/* and disallow core files too */
519572
current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
520573

521-
return err;
574+
return 0;
522575
}
523576

524577
void do_coredump(const kernel_siginfo_t *siginfo)
@@ -593,7 +646,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
593646
}
594647

595648
if (cprm.limit == 1) {
596-
/* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
649+
/* See umh_coredump_setup() which sets RLIMIT_CORE = 1.
597650
*
598651
* Normally core limits are irrelevant to pipes, since
599652
* we're not writing to the file system, but we use
@@ -632,7 +685,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
632685
retval = -ENOMEM;
633686
sub_info = call_usermodehelper_setup(helper_argv[0],
634687
helper_argv, NULL, GFP_KERNEL,
635-
umh_pipe_setup, NULL, &cprm);
688+
umh_coredump_setup, NULL, &cprm);
636689
if (sub_info)
637690
retval = call_usermodehelper_exec(sub_info,
638691
UMH_WAIT_EXEC);

fs/pidfs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,7 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
892892
return ERR_PTR(-ESRCH);
893893

894894
flags &= ~PIDFD_STALE;
895+
flags |= O_RDWR;
895896
pidfd_file = dentry_open(&path, flags, current_cred());
896897
/* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */
897898
if (!IS_ERR(pidfd_file))

include/linux/coredump.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ struct coredump_params {
2828
int vma_count;
2929
size_t vma_data_size;
3030
struct core_vma_metadata *vma_meta;
31+
struct pid *pid;
3132
};
3233

3334
extern unsigned int core_file_note_size_limit;

0 commit comments

Comments
 (0)