Skip to content

Commit c8e7ce2

Browse files
committed
Merge patch series "coredump: allow for flexible coredump handling"
Christian Brauner <[email protected]> says: In addition to the extensive selftests I've already written a (non-production ready) simple Rust coredump server for this in userspace: https://github.com/brauner/dumdum.git Extend the coredump socket to allow the coredump server to tell the kernel how to process individual coredumps. This allows for fine-grained coredump management. Userspace can decide to just let the kernel write out the coredump, or generate the coredump itself, or just reject it. When the crashing task connects to the coredump socket the kernel will send a struct coredump_req to the coredump server. The kernel will set the size member of struct coredump_req allowing the coredump server how much data can be read. The coredump server uses MSG_PEEK to peek the size of struct coredump_req. If the kernel uses a newer struct coredump_req the coredump server just reads the size it knows and discard any remaining bytes in the buffer. If the kernel uses an older struct coredump_req the coredump server just reads the size the kernel knows. The returned struct coredump_req will inform the coredump server what features the kernel supports. The coredump_req->mask member is set to the currently know features. The coredump server may only use features whose bits were raised by the kernel in coredump_req->mask. In response to a coredump_req from the kernel the coredump server sends a struct coredump_ack to the kernel. The kernel informs the coredump server what version of struct coredump_ack it supports by setting struct coredump_req->size_ack to the size it knows about. The coredump server may only send as many bytes as coredump_req->size_ack indicates (a smaller size is fine of course). The coredump server must set coredump_ack->size accordingly. The coredump server sets the features it wants to use in struct coredump_ack->mask. Only bits returned in struct coredump_req->mask may be used. In case an invalid struct coredump_ack is sent to the kernel a non-zero u32 integer is sent indicating the reason for the failure. If it was successful a zero u32 integer is sent. In the initial version the following features are supported in coredump_{req,ack}->mask: * COREDUMP_KERNEL The kernel will write the coredump data to the socket. * COREDUMP_USERSPACE The kernel will not write coredump data but will indicate to the parent that a coredump has been generated. This is used when userspace generates its own coredumps. * COREDUMP_REJECT The kernel will skip generating a coredump for this task. * COREDUMP_WAIT The kernel will prevent the task from exiting until the coredump server has shutdown the socket connection. The flexible coredump socket can be enabled by using the "@@" prefix instead of the single "@" prefix for the regular coredump socket: @@/run/systemd/coredump.socket will enable flexible coredump handling. Current kernels already enforce that "@" must be followed by "/" and will reject anything else. So extending this is backward and forward compatible. Link: https://lore.kernel.org/20250603-work-coredump-socket-protocol-v2-0-05a5f0c18ecc@kernel.org * patches from https://lore.kernel.org/20250603-work-coredump-socket-protocol-v2-0-05a5f0c18ecc@kernel.org: selftests/coredump: add coredump server selftests tools: add coredump.h header selftests/coredump: cleanup coredump tests selftests/coredump: fix build coredump: allow for flexible coredump handling Signed-off-by: Christian Brauner <[email protected]>
2 parents 19272b3 + 59cd658 commit c8e7ce2

File tree

6 files changed

+1811
-274
lines changed

6 files changed

+1811
-274
lines changed

fs/coredump.c

Lines changed: 168 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
#include <net/sock.h>
5252
#include <uapi/linux/pidfd.h>
5353
#include <uapi/linux/un.h>
54+
#include <uapi/linux/coredump.h>
5455

5556
#include <linux/uaccess.h>
5657
#include <asm/mmu_context.h>
@@ -83,15 +84,17 @@ static int core_name_size = CORENAME_MAX_SIZE;
8384
unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT;
8485

8586
enum coredump_type_t {
86-
COREDUMP_FILE = 1,
87-
COREDUMP_PIPE = 2,
88-
COREDUMP_SOCK = 3,
87+
COREDUMP_FILE = 1,
88+
COREDUMP_PIPE = 2,
89+
COREDUMP_SOCK = 3,
90+
COREDUMP_SOCK_REQ = 4,
8991
};
9092

9193
struct core_name {
9294
char *corename;
9395
int used, size;
9496
enum coredump_type_t core_type;
97+
u64 mask;
9598
};
9699

97100
static int expand_corename(struct core_name *cn, int size)
@@ -235,6 +238,9 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
235238
int pid_in_pattern = 0;
236239
int err = 0;
237240

241+
cn->mask = COREDUMP_KERNEL;
242+
if (core_pipe_limit)
243+
cn->mask |= COREDUMP_WAIT;
238244
cn->used = 0;
239245
cn->corename = NULL;
240246
if (*pat_ptr == '|')
@@ -264,6 +270,13 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
264270
pat_ptr++;
265271
if (!(*pat_ptr))
266272
return -ENOMEM;
273+
if (*pat_ptr == '@') {
274+
pat_ptr++;
275+
if (!(*pat_ptr))
276+
return -ENOMEM;
277+
278+
cn->core_type = COREDUMP_SOCK_REQ;
279+
}
267280

268281
err = cn_printf(cn, "%s", pat_ptr);
269282
if (err)
@@ -632,6 +645,135 @@ static int umh_coredump_setup(struct subprocess_info *info, struct cred *new)
632645
return 0;
633646
}
634647

648+
#ifdef CONFIG_UNIX
649+
static inline bool coredump_sock_recv(struct file *file, struct coredump_ack *ack, size_t size, int flags)
650+
{
651+
struct msghdr msg = {};
652+
struct kvec iov = { .iov_base = ack, .iov_len = size };
653+
ssize_t ret;
654+
655+
memset(ack, 0, size);
656+
ret = kernel_recvmsg(sock_from_file(file), &msg, &iov, 1, size, flags);
657+
return ret == size;
658+
}
659+
660+
static inline bool coredump_sock_send(struct file *file, struct coredump_req *req)
661+
{
662+
struct msghdr msg = { .msg_flags = MSG_NOSIGNAL };
663+
struct kvec iov = { .iov_base = req, .iov_len = sizeof(*req) };
664+
ssize_t ret;
665+
666+
ret = kernel_sendmsg(sock_from_file(file), &msg, &iov, 1, sizeof(*req));
667+
return ret == sizeof(*req);
668+
}
669+
670+
static_assert(sizeof(enum coredump_mark) == sizeof(__u32));
671+
672+
static inline bool coredump_sock_mark(struct file *file, enum coredump_mark mark)
673+
{
674+
struct msghdr msg = { .msg_flags = MSG_NOSIGNAL };
675+
struct kvec iov = { .iov_base = &mark, .iov_len = sizeof(mark) };
676+
ssize_t ret;
677+
678+
ret = kernel_sendmsg(sock_from_file(file), &msg, &iov, 1, sizeof(mark));
679+
return ret == sizeof(mark);
680+
}
681+
682+
static inline void coredump_sock_wait(struct file *file)
683+
{
684+
ssize_t n;
685+
686+
/*
687+
* We use a simple read to wait for the coredump processing to
688+
* finish. Either the socket is closed or we get sent unexpected
689+
* data. In both cases, we're done.
690+
*/
691+
n = __kernel_read(file, &(char){ 0 }, 1, NULL);
692+
if (n > 0)
693+
coredump_report_failure("Coredump socket had unexpected data");
694+
else if (n < 0)
695+
coredump_report_failure("Coredump socket failed");
696+
}
697+
698+
static inline void coredump_sock_shutdown(struct file *file)
699+
{
700+
struct socket *socket;
701+
702+
socket = sock_from_file(file);
703+
if (!socket)
704+
return;
705+
706+
/* Let userspace know we're done processing the coredump. */
707+
kernel_sock_shutdown(socket, SHUT_WR);
708+
}
709+
710+
static bool coredump_request(struct core_name *cn, struct coredump_params *cprm)
711+
{
712+
struct coredump_req req = {
713+
.size = sizeof(struct coredump_req),
714+
.mask = COREDUMP_KERNEL | COREDUMP_USERSPACE |
715+
COREDUMP_REJECT | COREDUMP_WAIT,
716+
.size_ack = sizeof(struct coredump_ack),
717+
};
718+
struct coredump_ack ack = {};
719+
ssize_t usize;
720+
721+
if (cn->core_type != COREDUMP_SOCK_REQ)
722+
return true;
723+
724+
/* Let userspace know what we support. */
725+
if (!coredump_sock_send(cprm->file, &req))
726+
return false;
727+
728+
/* Peek the size of the coredump_ack. */
729+
if (!coredump_sock_recv(cprm->file, &ack, sizeof(ack.size),
730+
MSG_PEEK | MSG_WAITALL))
731+
return false;
732+
733+
/* Refuse unknown coredump_ack sizes. */
734+
usize = ack.size;
735+
if (usize < COREDUMP_ACK_SIZE_VER0) {
736+
coredump_sock_mark(cprm->file, COREDUMP_MARK_MINSIZE);
737+
return false;
738+
}
739+
740+
if (usize > sizeof(ack)) {
741+
coredump_sock_mark(cprm->file, COREDUMP_MARK_MAXSIZE);
742+
return false;
743+
}
744+
745+
/* Now retrieve the coredump_ack. */
746+
if (!coredump_sock_recv(cprm->file, &ack, usize, MSG_WAITALL))
747+
return false;
748+
if (ack.size != usize)
749+
return false;
750+
751+
/* Refuse unknown coredump_ack flags. */
752+
if (ack.mask & ~req.mask) {
753+
coredump_sock_mark(cprm->file, COREDUMP_MARK_UNSUPPORTED);
754+
return false;
755+
}
756+
757+
/* Refuse mutually exclusive options. */
758+
if (hweight64(ack.mask & (COREDUMP_USERSPACE | COREDUMP_KERNEL |
759+
COREDUMP_REJECT)) != 1) {
760+
coredump_sock_mark(cprm->file, COREDUMP_MARK_CONFLICTING);
761+
return false;
762+
}
763+
764+
if (ack.spare) {
765+
coredump_sock_mark(cprm->file, COREDUMP_MARK_UNSUPPORTED);
766+
return false;
767+
}
768+
769+
cn->mask = ack.mask;
770+
return coredump_sock_mark(cprm->file, COREDUMP_MARK_REQACK);
771+
}
772+
#else
773+
static inline void coredump_sock_wait(struct file *file) { }
774+
static inline void coredump_sock_shutdown(struct file *file) { }
775+
#endif
776+
635777
void do_coredump(const kernel_siginfo_t *siginfo)
636778
{
637779
struct core_state core_state;
@@ -850,6 +992,8 @@ void do_coredump(const kernel_siginfo_t *siginfo)
850992
}
851993
break;
852994
}
995+
case COREDUMP_SOCK_REQ:
996+
fallthrough;
853997
case COREDUMP_SOCK: {
854998
#ifdef CONFIG_UNIX
855999
struct file *file __free(fput) = NULL;
@@ -918,6 +1062,9 @@ void do_coredump(const kernel_siginfo_t *siginfo)
9181062

9191063
cprm.limit = RLIM_INFINITY;
9201064
cprm.file = no_free_ptr(file);
1065+
1066+
if (!coredump_request(&cn, &cprm))
1067+
goto close_fail;
9211068
#else
9221069
coredump_report_failure("Core dump socket support %s disabled", cn.corename);
9231070
goto close_fail;
@@ -929,12 +1076,17 @@ void do_coredump(const kernel_siginfo_t *siginfo)
9291076
goto close_fail;
9301077
}
9311078

1079+
/* Don't even generate the coredump. */
1080+
if (cn.mask & COREDUMP_REJECT)
1081+
goto close_fail;
1082+
9321083
/* get us an unshared descriptor table; almost always a no-op */
9331084
/* The cell spufs coredump code reads the file descriptor tables */
9341085
retval = unshare_files();
9351086
if (retval)
9361087
goto close_fail;
937-
if (!dump_interrupted()) {
1088+
1089+
if ((cn.mask & COREDUMP_KERNEL) && !dump_interrupted()) {
9381090
/*
9391091
* umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would
9401092
* have this set to NULL.
@@ -962,38 +1114,27 @@ void do_coredump(const kernel_siginfo_t *siginfo)
9621114
free_vma_snapshot(&cprm);
9631115
}
9641116

965-
#ifdef CONFIG_UNIX
966-
/* Let userspace know we're done processing the coredump. */
967-
if (sock_from_file(cprm.file))
968-
kernel_sock_shutdown(sock_from_file(cprm.file), SHUT_WR);
969-
#endif
1117+
coredump_sock_shutdown(cprm.file);
1118+
1119+
/* Let the parent know that a coredump was generated. */
1120+
if (cn.mask & COREDUMP_USERSPACE)
1121+
core_dumped = true;
9701122

9711123
/*
9721124
* When core_pipe_limit is set we wait for the coredump server
9731125
* or usermodehelper to finish before exiting so it can e.g.,
9741126
* inspect /proc/<pid>.
9751127
*/
976-
if (core_pipe_limit) {
1128+
if (cn.mask & COREDUMP_WAIT) {
9771129
switch (cn.core_type) {
9781130
case COREDUMP_PIPE:
9791131
wait_for_dump_helpers(cprm.file);
9801132
break;
981-
#ifdef CONFIG_UNIX
982-
case COREDUMP_SOCK: {
983-
ssize_t n;
984-
985-
/*
986-
* We use a simple read to wait for the coredump
987-
* processing to finish. Either the socket is
988-
* closed or we get sent unexpected data. In
989-
* both cases, we're done.
990-
*/
991-
n = __kernel_read(cprm.file, &(char){ 0 }, 1, NULL);
992-
if (n != 0)
993-
coredump_report_failure("Unexpected data on coredump socket");
1133+
case COREDUMP_SOCK_REQ:
1134+
fallthrough;
1135+
case COREDUMP_SOCK:
1136+
coredump_sock_wait(cprm.file);
9941137
break;
995-
}
996-
#endif
9971138
default:
9981139
break;
9991140
}
@@ -1249,8 +1390,8 @@ static inline bool check_coredump_socket(void)
12491390
if (current->nsproxy->mnt_ns != init_task.nsproxy->mnt_ns)
12501391
return false;
12511392

1252-
/* Must be an absolute path. */
1253-
if (*(core_pattern + 1) != '/')
1393+
/* Must be an absolute path or the socket request. */
1394+
if (*(core_pattern + 1) != '/' && *(core_pattern + 1) != '@')
12541395
return false;
12551396

12561397
return true;

include/uapi/linux/coredump.h

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2+
3+
#ifndef _UAPI_LINUX_COREDUMP_H
4+
#define _UAPI_LINUX_COREDUMP_H
5+
6+
#include <linux/types.h>
7+
8+
/**
9+
* coredump_{req,ack} flags
10+
* @COREDUMP_KERNEL: kernel writes coredump
11+
* @COREDUMP_USERSPACE: userspace writes coredump
12+
* @COREDUMP_REJECT: don't generate coredump
13+
* @COREDUMP_WAIT: wait for coredump server
14+
*/
15+
enum {
16+
COREDUMP_KERNEL = (1ULL << 0),
17+
COREDUMP_USERSPACE = (1ULL << 1),
18+
COREDUMP_REJECT = (1ULL << 2),
19+
COREDUMP_WAIT = (1ULL << 3),
20+
};
21+
22+
/**
23+
* struct coredump_req - message kernel sends to userspace
24+
* @size: size of struct coredump_req
25+
* @size_ack: known size of struct coredump_ack on this kernel
26+
* @mask: supported features
27+
*
28+
* When a coredump happens the kernel will connect to the coredump
29+
* socket and send a coredump request to the coredump server. The @size
30+
* member is set to the size of struct coredump_req and provides a hint
31+
* to userspace how much data can be read. Userspace may use MSG_PEEK to
32+
* peek the size of struct coredump_req and then choose to consume it in
33+
* one go. Userspace may also simply read a COREDUMP_ACK_SIZE_VER0
34+
* request. If the size the kernel sends is larger userspace simply
35+
* discards any remaining data.
36+
*
37+
* The coredump_req->mask member is set to the currently know features.
38+
* Userspace may only set coredump_ack->mask to the bits raised by the
39+
* kernel in coredump_req->mask.
40+
*
41+
* The coredump_req->size_ack member is set by the kernel to the size of
42+
* struct coredump_ack the kernel knows. Userspace may only send up to
43+
* coredump_req->size_ack bytes to the kernel and must set
44+
* coredump_ack->size accordingly.
45+
*/
46+
struct coredump_req {
47+
__u32 size;
48+
__u32 size_ack;
49+
__u64 mask;
50+
};
51+
52+
enum {
53+
COREDUMP_REQ_SIZE_VER0 = 16U, /* size of first published struct */
54+
};
55+
56+
/**
57+
* struct coredump_ack - message userspace sends to kernel
58+
* @size: size of the struct
59+
* @spare: unused
60+
* @mask: features kernel is supposed to use
61+
*
62+
* The @size member must be set to the size of struct coredump_ack. It
63+
* may never exceed what the kernel returned in coredump_req->size_ack
64+
* but it may of course be smaller (>= COREDUMP_ACK_SIZE_VER0 and <=
65+
* coredump_req->size_ack).
66+
*
67+
* The @mask member must be set to the features the coredump server
68+
* wants the kernel to use. Only bits the kernel returned in
69+
* coredump_req->mask may be set.
70+
*/
71+
struct coredump_ack {
72+
__u32 size;
73+
__u32 spare;
74+
__u64 mask;
75+
};
76+
77+
enum {
78+
COREDUMP_ACK_SIZE_VER0 = 16U, /* size of first published struct */
79+
};
80+
81+
/**
82+
* enum coredump_mark - Markers for the coredump socket
83+
*
84+
* The kernel will place a single byte on the coredump socket. The
85+
* markers notify userspace whether the coredump ack succeeded or
86+
* failed.
87+
*
88+
* @COREDUMP_MARK_MINSIZE: the provided coredump_ack size was too small
89+
* @COREDUMP_MARK_MAXSIZE: the provided coredump_ack size was too big
90+
* @COREDUMP_MARK_UNSUPPORTED: the provided coredump_ack mask was invalid
91+
* @COREDUMP_MARK_CONFLICTING: the provided coredump_ack mask has conflicting options
92+
* @COREDUMP_MARK_REQACK: the coredump request and ack was successful
93+
* @__COREDUMP_MARK_MAX: the maximum coredump mark value
94+
*/
95+
enum coredump_mark {
96+
COREDUMP_MARK_REQACK = 0U,
97+
COREDUMP_MARK_MINSIZE = 1U,
98+
COREDUMP_MARK_MAXSIZE = 2U,
99+
COREDUMP_MARK_UNSUPPORTED = 3U,
100+
COREDUMP_MARK_CONFLICTING = 4U,
101+
__COREDUMP_MARK_MAX = (1U << 31),
102+
};
103+
104+
#endif /* _UAPI_LINUX_COREDUMP_H */

0 commit comments

Comments
 (0)