Skip to content

Commit 4da9af0

Browse files
committed
Merge tag 'threads-v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux
Pull pidfd updates from Christian Brauner: "This introduces a new extension to the pidfd_open() syscall. Users can now raise the new PIDFD_NONBLOCK flag to support non-blocking pidfd file descriptors. This has been requested for uses in async process management libraries such as async-pidfd in Rust. Ever since the introduction of pidfds and more advanced async io various programming languages such as Rust have grown support for async event libraries. These libraries are created to help build epoll-based event loops around file descriptors. A common pattern is to automatically make all file descriptors they manage to O_NONBLOCK. For such libraries the EAGAIN error code is treated specially. When a function is called that returns EAGAIN the function isn't called again until the event loop indicates the the file descriptor is ready. Supporting EAGAIN when waiting on pidfds makes such libraries just work with little effort. This introduces a new flag PIDFD_NONBLOCK that is equivalent to O_NONBLOCK. This follows the same patterns we have for other (anon inode) file descriptors such as EFD_NONBLOCK, IN_NONBLOCK, SFD_NONBLOCK, TFD_NONBLOCK and the same for close-on-exec flags. Passing a non-blocking pidfd to waitid() currently has no effect, i.e. is not supported. There are users which would like to use waitid() on pidfds that are O_NONBLOCK and mix it with pidfds that are blocking and both pass them to waitid(). The expected behavior is to have waitid() return -EAGAIN for non-blocking pidfds and to block for blocking pidfds without needing to perform any additional checks for flags set on the pidfd before passing it to waitid(). Non-blocking pidfds will return EAGAIN from waitid() when no child process is ready yet. Returning -EAGAIN for non-blocking pidfds makes it easier for event loops that handle EAGAIN specially. It also makes the API more consistent and uniform. In essence, waitid() is treated like a read on a non-blocking pidfd or a recvmsg() on a non-blocking socket. With the addition of support for non-blocking pidfds we support the same functionality that sockets do. For sockets() recvmsg() supports MSG_DONTWAIT for pidfds waitid() supports WNOHANG. Both flags are per-call options. In contrast non-blocking pidfds and non-blocking sockets are a setting on an open file description affecting all threads in the calling process as well as other processes that hold file descriptors referring to the same open file description. Both behaviors, per call and per open file description, have genuine use-cases. The interaction with the WNOHANG flag is documented as follows: - If a non-blocking pidfd is passed and WNOHANG is not raised we simply raise the WNOHANG flag internally. When do_wait() returns indicating that there are eligible child processes but none have exited yet we set EAGAIN. If no child process exists we continue returning ECHILD. - If a non-blocking pidfd is passed and WNOHANG is raised waitid() will continue returning 0, i.e. it will not set EAGAIN. This ensure backwards compatibility with applications passing WNOHANG explicitly with pidfds" * tag 'threads-v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux: tests: remove O_NONBLOCK before waiting for WSTOPPED tests: add waitid() tests for non-blocking pidfds tests: port pidfd_wait to kselftest harness pidfd: support PIDFD_NONBLOCK in pidfd_open() exit: support non-blocking pidfds
2 parents 612e7a4 + 01361b6 commit 4da9af0

File tree

5 files changed

+164
-183
lines changed

5 files changed

+164
-183
lines changed

include/uapi/linux/pidfd.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2+
3+
#ifndef _UAPI_LINUX_PIDFD_H
4+
#define _UAPI_LINUX_PIDFD_H
5+
6+
#include <linux/types.h>
7+
#include <linux/fcntl.h>
8+
9+
/* Flags for pidfd_open(). */
10+
#define PIDFD_NONBLOCK O_NONBLOCK
11+
12+
#endif /* _UAPI_LINUX_PIDFD_H */

kernel/exit.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1474,7 +1474,7 @@ static long do_wait(struct wait_opts *wo)
14741474
return retval;
14751475
}
14761476

1477-
static struct pid *pidfd_get_pid(unsigned int fd)
1477+
static struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
14781478
{
14791479
struct fd f;
14801480
struct pid *pid;
@@ -1484,8 +1484,10 @@ static struct pid *pidfd_get_pid(unsigned int fd)
14841484
return ERR_PTR(-EBADF);
14851485

14861486
pid = pidfd_pid(f.file);
1487-
if (!IS_ERR(pid))
1487+
if (!IS_ERR(pid)) {
14881488
get_pid(pid);
1489+
*flags = f.file->f_flags;
1490+
}
14891491

14901492
fdput(f);
14911493
return pid;
@@ -1498,6 +1500,7 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
14981500
struct pid *pid = NULL;
14991501
enum pid_type type;
15001502
long ret;
1503+
unsigned int f_flags = 0;
15011504

15021505
if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED|
15031506
__WNOTHREAD|__WCLONE|__WALL))
@@ -1531,9 +1534,10 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
15311534
if (upid < 0)
15321535
return -EINVAL;
15331536

1534-
pid = pidfd_get_pid(upid);
1537+
pid = pidfd_get_pid(upid, &f_flags);
15351538
if (IS_ERR(pid))
15361539
return PTR_ERR(pid);
1540+
15371541
break;
15381542
default:
15391543
return -EINVAL;
@@ -1544,7 +1548,12 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
15441548
wo.wo_flags = options;
15451549
wo.wo_info = infop;
15461550
wo.wo_rusage = ru;
1551+
if (f_flags & O_NONBLOCK)
1552+
wo.wo_flags |= WNOHANG;
1553+
15471554
ret = do_wait(&wo);
1555+
if (!ret && !(options & WNOHANG) && (f_flags & O_NONBLOCK))
1556+
ret = -EAGAIN;
15481557

15491558
put_pid(pid);
15501559
return ret;

kernel/pid.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include <linux/sched/task.h>
4444
#include <linux/idr.h>
4545
#include <net/sock.h>
46+
#include <uapi/linux/pidfd.h>
4647

4748
struct pid init_struct_pid = {
4849
.count = REFCOUNT_INIT(1),
@@ -522,7 +523,8 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
522523
/**
523524
* pidfd_create() - Create a new pid file descriptor.
524525
*
525-
* @pid: struct pid that the pidfd will reference
526+
* @pid: struct pid that the pidfd will reference
527+
* @flags: flags to pass
526528
*
527529
* This creates a new pid file descriptor with the O_CLOEXEC flag set.
528530
*
@@ -532,12 +534,12 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
532534
* Return: On success, a cloexec pidfd is returned.
533535
* On error, a negative errno number will be returned.
534536
*/
535-
static int pidfd_create(struct pid *pid)
537+
static int pidfd_create(struct pid *pid, unsigned int flags)
536538
{
537539
int fd;
538540

539541
fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
540-
O_RDWR | O_CLOEXEC);
542+
flags | O_RDWR | O_CLOEXEC);
541543
if (fd < 0)
542544
put_pid(pid);
543545

@@ -565,7 +567,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
565567
int fd;
566568
struct pid *p;
567569

568-
if (flags)
570+
if (flags & ~PIDFD_NONBLOCK)
569571
return -EINVAL;
570572

571573
if (pid <= 0)
@@ -576,7 +578,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
576578
return -ESRCH;
577579

578580
if (pid_has_task(p, PIDTYPE_TGID))
579-
fd = pidfd_create(p);
581+
fd = pidfd_create(p, flags);
580582
else
581583
fd = -EINVAL;
582584

tools/testing/selftests/pidfd/pidfd.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@
4646
#define __NR_pidfd_getfd -1
4747
#endif
4848

49+
#ifndef PIDFD_NONBLOCK
50+
#define PIDFD_NONBLOCK O_NONBLOCK
51+
#endif
52+
4953
/*
5054
* The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
5155
* That means, when it wraps around any pid < 300 will be skipped.

0 commit comments

Comments
 (0)