Skip to content

Commit 616ea5c

Browse files
committed
Merge tag 'seccomp-v5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
Pull seccomp updates from Kees Cook: - Add "atomic addfd + send reply" mode to SECCOMP_USER_NOTIF to better handle EINTR races visible to seccomp monitors. (Rodrigo Campos, Sargun Dhillon) - Improve seccomp selftests for readability in CI systems. (Kees Cook) * tag 'seccomp-v5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: selftests/seccomp: Avoid using "sysctl" for report selftests/seccomp: Flush benchmark output selftests/seccomp: More closely track fds being assigned selftests/seccomp: Add test for atomic addfd+send seccomp: Support atomic "addfd + send reply"
2 parents 233a806 + 9a03abc commit 616ea5c

File tree

5 files changed

+113
-12
lines changed

5 files changed

+113
-12
lines changed

Documentation/userspace-api/seccomp_filter.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,18 @@ and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a response, indicating what should be
259259
returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should
260260
be the same ``id`` as in ``struct seccomp_notif``.
261261

262+
Userspace can also add file descriptors to the notifying process via
263+
``ioctl(SECCOMP_IOCTL_NOTIF_ADDFD)``. The ``id`` member of
264+
``struct seccomp_notif_addfd`` should be the same ``id`` as in
265+
``struct seccomp_notif``. The ``newfd_flags`` flag may be used to set flags
266+
like O_EXEC on the file descriptor in the notifying process. If the supervisor
267+
wants to inject the file descriptor with a specific number, the
268+
``SECCOMP_ADDFD_FLAG_SETFD`` flag can be used, and set the ``newfd`` member to
269+
the specific number to use. If that file descriptor is already open in the
270+
notifying process it will be replaced. The supervisor can also add an FD, and
271+
respond atomically by using the ``SECCOMP_ADDFD_FLAG_SEND`` flag and the return
272+
value will be the injected file descriptor number.
273+
262274
It is worth noting that ``struct seccomp_data`` contains the values of register
263275
arguments to the syscall, but does not contain pointers to memory. The task's
264276
memory is accessible to suitably privileged traces via ``ptrace()`` or

include/uapi/linux/seccomp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ struct seccomp_notif_resp {
115115

116116
/* valid flags for seccomp_notif_addfd */
117117
#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
118+
#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
118119

119120
/**
120121
* struct seccomp_notif_addfd

kernel/seccomp.c

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ struct seccomp_knotif {
107107
* installing process should allocate the fd as normal.
108108
* @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
109109
* is allowed.
110+
* @ioctl_flags: The flags used for the seccomp_addfd ioctl.
110111
* @ret: The return value of the installing process. It is set to the fd num
111112
* upon success (>= 0).
112113
* @completion: Indicates that the installing process has completed fd
@@ -118,6 +119,7 @@ struct seccomp_kaddfd {
118119
struct file *file;
119120
int fd;
120121
unsigned int flags;
122+
__u32 ioctl_flags;
121123

122124
union {
123125
bool setfd;
@@ -1065,18 +1067,37 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
10651067
return filter->notif->next_id++;
10661068
}
10671069

1068-
static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd)
1070+
static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n)
10691071
{
1072+
int fd;
1073+
10701074
/*
10711075
* Remove the notification, and reset the list pointers, indicating
10721076
* that it has been handled.
10731077
*/
10741078
list_del_init(&addfd->list);
10751079
if (!addfd->setfd)
1076-
addfd->ret = receive_fd(addfd->file, addfd->flags);
1080+
fd = receive_fd(addfd->file, addfd->flags);
10771081
else
1078-
addfd->ret = receive_fd_replace(addfd->fd, addfd->file,
1079-
addfd->flags);
1082+
fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
1083+
addfd->ret = fd;
1084+
1085+
if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) {
1086+
/* If we fail reset and return an error to the notifier */
1087+
if (fd < 0) {
1088+
n->state = SECCOMP_NOTIFY_SENT;
1089+
} else {
1090+
/* Return the FD we just added */
1091+
n->flags = 0;
1092+
n->error = 0;
1093+
n->val = fd;
1094+
}
1095+
}
1096+
1097+
/*
1098+
* Mark the notification as completed. From this point, addfd mem
1099+
* might be invalidated and we can't safely read it anymore.
1100+
*/
10801101
complete(&addfd->completion);
10811102
}
10821103

@@ -1120,7 +1141,7 @@ static int seccomp_do_user_notification(int this_syscall,
11201141
struct seccomp_kaddfd, list);
11211142
/* Check if we were woken up by a addfd message */
11221143
if (addfd)
1123-
seccomp_handle_addfd(addfd);
1144+
seccomp_handle_addfd(addfd, &n);
11241145

11251146
} while (n.state != SECCOMP_NOTIFY_REPLIED);
11261147

@@ -1581,7 +1602,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
15811602
if (addfd.newfd_flags & ~O_CLOEXEC)
15821603
return -EINVAL;
15831604

1584-
if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD)
1605+
if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND))
15851606
return -EINVAL;
15861607

15871608
if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
@@ -1591,6 +1612,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
15911612
if (!kaddfd.file)
15921613
return -EBADF;
15931614

1615+
kaddfd.ioctl_flags = addfd.flags;
15941616
kaddfd.flags = addfd.newfd_flags;
15951617
kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD;
15961618
kaddfd.fd = addfd.newfd;
@@ -1616,6 +1638,23 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
16161638
goto out_unlock;
16171639
}
16181640

1641+
if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) {
1642+
/*
1643+
* Disallow queuing an atomic addfd + send reply while there are
1644+
* some addfd requests still to process.
1645+
*
1646+
* There is no clear reason to support it and allows us to keep
1647+
* the loop on the other side straight-forward.
1648+
*/
1649+
if (!list_empty(&knotif->addfd)) {
1650+
ret = -EBUSY;
1651+
goto out_unlock;
1652+
}
1653+
1654+
/* Allow exactly only one reply */
1655+
knotif->state = SECCOMP_NOTIFY_REPLIED;
1656+
}
1657+
16191658
list_add(&kaddfd.list, &knotif->addfd);
16201659
complete(&knotif->ready);
16211660
mutex_unlock(&filter->notify_lock);

tools/testing/selftests/seccomp/seccomp_benchmark.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,15 @@ int main(int argc, char *argv[])
143143
unsigned long long native, filter1, filter2, bitmap1, bitmap2;
144144
unsigned long long entry, per_filter1, per_filter2;
145145

146+
setbuf(stdout, NULL);
147+
148+
printf("Running on:\n");
149+
system("uname -a");
150+
146151
printf("Current BPF sysctl settings:\n");
147-
system("sysctl net.core.bpf_jit_enable");
148-
system("sysctl net.core.bpf_jit_harden");
152+
/* Avoid using "sysctl" which may not be installed. */
153+
system("grep -H . /proc/sys/net/core/bpf_jit_enable");
154+
system("grep -H . /proc/sys/net/core/bpf_jit_harden");
149155

150156
if (argc > 1)
151157
samples = strtoull(argv[1], NULL, 0);

tools/testing/selftests/seccomp/seccomp_bpf.c

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,10 @@ struct seccomp_notif_addfd {
235235
};
236236
#endif
237237

238+
#ifndef SECCOMP_ADDFD_FLAG_SEND
239+
#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
240+
#endif
241+
238242
struct seccomp_notif_addfd_small {
239243
__u64 id;
240244
char weird[4];
@@ -3959,7 +3963,7 @@ TEST(user_notification_addfd)
39593963
{
39603964
pid_t pid;
39613965
long ret;
3962-
int status, listener, memfd, fd;
3966+
int status, listener, memfd, fd, nextfd;
39633967
struct seccomp_notif_addfd addfd = {};
39643968
struct seccomp_notif_addfd_small small = {};
39653969
struct seccomp_notif_addfd_big big = {};
@@ -3968,25 +3972,34 @@ TEST(user_notification_addfd)
39683972
/* 100 ms */
39693973
struct timespec delay = { .tv_nsec = 100000000 };
39703974

3975+
/* There may be arbitrary already-open fds at test start. */
39713976
memfd = memfd_create("test", 0);
39723977
ASSERT_GE(memfd, 0);
3978+
nextfd = memfd + 1;
39733979

39743980
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
39753981
ASSERT_EQ(0, ret) {
39763982
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
39773983
}
39783984

3985+
/* fd: 4 */
39793986
/* Check that the basic notification machinery works */
39803987
listener = user_notif_syscall(__NR_getppid,
39813988
SECCOMP_FILTER_FLAG_NEW_LISTENER);
3982-
ASSERT_GE(listener, 0);
3989+
ASSERT_EQ(listener, nextfd++);
39833990

39843991
pid = fork();
39853992
ASSERT_GE(pid, 0);
39863993

39873994
if (pid == 0) {
3995+
/* fds will be added and this value is expected */
39883996
if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
39893997
exit(1);
3998+
3999+
/* Atomic addfd+send is received here. Check it is a valid fd */
4000+
if (fcntl(syscall(__NR_getppid), F_GETFD) == -1)
4001+
exit(1);
4002+
39904003
exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
39914004
}
39924005

@@ -4028,14 +4041,14 @@ TEST(user_notification_addfd)
40284041

40294042
/* Verify we can set an arbitrary remote fd */
40304043
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
4031-
EXPECT_GE(fd, 0);
4044+
EXPECT_EQ(fd, nextfd++);
40324045
EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
40334046

40344047
/* Verify we can set an arbitrary remote fd with large size */
40354048
memset(&big, 0x0, sizeof(big));
40364049
big.addfd = addfd;
40374050
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
4038-
EXPECT_GE(fd, 0);
4051+
EXPECT_EQ(fd, nextfd++);
40394052

40404053
/* Verify we can set a specific remote fd */
40414054
addfd.newfd = 42;
@@ -4065,6 +4078,32 @@ TEST(user_notification_addfd)
40654078
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
40664079
ASSERT_EQ(addfd.id, req.id);
40674080

4081+
/* Verify we can do an atomic addfd and send */
4082+
addfd.newfd = 0;
4083+
addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
4084+
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
4085+
/*
4086+
* Child has earlier "low" fds and now 42, so we expect the next
4087+
* lowest available fd to be assigned here.
4088+
*/
4089+
EXPECT_EQ(fd, nextfd++);
4090+
EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
4091+
4092+
/*
4093+
* This sets the ID of the ADD FD to the last request plus 1. The
4094+
* notification ID increments 1 per notification.
4095+
*/
4096+
addfd.id = req.id + 1;
4097+
4098+
/* This spins until the underlying notification is generated */
4099+
while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
4100+
errno != -EINPROGRESS)
4101+
nanosleep(&delay, NULL);
4102+
4103+
memset(&req, 0, sizeof(req));
4104+
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
4105+
ASSERT_EQ(addfd.id, req.id);
4106+
40684107
resp.id = req.id;
40694108
resp.error = 0;
40704109
resp.val = USER_NOTIF_MAGIC;
@@ -4125,6 +4164,10 @@ TEST(user_notification_addfd_rlimit)
41254164
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
41264165
EXPECT_EQ(errno, EMFILE);
41274166

4167+
addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
4168+
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4169+
EXPECT_EQ(errno, EMFILE);
4170+
41284171
addfd.newfd = 100;
41294172
addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
41304173
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);

0 commit comments

Comments
 (0)