Skip to content

Commit f06b71f

Browse files
committed
Make the user mode driver code a better citizen
This is the third round of my changeset to split the user mode driver code from the user mode helper code, and to make the code use common facilities to get things done instead of recreating them just for the user mode driver code. I have split the changes into small enough pieces so they should be easily readable and testable. The changes lean into the preexisting interfaces in the kernel and remove special cases for user mode driver code in favor of solutions that don't need special cases. This results in smaller code with fewer bugs. At a practical level this removes the maintenance burden of the user mode drivers from the user mode helper code and from exec as the special cases are removed. Similarly the LSM interaction bugs are fixed by not having unnecessary special cases for user mode drivers. I have tested thes changes by booting with the code compiled in and by killing "bpfilter_umh" and "running iptables -vnL" to restart the userspace driver, also by running "while true; do iptables -L;rmmod bpfilter; done" to verify the module load and unload work properly. I have compiled tested each change with and without CONFIG_BPFILTER enabled. From v2 to v3 I have made two siginficant changes. - I factored thread_group_exit out of pidfd_poll to allow the test to be used by the bpfilter code. - I renamed umd.c and umd.h to usermode_driver.c and usermode_driver.h respectively. I made a few very small changes from v1 to v2: - Updated the function name in a comment when the function is renamed - Moved some more code so that the the !CONFIG_BPFILTER case continues to compile when I moved the code into umd.c - A fix for the module loading case to really flush the file descriptor. - Removed split_argv entirely from fork_usermode_driver. There was nothing to split so it was just confusing. Please let me know if you see any bugs. Once the code review is finished I plan to place the code in a non-rebasing branch so I can pull it into my tree and so it can also be pulled into the bpf-next tree. v1: https://lkml.kernel.org/r/[email protected] v2: https://lkml.kernel.org/r/[email protected] Eric W. Biederman (16): umh: Capture the pid in umh_pipe_setup umh: Move setting PF_UMH into umh_pipe_setup umh: Rename the user mode driver helpers for clarity umh: Remove call_usermodehelper_setup_file. umh: Separate the user mode driver and the user mode helper support umd: For clarity rename umh_info umd_info umd: Rename umd_info.cmdline umd_info.driver_name umd: Transform fork_usermode_blob into fork_usermode_driver umh: Stop calling do_execve_file exec: Remove do_execve_file bpfilter: Move bpfilter_umh back into init data umd: Track user space drivers with struct pid exit: Factor thread_group_exited out of pidfd_poll bpfilter: Take advantage of the facilities of struct pid umd: Remove exit_umh umd: Stop using split_argv Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: "Eric W. Biederman" <[email protected]>
2 parents b3a9e3b + 33c3260 commit f06b71f

File tree

15 files changed

+275
-260
lines changed

15 files changed

+275
-260
lines changed

fs/exec.c

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1818,13 +1818,14 @@ static int exec_binprm(struct linux_binprm *bprm)
18181818
/*
18191819
* sys_execve() executes a new program.
18201820
*/
1821-
static int __do_execve_file(int fd, struct filename *filename,
1822-
struct user_arg_ptr argv,
1823-
struct user_arg_ptr envp,
1824-
int flags, struct file *file)
1821+
static int do_execveat_common(int fd, struct filename *filename,
1822+
struct user_arg_ptr argv,
1823+
struct user_arg_ptr envp,
1824+
int flags)
18251825
{
18261826
char *pathbuf = NULL;
18271827
struct linux_binprm *bprm;
1828+
struct file *file;
18281829
struct files_struct *displaced;
18291830
int retval;
18301831

@@ -1863,18 +1864,15 @@ static int __do_execve_file(int fd, struct filename *filename,
18631864
check_unsafe_exec(bprm);
18641865
current->in_execve = 1;
18651866

1866-
if (!file)
1867-
file = do_open_execat(fd, filename, flags);
1867+
file = do_open_execat(fd, filename, flags);
18681868
retval = PTR_ERR(file);
18691869
if (IS_ERR(file))
18701870
goto out_unmark;
18711871

18721872
sched_exec();
18731873

18741874
bprm->file = file;
1875-
if (!filename) {
1876-
bprm->filename = "none";
1877-
} else if (fd == AT_FDCWD || filename->name[0] == '/') {
1875+
if (fd == AT_FDCWD || filename->name[0] == '/') {
18781876
bprm->filename = filename->name;
18791877
} else {
18801878
if (filename->name[0] == '\0')
@@ -1935,8 +1933,7 @@ static int __do_execve_file(int fd, struct filename *filename,
19351933
task_numa_free(current, false);
19361934
free_bprm(bprm);
19371935
kfree(pathbuf);
1938-
if (filename)
1939-
putname(filename);
1936+
putname(filename);
19401937
if (displaced)
19411938
put_files_struct(displaced);
19421939
return retval;
@@ -1967,27 +1964,10 @@ static int __do_execve_file(int fd, struct filename *filename,
19671964
if (displaced)
19681965
reset_files_struct(displaced);
19691966
out_ret:
1970-
if (filename)
1971-
putname(filename);
1967+
putname(filename);
19721968
return retval;
19731969
}
19741970

1975-
static int do_execveat_common(int fd, struct filename *filename,
1976-
struct user_arg_ptr argv,
1977-
struct user_arg_ptr envp,
1978-
int flags)
1979-
{
1980-
return __do_execve_file(fd, filename, argv, envp, flags, NULL);
1981-
}
1982-
1983-
int do_execve_file(struct file *file, void *__argv, void *__envp)
1984-
{
1985-
struct user_arg_ptr argv = { .ptr.native = __argv };
1986-
struct user_arg_ptr envp = { .ptr.native = __envp };
1987-
1988-
return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
1989-
}
1990-
19911971
int do_execve(struct filename *filename,
19921972
const char __user *const __user *__argv,
19931973
const char __user *const __user *__envp)

include/linux/binfmts.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,5 @@ extern int do_execveat(int, struct filename *,
141141
const char __user * const __user *,
142142
const char __user * const __user *,
143143
int);
144-
int do_execve_file(struct file *file, void *__argv, void *__envp);
145144

146145
#endif /* _LINUX_BINFMTS_H */

include/linux/bpfilter.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,23 @@
33
#define _LINUX_BPFILTER_H
44

55
#include <uapi/linux/bpfilter.h>
6-
#include <linux/umh.h>
6+
#include <linux/usermode_driver.h>
77

88
struct sock;
99
int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
1010
unsigned int optlen);
1111
int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
1212
int __user *optlen);
13+
void bpfilter_umh_cleanup(struct umd_info *info);
14+
1315
struct bpfilter_umh_ops {
14-
struct umh_info info;
16+
struct umd_info info;
1517
/* since ip_getsockopt() can run in parallel, serialize access to umh */
1618
struct mutex lock;
1719
int (*sockopt)(struct sock *sk, int optname,
1820
char __user *optval,
1921
unsigned int optlen, bool is_set);
2022
int (*start)(void);
21-
bool stop;
2223
};
2324
extern struct bpfilter_umh_ops bpfilter_ops;
2425
#endif

include/linux/sched.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,7 +1511,6 @@ extern struct pid *cad_pid;
15111511
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
15121512
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
15131513
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
1514-
#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */
15151514
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
15161515
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
15171516
#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */
@@ -2020,14 +2019,6 @@ static inline void rseq_execve(struct task_struct *t)
20202019

20212020
#endif
20222021

2023-
void __exit_umh(struct task_struct *tsk);
2024-
2025-
static inline void exit_umh(struct task_struct *tsk)
2026-
{
2027-
if (unlikely(tsk->flags & PF_UMH))
2028-
__exit_umh(tsk);
2029-
}
2030-
20312022
#ifdef CONFIG_DEBUG_RSEQ
20322023

20332024
void rseq_syscall(struct pt_regs *regs);

include/linux/sched/signal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,8 @@ static inline int thread_group_empty(struct task_struct *p)
674674
#define delay_group_leader(p) \
675675
(thread_group_leader(p) && !thread_group_empty(p))
676676

677+
extern bool thread_group_exited(struct pid *pid);
678+
677679
extern struct sighand_struct *__lock_task_sighand(struct task_struct *task,
678680
unsigned long *flags);
679681

include/linux/umh.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,8 @@ struct subprocess_info {
2222
const char *path;
2323
char **argv;
2424
char **envp;
25-
struct file *file;
2625
int wait;
2726
int retval;
28-
pid_t pid;
2927
int (*init)(struct subprocess_info *info, struct cred *new);
3028
void (*cleanup)(struct subprocess_info *info);
3129
void *data;
@@ -40,19 +38,6 @@ call_usermodehelper_setup(const char *path, char **argv, char **envp,
4038
int (*init)(struct subprocess_info *info, struct cred *new),
4139
void (*cleanup)(struct subprocess_info *), void *data);
4240

43-
struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
44-
int (*init)(struct subprocess_info *info, struct cred *new),
45-
void (*cleanup)(struct subprocess_info *), void *data);
46-
struct umh_info {
47-
const char *cmdline;
48-
struct file *pipe_to_umh;
49-
struct file *pipe_from_umh;
50-
struct list_head list;
51-
void (*cleanup)(struct umh_info *info);
52-
pid_t pid;
53-
};
54-
int fork_usermode_blob(void *data, size_t len, struct umh_info *info);
55-
5641
extern int
5742
call_usermodehelper_exec(struct subprocess_info *info, int wait);
5843

include/linux/usermode_driver.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#ifndef __LINUX_USERMODE_DRIVER_H__
2+
#define __LINUX_USERMODE_DRIVER_H__
3+
4+
#include <linux/umh.h>
5+
#include <linux/path.h>
6+
7+
struct umd_info {
8+
const char *driver_name;
9+
struct file *pipe_to_umh;
10+
struct file *pipe_from_umh;
11+
struct path wd;
12+
struct pid *tgid;
13+
};
14+
int umd_load_blob(struct umd_info *info, const void *data, size_t len);
15+
int umd_unload_blob(struct umd_info *info);
16+
int fork_usermode_driver(struct umd_info *info);
17+
18+
#endif /* __LINUX_USERMODE_DRIVER_H__ */

kernel/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ obj-y = fork.o exec_domain.o panic.o \
1212
notifier.o ksysfs.o cred.o reboot.o \
1313
async.o range.o smpboot.o ucount.o
1414

15+
obj-$(CONFIG_BPFILTER) += usermode_driver.o
1516
obj-$(CONFIG_MODULES) += kmod.o
1617
obj-$(CONFIG_MULTIUSER) += groups.o
1718

kernel/exit.c

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -804,7 +804,6 @@ void __noreturn do_exit(long code)
804804
exit_task_namespaces(tsk);
805805
exit_task_work(tsk);
806806
exit_thread(tsk);
807-
exit_umh(tsk);
808807

809808
/*
810809
* Flush inherited counters to the parent - before the parent
@@ -1711,6 +1710,30 @@ COMPAT_SYSCALL_DEFINE5(waitid,
17111710
}
17121711
#endif
17131712

1713+
/**
1714+
* thread_group_exited - check that a thread group has exited
1715+
* @pid: tgid of thread group to be checked.
1716+
*
1717+
* Test if the thread group represented by tgid has exited (all
1718+
* threads are zombies, dead or completely gone).
1719+
*
1720+
* Return: true if the thread group has exited. false otherwise.
1721+
*/
1722+
bool thread_group_exited(struct pid *pid)
1723+
{
1724+
struct task_struct *task;
1725+
bool exited;
1726+
1727+
rcu_read_lock();
1728+
task = pid_task(pid, PIDTYPE_PID);
1729+
exited = !task ||
1730+
(READ_ONCE(task->exit_state) && thread_group_empty(task));
1731+
rcu_read_unlock();
1732+
1733+
return exited;
1734+
}
1735+
EXPORT_SYMBOL(thread_group_exited);
1736+
17141737
__weak void abort(void)
17151738
{
17161739
BUG();

kernel/fork.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1787,22 +1787,18 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
17871787
*/
17881788
static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
17891789
{
1790-
struct task_struct *task;
17911790
struct pid *pid = file->private_data;
17921791
__poll_t poll_flags = 0;
17931792

17941793
poll_wait(file, &pid->wait_pidfd, pts);
17951794

1796-
rcu_read_lock();
1797-
task = pid_task(pid, PIDTYPE_PID);
17981795
/*
17991796
* Inform pollers only when the whole thread group exits.
18001797
* If the thread group leader exits before all other threads in the
18011798
* group, then poll(2) should block, similar to the wait(2) family.
18021799
*/
1803-
if (!task || (task->exit_state && thread_group_empty(task)))
1800+
if (thread_group_exited(pid))
18041801
poll_flags = EPOLLIN | EPOLLRDNORM;
1805-
rcu_read_unlock();
18061802

18071803
return poll_flags;
18081804
}

0 commit comments

Comments
 (0)