Skip to content

Commit 343f4c4

Browse files
committed
kthread: Don't allocate kthread_struct for init and umh
If kthread_is_per_cpu runs concurrently with free_kthread_struct the kthread_struct that was just freed may be read from. This bug was introduced by commit 40966e3 ("kthread: Ensure struct kthread is present for all kthreads"). When kthread_struct started to be allocated for all tasks that have PF_KTHREAD set. This in turn required the kthread_struct to be freed in kernel_execve and violated the assumption that kthread_struct will have the same lifetime as the task. Looking a bit deeper this only applies to callers of kernel_execve which is just the init process and the user mode helper processes. These processes really don't want to be kernel threads but are for historical reasons. Mostly that copy_thread does not know how to take a kernel mode function to the process with for processes without PF_KTHREAD or PF_IO_WORKER set. Solve this by not allocating kthread_struct for the init process and the user mode helper processes. This is done by adding a kthread member to struct kernel_clone_args. Setting kthread in fork_idle and kernel_thread. Adding user_mode_thread that works like kernel_thread except it does not set kthread. In fork only allocating the kthread_struct if .kthread is set. I have looked at kernel/kthread.c and since commit 40966e3 ("kthread: Ensure struct kthread is present for all kthreads") there have been no assumptions added that to_kthread or __to_kthread will not return NULL. There are a few callers of to_kthread or __to_kthread that assume a non-NULL struct kthread pointer will be returned. These functions are kthread_data(), kthread_parmme(), kthread_exit(), kthread(), kthread_park(), kthread_unpark(), kthread_stop(). All of those functions can reasonably expected to be called when it is know that a task is a kthread so that assumption seems reasonable. Cc: [email protected] Fixes: 40966e3 ("kthread: Ensure struct kthread is present for all kthreads") Reported-by: Максим Кутявин <[email protected]> Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: "Eric W. Biederman" <[email protected]>
1 parent 3123109 commit 343f4c4

File tree

5 files changed

+30
-8
lines changed

5 files changed

+30
-8
lines changed

fs/exec.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1308,8 +1308,6 @@ int begin_new_exec(struct linux_binprm * bprm)
13081308
if (retval)
13091309
goto out_unlock;
13101310

1311-
if (me->flags & PF_KTHREAD)
1312-
free_kthread_struct(me);
13131311
me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
13141312
PF_NOFREEZE | PF_NO_SETAFFINITY);
13151313
flush_thread();
@@ -1955,6 +1953,10 @@ int kernel_execve(const char *kernel_filename,
19551953
int fd = AT_FDCWD;
19561954
int retval;
19571955

1956+
if (WARN_ON_ONCE((current->flags & PF_KTHREAD) &&
1957+
(current->worker_private)))
1958+
return -EINVAL;
1959+
19581960
filename = getname_kernel(kernel_filename);
19591961
if (IS_ERR(filename))
19601962
return PTR_ERR(filename);

include/linux/sched/task.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ struct kernel_clone_args {
3232
size_t set_tid_size;
3333
int cgroup;
3434
int io_thread;
35+
int kthread;
3536
struct cgroup *cgrp;
3637
struct css_set *cset;
3738
};
@@ -89,6 +90,7 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
8990
struct task_struct *fork_idle(int);
9091
struct mm_struct *copy_init_mm(void);
9192
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
93+
extern pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags);
9294
extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
9395
int kernel_wait(pid_t pid, int *stat);
9496

init/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,7 @@ noinline void __ref rest_init(void)
688688
* the init task will end up wanting to create kthreads, which, if
689689
* we schedule it before we create kthreadd, will OOPS.
690690
*/
691-
pid = kernel_thread(kernel_init, NULL, CLONE_FS);
691+
pid = user_mode_thread(kernel_init, NULL, CLONE_FS);
692692
/*
693693
* Pin init on the boot CPU. Task migration is not properly working
694694
* until sched_init_smp() has been run. It will set the allowed

kernel/fork.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2157,7 +2157,7 @@ static __latent_entropy struct task_struct *copy_process(
21572157
p->io_context = NULL;
21582158
audit_set_context(p, NULL);
21592159
cgroup_fork(p);
2160-
if (p->flags & PF_KTHREAD) {
2160+
if (args->kthread) {
21612161
if (!set_kthread_struct(p))
21622162
goto bad_fork_cleanup_delayacct;
21632163
}
@@ -2548,7 +2548,8 @@ struct task_struct * __init fork_idle(int cpu)
25482548
{
25492549
struct task_struct *task;
25502550
struct kernel_clone_args args = {
2551-
.flags = CLONE_VM,
2551+
.flags = CLONE_VM,
2552+
.kthread = 1,
25522553
};
25532554

25542555
task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args);
@@ -2679,6 +2680,23 @@ pid_t kernel_clone(struct kernel_clone_args *args)
26792680
* Create a kernel thread.
26802681
*/
26812682
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
2683+
{
2684+
struct kernel_clone_args args = {
2685+
.flags = ((lower_32_bits(flags) | CLONE_VM |
2686+
CLONE_UNTRACED) & ~CSIGNAL),
2687+
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
2688+
.stack = (unsigned long)fn,
2689+
.stack_size = (unsigned long)arg,
2690+
.kthread = 1,
2691+
};
2692+
2693+
return kernel_clone(&args);
2694+
}
2695+
2696+
/*
2697+
* Create a user mode thread.
2698+
*/
2699+
pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags)
26822700
{
26832701
struct kernel_clone_args args = {
26842702
.flags = ((lower_32_bits(flags) | CLONE_VM |

kernel/umh.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info)
132132

133133
/* If SIGCLD is ignored do_wait won't populate the status. */
134134
kernel_sigaction(SIGCHLD, SIG_DFL);
135-
pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
135+
pid = user_mode_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
136136
if (pid < 0)
137137
sub_info->retval = pid;
138138
else
@@ -171,8 +171,8 @@ static void call_usermodehelper_exec_work(struct work_struct *work)
171171
* want to pollute current->children, and we need a parent
172172
* that always ignores SIGCHLD to ensure auto-reaping.
173173
*/
174-
pid = kernel_thread(call_usermodehelper_exec_async, sub_info,
175-
CLONE_PARENT | SIGCHLD);
174+
pid = user_mode_thread(call_usermodehelper_exec_async, sub_info,
175+
CLONE_PARENT | SIGCHLD);
176176
if (pid < 0) {
177177
sub_info->retval = pid;
178178
umh_complete(sub_info);

0 commit comments

Comments
 (0)