Skip to content

Commit f9010db

Browse files
mikechristietorvalds
authored andcommitted
fork, vhost: Use CLONE_THREAD to fix freezer/ps regression
When switching from kthreads to vhost_tasks two bugs were added: 1. The vhost worker tasks's now show up as processes so scripts doing ps or ps a would not incorrectly detect the vhost task as another process. 2. kthreads disabled freeze by setting PF_NOFREEZE, but vhost tasks's didn't disable or add support for them. To fix both bugs, this switches the vhost task to be thread in the process that does the VHOST_SET_OWNER ioctl, and has vhost_worker call get_signal to support SIGKILL/SIGSTOP and freeze signals. Note that SIGKILL/STOP support is required because CLONE_THREAD requires CLONE_SIGHAND which requires those 2 signals to be supported. This is a modified version of the patch written by Mike Christie <[email protected]> which was a modified version of patch originally written by Linus. Much of what depended upon PF_IO_WORKER now depends on PF_USER_WORKER. Including ignoring signals, setting up the register state, and having get_signal return instead of calling do_group_exit. Tidied up the vhost_task abstraction so that the definition of vhost_task only needs to be visible inside of vhost_task.c. Making it easier to review the code and tell what needs to be done where. As part of this the main loop has been moved from vhost_worker into vhost_task_fn. vhost_worker now returns true if work was done. The main loop has been updated to call get_signal which handles SIGSTOP, freezing, and collects the message that tells the thread to exit as part of process exit. This collection clears __fatal_signal_pending. This collection is not guaranteed to clear signal_pending() so clear that explicitly so the schedule() sleeps. For now the vhost thread continues to exist and run work until the last file descriptor is closed and the release function is called as part of freeing struct file. To avoid hangs in the coredump rendezvous and when killing threads in a multi-threaded exec. The coredump code and de_thread have been modified to ignore vhost threads. Remvoing the special case for exec appears to require teaching vhost_dev_flush how to directly complete transactions in case the vhost thread is no longer running. Removing the special case for coredump rendezvous requires either the above fix needed for exec or moving the coredump rendezvous into get_signal. Fixes: 6e890c5 ("vhost: use vhost_tasks for worker threads") Signed-off-by: Eric W. Biederman <[email protected]> Co-developed-by: Mike Christie <[email protected]> Signed-off-by: Mike Christie <[email protected]> Acked-by: Michael S. Tsirkin <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 1874a42 commit f9010db

File tree

11 files changed

+89
-77
lines changed

11 files changed

+89
-77
lines changed

arch/x86/include/asm/fpu/sched.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ extern void fpu_flush_thread(void);
3939
static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
4040
{
4141
if (cpu_feature_enabled(X86_FEATURE_FPU) &&
42-
!(current->flags & (PF_KTHREAD | PF_IO_WORKER))) {
42+
!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
4343
save_fpregs_to_fpstate(old_fpu);
4444
/*
4545
* The save operation preserved register state, so the

arch/x86/kernel/fpu/context.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ static inline void fpregs_restore_userregs(void)
5757
struct fpu *fpu = &current->thread.fpu;
5858
int cpu = smp_processor_id();
5959

60-
if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_IO_WORKER)))
60+
if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_USER_WORKER)))
6161
return;
6262

6363
if (!fpregs_state_valid(fpu, cpu)) {

arch/x86/kernel/fpu/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask)
426426

427427
this_cpu_write(in_kernel_fpu, true);
428428

429-
if (!(current->flags & (PF_KTHREAD | PF_IO_WORKER)) &&
429+
if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER)) &&
430430
!test_thread_flag(TIF_NEED_FPU_LOAD)) {
431431
set_thread_flag(TIF_NEED_FPU_LOAD);
432432
save_fpregs_to_fpstate(&current->thread.fpu);

drivers/vhost/vhost.c

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
256256
* test_and_set_bit() implies a memory barrier.
257257
*/
258258
llist_add(&work->node, &dev->worker->work_list);
259-
wake_up_process(dev->worker->vtsk->task);
259+
vhost_task_wake(dev->worker->vtsk);
260260
}
261261
}
262262
EXPORT_SYMBOL_GPL(vhost_work_queue);
@@ -333,39 +333,27 @@ static void vhost_vq_reset(struct vhost_dev *dev,
333333
__vhost_vq_meta_reset(vq);
334334
}
335335

336-
static int vhost_worker(void *data)
336+
static bool vhost_worker(void *data)
337337
{
338338
struct vhost_worker *worker = data;
339339
struct vhost_work *work, *work_next;
340340
struct llist_node *node;
341341

342-
for (;;) {
343-
/* mb paired w/ kthread_stop */
344-
set_current_state(TASK_INTERRUPTIBLE);
345-
346-
if (vhost_task_should_stop(worker->vtsk)) {
347-
__set_current_state(TASK_RUNNING);
348-
break;
349-
}
350-
351-
node = llist_del_all(&worker->work_list);
352-
if (!node)
353-
schedule();
354-
342+
node = llist_del_all(&worker->work_list);
343+
if (node) {
355344
node = llist_reverse_order(node);
356345
/* make sure flag is seen after deletion */
357346
smp_wmb();
358347
llist_for_each_entry_safe(work, work_next, node, node) {
359348
clear_bit(VHOST_WORK_QUEUED, &work->flags);
360-
__set_current_state(TASK_RUNNING);
361349
kcov_remote_start_common(worker->kcov_handle);
362350
work->fn(work);
363351
kcov_remote_stop();
364352
cond_resched();
365353
}
366354
}
367355

368-
return 0;
356+
return !!node;
369357
}
370358

371359
static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)

fs/coredump.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,9 @@ static int zap_process(struct task_struct *start, int exit_code)
371371
if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
372372
sigaddset(&t->pending.signal, SIGKILL);
373373
signal_wake_up(t, 1);
374-
nr++;
374+
/* The vhost_worker does not particpate in coredumps */
375+
if ((t->flags & (PF_USER_WORKER | PF_IO_WORKER)) != PF_USER_WORKER)
376+
nr++;
375377
}
376378
}
377379

include/linux/sched/task.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ struct kernel_clone_args {
2929
u32 io_thread:1;
3030
u32 user_worker:1;
3131
u32 no_files:1;
32-
u32 ignore_signals:1;
3332
unsigned long stack;
3433
unsigned long stack_size;
3534
unsigned long tls;

include/linux/sched/vhost_task.h

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,13 @@
22
#ifndef _LINUX_VHOST_TASK_H
33
#define _LINUX_VHOST_TASK_H
44

5-
#include <linux/completion.h>
65

7-
struct task_struct;
6+
struct vhost_task;
87

9-
struct vhost_task {
10-
int (*fn)(void *data);
11-
void *data;
12-
struct completion exited;
13-
unsigned long flags;
14-
struct task_struct *task;
15-
};
16-
17-
struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg,
8+
struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg,
189
const char *name);
1910
void vhost_task_start(struct vhost_task *vtsk);
2011
void vhost_task_stop(struct vhost_task *vtsk);
21-
bool vhost_task_should_stop(struct vhost_task *vtsk);
12+
void vhost_task_wake(struct vhost_task *vtsk);
2213

2314
#endif

kernel/exit.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,10 @@ static void coredump_task_exit(struct task_struct *tsk)
411411
tsk->flags |= PF_POSTCOREDUMP;
412412
core_state = tsk->signal->core_state;
413413
spin_unlock_irq(&tsk->sighand->siglock);
414-
if (core_state) {
414+
415+
/* The vhost_worker does not particpate in coredumps */
416+
if (core_state &&
417+
((tsk->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER)) {
415418
struct core_thread self;
416419

417420
self.task = current;

kernel/fork.c

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2336,16 +2336,16 @@ __latent_entropy struct task_struct *copy_process(
23362336
p->flags &= ~PF_KTHREAD;
23372337
if (args->kthread)
23382338
p->flags |= PF_KTHREAD;
2339-
if (args->user_worker)
2340-
p->flags |= PF_USER_WORKER;
2341-
if (args->io_thread) {
2339+
if (args->user_worker) {
23422340
/*
2343-
* Mark us an IO worker, and block any signal that isn't
2341+
* Mark us a user worker, and block any signal that isn't
23442342
* fatal or STOP
23452343
*/
2346-
p->flags |= PF_IO_WORKER;
2344+
p->flags |= PF_USER_WORKER;
23472345
siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
23482346
}
2347+
if (args->io_thread)
2348+
p->flags |= PF_IO_WORKER;
23492349

23502350
if (args->name)
23512351
strscpy_pad(p->comm, args->name, sizeof(p->comm));
@@ -2517,9 +2517,6 @@ __latent_entropy struct task_struct *copy_process(
25172517
if (retval)
25182518
goto bad_fork_cleanup_io;
25192519

2520-
if (args->ignore_signals)
2521-
ignore_signals(p);
2522-
25232520
stackleak_task_init(p);
25242521

25252522
if (pid != &init_struct_pid) {

kernel/signal.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1368,7 +1368,9 @@ int zap_other_threads(struct task_struct *p)
13681368

13691369
while_each_thread(p, t) {
13701370
task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
1371-
count++;
1371+
/* Don't require de_thread to wait for the vhost_worker */
1372+
if ((t->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER)
1373+
count++;
13721374

13731375
/* Don't bother with already dead threads */
13741376
if (t->exit_state)
@@ -2861,11 +2863,11 @@ bool get_signal(struct ksignal *ksig)
28612863
}
28622864

28632865
/*
2864-
* PF_IO_WORKER threads will catch and exit on fatal signals
2866+
* PF_USER_WORKER threads will catch and exit on fatal signals
28652867
* themselves. They have cleanup that must be performed, so
28662868
* we cannot call do_exit() on their behalf.
28672869
*/
2868-
if (current->flags & PF_IO_WORKER)
2870+
if (current->flags & PF_USER_WORKER)
28692871
goto out;
28702872

28712873
/*

0 commit comments

Comments
 (0)