Skip to content

Commit 4d13f43

Browse files
author
Frederic Weisbecker
committed
kthread: Implement preferred affinity
Affining kthreads follow either of four existing different patterns: 1) Per-CPU kthreads must stay affine to a single CPU and never execute relevant code on any other CPU. This is currently handled by smpboot code which takes care of CPU-hotplug operations. 2) Kthreads that _have_ to be affine to a specific set of CPUs and can't run anywhere else. The affinity is set through kthread_bind_mask() and the subsystem takes care by itself to handle CPU-hotplug operations. 3) Kthreads that prefer to be affine to a specific NUMA node. That preferred affinity is applied by default when an actual node ID is passed on kthread creation, provided the kthread is not per-CPU and no call to kthread_bind_mask() has been issued before the first wake-up. 4) Similar to the previous point but kthreads have a preferred affinity different than a node. It is set manually like any other task and CPU-hotplug is supposed to be handled by the relevant subsystem so that the task is properly reaffined whenever a given CPU from the preferred affinity comes up. Also care must be taken so that the preferred affinity doesn't cross housekeeping cpumask boundaries. Provide a function to handle the last usecase, mostly reusing the current node default affinity infrastructure. kthread_affine_preferred() is introduced, to be used just like kthread_bind_mask(), right after kthread creation and before the first wake up. The kthread is then affine right away to the cpumask passed through the API if it has online housekeeping CPUs. Otherwise it will be affine to all online housekeeping CPUs as a last resort. As with node affinity, it is aware of CPU hotplug events such that: * When a housekeeping CPU goes up that is part of the preferred affinity of a given kthread, the related task is re-affined to that preferred affinity if it was previously running on the default last resort online housekeeping set. * When a housekeeping CPU goes down while it was part of the preferred affinity of a kthread, the running task is migrated (or the sleeping task is woken up) automatically by the scheduler to other housekeepers within the preferred affinity or, as a last resort, to all housekeepers from other nodes. Acked-by: Vlastimil Babka <[email protected]> Signed-off-by: Frederic Weisbecker <[email protected]>
1 parent c6a566f commit 4d13f43

File tree

2 files changed

+62
-7
lines changed

2 files changed

+62
-7
lines changed

include/linux/kthread.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ kthread_run_on_cpu(int (*threadfn)(void *data), void *data,
8585
void free_kthread_struct(struct task_struct *k);
8686
void kthread_bind(struct task_struct *k, unsigned int cpu);
8787
void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
88+
int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask);
8889
int kthread_stop(struct task_struct *k);
8990
int kthread_stop_put(struct task_struct *k);
9091
bool kthread_should_stop(void);

kernel/kthread.c

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ struct kthread {
7070
char *full_name;
7171
struct task_struct *task;
7272
struct list_head hotplug_node;
73+
struct cpumask *preferred_affinity;
7374
};
7475

7576
enum KTHREAD_BITS {
@@ -327,6 +328,11 @@ void __noreturn kthread_exit(long result)
327328
mutex_lock(&kthreads_hotplug_lock);
328329
list_del(&kthread->hotplug_node);
329330
mutex_unlock(&kthreads_hotplug_lock);
331+
332+
if (kthread->preferred_affinity) {
333+
kfree(kthread->preferred_affinity);
334+
kthread->preferred_affinity = NULL;
335+
}
330336
}
331337
do_exit(0);
332338
}
@@ -355,9 +361,17 @@ EXPORT_SYMBOL(kthread_complete_and_exit);
355361

356362
static void kthread_fetch_affinity(struct kthread *kthread, struct cpumask *cpumask)
357363
{
358-
cpumask_and(cpumask, cpumask_of_node(kthread->node),
359-
housekeeping_cpumask(HK_TYPE_KTHREAD));
364+
const struct cpumask *pref;
360365

366+
if (kthread->preferred_affinity) {
367+
pref = kthread->preferred_affinity;
368+
} else {
369+
if (WARN_ON_ONCE(kthread->node == NUMA_NO_NODE))
370+
return;
371+
pref = cpumask_of_node(kthread->node);
372+
}
373+
374+
cpumask_and(cpumask, pref, housekeeping_cpumask(HK_TYPE_KTHREAD));
361375
if (cpumask_empty(cpumask))
362376
cpumask_copy(cpumask, housekeeping_cpumask(HK_TYPE_KTHREAD));
363377
}
@@ -440,7 +454,7 @@ static int kthread(void *_create)
440454

441455
self->started = 1;
442456

443-
if (!(current->flags & PF_NO_SETAFFINITY))
457+
if (!(current->flags & PF_NO_SETAFFINITY) && !self->preferred_affinity)
444458
kthread_affine_node();
445459

446460
ret = -EINTR;
@@ -839,12 +853,53 @@ int kthreadd(void *unused)
839853
return 0;
840854
}
841855

856+
int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask)
857+
{
858+
struct kthread *kthread = to_kthread(p);
859+
cpumask_var_t affinity;
860+
unsigned long flags;
861+
int ret;
862+
863+
if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE) || kthread->started) {
864+
WARN_ON(1);
865+
return -EINVAL;
866+
}
867+
868+
WARN_ON_ONCE(kthread->preferred_affinity);
869+
870+
if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
871+
return -ENOMEM;
872+
873+
kthread->preferred_affinity = kzalloc(sizeof(struct cpumask), GFP_KERNEL);
874+
if (!kthread->preferred_affinity) {
875+
ret = -ENOMEM;
876+
goto out;
877+
}
878+
879+
mutex_lock(&kthreads_hotplug_lock);
880+
cpumask_copy(kthread->preferred_affinity, mask);
881+
WARN_ON_ONCE(!list_empty(&kthread->hotplug_node));
882+
list_add_tail(&kthread->hotplug_node, &kthreads_hotplug);
883+
kthread_fetch_affinity(kthread, affinity);
884+
885+
/* It's safe because the task is inactive. */
886+
raw_spin_lock_irqsave(&p->pi_lock, flags);
887+
do_set_cpus_allowed(p, affinity);
888+
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
889+
890+
mutex_unlock(&kthreads_hotplug_lock);
891+
out:
892+
free_cpumask_var(affinity);
893+
894+
return 0;
895+
}
896+
842897
/*
843898
* Re-affine kthreads according to their preferences
844899
* and the newly online CPU. The CPU down part is handled
845900
* by select_fallback_rq() which default re-affines to
846-
* housekeepers in case the preferred affinity doesn't
847-
* apply anymore.
901+
* housekeepers from other nodes in case the preferred
902+
* affinity doesn't apply anymore.
848903
*/
849904
static int kthreads_online_cpu(unsigned int cpu)
850905
{
@@ -864,8 +919,7 @@ static int kthreads_online_cpu(unsigned int cpu)
864919

865920
list_for_each_entry(k, &kthreads_hotplug, hotplug_node) {
866921
if (WARN_ON_ONCE((k->task->flags & PF_NO_SETAFFINITY) ||
867-
kthread_is_per_cpu(k->task) ||
868-
k->node == NUMA_NO_NODE)) {
922+
kthread_is_per_cpu(k->task))) {
869923
ret = -EINVAL;
870924
continue;
871925
}

0 commit comments

Comments
 (0)