Skip to content

Commit 07ec77a

Browse files
willdeaconPeter Zijlstra
authored andcommitted
sched: Allow task CPU affinity to be restricted on asymmetric systems
Asymmetric systems may not offer the same level of userspace ISA support across all CPUs, meaning that some applications cannot be executed by some CPUs. As a concrete example, upcoming arm64 big.LITTLE designs do not feature support for 32-bit applications on both clusters. Although userspace can carefully manage the affinity masks for such tasks, one place where it is particularly problematic is execve() because the CPU on which the execve() is occurring may be incompatible with the new application image. In such a situation, it is desirable to restrict the affinity mask of the task and ensure that the new image is entered on a compatible CPU. From userspace's point of view, this looks the same as if the incompatible CPUs have been hotplugged off in the task's affinity mask. Similarly, if a subsequent execve() reverts to a compatible image, then the old affinity is restored if it is still valid. In preparation for restricting the affinity mask for compat tasks on arm64 systems without uniform support for 32-bit applications, introduce {force,relax}_compatible_cpus_allowed_ptr(), which respectively restrict and restore the affinity mask for a task based on the compatible CPUs. Signed-off-by: Will Deacon <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Valentin Schneider <[email protected]> Reviewed-by: Quentin Perret <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent db3b02a commit 07ec77a

File tree

3 files changed

+183
-18
lines changed

3 files changed

+183
-18
lines changed

include/linux/sched.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1709,6 +1709,8 @@ extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new
17091709
extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
17101710
extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node);
17111711
extern void release_user_cpus_ptr(struct task_struct *p);
1712+
extern void force_compatible_cpus_allowed_ptr(struct task_struct *p);
1713+
extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p);
17121714
#else
17131715
static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
17141716
{

kernel/sched/core.c

Lines changed: 180 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2494,10 +2494,18 @@ int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src,
24942494
return 0;
24952495
}
24962496

2497+
static inline struct cpumask *clear_user_cpus_ptr(struct task_struct *p)
2498+
{
2499+
struct cpumask *user_mask = NULL;
2500+
2501+
swap(p->user_cpus_ptr, user_mask);
2502+
2503+
return user_mask;
2504+
}
2505+
24972506
void release_user_cpus_ptr(struct task_struct *p)
24982507
{
2499-
kfree(p->user_cpus_ptr);
2500-
p->user_cpus_ptr = NULL;
2508+
kfree(clear_user_cpus_ptr(p));
25012509
}
25022510

25032511
/*
@@ -2717,27 +2725,23 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
27172725
}
27182726

27192727
/*
2720-
* Change a given task's CPU affinity. Migrate the thread to a
2721-
* proper CPU and schedule it away if the CPU it's executing on
2722-
* is removed from the allowed bitmask.
2723-
*
2724-
* NOTE: the caller must have a valid reference to the task, the
2725-
* task must not exit() & deallocate itself prematurely. The
2726-
* call is not atomic; no spinlocks may be held.
2728+
* Called with both p->pi_lock and rq->lock held; drops both before returning.
27272729
*/
2728-
static int __set_cpus_allowed_ptr(struct task_struct *p,
2729-
const struct cpumask *new_mask,
2730-
u32 flags)
2730+
static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
2731+
const struct cpumask *new_mask,
2732+
u32 flags,
2733+
struct rq *rq,
2734+
struct rq_flags *rf)
2735+
__releases(rq->lock)
2736+
__releases(p->pi_lock)
27312737
{
27322738
const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p);
27332739
const struct cpumask *cpu_valid_mask = cpu_active_mask;
27342740
bool kthread = p->flags & PF_KTHREAD;
2741+
struct cpumask *user_mask = NULL;
27352742
unsigned int dest_cpu;
2736-
struct rq_flags rf;
2737-
struct rq *rq;
27382743
int ret = 0;
27392744

2740-
rq = task_rq_lock(p, &rf);
27412745
update_rq_clock(rq);
27422746

27432747
if (kthread || is_migration_disabled(p)) {
@@ -2793,20 +2797,178 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
27932797

27942798
__do_set_cpus_allowed(p, new_mask, flags);
27952799

2796-
return affine_move_task(rq, p, &rf, dest_cpu, flags);
2800+
if (flags & SCA_USER)
2801+
user_mask = clear_user_cpus_ptr(p);
2802+
2803+
ret = affine_move_task(rq, p, rf, dest_cpu, flags);
2804+
2805+
kfree(user_mask);
2806+
2807+
return ret;
27972808

27982809
out:
2799-
task_rq_unlock(rq, p, &rf);
2810+
task_rq_unlock(rq, p, rf);
28002811

28012812
return ret;
28022813
}
28032814

2815+
/*
2816+
* Change a given task's CPU affinity. Migrate the thread to a
2817+
* proper CPU and schedule it away if the CPU it's executing on
2818+
* is removed from the allowed bitmask.
2819+
*
2820+
* NOTE: the caller must have a valid reference to the task, the
2821+
* task must not exit() & deallocate itself prematurely. The
2822+
* call is not atomic; no spinlocks may be held.
2823+
*/
2824+
static int __set_cpus_allowed_ptr(struct task_struct *p,
2825+
const struct cpumask *new_mask, u32 flags)
2826+
{
2827+
struct rq_flags rf;
2828+
struct rq *rq;
2829+
2830+
rq = task_rq_lock(p, &rf);
2831+
return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf);
2832+
}
2833+
28042834
int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
28052835
{
28062836
return __set_cpus_allowed_ptr(p, new_mask, 0);
28072837
}
28082838
EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
28092839

2840+
/*
2841+
* Change a given task's CPU affinity to the intersection of its current
2842+
* affinity mask and @subset_mask, writing the resulting mask to @new_mask
2843+
* and pointing @p->user_cpus_ptr to a copy of the old mask.
2844+
* If the resulting mask is empty, leave the affinity unchanged and return
2845+
* -EINVAL.
2846+
*/
2847+
static int restrict_cpus_allowed_ptr(struct task_struct *p,
2848+
struct cpumask *new_mask,
2849+
const struct cpumask *subset_mask)
2850+
{
2851+
struct cpumask *user_mask = NULL;
2852+
struct rq_flags rf;
2853+
struct rq *rq;
2854+
int err;
2855+
2856+
if (!p->user_cpus_ptr) {
2857+
user_mask = kmalloc(cpumask_size(), GFP_KERNEL);
2858+
if (!user_mask)
2859+
return -ENOMEM;
2860+
}
2861+
2862+
rq = task_rq_lock(p, &rf);
2863+
2864+
/*
2865+
* Forcefully restricting the affinity of a deadline task is
2866+
* likely to cause problems, so fail and noisily override the
2867+
* mask entirely.
2868+
*/
2869+
if (task_has_dl_policy(p) && dl_bandwidth_enabled()) {
2870+
err = -EPERM;
2871+
goto err_unlock;
2872+
}
2873+
2874+
if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) {
2875+
err = -EINVAL;
2876+
goto err_unlock;
2877+
}
2878+
2879+
/*
2880+
* We're about to butcher the task affinity, so keep track of what
2881+
* the user asked for in case we're able to restore it later on.
2882+
*/
2883+
if (user_mask) {
2884+
cpumask_copy(user_mask, p->cpus_ptr);
2885+
p->user_cpus_ptr = user_mask;
2886+
}
2887+
2888+
return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, &rf);
2889+
2890+
err_unlock:
2891+
task_rq_unlock(rq, p, &rf);
2892+
kfree(user_mask);
2893+
return err;
2894+
}
2895+
2896+
/*
2897+
* Restrict the CPU affinity of task @p so that it is a subset of
2898+
* task_cpu_possible_mask() and point @p->user_cpu_ptr to a copy of the
2899+
* old affinity mask. If the resulting mask is empty, we warn and walk
2900+
* up the cpuset hierarchy until we find a suitable mask.
2901+
*/
2902+
void force_compatible_cpus_allowed_ptr(struct task_struct *p)
2903+
{
2904+
cpumask_var_t new_mask;
2905+
const struct cpumask *override_mask = task_cpu_possible_mask(p);
2906+
2907+
alloc_cpumask_var(&new_mask, GFP_KERNEL);
2908+
2909+
/*
2910+
* __migrate_task() can fail silently in the face of concurrent
2911+
* offlining of the chosen destination CPU, so take the hotplug
2912+
* lock to ensure that the migration succeeds.
2913+
*/
2914+
cpus_read_lock();
2915+
if (!cpumask_available(new_mask))
2916+
goto out_set_mask;
2917+
2918+
if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask))
2919+
goto out_free_mask;
2920+
2921+
/*
2922+
* We failed to find a valid subset of the affinity mask for the
2923+
* task, so override it based on its cpuset hierarchy.
2924+
*/
2925+
cpuset_cpus_allowed(p, new_mask);
2926+
override_mask = new_mask;
2927+
2928+
out_set_mask:
2929+
if (printk_ratelimit()) {
2930+
printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n",
2931+
task_pid_nr(p), p->comm,
2932+
cpumask_pr_args(override_mask));
2933+
}
2934+
2935+
WARN_ON(set_cpus_allowed_ptr(p, override_mask));
2936+
out_free_mask:
2937+
cpus_read_unlock();
2938+
free_cpumask_var(new_mask);
2939+
}
2940+
2941+
static int
2942+
__sched_setaffinity(struct task_struct *p, const struct cpumask *mask);
2943+
2944+
/*
2945+
* Restore the affinity of a task @p which was previously restricted by a
2946+
* call to force_compatible_cpus_allowed_ptr(). This will clear (and free)
2947+
* @p->user_cpus_ptr.
2948+
*
2949+
* It is the caller's responsibility to serialise this with any calls to
2950+
* force_compatible_cpus_allowed_ptr(@p).
2951+
*/
2952+
void relax_compatible_cpus_allowed_ptr(struct task_struct *p)
2953+
{
2954+
struct cpumask *user_mask = p->user_cpus_ptr;
2955+
unsigned long flags;
2956+
2957+
/*
2958+
* Try to restore the old affinity mask. If this fails, then
2959+
* we free the mask explicitly to avoid it being inherited across
2960+
* a subsequent fork().
2961+
*/
2962+
if (!user_mask || !__sched_setaffinity(p, user_mask))
2963+
return;
2964+
2965+
raw_spin_lock_irqsave(&p->pi_lock, flags);
2966+
user_mask = clear_user_cpus_ptr(p);
2967+
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2968+
2969+
kfree(user_mask);
2970+
}
2971+
28102972
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
28112973
{
28122974
#ifdef CONFIG_SCHED_DEBUG
@@ -7629,7 +7791,7 @@ __sched_setaffinity(struct task_struct *p, const struct cpumask *mask)
76297791
}
76307792
#endif
76317793
again:
7632-
retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK);
7794+
retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK | SCA_USER);
76337795
if (retval)
76347796
goto out_free_new_mask;
76357797

kernel/sched/sched.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2244,6 +2244,7 @@ extern struct task_struct *pick_next_task_idle(struct rq *rq);
22442244
#define SCA_CHECK 0x01
22452245
#define SCA_MIGRATE_DISABLE 0x02
22462246
#define SCA_MIGRATE_ENABLE 0x04
2247+
#define SCA_USER 0x08
22472248

22482249
#ifdef CONFIG_SMP
22492250

0 commit comments

Comments
 (0)