Skip to content

Commit 5d3c0db

Browse files
committed
Merge tag 'sched-core-2021-08-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: - The biggest change in this cycle is scheduler support for asymmetric scheduling affinity, to support the execution of legacy 32-bit tasks on AArch32 systems that also have 64-bit-only CPUs. Architectures can fill in this functionality by defining their own task_cpu_possible_mask(p). When this is done, the scheduler will make sure the task will only be scheduled on CPUs that support it. (The actual arm64 specific changes are not part of this tree.) For other architectures there will be no change in functionality. - Add cgroup SCHED_IDLE support - Increase node-distance flexibility & delay determining it until a CPU is brought online. (This enables platforms where node distance isn't final until the CPU is only.) - Deadline scheduler enhancements & fixes - Misc fixes & cleanups. * tag 'sched-core-2021-08-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits) eventfd: Make signal recursion protection a task bit sched/fair: Mark tg_is_idle() an inline in the !CONFIG_FAIR_GROUP_SCHED case sched: Introduce dl_task_check_affinity() to check proposed affinity sched: Allow task CPU affinity to be restricted on asymmetric systems sched: Split the guts of sched_setaffinity() into a helper function sched: Introduce task_struct::user_cpus_ptr to track requested affinity sched: Reject CPU affinity changes based on task_cpu_possible_mask() cpuset: Cleanup cpuset_cpus_allowed_fallback() use in select_fallback_rq() cpuset: Honour task_cpu_possible_mask() in guarantee_online_cpus() cpuset: Don't use the cpu_possible_mask as a last resort for cgroup v1 sched: Introduce task_cpu_possible_mask() to limit fallback rq selection sched: Cgroup SCHED_IDLE support sched/topology: Skip updating masks for non-online nodes sched: Replace deprecated CPU-hotplug functions. sched: Skip priority checks with SCHED_FLAG_KEEP_PARAMS sched: Fix UCLAMP_FLAG_IDLE setting sched/deadline: Fix missing clock update in migrate_task_rq_dl() sched/fair: Avoid a second scan of target in select_idle_cpu sched/fair: Use prev instead of new target as recent_used_cpu sched: Don't report SCHED_FLAG_SUGOV in sched_getattr() ...
2 parents 230bda0 + b542e38 commit 5d3c0db

File tree

17 files changed

+729
-185
lines changed

17 files changed

+729
-185
lines changed

fs/aio.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1695,7 +1695,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
16951695
list_del(&iocb->ki_list);
16961696
iocb->ki_res.res = mangle_poll(mask);
16971697
req->done = true;
1698-
if (iocb->ki_eventfd && eventfd_signal_count()) {
1698+
if (iocb->ki_eventfd && eventfd_signal_allowed()) {
16991699
iocb = NULL;
17001700
INIT_WORK(&req->work, aio_poll_put_work);
17011701
schedule_work(&req->work);

fs/eventfd.c

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
#include <linux/idr.h>
2626
#include <linux/uio.h>
2727

28-
DEFINE_PER_CPU(int, eventfd_wake_count);
29-
3028
static DEFINE_IDA(eventfd_ida);
3129

3230
struct eventfd_ctx {
@@ -67,21 +65,21 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
6765
* Deadlock or stack overflow issues can happen if we recurse here
6866
* through waitqueue wakeup handlers. If the caller users potentially
6967
* nested waitqueues with custom wakeup handlers, then it should
70-
* check eventfd_signal_count() before calling this function. If
71-
* it returns true, the eventfd_signal() call should be deferred to a
68+
* check eventfd_signal_allowed() before calling this function. If
69+
* it returns false, the eventfd_signal() call should be deferred to a
7270
* safe context.
7371
*/
74-
if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
72+
if (WARN_ON_ONCE(current->in_eventfd_signal))
7573
return 0;
7674

7775
spin_lock_irqsave(&ctx->wqh.lock, flags);
78-
this_cpu_inc(eventfd_wake_count);
76+
current->in_eventfd_signal = 1;
7977
if (ULLONG_MAX - ctx->count < n)
8078
n = ULLONG_MAX - ctx->count;
8179
ctx->count += n;
8280
if (waitqueue_active(&ctx->wqh))
8381
wake_up_locked_poll(&ctx->wqh, EPOLLIN);
84-
this_cpu_dec(eventfd_wake_count);
82+
current->in_eventfd_signal = 0;
8583
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
8684

8785
return n;

include/linux/cpuset.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <linux/cpumask.h>
1616
#include <linux/nodemask.h>
1717
#include <linux/mm.h>
18+
#include <linux/mmu_context.h>
1819
#include <linux/jump_label.h>
1920

2021
#ifdef CONFIG_CPUSETS
@@ -58,7 +59,7 @@ extern void cpuset_wait_for_hotplug(void);
5859
extern void cpuset_read_lock(void);
5960
extern void cpuset_read_unlock(void);
6061
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
61-
extern void cpuset_cpus_allowed_fallback(struct task_struct *p);
62+
extern bool cpuset_cpus_allowed_fallback(struct task_struct *p);
6263
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
6364
#define cpuset_current_mems_allowed (current->mems_allowed)
6465
void cpuset_init_current_mems_allowed(void);
@@ -184,11 +185,12 @@ static inline void cpuset_read_unlock(void) { }
184185
static inline void cpuset_cpus_allowed(struct task_struct *p,
185186
struct cpumask *mask)
186187
{
187-
cpumask_copy(mask, cpu_possible_mask);
188+
cpumask_copy(mask, task_cpu_possible_mask(p));
188189
}
189190

190-
static inline void cpuset_cpus_allowed_fallback(struct task_struct *p)
191+
static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p)
191192
{
193+
return false;
192194
}
193195

194196
static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)

include/linux/eventfd.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <linux/err.h>
1515
#include <linux/percpu-defs.h>
1616
#include <linux/percpu.h>
17+
#include <linux/sched.h>
1718

1819
/*
1920
* CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
@@ -43,11 +44,9 @@ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w
4344
__u64 *cnt);
4445
void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
4546

46-
DECLARE_PER_CPU(int, eventfd_wake_count);
47-
48-
static inline bool eventfd_signal_count(void)
47+
static inline bool eventfd_signal_allowed(void)
4948
{
50-
return this_cpu_read(eventfd_wake_count);
49+
return !current->in_eventfd_signal;
5150
}
5251

5352
#else /* CONFIG_EVENTFD */
@@ -78,9 +77,9 @@ static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
7877
return -ENOSYS;
7978
}
8079

81-
static inline bool eventfd_signal_count(void)
80+
static inline bool eventfd_signal_allowed(void)
8281
{
83-
return false;
82+
return true;
8483
}
8584

8685
static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)

include/linux/mmu_context.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,18 @@
1414
static inline void leave_mm(int cpu) { }
1515
#endif
1616

17+
/*
18+
* CPUs that are capable of running user task @p. Must contain at least one
19+
* active CPU. It is assumed that the kernel can run on all CPUs, so calling
20+
* this for a kernel thread is pointless.
21+
*
22+
* By default, we assume a sane, homogeneous system.
23+
*/
24+
#ifndef task_cpu_possible_mask
25+
# define task_cpu_possible_mask(p) cpu_possible_mask
26+
# define task_cpu_possible(cpu, p) true
27+
#else
28+
# define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p))
29+
#endif
30+
1731
#endif

include/linux/sched.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,7 @@ struct task_struct {
748748
unsigned int policy;
749749
int nr_cpus_allowed;
750750
const cpumask_t *cpus_ptr;
751+
cpumask_t *user_cpus_ptr;
751752
cpumask_t cpus_mask;
752753
void *migration_pending;
753754
#ifdef CONFIG_SMP
@@ -863,6 +864,10 @@ struct task_struct {
863864
/* Used by page_owner=on to detect recursion in page tracking. */
864865
unsigned in_page_owner:1;
865866
#endif
867+
#ifdef CONFIG_EVENTFD
868+
/* Recursion prevention for eventfd_signal() */
869+
unsigned in_eventfd_signal:1;
870+
#endif
866871

867872
unsigned long atomic_flags; /* Flags requiring atomic access. */
868873

@@ -1705,6 +1710,11 @@ extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_
17051710
#ifdef CONFIG_SMP
17061711
extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
17071712
extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
1713+
extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node);
1714+
extern void release_user_cpus_ptr(struct task_struct *p);
1715+
extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask);
1716+
extern void force_compatible_cpus_allowed_ptr(struct task_struct *p);
1717+
extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p);
17081718
#else
17091719
static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
17101720
{
@@ -1715,6 +1725,21 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma
17151725
return -EINVAL;
17161726
return 0;
17171727
}
1728+
static inline int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node)
1729+
{
1730+
if (src->user_cpus_ptr)
1731+
return -EINVAL;
1732+
return 0;
1733+
}
1734+
static inline void release_user_cpus_ptr(struct task_struct *p)
1735+
{
1736+
WARN_ON(p->user_cpus_ptr);
1737+
}
1738+
1739+
static inline int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
1740+
{
1741+
return 0;
1742+
}
17181743
#endif
17191744

17201745
extern int yield_to(struct task_struct *p, bool preempt);

include/linux/sched/sysctl.h

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,30 +28,12 @@ enum { sysctl_hung_task_timeout_secs = 0 };
2828

2929
extern unsigned int sysctl_sched_child_runs_first;
3030

31-
extern unsigned int sysctl_sched_latency;
32-
extern unsigned int sysctl_sched_min_granularity;
33-
extern unsigned int sysctl_sched_wakeup_granularity;
34-
3531
enum sched_tunable_scaling {
3632
SCHED_TUNABLESCALING_NONE,
3733
SCHED_TUNABLESCALING_LOG,
3834
SCHED_TUNABLESCALING_LINEAR,
3935
SCHED_TUNABLESCALING_END,
4036
};
41-
extern unsigned int sysctl_sched_tunable_scaling;
42-
43-
extern unsigned int sysctl_numa_balancing_scan_delay;
44-
extern unsigned int sysctl_numa_balancing_scan_period_min;
45-
extern unsigned int sysctl_numa_balancing_scan_period_max;
46-
extern unsigned int sysctl_numa_balancing_scan_size;
47-
48-
#ifdef CONFIG_SCHED_DEBUG
49-
extern __read_mostly unsigned int sysctl_sched_migration_cost;
50-
extern __read_mostly unsigned int sysctl_sched_nr_migrate;
51-
52-
extern int sysctl_resched_latency_warn_ms;
53-
extern int sysctl_resched_latency_warn_once;
54-
#endif
5537

5638
/*
5739
* control realtime throttling:

include/linux/wait.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ struct task_struct;
5656

5757
#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
5858
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
59-
.head = { &(name).head, &(name).head } }
59+
.head = LIST_HEAD_INIT(name.head) }
6060

6161
#define DECLARE_WAIT_QUEUE_HEAD(name) \
6262
struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name)

init/init_task.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ struct task_struct init_task
8080
.normal_prio = MAX_PRIO - 20,
8181
.policy = SCHED_NORMAL,
8282
.cpus_ptr = &init_task.cpus_mask,
83+
.user_cpus_ptr = NULL,
8384
.cpus_mask = CPU_MASK_ALL,
8485
.nr_cpus_allowed= NR_CPUS,
8586
.mm = NULL,

kernel/cgroup/cpuset.c

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -372,18 +372,29 @@ static inline bool is_in_v2_mode(void)
372372
}
373373

374374
/*
375-
* Return in pmask the portion of a cpusets's cpus_allowed that
376-
* are online. If none are online, walk up the cpuset hierarchy
377-
* until we find one that does have some online cpus.
375+
* Return in pmask the portion of a task's cpusets's cpus_allowed that
376+
* are online and are capable of running the task. If none are found,
377+
* walk up the cpuset hierarchy until we find one that does have some
378+
* appropriate cpus.
378379
*
379380
* One way or another, we guarantee to return some non-empty subset
380381
* of cpu_online_mask.
381382
*
382383
* Call with callback_lock or cpuset_mutex held.
383384
*/
384-
static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
385+
static void guarantee_online_cpus(struct task_struct *tsk,
386+
struct cpumask *pmask)
385387
{
386-
while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) {
388+
const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
389+
struct cpuset *cs;
390+
391+
if (WARN_ON(!cpumask_and(pmask, possible_mask, cpu_online_mask)))
392+
cpumask_copy(pmask, cpu_online_mask);
393+
394+
rcu_read_lock();
395+
cs = task_cs(tsk);
396+
397+
while (!cpumask_intersects(cs->effective_cpus, pmask)) {
387398
cs = parent_cs(cs);
388399
if (unlikely(!cs)) {
389400
/*
@@ -393,11 +404,13 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
393404
* cpuset's effective_cpus is on its way to be
394405
* identical to cpu_online_mask.
395406
*/
396-
cpumask_copy(pmask, cpu_online_mask);
397-
return;
407+
goto out_unlock;
398408
}
399409
}
400-
cpumask_and(pmask, cs->effective_cpus, cpu_online_mask);
410+
cpumask_and(pmask, pmask, cs->effective_cpus);
411+
412+
out_unlock:
413+
rcu_read_unlock();
401414
}
402415

403416
/*
@@ -2199,15 +2212,13 @@ static void cpuset_attach(struct cgroup_taskset *tset)
21992212

22002213
percpu_down_write(&cpuset_rwsem);
22012214

2202-
/* prepare for attach */
2203-
if (cs == &top_cpuset)
2204-
cpumask_copy(cpus_attach, cpu_possible_mask);
2205-
else
2206-
guarantee_online_cpus(cs, cpus_attach);
2207-
22082215
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
22092216

22102217
cgroup_taskset_for_each(task, css, tset) {
2218+
if (cs != &top_cpuset)
2219+
guarantee_online_cpus(task, cpus_attach);
2220+
else
2221+
cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
22112222
/*
22122223
* can_attach beforehand should guarantee that this doesn't
22132224
* fail. TODO: have a better way to handle failure here
@@ -3302,9 +3313,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
33023313
unsigned long flags;
33033314

33043315
spin_lock_irqsave(&callback_lock, flags);
3305-
rcu_read_lock();
3306-
guarantee_online_cpus(task_cs(tsk), pmask);
3307-
rcu_read_unlock();
3316+
guarantee_online_cpus(tsk, pmask);
33083317
spin_unlock_irqrestore(&callback_lock, flags);
33093318
}
33103319

@@ -3318,13 +3327,22 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
33183327
* which will not contain a sane cpumask during cases such as cpu hotplugging.
33193328
* This is the absolute last resort for the scheduler and it is only used if
33203329
* _every_ other avenue has been traveled.
3330+
*
3331+
* Returns true if the affinity of @tsk was changed, false otherwise.
33213332
**/
33223333

3323-
void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
3334+
bool cpuset_cpus_allowed_fallback(struct task_struct *tsk)
33243335
{
3336+
const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
3337+
const struct cpumask *cs_mask;
3338+
bool changed = false;
3339+
33253340
rcu_read_lock();
3326-
do_set_cpus_allowed(tsk, is_in_v2_mode() ?
3327-
task_cs(tsk)->cpus_allowed : cpu_possible_mask);
3341+
cs_mask = task_cs(tsk)->cpus_allowed;
3342+
if (is_in_v2_mode() && cpumask_subset(cs_mask, possible_mask)) {
3343+
do_set_cpus_allowed(tsk, cs_mask);
3344+
changed = true;
3345+
}
33283346
rcu_read_unlock();
33293347

33303348
/*
@@ -3344,6 +3362,7 @@ void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
33443362
* select_fallback_rq() will fix things ups and set cpu_possible_mask
33453363
* if required.
33463364
*/
3365+
return changed;
33473366
}
33483367

33493368
void __init cpuset_init_current_mems_allowed(void)

0 commit comments

Comments
 (0)