Skip to content

Commit 3144c83

Browse files
author
Peter Zijlstra
committed
Merge branch 'tip/sched/urgent'
Sync with sched/urgent to avoid conflicts. Signed-off-by: Peter Zijlstra <[email protected]>
2 parents 7266f0a + cd9626e commit 3144c83

File tree

16 files changed

+138
-72
lines changed

16 files changed

+138
-72
lines changed

include/linux/sched.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2133,6 +2133,11 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
21332133

21342134
#endif /* CONFIG_SMP */
21352135

2136+
static inline bool task_is_runnable(struct task_struct *p)
2137+
{
2138+
return p->on_rq && !p->se.sched_delayed;
2139+
}
2140+
21362141
extern bool sched_task_on_rq(struct task_struct *p);
21372142
extern unsigned long get_wchan(struct task_struct *p);
21382143
extern struct task_struct *cpu_curr_snapshot(int cpu);

include/linux/task_work.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,14 @@ init_task_work(struct callback_head *twork, task_work_func_t func)
1414
}
1515

1616
enum task_work_notify_mode {
17-
TWA_NONE,
17+
TWA_NONE = 0,
1818
TWA_RESUME,
1919
TWA_SIGNAL,
2020
TWA_SIGNAL_NO_IPI,
2121
TWA_NMI_CURRENT,
22+
23+
TWA_FLAGS = 0xff00,
24+
TWAF_NO_ALLOC = 0x0100,
2225
};
2326

2427
static inline bool task_work_pending(struct task_struct *task)

kernel/events/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9251,7 +9251,7 @@ static void perf_event_switch(struct task_struct *task,
92519251
},
92529252
};
92539253

9254-
if (!sched_in && task->on_rq) {
9254+
if (!sched_in && task_is_runnable(task)) {
92559255
switch_event.event_id.header.misc |=
92569256
PERF_RECORD_MISC_SWITCH_OUT_PREEMPT;
92579257
}

kernel/freezer.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,12 @@ static int __set_task_frozen(struct task_struct *p, void *arg)
109109
{
110110
unsigned int state = READ_ONCE(p->__state);
111111

112-
if (p->on_rq)
112+
/*
113+
* Allow freezing the sched_delayed tasks; they will not execute until
114+
* ttwu() fixes them up, so it is safe to swap their state now, instead
115+
* of waiting for them to get fully dequeued.
116+
*/
117+
if (task_is_runnable(p))
113118
return 0;
114119

115120
if (p != current && task_curr(p))

kernel/rcu/tasks.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -985,6 +985,15 @@ static bool rcu_tasks_is_holdout(struct task_struct *t)
985985
if (!READ_ONCE(t->on_rq))
986986
return false;
987987

988+
/*
989+
* t->on_rq && !t->se.sched_delayed *could* be considered sleeping but
990+
* since it is a spurious state (it will transition into the
991+
* traditional blocked state or get woken up without outside
992+
* dependencies), not considering it such should only affect timing.
993+
*
994+
* Be conservative for now and not include it.
995+
*/
996+
988997
/*
989998
* Idle tasks (or idle injection) within the idle loop are RCU-tasks
990999
* quiescent states. But CPU boot code performed by the idle task

kernel/sched/core.c

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,11 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
548548
* ON_RQ_MIGRATING state is used for migration without holding both
549549
* rq->locks. It indicates task_cpu() is not stable, see task_rq_lock().
550550
*
551+
* Additionally it is possible to be ->on_rq but still be considered not
552+
* runnable when p->se.sched_delayed is true. These tasks are on the runqueue
553+
* but will be dequeued as soon as they get picked again. See the
554+
* task_is_runnable() helper.
555+
*
551556
* p->on_cpu <- { 0, 1 }:
552557
*
553558
* is set by prepare_task() and cleared by finish_task() such that it will be
@@ -2012,18 +2017,18 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
20122017
if (!(flags & ENQUEUE_NOCLOCK))
20132018
update_rq_clock(rq);
20142019

2015-
if (!(flags & ENQUEUE_RESTORE)) {
2016-
sched_info_enqueue(rq, p);
2017-
psi_enqueue(p, (flags & ENQUEUE_WAKEUP) && !(flags & ENQUEUE_MIGRATED));
2018-
}
2019-
20202020
p->sched_class->enqueue_task(rq, p, flags);
20212021
/*
20222022
* Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
20232023
* ->sched_delayed.
20242024
*/
20252025
uclamp_rq_inc(rq, p);
20262026

2027+
if (!(flags & ENQUEUE_RESTORE)) {
2028+
sched_info_enqueue(rq, p);
2029+
psi_enqueue(p, flags & ENQUEUE_MIGRATED);
2030+
}
2031+
20272032
if (sched_core_enabled(rq))
20282033
sched_core_enqueue(rq, p);
20292034
}
@@ -2041,7 +2046,7 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
20412046

20422047
if (!(flags & DEQUEUE_SAVE)) {
20432048
sched_info_dequeue(rq, p);
2044-
psi_dequeue(p, flags & DEQUEUE_SLEEP);
2049+
psi_dequeue(p, !(flags & DEQUEUE_SLEEP));
20452050
}
20462051

20472052
/*
@@ -4317,9 +4322,10 @@ static bool __task_needs_rq_lock(struct task_struct *p)
43174322
* @arg: Argument to function.
43184323
*
43194324
* Fix the task in it's current state by avoiding wakeups and or rq operations
4320-
* and call @func(@arg) on it. This function can use ->on_rq and task_curr()
4321-
* to work out what the state is, if required. Given that @func can be invoked
4322-
* with a runqueue lock held, it had better be quite lightweight.
4325+
* and call @func(@arg) on it. This function can use task_is_runnable() and
4326+
* task_curr() to work out what the state is, if required. Given that @func
4327+
* can be invoked with a runqueue lock held, it had better be quite
4328+
* lightweight.
43234329
*
43244330
* Returns:
43254331
* Whatever @func returns
@@ -6537,6 +6543,7 @@ static void __sched notrace __schedule(int sched_mode)
65376543
* as a preemption by schedule_debug() and RCU.
65386544
*/
65396545
bool preempt = sched_mode > SM_NONE;
6546+
bool block = false;
65406547
unsigned long *switch_count;
65416548
unsigned long prev_state;
65426549
struct rq_flags rf;
@@ -6622,6 +6629,7 @@ static void __sched notrace __schedule(int sched_mode)
66226629
* After this, schedule() must not care about p->state any more.
66236630
*/
66246631
block_task(rq, prev, flags);
6632+
block = true;
66256633
}
66266634
switch_count = &prev->nvcsw;
66276635
}
@@ -6667,7 +6675,7 @@ static void __sched notrace __schedule(int sched_mode)
66676675

66686676
migrate_disable_switch(rq, prev);
66696677
psi_account_irqtime(rq, prev, next);
6670-
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
6678+
psi_sched_switch(prev, next, block);
66716679

66726680
trace_sched_switch(preempt, prev, next, prev_state);
66736681

@@ -7010,20 +7018,20 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag
70107018
}
70117019
EXPORT_SYMBOL(default_wake_function);
70127020

7013-
void __setscheduler_prio(struct task_struct *p, int prio)
7021+
const struct sched_class *__setscheduler_class(struct task_struct *p, int prio)
70147022
{
70157023
if (dl_prio(prio))
7016-
p->sched_class = &dl_sched_class;
7017-
else if (rt_prio(prio))
7018-
p->sched_class = &rt_sched_class;
7024+
return &dl_sched_class;
7025+
7026+
if (rt_prio(prio))
7027+
return &rt_sched_class;
7028+
70197029
#ifdef CONFIG_SCHED_CLASS_EXT
7020-
else if (task_should_scx(p))
7021-
p->sched_class = &ext_sched_class;
7030+
if (task_should_scx(p))
7031+
return &ext_sched_class;
70227032
#endif
7023-
else
7024-
p->sched_class = &fair_sched_class;
70257033

7026-
p->prio = prio;
7034+
return &fair_sched_class;
70277035
}
70287036

70297037
#ifdef CONFIG_RT_MUTEXES
@@ -7069,7 +7077,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
70697077
{
70707078
int prio, oldprio, queued, running, queue_flag =
70717079
DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
7072-
const struct sched_class *prev_class;
7080+
const struct sched_class *prev_class, *next_class;
70737081
struct rq_flags rf;
70747082
struct rq *rq;
70757083

@@ -7127,6 +7135,11 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
71277135
queue_flag &= ~DEQUEUE_MOVE;
71287136

71297137
prev_class = p->sched_class;
7138+
next_class = __setscheduler_class(p, prio);
7139+
7140+
if (prev_class != next_class && p->se.sched_delayed)
7141+
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
7142+
71307143
queued = task_on_rq_queued(p);
71317144
running = task_current(rq, p);
71327145
if (queued)
@@ -7164,7 +7177,9 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
71647177
p->rt.timeout = 0;
71657178
}
71667179

7167-
__setscheduler_prio(p, prio);
7180+
p->sched_class = next_class;
7181+
p->prio = prio;
7182+
71687183
check_class_changing(rq, p, prev_class);
71697184

71707185
if (queued)
@@ -10458,7 +10473,9 @@ void task_tick_mm_cid(struct rq *rq, struct task_struct *curr)
1045810473
return;
1045910474
if (time_before(now, READ_ONCE(curr->mm->mm_cid_next_scan)))
1046010475
return;
10461-
task_work_add(curr, work, TWA_RESUME);
10476+
10477+
/* No page allocation under rq lock */
10478+
task_work_add(curr, work, TWA_RESUME | TWAF_NO_ALLOC);
1046210479
}
1046310480

1046410481
void sched_mm_cid_exit_signals(struct task_struct *t)

kernel/sched/deadline.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2385,7 +2385,7 @@ static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
23852385

23862386
deadline_queue_push_tasks(rq);
23872387

2388-
if (hrtick_enabled(rq))
2388+
if (hrtick_enabled_dl(rq))
23892389
start_hrtick_dl(rq, &p->dl);
23902390
}
23912391

kernel/sched/ext.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4471,7 +4471,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
44714471
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
44724472

44734473
p->scx.slice = min_t(u64, p->scx.slice, SCX_SLICE_DFL);
4474-
__setscheduler_prio(p, p->prio);
4474+
p->sched_class = __setscheduler_class(p, p->prio);
44754475
check_class_changing(task_rq(p), p, old_class);
44764476

44774477
sched_enq_and_set_task(&ctx);
@@ -5186,7 +5186,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
51865186

51875187
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
51885188

5189-
__setscheduler_prio(p, p->prio);
5189+
p->sched_class = __setscheduler_class(p, p->prio);
51905190
check_class_changing(task_rq(p), p, old_class);
51915191

51925192
sched_enq_and_set_task(&ctx);

kernel/sched/fair.c

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,7 +1247,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
12471247

12481248
account_cfs_rq_runtime(cfs_rq, delta_exec);
12491249

1250-
if (rq->nr_running == 1)
1250+
if (cfs_rq->nr_running == 1)
12511251
return;
12521252

12531253
if (resched || did_preempt_short(cfs_rq, curr)) {
@@ -6052,10 +6052,13 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
60526052
for_each_sched_entity(se) {
60536053
struct cfs_rq *qcfs_rq = cfs_rq_of(se);
60546054

6055-
if (se->on_rq) {
6056-
SCHED_WARN_ON(se->sched_delayed);
6055+
/* Handle any unfinished DELAY_DEQUEUE business first. */
6056+
if (se->sched_delayed) {
6057+
int flags = DEQUEUE_SLEEP | DEQUEUE_DELAYED;
6058+
6059+
dequeue_entity(qcfs_rq, se, flags);
6060+
} else if (se->on_rq)
60576061
break;
6058-
}
60596062
enqueue_entity(qcfs_rq, se, ENQUEUE_WAKEUP);
60606063

60616064
if (cfs_rq_is_idle(group_cfs_rq(se)))
@@ -13168,22 +13171,6 @@ static void attach_task_cfs_rq(struct task_struct *p)
1316813171
static void switched_from_fair(struct rq *rq, struct task_struct *p)
1316913172
{
1317013173
detach_task_cfs_rq(p);
13171-
/*
13172-
* Since this is called after changing class, this is a little weird
13173-
* and we cannot use DEQUEUE_DELAYED.
13174-
*/
13175-
if (p->se.sched_delayed) {
13176-
/* First, dequeue it from its new class' structures */
13177-
dequeue_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SLEEP);
13178-
/*
13179-
* Now, clean up the fair_sched_class side of things
13180-
* related to sched_delayed being true and that wasn't done
13181-
* due to the generic dequeue not using DEQUEUE_DELAYED.
13182-
*/
13183-
finish_delayed_dequeue_entity(&p->se);
13184-
p->se.rel_deadline = 0;
13185-
__block_task(rq, p);
13186-
}
1318713174
}
1318813175

1318913176
static void switched_to_fair(struct rq *rq, struct task_struct *p)

kernel/sched/sched.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3797,7 +3797,7 @@ static inline int rt_effective_prio(struct task_struct *p, int prio)
37973797

37983798
extern int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user, bool pi);
37993799
extern int __sched_setaffinity(struct task_struct *p, struct affinity_context *ctx);
3800-
extern void __setscheduler_prio(struct task_struct *p, int prio);
3800+
extern const struct sched_class *__setscheduler_class(struct task_struct *p, int prio);
38013801
extern void set_load_weight(struct task_struct *p, bool update_load);
38023802
extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
38033803
extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags);

0 commit comments

Comments
 (0)