Skip to content

Commit 98442f0

Browse files
author
Peter Zijlstra
committed
sched: Fix delayed_dequeue vs switched_from_fair()
Commit 2e0199d ("sched/fair: Prepare exit/cleanup paths for delayed_dequeue") and its follow up fixes try to deal with a rather unfortunate situation where is task is enqueued in a new class, even though it shouldn't have been. Mostly because the existing ->switched_to/from() hooks are in the wrong place for this case. This all led to Paul being able to trigger failures at something like once per 10k CPU hours of RCU torture. For now, do the ugly thing and move the code to the right place by ignoring the switch hooks. Note: Clean up the whole sched_class::switch*_{to,from}() thing. Fixes: 2e0199d ("sched/fair: Prepare exit/cleanup paths for delayed_dequeue") Reported-by: Paul E. McKenney <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 73ab05a commit 98442f0

File tree

5 files changed

+30
-34
lines changed

5 files changed

+30
-34
lines changed

kernel/sched/core.c

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7010,20 +7010,20 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag
70107010
}
70117011
EXPORT_SYMBOL(default_wake_function);
70127012

7013-
void __setscheduler_prio(struct task_struct *p, int prio)
7013+
const struct sched_class *__setscheduler_class(struct task_struct *p, int prio)
70147014
{
70157015
if (dl_prio(prio))
7016-
p->sched_class = &dl_sched_class;
7017-
else if (rt_prio(prio))
7018-
p->sched_class = &rt_sched_class;
7016+
return &dl_sched_class;
7017+
7018+
if (rt_prio(prio))
7019+
return &rt_sched_class;
7020+
70197021
#ifdef CONFIG_SCHED_CLASS_EXT
7020-
else if (task_should_scx(p))
7021-
p->sched_class = &ext_sched_class;
7022+
if (task_should_scx(p))
7023+
return &ext_sched_class;
70227024
#endif
7023-
else
7024-
p->sched_class = &fair_sched_class;
70257025

7026-
p->prio = prio;
7026+
return &fair_sched_class;
70277027
}
70287028

70297029
#ifdef CONFIG_RT_MUTEXES
@@ -7069,7 +7069,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
70697069
{
70707070
int prio, oldprio, queued, running, queue_flag =
70717071
DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
7072-
const struct sched_class *prev_class;
7072+
const struct sched_class *prev_class, *next_class;
70737073
struct rq_flags rf;
70747074
struct rq *rq;
70757075

@@ -7127,6 +7127,11 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
71277127
queue_flag &= ~DEQUEUE_MOVE;
71287128

71297129
prev_class = p->sched_class;
7130+
next_class = __setscheduler_class(p, prio);
7131+
7132+
if (prev_class != next_class && p->se.sched_delayed)
7133+
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
7134+
71307135
queued = task_on_rq_queued(p);
71317136
running = task_current(rq, p);
71327137
if (queued)
@@ -7164,7 +7169,9 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
71647169
p->rt.timeout = 0;
71657170
}
71667171

7167-
__setscheduler_prio(p, prio);
7172+
p->sched_class = next_class;
7173+
p->prio = prio;
7174+
71687175
check_class_changing(rq, p, prev_class);
71697176

71707177
if (queued)

kernel/sched/ext.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4471,7 +4471,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
44714471
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
44724472

44734473
p->scx.slice = min_t(u64, p->scx.slice, SCX_SLICE_DFL);
4474-
__setscheduler_prio(p, p->prio);
4474+
p->sched_class = __setscheduler_class(p, p->prio);
44754475
check_class_changing(task_rq(p), p, old_class);
44764476

44774477
sched_enq_and_set_task(&ctx);
@@ -5186,7 +5186,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
51865186

51875187
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
51885188

5189-
__setscheduler_prio(p, p->prio);
5189+
p->sched_class = __setscheduler_class(p, p->prio);
51905190
check_class_changing(task_rq(p), p, old_class);
51915191

51925192
sched_enq_and_set_task(&ctx);

kernel/sched/fair.c

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13177,22 +13177,6 @@ static void attach_task_cfs_rq(struct task_struct *p)
1317713177
static void switched_from_fair(struct rq *rq, struct task_struct *p)
1317813178
{
1317913179
detach_task_cfs_rq(p);
13180-
/*
13181-
* Since this is called after changing class, this is a little weird
13182-
* and we cannot use DEQUEUE_DELAYED.
13183-
*/
13184-
if (p->se.sched_delayed) {
13185-
/* First, dequeue it from its new class' structures */
13186-
dequeue_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SLEEP);
13187-
/*
13188-
* Now, clean up the fair_sched_class side of things
13189-
* related to sched_delayed being true and that wasn't done
13190-
* due to the generic dequeue not using DEQUEUE_DELAYED.
13191-
*/
13192-
finish_delayed_dequeue_entity(&p->se);
13193-
p->se.rel_deadline = 0;
13194-
__block_task(rq, p);
13195-
}
1319613180
}
1319713181

1319813182
static void switched_to_fair(struct rq *rq, struct task_struct *p)

kernel/sched/sched.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3797,7 +3797,7 @@ static inline int rt_effective_prio(struct task_struct *p, int prio)
37973797

37983798
extern int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user, bool pi);
37993799
extern int __sched_setaffinity(struct task_struct *p, struct affinity_context *ctx);
3800-
extern void __setscheduler_prio(struct task_struct *p, int prio);
3800+
extern const struct sched_class *__setscheduler_class(struct task_struct *p, int prio);
38013801
extern void set_load_weight(struct task_struct *p, bool update_load);
38023802
extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
38033803
extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags);

kernel/sched/syscalls.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ int __sched_setscheduler(struct task_struct *p,
529529
{
530530
int oldpolicy = -1, policy = attr->sched_policy;
531531
int retval, oldprio, newprio, queued, running;
532-
const struct sched_class *prev_class;
532+
const struct sched_class *prev_class, *next_class;
533533
struct balance_callback *head;
534534
struct rq_flags rf;
535535
int reset_on_fork;
@@ -706,18 +706,23 @@ int __sched_setscheduler(struct task_struct *p,
706706
queue_flags &= ~DEQUEUE_MOVE;
707707
}
708708

709+
prev_class = p->sched_class;
710+
next_class = __setscheduler_class(p, newprio);
711+
712+
if (prev_class != next_class && p->se.sched_delayed)
713+
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
714+
709715
queued = task_on_rq_queued(p);
710716
running = task_current(rq, p);
711717
if (queued)
712718
dequeue_task(rq, p, queue_flags);
713719
if (running)
714720
put_prev_task(rq, p);
715721

716-
prev_class = p->sched_class;
717-
718722
if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
719723
__setscheduler_params(p, attr);
720-
__setscheduler_prio(p, newprio);
724+
p->sched_class = next_class;
725+
p->prio = newprio;
721726
}
722727
__setscheduler_uclamp(p, attr);
723728
check_class_changing(rq, p, prev_class);

0 commit comments

Comments
 (0)