Skip to content

Commit 82e9d04

Browse files
author
Peter Zijlstra
committed
sched/fair: Avoid re-setting virtual deadline on 'migrations'
During OSPM24 Youssef noted that migrations are re-setting the virtual deadline. Notably everything that does a dequeue-enqueue, like setting nice, changing preferred numa-node, and a myriad of other random crap, will cause this to happen. This shouldn't be. Preserve the relative virtual deadline across such dequeue/enqueue cycles. Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Valentin Schneider <[email protected]> Tested-by: Valentin Schneider <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent fc1892b commit 82e9d04

File tree

3 files changed

+26
-7
lines changed

3 files changed

+26
-7
lines changed

include/linux/sched.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -544,8 +544,10 @@ struct sched_entity {
544544
u64 min_vruntime;
545545

546546
struct list_head group_node;
547-
unsigned int on_rq;
548-
unsigned int sched_delayed;
547+
unsigned char on_rq;
548+
unsigned char sched_delayed;
549+
unsigned char rel_deadline;
550+
/* hole */
549551

550552
u64 exec_start;
551553
u64 sum_exec_runtime;

kernel/sched/fair.c

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5270,6 +5270,12 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
52705270

52715271
se->vruntime = vruntime - lag;
52725272

5273+
if (sched_feat(PLACE_REL_DEADLINE) && se->rel_deadline) {
5274+
se->deadline += se->vruntime;
5275+
se->rel_deadline = 0;
5276+
return;
5277+
}
5278+
52735279
/*
52745280
* When joining the competition; the existing tasks will be,
52755281
* on average, halfway through their slice, as such start tasks
@@ -5382,23 +5388,24 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
53825388
static bool
53835389
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
53845390
{
5391+
bool sleep = flags & DEQUEUE_SLEEP;
5392+
53855393
update_curr(cfs_rq);
53865394

53875395
if (flags & DEQUEUE_DELAYED) {
53885396
SCHED_WARN_ON(!se->sched_delayed);
53895397
} else {
5390-
bool sleep = flags & DEQUEUE_SLEEP;
5391-
5398+
bool delay = sleep;
53925399
/*
53935400
* DELAY_DEQUEUE relies on spurious wakeups, special task
53945401
* states must not suffer spurious wakeups, excempt them.
53955402
*/
53965403
if (flags & DEQUEUE_SPECIAL)
5397-
sleep = false;
5404+
delay = false;
53985405

5399-
SCHED_WARN_ON(sleep && se->sched_delayed);
5406+
SCHED_WARN_ON(delay && se->sched_delayed);
54005407

5401-
if (sched_feat(DELAY_DEQUEUE) && sleep &&
5408+
if (sched_feat(DELAY_DEQUEUE) && delay &&
54025409
!entity_eligible(cfs_rq, se)) {
54035410
if (cfs_rq->next == se)
54045411
cfs_rq->next = NULL;
@@ -5429,6 +5436,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
54295436
clear_buddies(cfs_rq, se);
54305437

54315438
update_entity_lag(cfs_rq, se);
5439+
if (sched_feat(PLACE_REL_DEADLINE) && !sleep) {
5440+
se->deadline -= se->vruntime;
5441+
se->rel_deadline = 1;
5442+
}
5443+
54325444
if (se != cfs_rq->curr)
54335445
__dequeue_entity(cfs_rq, se);
54345446
se->on_rq = 0;
@@ -12992,6 +13004,7 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
1299213004
if (p->se.sched_delayed) {
1299313005
dequeue_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SLEEP);
1299413006
p->se.sched_delayed = 0;
13007+
p->se.rel_deadline = 0;
1299513008
if (sched_feat(DELAY_ZERO) && p->se.vlag > 0)
1299613009
p->se.vlag = 0;
1299713010
}

kernel/sched/features.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ SCHED_FEAT(PLACE_LAG, true)
99
* Give new tasks half a slice to ease into the competition.
1010
*/
1111
SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
12+
/*
13+
* Preserve relative virtual deadline on 'migration'.
14+
*/
15+
SCHED_FEAT(PLACE_REL_DEADLINE, true)
1216
/*
1317
* Inhibit (wakeup) preemption until the current task has either matched the
1418
* 0-lag point or until is has exhausted it's slice.

0 commit comments

Comments
 (0)