Skip to content

Commit af0c8b2

Browse files
author
Peter Zijlstra
committed
sched: Split scheduler and execution contexts
Let's define the "scheduling context" as all the scheduler state in task_struct for the task chosen to run, which we'll call the donor task, and the "execution context" as all state required to actually run the task. Currently both are intertwined in task_struct. We want to logically split these such that we can use the scheduling context of the donor task selected to be scheduled, but use the execution context of a different task to actually be run. To this purpose, introduce rq->donor field to point to the task_struct chosen from the runqueue by the scheduler, and will be used for scheduler state, and preserve rq->curr to indicate the execution context of the task that will actually be run. This patch introduces the donor field as a union with curr, so it doesn't cause the contexts to be split yet, but adds the logic to handle everything separately. [add additional comments and update more sched_class code to use rq::proxy] [jstultz: Rebased and resolved minor collisions, reworked to use accessors, tweaked update_curr_common to use rq_proxy fixing rt scheduling issues] Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Juri Lelli <[email protected]> Signed-off-by: Connor O'Brien <[email protected]> Signed-off-by: John Stultz <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Metin Kaya <[email protected]> Tested-by: K Prateek Nayak <[email protected]> Tested-by: Metin Kaya <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 7b3d61f commit af0c8b2

File tree

7 files changed

+114
-80
lines changed

7 files changed

+114
-80
lines changed

kernel/sched/core.c

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -832,7 +832,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
832832

833833
rq_lock(rq, &rf);
834834
update_rq_clock(rq);
835-
rq->curr->sched_class->task_tick(rq, rq->curr, 1);
835+
rq->donor->sched_class->task_tick(rq, rq->curr, 1);
836836
rq_unlock(rq, &rf);
837837

838838
return HRTIMER_NORESTART;
@@ -2135,16 +2135,18 @@ void check_class_changed(struct rq *rq, struct task_struct *p,
21352135

21362136
void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags)
21372137
{
2138-
if (p->sched_class == rq->curr->sched_class)
2139-
rq->curr->sched_class->wakeup_preempt(rq, p, flags);
2140-
else if (sched_class_above(p->sched_class, rq->curr->sched_class))
2138+
struct task_struct *donor = rq->donor;
2139+
2140+
if (p->sched_class == donor->sched_class)
2141+
donor->sched_class->wakeup_preempt(rq, p, flags);
2142+
else if (sched_class_above(p->sched_class, donor->sched_class))
21412143
resched_curr(rq);
21422144

21432145
/*
21442146
* A queue event has occurred, and we're going to schedule. In
21452147
* this case, we can save a useless back to back clock update.
21462148
*/
2147-
if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
2149+
if (task_on_rq_queued(donor) && test_tsk_need_resched(rq->curr))
21482150
rq_clock_skip_update(rq);
21492151
}
21502152

@@ -2680,7 +2682,7 @@ __do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx)
26802682
lockdep_assert_held(&p->pi_lock);
26812683

26822684
queued = task_on_rq_queued(p);
2683-
running = task_current(rq, p);
2685+
running = task_current_donor(rq, p);
26842686

26852687
if (queued) {
26862688
/*
@@ -5507,7 +5509,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
55075509
* project cycles that may never be accounted to this
55085510
* thread, breaking clock_gettime().
55095511
*/
5510-
if (task_current(rq, p) && task_on_rq_queued(p)) {
5512+
if (task_current_donor(rq, p) && task_on_rq_queued(p)) {
55115513
prefetch_curr_exec_start(p);
55125514
update_rq_clock(rq);
55135515
p->sched_class->update_curr(rq);
@@ -5575,7 +5577,8 @@ void sched_tick(void)
55755577
{
55765578
int cpu = smp_processor_id();
55775579
struct rq *rq = cpu_rq(cpu);
5578-
struct task_struct *curr;
5580+
/* accounting goes to the donor task */
5581+
struct task_struct *donor;
55795582
struct rq_flags rf;
55805583
unsigned long hw_pressure;
55815584
u64 resched_latency;
@@ -5586,19 +5589,19 @@ void sched_tick(void)
55865589
sched_clock_tick();
55875590

55885591
rq_lock(rq, &rf);
5592+
donor = rq->donor;
55895593

5590-
curr = rq->curr;
5591-
psi_account_irqtime(rq, curr, NULL);
5594+
psi_account_irqtime(rq, donor, NULL);
55925595

55935596
update_rq_clock(rq);
55945597
hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
55955598
update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);
5596-
curr->sched_class->task_tick(rq, curr, 0);
5599+
donor->sched_class->task_tick(rq, donor, 0);
55975600
if (sched_feat(LATENCY_WARN))
55985601
resched_latency = cpu_resched_latency(rq);
55995602
calc_global_load_tick(rq);
56005603
sched_core_tick(rq);
5601-
task_tick_mm_cid(rq, curr);
5604+
task_tick_mm_cid(rq, donor);
56025605
scx_tick(rq);
56035606

56045607
rq_unlock(rq, &rf);
@@ -5608,8 +5611,8 @@ void sched_tick(void)
56085611

56095612
perf_event_task_tick();
56105613

5611-
if (curr->flags & PF_WQ_WORKER)
5612-
wq_worker_tick(curr);
5614+
if (donor->flags & PF_WQ_WORKER)
5615+
wq_worker_tick(donor);
56135616

56145617
#ifdef CONFIG_SMP
56155618
if (!scx_switched_all()) {
@@ -5676,6 +5679,12 @@ static void sched_tick_remote(struct work_struct *work)
56765679
struct task_struct *curr = rq->curr;
56775680

56785681
if (cpu_online(cpu)) {
5682+
/*
5683+
* Since this is a remote tick for full dynticks mode,
5684+
* we are always sure that there is no proxy (only a
5685+
* single task is running).
5686+
*/
5687+
SCHED_WARN_ON(rq->curr != rq->donor);
56795688
update_rq_clock(rq);
56805689

56815690
if (!is_idle_task(curr)) {
@@ -6642,6 +6651,7 @@ static void __sched notrace __schedule(int sched_mode)
66426651
}
66436652

66446653
next = pick_next_task(rq, prev, &rf);
6654+
rq_set_donor(rq, next);
66456655
picked:
66466656
clear_tsk_need_resched(prev);
66476657
clear_preempt_need_resched();
@@ -7148,7 +7158,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
71487158
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
71497159

71507160
queued = task_on_rq_queued(p);
7151-
running = task_current(rq, p);
7161+
running = task_current_donor(rq, p);
71527162
if (queued)
71537163
dequeue_task(rq, p, queue_flag);
71547164
if (running)
@@ -7718,6 +7728,7 @@ void __init init_idle(struct task_struct *idle, int cpu)
77187728
rcu_read_unlock();
77197729

77207730
rq->idle = idle;
7731+
rq_set_donor(rq, idle);
77217732
rcu_assign_pointer(rq->curr, idle);
77227733
idle->on_rq = TASK_ON_RQ_QUEUED;
77237734
#ifdef CONFIG_SMP
@@ -7807,7 +7818,7 @@ void sched_setnuma(struct task_struct *p, int nid)
78077818

78087819
rq = task_rq_lock(p, &rf);
78097820
queued = task_on_rq_queued(p);
7810-
running = task_current(rq, p);
7821+
running = task_current_donor(rq, p);
78117822

78127823
if (queued)
78137824
dequeue_task(rq, p, DEQUEUE_SAVE);
@@ -8957,7 +8968,7 @@ void sched_move_task(struct task_struct *tsk)
89578968

89588969
update_rq_clock(rq);
89598970

8960-
running = task_current(rq, tsk);
8971+
running = task_current_donor(rq, tsk);
89618972
queued = task_on_rq_queued(tsk);
89628973

89638974
if (queued)

kernel/sched/deadline.c

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1339,7 +1339,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
13391339
#endif
13401340

13411341
enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
1342-
if (dl_task(rq->curr))
1342+
if (dl_task(rq->donor))
13431343
wakeup_preempt_dl(rq, p, 0);
13441344
else
13451345
resched_curr(rq);
@@ -1736,11 +1736,11 @@ int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 perio
17361736
*/
17371737
static void update_curr_dl(struct rq *rq)
17381738
{
1739-
struct task_struct *curr = rq->curr;
1740-
struct sched_dl_entity *dl_se = &curr->dl;
1739+
struct task_struct *donor = rq->donor;
1740+
struct sched_dl_entity *dl_se = &donor->dl;
17411741
s64 delta_exec;
17421742

1743-
if (!dl_task(curr) || !on_dl_rq(dl_se))
1743+
if (!dl_task(donor) || !on_dl_rq(dl_se))
17441744
return;
17451745

17461746
/*
@@ -2213,7 +2213,7 @@ static int find_later_rq(struct task_struct *task);
22132213
static int
22142214
select_task_rq_dl(struct task_struct *p, int cpu, int flags)
22152215
{
2216-
struct task_struct *curr;
2216+
struct task_struct *curr, *donor;
22172217
bool select_rq;
22182218
struct rq *rq;
22192219

@@ -2224,6 +2224,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
22242224

22252225
rcu_read_lock();
22262226
curr = READ_ONCE(rq->curr); /* unlocked access */
2227+
donor = READ_ONCE(rq->donor);
22272228

22282229
/*
22292230
* If we are dealing with a -deadline task, we must
@@ -2234,9 +2235,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
22342235
* other hand, if it has a shorter deadline, we
22352236
* try to make it stay here, it might be important.
22362237
*/
2237-
select_rq = unlikely(dl_task(curr)) &&
2238+
select_rq = unlikely(dl_task(donor)) &&
22382239
(curr->nr_cpus_allowed < 2 ||
2239-
!dl_entity_preempt(&p->dl, &curr->dl)) &&
2240+
!dl_entity_preempt(&p->dl, &donor->dl)) &&
22402241
p->nr_cpus_allowed > 1;
22412242

22422243
/*
@@ -2299,7 +2300,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
22992300
* let's hope p can move out.
23002301
*/
23012302
if (rq->curr->nr_cpus_allowed == 1 ||
2302-
!cpudl_find(&rq->rd->cpudl, rq->curr, NULL))
2303+
!cpudl_find(&rq->rd->cpudl, rq->donor, NULL))
23032304
return;
23042305

23052306
/*
@@ -2338,7 +2339,7 @@ static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
23382339
static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p,
23392340
int flags)
23402341
{
2341-
if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
2342+
if (dl_entity_preempt(&p->dl, &rq->donor->dl)) {
23422343
resched_curr(rq);
23432344
return;
23442345
}
@@ -2348,7 +2349,7 @@ static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p,
23482349
* In the unlikely case current and p have the same deadline
23492350
* let us try to decide what's the best thing to do...
23502351
*/
2351-
if ((p->dl.deadline == rq->curr->dl.deadline) &&
2352+
if ((p->dl.deadline == rq->donor->dl.deadline) &&
23522353
!test_tsk_need_resched(rq->curr))
23532354
check_preempt_equal_dl(rq, p);
23542355
#endif /* CONFIG_SMP */
@@ -2380,7 +2381,7 @@ static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
23802381
if (!first)
23812382
return;
23822383

2383-
if (rq->curr->sched_class != &dl_sched_class)
2384+
if (rq->donor->sched_class != &dl_sched_class)
23842385
update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
23852386

23862387
deadline_queue_push_tasks(rq);
@@ -2699,8 +2700,8 @@ static int push_dl_task(struct rq *rq)
26992700
* can move away, it makes sense to just reschedule
27002701
* without going further in pushing next_task.
27012702
*/
2702-
if (dl_task(rq->curr) &&
2703-
dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
2703+
if (dl_task(rq->donor) &&
2704+
dl_time_before(next_task->dl.deadline, rq->donor->dl.deadline) &&
27042705
rq->curr->nr_cpus_allowed > 1) {
27052706
resched_curr(rq);
27062707
return 0;
@@ -2823,7 +2824,7 @@ static void pull_dl_task(struct rq *this_rq)
28232824
* deadline than the current task of its runqueue.
28242825
*/
28252826
if (dl_time_before(p->dl.deadline,
2826-
src_rq->curr->dl.deadline))
2827+
src_rq->donor->dl.deadline))
28272828
goto skip;
28282829

28292830
if (is_migration_disabled(p)) {
@@ -2862,9 +2863,9 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
28622863
if (!task_on_cpu(rq, p) &&
28632864
!test_tsk_need_resched(rq->curr) &&
28642865
p->nr_cpus_allowed > 1 &&
2865-
dl_task(rq->curr) &&
2866+
dl_task(rq->donor) &&
28662867
(rq->curr->nr_cpus_allowed < 2 ||
2867-
!dl_entity_preempt(&p->dl, &rq->curr->dl))) {
2868+
!dl_entity_preempt(&p->dl, &rq->donor->dl))) {
28682869
push_dl_tasks(rq);
28692870
}
28702871
}
@@ -3039,12 +3040,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
30393040
return;
30403041
}
30413042

3042-
if (rq->curr != p) {
3043+
if (rq->donor != p) {
30433044
#ifdef CONFIG_SMP
30443045
if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
30453046
deadline_queue_push_tasks(rq);
30463047
#endif
3047-
if (dl_task(rq->curr))
3048+
if (dl_task(rq->donor))
30483049
wakeup_preempt_dl(rq, p, 0);
30493050
else
30503051
resched_curr(rq);
@@ -3073,7 +3074,7 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
30733074
if (!rq->dl.overloaded)
30743075
deadline_queue_pull_task(rq);
30753076

3076-
if (task_current(rq, p)) {
3077+
if (task_current_donor(rq, p)) {
30773078
/*
30783079
* If we now have a earlier deadline task than p,
30793080
* then reschedule, provided p is still on this

kernel/sched/fair.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,12 +1200,12 @@ static inline bool do_preempt_short(struct cfs_rq *cfs_rq,
12001200
*/
12011201
s64 update_curr_common(struct rq *rq)
12021202
{
1203-
struct task_struct *curr = rq->curr;
1203+
struct task_struct *donor = rq->donor;
12041204
s64 delta_exec;
12051205

1206-
delta_exec = update_curr_se(rq, &curr->se);
1206+
delta_exec = update_curr_se(rq, &donor->se);
12071207
if (likely(delta_exec > 0))
1208-
update_curr_task(curr, delta_exec);
1208+
update_curr_task(donor, delta_exec);
12091209

12101210
return delta_exec;
12111211
}
@@ -1258,7 +1258,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
12581258

12591259
static void update_curr_fair(struct rq *rq)
12601260
{
1261-
update_curr(cfs_rq_of(&rq->curr->se));
1261+
update_curr(cfs_rq_of(&rq->donor->se));
12621262
}
12631263

12641264
static inline void
@@ -6815,7 +6815,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
68156815
s64 delta = slice - ran;
68166816

68176817
if (delta < 0) {
6818-
if (task_current(rq, p))
6818+
if (task_current_donor(rq, p))
68196819
resched_curr(rq);
68206820
return;
68216821
}
@@ -6830,12 +6830,12 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
68306830
*/
68316831
static void hrtick_update(struct rq *rq)
68326832
{
6833-
struct task_struct *curr = rq->curr;
6833+
struct task_struct *donor = rq->donor;
68346834

6835-
if (!hrtick_enabled_fair(rq) || curr->sched_class != &fair_sched_class)
6835+
if (!hrtick_enabled_fair(rq) || donor->sched_class != &fair_sched_class)
68366836
return;
68376837

6838-
hrtick_start_fair(rq, curr);
6838+
hrtick_start_fair(rq, donor);
68396839
}
68406840
#else /* !CONFIG_SCHED_HRTICK */
68416841
static inline void
@@ -8750,9 +8750,9 @@ static void set_next_buddy(struct sched_entity *se)
87508750
*/
87518751
static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int wake_flags)
87528752
{
8753-
struct task_struct *curr = rq->curr;
8754-
struct sched_entity *se = &curr->se, *pse = &p->se;
8755-
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
8753+
struct task_struct *donor = rq->donor;
8754+
struct sched_entity *se = &donor->se, *pse = &p->se;
8755+
struct cfs_rq *cfs_rq = task_cfs_rq(donor);
87568756
int cse_is_idle, pse_is_idle;
87578757

87588758
if (unlikely(se == pse))
@@ -8781,7 +8781,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
87818781
* prevents us from potentially nominating it as a false LAST_BUDDY
87828782
* below.
87838783
*/
8784-
if (test_tsk_need_resched(curr))
8784+
if (test_tsk_need_resched(rq->curr))
87858785
return;
87868786

87878787
if (!sched_feat(WAKEUP_PREEMPTION))
@@ -13080,7 +13080,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
1308013080
* our priority decreased, or if we are not currently running on
1308113081
* this runqueue and our priority is higher than the current's
1308213082
*/
13083-
if (task_current(rq, p)) {
13083+
if (task_current_donor(rq, p)) {
1308413084
if (p->prio > oldprio)
1308513085
resched_curr(rq);
1308613086
} else
@@ -13187,7 +13187,7 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p)
1318713187
* kick off the schedule if running, otherwise just see
1318813188
* if we can still preempt the current task.
1318913189
*/
13190-
if (task_current(rq, p))
13190+
if (task_current_donor(rq, p))
1319113191
resched_curr(rq);
1319213192
else
1319313193
wakeup_preempt(rq, p, 0);

kernel/sched/pelt.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,7 @@ int update_irq_load_avg(struct rq *rq, u64 running)
476476
bool update_other_load_avgs(struct rq *rq)
477477
{
478478
u64 now = rq_clock_pelt(rq);
479-
const struct sched_class *curr_class = rq->curr->sched_class;
479+
const struct sched_class *curr_class = rq->donor->sched_class;
480480
unsigned long hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
481481

482482
lockdep_assert_rq_held(rq);

0 commit comments

Comments
 (0)