Skip to content

Commit 753e283

Browse files
committed
sched_ext: Unify regular and core-sched pick task paths
Because the BPF scheduler's dispatch path is invoked from balance(), sched_ext needs to invoke balance_one() on all sibling rq's before picking the next task for core-sched. Before the recent pick_next_task() updates, sched_ext couldn't share pick task between regular and core-sched paths because pick_next_task() depended on put_prev_task() being called on the current task. Tasks currently running on sibling rq's can't be put when one rq is trying to pick the next task, so pick_task_scx() had to have a separate mechanism to pick between a sibling rq's current task and the first task in its local DSQ. However, with the preceding updates, pick_next_task_scx() no longer depends on the current task being put and can compare the current task and the next in line statelessly, and the pick task logic should be shareable between regular and core-sched paths. Unify regular and core-sched pick task paths: - There's no reason to distinguish local and sibling picks anymore. @Local is removed from balance_one(). - pick_next_task_scx() is turned into pick_task_scx() by dropping the put_prev_set_next_task() call. - The old pick_task_scx() is dropped. Signed-off-by: Tejun Heo <[email protected]>
1 parent 8b1451f commit 753e283

File tree

1 file changed

+11
-67
lines changed

1 file changed

+11
-67
lines changed

kernel/sched/ext.c

Lines changed: 11 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -2500,7 +2500,7 @@ static void flush_dispatch_buf(struct rq *rq)
25002500
dspc->cursor = 0;
25012501
}
25022502

2503-
static int balance_one(struct rq *rq, struct task_struct *prev, bool local)
2503+
static int balance_one(struct rq *rq, struct task_struct *prev)
25042504
{
25052505
struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
25062506
bool prev_on_scx = prev->sched_class == &ext_sched_class;
@@ -2529,22 +2529,16 @@ static int balance_one(struct rq *rq, struct task_struct *prev, bool local)
25292529
/*
25302530
* If @prev is runnable & has slice left, it has priority and
25312531
* fetching more just increases latency for the fetched tasks.
2532-
* Tell pick_next_task_scx() to keep running @prev. If the BPF
2532+
* Tell pick_task_scx() to keep running @prev. If the BPF
25332533
* scheduler wants to handle this explicitly, it should
25342534
* implement ->cpu_release().
25352535
*
25362536
* See scx_ops_disable_workfn() for the explanation on the
25372537
* bypassing test.
2538-
*
2539-
* When balancing a remote CPU for core-sched, there won't be a
2540-
* following put_prev_task_scx() call and we don't own
2541-
* %SCX_RQ_BAL_KEEP. Instead, pick_task_scx() will test the same
2542-
* conditions later and pick @rq->curr accordingly.
25432538
*/
25442539
if ((prev->scx.flags & SCX_TASK_QUEUED) &&
25452540
prev->scx.slice && !scx_ops_bypassing()) {
2546-
if (local)
2547-
rq->scx.flags |= SCX_RQ_BAL_KEEP;
2541+
rq->scx.flags |= SCX_RQ_BAL_KEEP;
25482542
goto has_tasks;
25492543
}
25502544
}
@@ -2603,8 +2597,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev, bool local)
26032597
*/
26042598
if ((prev->scx.flags & SCX_TASK_QUEUED) &&
26052599
(!static_branch_unlikely(&scx_ops_enq_last) || scx_ops_bypassing())) {
2606-
if (local)
2607-
rq->scx.flags |= SCX_RQ_BAL_KEEP;
2600+
rq->scx.flags |= SCX_RQ_BAL_KEEP;
26082601
goto has_tasks;
26092602
}
26102603
rq->scx.flags &= ~SCX_RQ_IN_BALANCE;
@@ -2622,13 +2615,13 @@ static int balance_scx(struct rq *rq, struct task_struct *prev,
26222615

26232616
rq_unpin_lock(rq, rf);
26242617

2625-
ret = balance_one(rq, prev, true);
2618+
ret = balance_one(rq, prev);
26262619

26272620
#ifdef CONFIG_SCHED_SMT
26282621
/*
26292622
* When core-sched is enabled, this ops.balance() call will be followed
2630-
* by put_prev_scx() and pick_task_scx() on this CPU and pick_task_scx()
2631-
* on the SMT siblings. Balance the siblings too.
2623+
* by pick_task_scx() on this CPU and the SMT siblings. Balance the
2624+
* siblings too.
26322625
*/
26332626
if (sched_core_enabled(rq)) {
26342627
const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq));
@@ -2640,7 +2633,7 @@ static int balance_scx(struct rq *rq, struct task_struct *prev,
26402633

26412634
WARN_ON_ONCE(__rq_lockp(rq) != __rq_lockp(srq));
26422635
update_rq_clock(srq);
2643-
balance_one(srq, sprev, false);
2636+
balance_one(srq, sprev);
26442637
}
26452638
}
26462639
#endif
@@ -2760,9 +2753,9 @@ static struct task_struct *first_local_task(struct rq *rq)
27602753
struct task_struct, scx.dsq_list.node);
27612754
}
27622755

2763-
static struct task_struct *pick_next_task_scx(struct rq *rq,
2764-
struct task_struct *prev)
2756+
static struct task_struct *pick_task_scx(struct rq *rq)
27652757
{
2758+
struct task_struct *prev = rq->curr;
27662759
struct task_struct *p;
27672760

27682761
/*
@@ -2790,8 +2783,6 @@ static struct task_struct *pick_next_task_scx(struct rq *rq,
27902783
}
27912784
}
27922785

2793-
put_prev_set_next_task(rq, prev, p);
2794-
27952786
return p;
27962787
}
27972788

@@ -2828,49 +2819,6 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
28282819
else
28292820
return time_after64(a->scx.core_sched_at, b->scx.core_sched_at);
28302821
}
2831-
2832-
/**
2833-
* pick_task_scx - Pick a candidate task for core-sched
2834-
* @rq: rq to pick the candidate task from
2835-
*
2836-
* Core-sched calls this function on each SMT sibling to determine the next
2837-
* tasks to run on the SMT siblings. balance_one() has been called on all
2838-
* siblings and put_prev_task_scx() has been called only for the current CPU.
2839-
*
2840-
* As put_prev_task_scx() hasn't been called on remote CPUs, we can't just look
2841-
* at the first task in the local dsq. @rq->curr has to be considered explicitly
2842-
* to mimic %SCX_RQ_BAL_KEEP.
2843-
*/
2844-
static struct task_struct *pick_task_scx(struct rq *rq)
2845-
{
2846-
struct task_struct *curr = rq->curr;
2847-
struct task_struct *first = first_local_task(rq);
2848-
2849-
if (curr->scx.flags & SCX_TASK_QUEUED) {
2850-
/* is curr the only runnable task? */
2851-
if (!first)
2852-
return curr;
2853-
2854-
/*
2855-
* Does curr trump first? We can always go by core_sched_at for
2856-
* this comparison as it represents global FIFO ordering when
2857-
* the default core-sched ordering is used and local-DSQ FIFO
2858-
* ordering otherwise.
2859-
*
2860-
* We can have a task with an earlier timestamp on the DSQ. For
2861-
* example, when a current task is preempted by a sibling
2862-
* picking a different cookie, the task would be requeued at the
2863-
* head of the local DSQ with an earlier timestamp than the
2864-
* core-sched picked next task. Besides, the BPF scheduler may
2865-
* dispatch any tasks to the local DSQ anytime.
2866-
*/
2867-
if (curr->scx.slice && time_before64(curr->scx.core_sched_at,
2868-
first->scx.core_sched_at))
2869-
return curr;
2870-
}
2871-
2872-
return first; /* this may be %NULL */
2873-
}
28742822
#endif /* CONFIG_SCHED_CORE */
28752823

28762824
static enum scx_cpu_preempt_reason
@@ -3638,7 +3586,7 @@ DEFINE_SCHED_CLASS(ext) = {
36383586
.wakeup_preempt = wakeup_preempt_scx,
36393587

36403588
.balance = balance_scx,
3641-
.pick_next_task = pick_next_task_scx,
3589+
.pick_task = pick_task_scx,
36423590

36433591
.put_prev_task = put_prev_task_scx,
36443592
.set_next_task = set_next_task_scx,
@@ -3654,10 +3602,6 @@ DEFINE_SCHED_CLASS(ext) = {
36543602
.rq_offline = rq_offline_scx,
36553603
#endif
36563604

3657-
#ifdef CONFIG_SCHED_CORE
3658-
.pick_task = pick_task_scx,
3659-
#endif
3660-
36613605
.task_tick = task_tick_scx,
36623606

36633607
.switching_to = switching_to_scx,

0 commit comments

Comments
 (0)