Skip to content

Commit 1a61510

Browse files
hnazPeter Zijlstra
authored andcommitted
sched: psi: pass enqueue/dequeue flags to psi callbacks directly
What psi needs to do on each enqueue and dequeue has gotten more subtle, and the generic sched code trying to distill this into a bool for the callbacks is awkward. Pass the flags directly and let psi parse them. For that to work, the #include "stats.h" (which has the psi callback implementations) needs to be below the flag definitions in "sched.h". Move that section further down, next to some of the other accounting stuff. This also puts the ENQUEUE_SAVE/RESTORE branch behind the psi jump label, slightly reducing overhead when PSI=y but runtime disabled. Suggested-by: Peter Zijlstra <[email protected]> Signed-off-by: Johannes Weiner <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 23f1178 commit 1a61510

File tree

3 files changed

+53
-44
lines changed

3 files changed

+53
-44
lines changed

kernel/sched/core.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2024,10 +2024,10 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
20242024
*/
20252025
uclamp_rq_inc(rq, p);
20262026

2027-
if (!(flags & ENQUEUE_RESTORE)) {
2027+
psi_enqueue(p, flags);
2028+
2029+
if (!(flags & ENQUEUE_RESTORE))
20282030
sched_info_enqueue(rq, p);
2029-
psi_enqueue(p, flags & ENQUEUE_MIGRATED);
2030-
}
20312031

20322032
if (sched_core_enabled(rq))
20332033
sched_core_enqueue(rq, p);
@@ -2044,10 +2044,10 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
20442044
if (!(flags & DEQUEUE_NOCLOCK))
20452045
update_rq_clock(rq);
20462046

2047-
if (!(flags & DEQUEUE_SAVE)) {
2047+
if (!(flags & DEQUEUE_SAVE))
20482048
sched_info_dequeue(rq, p);
2049-
psi_dequeue(p, !(flags & DEQUEUE_SLEEP));
2050-
}
2049+
2050+
psi_dequeue(p, flags);
20512051

20522052
/*
20532053
* Must be before ->dequeue_task() because ->dequeue_task() can 'fail'

kernel/sched/sched.h

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2093,34 +2093,6 @@ static inline const struct cpumask *task_user_cpus(struct task_struct *p)
20932093

20942094
#endif /* CONFIG_SMP */
20952095

2096-
#include "stats.h"
2097-
2098-
#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)
2099-
2100-
extern void __sched_core_account_forceidle(struct rq *rq);
2101-
2102-
static inline void sched_core_account_forceidle(struct rq *rq)
2103-
{
2104-
if (schedstat_enabled())
2105-
__sched_core_account_forceidle(rq);
2106-
}
2107-
2108-
extern void __sched_core_tick(struct rq *rq);
2109-
2110-
static inline void sched_core_tick(struct rq *rq)
2111-
{
2112-
if (sched_core_enabled(rq) && schedstat_enabled())
2113-
__sched_core_tick(rq);
2114-
}
2115-
2116-
#else /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS): */
2117-
2118-
static inline void sched_core_account_forceidle(struct rq *rq) { }
2119-
2120-
static inline void sched_core_tick(struct rq *rq) { }
2121-
2122-
#endif /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS) */
2123-
21242096
#ifdef CONFIG_CGROUP_SCHED
21252097

21262098
/*
@@ -3191,6 +3163,34 @@ extern void nohz_run_idle_balance(int cpu);
31913163
static inline void nohz_run_idle_balance(int cpu) { }
31923164
#endif
31933165

3166+
#include "stats.h"
3167+
3168+
#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)
3169+
3170+
extern void __sched_core_account_forceidle(struct rq *rq);
3171+
3172+
static inline void sched_core_account_forceidle(struct rq *rq)
3173+
{
3174+
if (schedstat_enabled())
3175+
__sched_core_account_forceidle(rq);
3176+
}
3177+
3178+
extern void __sched_core_tick(struct rq *rq);
3179+
3180+
static inline void sched_core_tick(struct rq *rq)
3181+
{
3182+
if (sched_core_enabled(rq) && schedstat_enabled())
3183+
__sched_core_tick(rq);
3184+
}
3185+
3186+
#else /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS): */
3187+
3188+
static inline void sched_core_account_forceidle(struct rq *rq) { }
3189+
3190+
static inline void sched_core_tick(struct rq *rq) { }
3191+
3192+
#endif /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS) */
3193+
31943194
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
31953195

31963196
struct irqtime {

kernel/sched/stats.h

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -127,21 +127,25 @@ static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
127127
* go through migration requeues. In this case, *sleeping* states need
128128
* to be transferred.
129129
*/
130-
static inline void psi_enqueue(struct task_struct *p, bool migrate)
130+
static inline void psi_enqueue(struct task_struct *p, int flags)
131131
{
132132
int clear = 0, set = 0;
133133

134134
if (static_branch_likely(&psi_disabled))
135135
return;
136136

137+
/* Same runqueue, nothing changed for psi */
138+
if (flags & ENQUEUE_RESTORE)
139+
return;
140+
137141
if (p->se.sched_delayed) {
138142
/* CPU migration of "sleeping" task */
139-
SCHED_WARN_ON(!migrate);
143+
SCHED_WARN_ON(!(flags & ENQUEUE_MIGRATED));
140144
if (p->in_memstall)
141145
set |= TSK_MEMSTALL;
142146
if (p->in_iowait)
143147
set |= TSK_IOWAIT;
144-
} else if (migrate) {
148+
} else if (flags & ENQUEUE_MIGRATED) {
145149
/* CPU migration of runnable task */
146150
set = TSK_RUNNING;
147151
if (p->in_memstall)
@@ -158,24 +162,29 @@ static inline void psi_enqueue(struct task_struct *p, bool migrate)
158162
psi_task_change(p, clear, set);
159163
}
160164

161-
static inline void psi_dequeue(struct task_struct *p, bool migrate)
165+
static inline void psi_dequeue(struct task_struct *p, int flags)
162166
{
163167
if (static_branch_likely(&psi_disabled))
164168
return;
165169

166-
/*
167-
* When migrating a task to another CPU, clear all psi
168-
* state. The enqueue callback above will work it out.
169-
*/
170-
if (migrate)
171-
psi_task_change(p, p->psi_flags, 0);
170+
/* Same runqueue, nothing changed for psi */
171+
if (flags & DEQUEUE_SAVE)
172+
return;
172173

173174
/*
174175
* A voluntary sleep is a dequeue followed by a task switch. To
175176
* avoid walking all ancestors twice, psi_task_switch() handles
176177
* TSK_RUNNING and TSK_IOWAIT for us when it moves TSK_ONCPU.
177178
* Do nothing here.
178179
*/
180+
if (flags & DEQUEUE_SLEEP)
181+
return;
182+
183+
/*
184+
* When migrating a task to another CPU, clear all psi
185+
* state. The enqueue callback above will work it out.
186+
*/
187+
psi_task_change(p, p->psi_flags, 0);
179188
}
180189

181190
static inline void psi_ttwu_dequeue(struct task_struct *p)

0 commit comments

Comments
 (0)