Skip to content

Commit 68d7a19

Browse files
deggemanPeter Zijlstra
authored andcommitted
sched/fair: Fix util_est UTIL_AVG_UNCHANGED handling
The util_est internal UTIL_AVG_UNCHANGED flag which is used to prevent unnecessary util_est updates uses the LSB of util_est.enqueued. It is exposed via _task_util_est() (and task_util_est()). Commit 92a801e ("sched/fair: Mask UTIL_AVG_UNCHANGED usages") mentions that the LSB is lost for util_est resolution but find_energy_efficient_cpu() checks if task_util_est() returns 0 to return prev_cpu early. _task_util_est() returns the max value of util_est.ewma and util_est.enqueued or'ed w/ UTIL_AVG_UNCHANGED. So task_util_est() returning the max of task_util() and _task_util_est() will never return 0 under the default SCHED_FEAT(UTIL_EST, true). To fix this use the MSB of util_est.enqueued instead and keep the flag util_est internal, i.e. don't export it via _task_util_est(). The maximal possible util_avg value for a task is 1024 so the MSB of 'unsigned int util_est.enqueued' isn't used to store a util value. As a caveat the code behind the util_est_se trace point has to filter UTIL_AVG_UNCHANGED to see the real util_est.enqueued value which should be easy to do. This also fixes an issue report by Xuewen Yan that util_est_update() only used UTIL_AVG_UNCHANGED for the subtrahend of the equation: last_enqueued_diff = ue.enqueued - (task_util() | UTIL_AVG_UNCHANGED) Fixes: b89997a sched/pelt: Fix task util_est update filtering Signed-off-by: Dietmar Eggemann <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Xuewen Yan <[email protected]> Reviewed-by: Vincent Donnefort <[email protected]> Reviewed-by: Vincent Guittot <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent fcf6631 commit 68d7a19

File tree

4 files changed

+14
-13
lines changed

4 files changed

+14
-13
lines changed

include/linux/sched.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,11 +350,19 @@ struct load_weight {
350350
* Only for tasks we track a moving average of the past instantaneous
351351
* estimated utilization. This allows to absorb sporadic drops in utilization
352352
* of an otherwise almost periodic task.
353+
*
354+
* The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg
355+
* updates. When a task is dequeued, its util_est should not be updated if its
356+
* util_avg has not been updated in the meantime.
357+
* This information is mapped into the MSB bit of util_est.enqueued at dequeue
358+
* time. Since max value of util_est.enqueued for a task is 1024 (PELT util_avg
359+
* for a task) it is safe to use MSB.
353360
*/
354361
struct util_est {
355362
unsigned int enqueued;
356363
unsigned int ewma;
357364
#define UTIL_EST_WEIGHT_SHIFT 2
365+
#define UTIL_AVG_UNCHANGED 0x80000000
358366
} __attribute__((__aligned__(sizeof(u64))));
359367

360368
/*

kernel/sched/debug.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -885,6 +885,7 @@ static const struct seq_operations sched_debug_sops = {
885885
#define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
886886
#define __P(F) __PS(#F, F)
887887
#define P(F) __PS(#F, p->F)
888+
#define PM(F, M) __PS(#F, p->F & (M))
888889
#define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F)))
889890
#define __PN(F) __PSN(#F, F)
890891
#define PN(F) __PSN(#F, p->F)
@@ -1011,7 +1012,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
10111012
P(se.avg.util_avg);
10121013
P(se.avg.last_update_time);
10131014
P(se.avg.util_est.ewma);
1014-
P(se.avg.util_est.enqueued);
1015+
PM(se.avg.util_est.enqueued, ~UTIL_AVG_UNCHANGED);
10151016
#endif
10161017
#ifdef CONFIG_UCLAMP_TASK
10171018
__PS("uclamp.min", p->uclamp_req[UCLAMP_MIN].value);

kernel/sched/fair.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3907,7 +3907,7 @@ static inline unsigned long _task_util_est(struct task_struct *p)
39073907
{
39083908
struct util_est ue = READ_ONCE(p->se.avg.util_est);
39093909

3910-
return (max(ue.ewma, ue.enqueued) | UTIL_AVG_UNCHANGED);
3910+
return max(ue.ewma, (ue.enqueued & ~UTIL_AVG_UNCHANGED));
39113911
}
39123912

39133913
static inline unsigned long task_util_est(struct task_struct *p)
@@ -4007,7 +4007,7 @@ static inline void util_est_update(struct cfs_rq *cfs_rq,
40074007
* Reset EWMA on utilization increases, the moving average is used only
40084008
* to smooth utilization decreases.
40094009
*/
4010-
ue.enqueued = (task_util(p) | UTIL_AVG_UNCHANGED);
4010+
ue.enqueued = task_util(p);
40114011
if (sched_feat(UTIL_EST_FASTUP)) {
40124012
if (ue.ewma < ue.enqueued) {
40134013
ue.ewma = ue.enqueued;
@@ -4056,6 +4056,7 @@ static inline void util_est_update(struct cfs_rq *cfs_rq,
40564056
ue.ewma += last_ewma_diff;
40574057
ue.ewma >>= UTIL_EST_WEIGHT_SHIFT;
40584058
done:
4059+
ue.enqueued |= UTIL_AVG_UNCHANGED;
40594060
WRITE_ONCE(p->se.avg.util_est, ue);
40604061

40614062
trace_sched_util_est_se_tp(&p->se);

kernel/sched/pelt.h

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,23 +42,14 @@ static inline u32 get_pelt_divider(struct sched_avg *avg)
4242
return LOAD_AVG_MAX - 1024 + avg->period_contrib;
4343
}
4444

45-
/*
46-
* When a task is dequeued, its estimated utilization should not be update if
47-
* its util_avg has not been updated at least once.
48-
* This flag is used to synchronize util_avg updates with util_est updates.
49-
* We map this information into the LSB bit of the utilization saved at
50-
* dequeue time (i.e. util_est.dequeued).
51-
*/
52-
#define UTIL_AVG_UNCHANGED 0x1
53-
5445
static inline void cfs_se_util_change(struct sched_avg *avg)
5546
{
5647
unsigned int enqueued;
5748

5849
if (!sched_feat(UTIL_EST))
5950
return;
6051

61-
/* Avoid store if the flag has been already set */
52+
/* Avoid store if the flag has been already reset */
6253
enqueued = avg->util_est.enqueued;
6354
if (!(enqueued & UTIL_AVG_UNCHANGED))
6455
return;

0 commit comments

Comments
 (0)