Skip to content

Commit 4e3c7d3

Browse files
deggemanPeter Zijlstra
authored andcommitted
sched/fair: Refactor cpu_util_without()
Except the 'task has no contribution or is new' condition at the beginning of cpu_util_without(), which it shares with the load and runnable counterpart functions, a cpu_util_next(..., dst_cpu = -1) call can replace the rest of it. The UTIL_EST specific check that task util_est has to be subtracted from the CPU one in case of an enqueued (or current (to cater for the wakeup - lb race)) task has to be moved to cpu_util_next(). This was initially introduced by commit c469933 ("sched/fair: Fix cpu_util_wake() for 'execl' type workloads"). UnixBench's `execl` throughput tests were run on the dual socket 40 CPUs Intel E5-2690 v2 to make sure it doesn't regress again. Signed-off-by: Dietmar Eggemann <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Vincent Guittot <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent a658353 commit 4e3c7d3

File tree

1 file changed

+57
-100
lines changed

1 file changed

+57
-100
lines changed

kernel/sched/fair.c

Lines changed: 57 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -6544,132 +6544,89 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
65446544
}
65456545

65466546
/*
6547-
* cpu_util_without: compute cpu utilization without any contributions from *p
6548-
* @cpu: the CPU which utilization is requested
6549-
* @p: the task which utilization should be discounted
6550-
*
6551-
* The utilization of a CPU is defined by the utilization of tasks currently
6552-
* enqueued on that CPU as well as tasks which are currently sleeping after an
6553-
* execution on that CPU.
6554-
*
6555-
* This method returns the utilization of the specified CPU by discounting the
6556-
* utilization of the specified task, whenever the task is currently
6557-
* contributing to the CPU utilization.
6558-
*/
6559-
static unsigned long cpu_util_without(int cpu, struct task_struct *p)
6560-
{
6561-
struct cfs_rq *cfs_rq;
6562-
unsigned int util;
6563-
6564-
/* Task has no contribution or is new */
6565-
if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
6566-
return cpu_util_cfs(cpu);
6567-
6568-
cfs_rq = &cpu_rq(cpu)->cfs;
6569-
util = READ_ONCE(cfs_rq->avg.util_avg);
6570-
6571-
/* Discount task's util from CPU's util */
6572-
lsub_positive(&util, task_util(p));
6573-
6574-
/*
6575-
* Covered cases:
6576-
*
6577-
* a) if *p is the only task sleeping on this CPU, then:
6578-
* cpu_util (== task_util) > util_est (== 0)
6579-
* and thus we return:
6580-
* cpu_util_without = (cpu_util - task_util) = 0
6581-
*
6582-
* b) if other tasks are SLEEPING on this CPU, which is now exiting
6583-
* IDLE, then:
6584-
* cpu_util >= task_util
6585-
* cpu_util > util_est (== 0)
6586-
* and thus we discount *p's blocked utilization to return:
6587-
* cpu_util_without = (cpu_util - task_util) >= 0
6588-
*
6589-
* c) if other tasks are RUNNABLE on that CPU and
6590-
* util_est > cpu_util
6591-
* then we use util_est since it returns a more restrictive
6592-
* estimation of the spare capacity on that CPU, by just
6593-
* considering the expected utilization of tasks already
6594-
* runnable on that CPU.
6595-
*
6596-
* Cases a) and b) are covered by the above code, while case c) is
6597-
* covered by the following code when estimated utilization is
6598-
* enabled.
6599-
*/
6600-
if (sched_feat(UTIL_EST)) {
6601-
unsigned int estimated =
6602-
READ_ONCE(cfs_rq->avg.util_est.enqueued);
6603-
6604-
/*
6605-
* Despite the following checks we still have a small window
6606-
* for a possible race, when an execl's select_task_rq_fair()
6607-
* races with LB's detach_task():
6608-
*
6609-
* detach_task()
6610-
* p->on_rq = TASK_ON_RQ_MIGRATING;
6611-
* ---------------------------------- A
6612-
* deactivate_task() \
6613-
* dequeue_task() + RaceTime
6614-
* util_est_dequeue() /
6615-
* ---------------------------------- B
6616-
*
6617-
* The additional check on "current == p" it's required to
6618-
* properly fix the execl regression and it helps in further
6619-
* reducing the chances for the above race.
6620-
*/
6621-
if (unlikely(task_on_rq_queued(p) || current == p))
6622-
lsub_positive(&estimated, _task_util_est(p));
6623-
6624-
util = max(util, estimated);
6625-
}
6626-
6627-
/*
6628-
* Utilization (estimated) can exceed the CPU capacity, thus let's
6629-
* clamp to the maximum CPU capacity to ensure consistency with
6630-
* cpu_util.
6631-
*/
6632-
return min_t(unsigned long, util, capacity_orig_of(cpu));
6633-
}
6634-
6635-
/*
6636-
* Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued)
6637-
* to @dst_cpu.
6547+
* Predicts what cpu_util(@cpu) would return if @p was removed from @cpu
6548+
* (@dst_cpu = -1) or migrated to @dst_cpu.
66386549
*/
66396550
static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
66406551
{
66416552
struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
6642-
unsigned long util_est, util = READ_ONCE(cfs_rq->avg.util_avg);
6553+
unsigned long util = READ_ONCE(cfs_rq->avg.util_avg);
66436554

66446555
/*
6645-
* If @p migrates from @cpu to another, remove its contribution. Or,
6646-
* if @p migrates from another CPU to @cpu, add its contribution. In
6647-
* the other cases, @cpu is not impacted by the migration, so the
6648-
* util_avg should already be correct.
6556+
* If @dst_cpu is -1 or @p migrates from @cpu to @dst_cpu remove its
6557+
* contribution. If @p migrates from another CPU to @cpu add its
6558+
* contribution. In all the other cases @cpu is not impacted by the
6559+
* migration so its util_avg is already correct.
66496560
*/
66506561
if (task_cpu(p) == cpu && dst_cpu != cpu)
66516562
lsub_positive(&util, task_util(p));
66526563
else if (task_cpu(p) != cpu && dst_cpu == cpu)
66536564
util += task_util(p);
66546565

66556566
if (sched_feat(UTIL_EST)) {
6567+
unsigned long util_est;
6568+
66566569
util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued);
66576570

66586571
/*
6659-
* During wake-up, the task isn't enqueued yet and doesn't
6660-
* appear in the cfs_rq->avg.util_est.enqueued of any rq,
6661-
* so just add it (if needed) to "simulate" what will be
6662-
* cpu_util after the task has been enqueued.
6572+
* During wake-up @p isn't enqueued yet and doesn't contribute
6573+
* to any cpu_rq(cpu)->cfs.avg.util_est.enqueued.
6574+
* If @dst_cpu == @cpu add it to "simulate" cpu_util after @p
6575+
* has been enqueued.
6576+
*
6577+
* During exec (@dst_cpu = -1) @p is enqueued and does
6578+
* contribute to cpu_rq(cpu)->cfs.util_est.enqueued.
6579+
* Remove it to "simulate" cpu_util without @p's contribution.
6580+
*
6581+
* Despite the task_on_rq_queued(@p) check there is still a
6582+
* small window for a possible race when an exec
6583+
* select_task_rq_fair() races with LB's detach_task().
6584+
*
6585+
* detach_task()
6586+
* deactivate_task()
6587+
* p->on_rq = TASK_ON_RQ_MIGRATING;
6588+
* -------------------------------- A
6589+
* dequeue_task() \
6590+
* dequeue_task_fair() + Race Time
6591+
* util_est_dequeue() /
6592+
* -------------------------------- B
6593+
*
6594+
* The additional check "current == p" is required to further
6595+
* reduce the race window.
66636596
*/
66646597
if (dst_cpu == cpu)
66656598
util_est += _task_util_est(p);
6599+
else if (unlikely(task_on_rq_queued(p) || current == p))
6600+
lsub_positive(&util_est, _task_util_est(p));
66666601

66676602
util = max(util, util_est);
66686603
}
66696604

66706605
return min(util, capacity_orig_of(cpu));
66716606
}
66726607

6608+
/*
6609+
* cpu_util_without: compute cpu utilization without any contributions from *p
6610+
* @cpu: the CPU which utilization is requested
6611+
* @p: the task which utilization should be discounted
6612+
*
6613+
* The utilization of a CPU is defined by the utilization of tasks currently
6614+
* enqueued on that CPU as well as tasks which are currently sleeping after an
6615+
* execution on that CPU.
6616+
*
6617+
* This method returns the utilization of the specified CPU by discounting the
6618+
* utilization of the specified task, whenever the task is currently
6619+
* contributing to the CPU utilization.
6620+
*/
6621+
static unsigned long cpu_util_without(int cpu, struct task_struct *p)
6622+
{
6623+
/* Task has no contribution or is new */
6624+
if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
6625+
return cpu_util_cfs(cpu);
6626+
6627+
return cpu_util_next(cpu, p, -1);
6628+
}
6629+
66736630
/*
66746631
* compute_energy(): Estimates the energy that @pd would consume if @p was
66756632
* migrated to @dst_cpu. compute_energy() predicts what will be the utilization

0 commit comments

Comments
 (0)