Skip to content

Commit aa4f74d

Browse files
johnstultz-workPeter Zijlstra
authored andcommitted
sched: Fix runtime accounting w/ split exec & sched contexts
Without proxy-exec, we normally charge the "current" task for both its vruntime as well as its sum_exec_runtime. With proxy, however, we have two "current" contexts: the scheduler context and the execution context. We want to charge the execution context rq->curr (ie: proxy/lock holder) execution time to its sum_exec_runtime (so it's clear to userland the rq->curr task *is* running), as well as its thread group. However the rest of the time accounting (such a vruntime and cgroup accounting), we charge against the scheduler context (rq->donor) task, because it is from that task that the time is being "donated". If the donor and curr tasks are the same, then it's the same as without proxy. Signed-off-by: John Stultz <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Tested-by: K Prateek Nayak <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 865d8cf commit aa4f74d

File tree

1 file changed

+28
-14
lines changed

1 file changed

+28
-14
lines changed

kernel/sched/fair.c

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,30 +1152,40 @@ void post_init_entity_util_avg(struct task_struct *p)
11521152
sa->runnable_avg = sa->util_avg;
11531153
}
11541154

1155-
static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
1155+
static s64 update_se(struct rq *rq, struct sched_entity *se)
11561156
{
11571157
u64 now = rq_clock_task(rq);
11581158
s64 delta_exec;
11591159

1160-
delta_exec = now - curr->exec_start;
1160+
delta_exec = now - se->exec_start;
11611161
if (unlikely(delta_exec <= 0))
11621162
return delta_exec;
11631163

1164-
curr->exec_start = now;
1165-
curr->sum_exec_runtime += delta_exec;
1164+
se->exec_start = now;
1165+
if (entity_is_task(se)) {
1166+
struct task_struct *donor = task_of(se);
1167+
struct task_struct *running = rq->curr;
1168+
/*
1169+
* If se is a task, we account the time against the running
1170+
* task, as w/ proxy-exec they may not be the same.
1171+
*/
1172+
running->se.exec_start = now;
1173+
running->se.sum_exec_runtime += delta_exec;
11661174

1167-
if (entity_is_task(curr)) {
1168-
struct task_struct *p = task_of(curr);
1175+
trace_sched_stat_runtime(running, delta_exec);
1176+
account_group_exec_runtime(running, delta_exec);
11691177

1170-
trace_sched_stat_runtime(p, delta_exec);
1171-
account_group_exec_runtime(p, delta_exec);
1172-
cgroup_account_cputime(p, delta_exec);
1178+
/* cgroup time is always accounted against the donor */
1179+
cgroup_account_cputime(donor, delta_exec);
1180+
} else {
1181+
/* If not task, account the time against donor se */
1182+
se->sum_exec_runtime += delta_exec;
11731183
}
11741184

11751185
if (schedstat_enabled()) {
11761186
struct sched_statistics *stats;
11771187

1178-
stats = __schedstats_from_se(curr);
1188+
stats = __schedstats_from_se(se);
11791189
__schedstat_set(stats->exec_max,
11801190
max(delta_exec, stats->exec_max));
11811191
}
@@ -1188,16 +1198,20 @@ static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
11881198
*/
11891199
s64 update_curr_common(struct rq *rq)
11901200
{
1191-
struct task_struct *donor = rq->donor;
1192-
1193-
return update_curr_se(rq, &donor->se);
1201+
return update_se(rq, &rq->donor->se);
11941202
}
11951203

11961204
/*
11971205
* Update the current task's runtime statistics.
11981206
*/
11991207
static void update_curr(struct cfs_rq *cfs_rq)
12001208
{
1209+
/*
1210+
* Note: cfs_rq->curr corresponds to the task picked to
1211+
* run (ie: rq->donor.se) which due to proxy-exec may
1212+
* not necessarily be the actual task running
1213+
* (rq->curr.se). This is easy to confuse!
1214+
*/
12011215
struct sched_entity *curr = cfs_rq->curr;
12021216
struct rq *rq = rq_of(cfs_rq);
12031217
s64 delta_exec;
@@ -1206,7 +1220,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
12061220
if (unlikely(!curr))
12071221
return;
12081222

1209-
delta_exec = update_curr_se(rq, curr);
1223+
delta_exec = update_se(rq, curr);
12101224
if (unlikely(delta_exec <= 0))
12111225
return;
12121226

0 commit comments

Comments
 (0)