Skip to content

Commit 3653469

Browse files
committed
Merge tag 'sched_urgent_for_v6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Borislav Petkov: - Fix a performance regression when measuring the CPU time of a thread (clock_gettime(CLOCK_THREAD_CPUTIME_ID,...)) due to the addition of PSI IRQ time accounting in the hotpath - Fix a task_struct leak due to missing to decrement the refcount when the task is enqueued before the timer which is supposed to do that, expires - Revert an attempt to expedite detaching of movable tasks, as finding those could become very costly. Turns out the original issue wasn't even hit by anyone * tag 'sched_urgent_for_v6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched: Move psi_account_irqtime() out of update_rq_clock_task() hotpath sched/deadline: Fix task_struct reference leak Revert "sched/fair: Make sure to try to detach at least one movable task"
2 parents 35ce463 + ddae0ca commit 3653469

File tree

6 files changed

+39
-20
lines changed

6 files changed

+39
-20
lines changed

kernel/sched/core.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -723,7 +723,6 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
723723

724724
rq->prev_irq_time += irq_delta;
725725
delta -= irq_delta;
726-
psi_account_irqtime(rq->curr, irq_delta);
727726
delayacct_irq(rq->curr, irq_delta);
728727
#endif
729728
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
@@ -5665,7 +5664,7 @@ void sched_tick(void)
56655664
{
56665665
int cpu = smp_processor_id();
56675666
struct rq *rq = cpu_rq(cpu);
5668-
struct task_struct *curr = rq->curr;
5667+
struct task_struct *curr;
56695668
struct rq_flags rf;
56705669
unsigned long hw_pressure;
56715670
u64 resched_latency;
@@ -5677,6 +5676,9 @@ void sched_tick(void)
56775676

56785677
rq_lock(rq, &rf);
56795678

5679+
curr = rq->curr;
5680+
psi_account_irqtime(rq, curr, NULL);
5681+
56805682
update_rq_clock(rq);
56815683
hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
56825684
update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);
@@ -6737,6 +6739,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
67376739
++*switch_count;
67386740

67396741
migrate_disable_switch(rq, prev);
6742+
psi_account_irqtime(rq, prev, next);
67406743
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
67416744

67426745
trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);

kernel/sched/deadline.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1804,8 +1804,13 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
18041804
* The replenish timer needs to be canceled. No
18051805
* problem if it fires concurrently: boosted threads
18061806
* are ignored in dl_task_timer().
1807+
*
1808+
* If the timer callback was running (hrtimer_try_to_cancel == -1),
1809+
* it will eventually call put_task_struct().
18071810
*/
1808-
hrtimer_try_to_cancel(&p->dl.dl_timer);
1811+
if (hrtimer_try_to_cancel(&p->dl.dl_timer) == 1 &&
1812+
!dl_server(&p->dl))
1813+
put_task_struct(p);
18091814
p->dl.dl_throttled = 0;
18101815
}
18111816
} else if (!dl_prio(p->normal_prio)) {

kernel/sched/fair.c

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9149,12 +9149,8 @@ static int detach_tasks(struct lb_env *env)
91499149
break;
91509150

91519151
env->loop++;
9152-
/*
9153-
* We've more or less seen every task there is, call it quits
9154-
* unless we haven't found any movable task yet.
9155-
*/
9156-
if (env->loop > env->loop_max &&
9157-
!(env->flags & LBF_ALL_PINNED))
9152+
/* We've more or less seen every task there is, call it quits */
9153+
if (env->loop > env->loop_max)
91589154
break;
91599155

91609156
/* take a breather every nr_migrate tasks */
@@ -11393,9 +11389,7 @@ static int sched_balance_rq(int this_cpu, struct rq *this_rq,
1139311389

1139411390
if (env.flags & LBF_NEED_BREAK) {
1139511391
env.flags &= ~LBF_NEED_BREAK;
11396-
/* Stop if we tried all running tasks */
11397-
if (env.loop < busiest->nr_running)
11398-
goto more_balance;
11392+
goto more_balance;
1139911393
}
1140011394

1140111395
/*

kernel/sched/psi.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
773773
enum psi_states s;
774774
u32 state_mask;
775775

776+
lockdep_assert_rq_held(cpu_rq(cpu));
776777
groupc = per_cpu_ptr(group->pcpu, cpu);
777778

778779
/*
@@ -991,22 +992,32 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
991992
}
992993

993994
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
994-
void psi_account_irqtime(struct task_struct *task, u32 delta)
995+
void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev)
995996
{
996-
int cpu = task_cpu(task);
997+
int cpu = task_cpu(curr);
997998
struct psi_group *group;
998999
struct psi_group_cpu *groupc;
999-
u64 now;
1000+
u64 now, irq;
1001+
s64 delta;
10001002

10011003
if (static_branch_likely(&psi_disabled))
10021004
return;
10031005

1004-
if (!task->pid)
1006+
if (!curr->pid)
1007+
return;
1008+
1009+
lockdep_assert_rq_held(rq);
1010+
group = task_psi_group(curr);
1011+
if (prev && task_psi_group(prev) == group)
10051012
return;
10061013

10071014
now = cpu_clock(cpu);
1015+
irq = irq_time_read(cpu);
1016+
delta = (s64)(irq - rq->psi_irq_time);
1017+
if (delta < 0)
1018+
return;
1019+
rq->psi_irq_time = irq;
10081020

1009-
group = task_psi_group(task);
10101021
do {
10111022
if (!group->enabled)
10121023
continue;

kernel/sched/sched.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1126,6 +1126,7 @@ struct rq {
11261126

11271127
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
11281128
u64 prev_irq_time;
1129+
u64 psi_irq_time;
11291130
#endif
11301131
#ifdef CONFIG_PARAVIRT
11311132
u64 prev_steal_time;

kernel/sched/stats.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,12 @@ __schedstats_from_se(struct sched_entity *se)
110110
void psi_task_change(struct task_struct *task, int clear, int set);
111111
void psi_task_switch(struct task_struct *prev, struct task_struct *next,
112112
bool sleep);
113-
void psi_account_irqtime(struct task_struct *task, u32 delta);
114-
113+
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
114+
void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev);
115+
#else
116+
static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
117+
struct task_struct *prev) {}
118+
#endif /*CONFIG_IRQ_TIME_ACCOUNTING */
115119
/*
116120
* PSI tracks state that persists across sleeps, such as iowaits and
117121
* memory stalls. As a result, it has to distinguish between sleeps,
@@ -192,7 +196,8 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) {}
192196
static inline void psi_sched_switch(struct task_struct *prev,
193197
struct task_struct *next,
194198
bool sleep) {}
195-
static inline void psi_account_irqtime(struct task_struct *task, u32 delta) {}
199+
static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
200+
struct task_struct *prev) {}
196201
#endif /* CONFIG_PSI */
197202

198203
#ifdef CONFIG_SCHED_INFO

0 commit comments

Comments
 (0)