Skip to content

Commit 71dbdde

Browse files
hnazPeter Zijlstra
authored andcommitted
sched/psi: Remove NR_ONCPU task accounting
We put all fields updated by the scheduler in the first cacheline of struct psi_group_cpu for performance. Since we want add another PSI_IRQ_FULL to track IRQ/SOFTIRQ pressure, we need to reclaim space first. This patch remove NR_ONCPU task accounting in struct psi_group_cpu, use one bit in state_mask to track instead. Signed-off-by: Johannes Weiner <[email protected]> Signed-off-by: Chengming Zhou <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Chengming Zhou <[email protected]> Tested-by: Chengming Zhou <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 65176f5 commit 71dbdde

File tree

2 files changed

+37
-20
lines changed

2 files changed

+37
-20
lines changed

include/linux/psi_types.h

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,6 @@ enum psi_task_count {
1515
NR_IOWAIT,
1616
NR_MEMSTALL,
1717
NR_RUNNING,
18-
/*
19-
* This can't have values other than 0 or 1 and could be
20-
* implemented as a bit flag. But for now we still have room
21-
* in the first cacheline of psi_group_cpu, and this way we
22-
* don't have to special case any state tracking for it.
23-
*/
24-
NR_ONCPU,
2518
/*
2619
* For IO and CPU stalls the presence of running/oncpu tasks
2720
* in the domain means a partial rather than a full stall.
@@ -32,16 +25,18 @@ enum psi_task_count {
3225
* threads and memstall ones.
3326
*/
3427
NR_MEMSTALL_RUNNING,
35-
NR_PSI_TASK_COUNTS = 5,
28+
NR_PSI_TASK_COUNTS = 4,
3629
};
3730

3831
/* Task state bitmasks */
3932
#define TSK_IOWAIT (1 << NR_IOWAIT)
4033
#define TSK_MEMSTALL (1 << NR_MEMSTALL)
4134
#define TSK_RUNNING (1 << NR_RUNNING)
42-
#define TSK_ONCPU (1 << NR_ONCPU)
4335
#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)
4436

37+
/* Only one task can be scheduled, no corresponding task count */
38+
#define TSK_ONCPU (1 << NR_PSI_TASK_COUNTS)
39+
4540
/* Resources that workloads could be stalled on */
4641
enum psi_res {
4742
PSI_IO,
@@ -68,6 +63,9 @@ enum psi_states {
6863
NR_PSI_STATES = 7,
6964
};
7065

66+
/* Use one bit in the state mask to track TSK_ONCPU */
67+
#define PSI_ONCPU (1 << NR_PSI_STATES)
68+
7169
enum psi_aggregators {
7270
PSI_AVGS = 0,
7371
PSI_POLL,

kernel/sched/psi.c

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ void __init psi_init(void)
212212
group_init(&psi_system);
213213
}
214214

215-
static bool test_state(unsigned int *tasks, enum psi_states state)
215+
static bool test_state(unsigned int *tasks, enum psi_states state, bool oncpu)
216216
{
217217
switch (state) {
218218
case PSI_IO_SOME:
@@ -225,9 +225,9 @@ static bool test_state(unsigned int *tasks, enum psi_states state)
225225
return unlikely(tasks[NR_MEMSTALL] &&
226226
tasks[NR_RUNNING] == tasks[NR_MEMSTALL_RUNNING]);
227227
case PSI_CPU_SOME:
228-
return unlikely(tasks[NR_RUNNING] > tasks[NR_ONCPU]);
228+
return unlikely(tasks[NR_RUNNING] > oncpu);
229229
case PSI_CPU_FULL:
230-
return unlikely(tasks[NR_RUNNING] && !tasks[NR_ONCPU]);
230+
return unlikely(tasks[NR_RUNNING] && !oncpu);
231231
case PSI_NONIDLE:
232232
return tasks[NR_IOWAIT] || tasks[NR_MEMSTALL] ||
233233
tasks[NR_RUNNING];
@@ -689,9 +689,9 @@ static void psi_group_change(struct psi_group *group, int cpu,
689689
bool wake_clock)
690690
{
691691
struct psi_group_cpu *groupc;
692-
u32 state_mask = 0;
693692
unsigned int t, m;
694693
enum psi_states s;
694+
u32 state_mask;
695695

696696
groupc = per_cpu_ptr(group->pcpu, cpu);
697697

@@ -707,17 +707,36 @@ static void psi_group_change(struct psi_group *group, int cpu,
707707

708708
record_times(groupc, now);
709709

710+
/*
711+
* Start with TSK_ONCPU, which doesn't have a corresponding
712+
* task count - it's just a boolean flag directly encoded in
713+
* the state mask. Clear, set, or carry the current state if
714+
* no changes are requested.
715+
*/
716+
if (unlikely(clear & TSK_ONCPU)) {
717+
state_mask = 0;
718+
clear &= ~TSK_ONCPU;
719+
} else if (unlikely(set & TSK_ONCPU)) {
720+
state_mask = PSI_ONCPU;
721+
set &= ~TSK_ONCPU;
722+
} else {
723+
state_mask = groupc->state_mask & PSI_ONCPU;
724+
}
725+
726+
/*
727+
* The rest of the state mask is calculated based on the task
728+
* counts. Update those first, then construct the mask.
729+
*/
710730
for (t = 0, m = clear; m; m &= ~(1 << t), t++) {
711731
if (!(m & (1 << t)))
712732
continue;
713733
if (groupc->tasks[t]) {
714734
groupc->tasks[t]--;
715735
} else if (!psi_bug) {
716-
printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u %u %u] clear=%x set=%x\n",
736+
printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u %u] clear=%x set=%x\n",
717737
cpu, t, groupc->tasks[0],
718738
groupc->tasks[1], groupc->tasks[2],
719-
groupc->tasks[3], groupc->tasks[4],
720-
clear, set);
739+
groupc->tasks[3], clear, set);
721740
psi_bug = 1;
722741
}
723742
}
@@ -726,9 +745,8 @@ static void psi_group_change(struct psi_group *group, int cpu,
726745
if (set & (1 << t))
727746
groupc->tasks[t]++;
728747

729-
/* Calculate state mask representing active states */
730748
for (s = 0; s < NR_PSI_STATES; s++) {
731-
if (test_state(groupc->tasks, s))
749+
if (test_state(groupc->tasks, s, state_mask & PSI_ONCPU))
732750
state_mask |= (1 << s);
733751
}
734752

@@ -740,7 +758,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
740758
* task in a cgroup is in_memstall, the corresponding groupc
741759
* on that cpu is in PSI_MEM_FULL state.
742760
*/
743-
if (unlikely(groupc->tasks[NR_ONCPU] && cpu_curr(cpu)->in_memstall))
761+
if (unlikely((state_mask & PSI_ONCPU) && cpu_curr(cpu)->in_memstall))
744762
state_mask |= (1 << PSI_MEM_FULL);
745763

746764
groupc->state_mask = state_mask;
@@ -828,7 +846,8 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
828846
*/
829847
iter = NULL;
830848
while ((group = iterate_groups(next, &iter))) {
831-
if (per_cpu_ptr(group->pcpu, cpu)->tasks[NR_ONCPU]) {
849+
if (per_cpu_ptr(group->pcpu, cpu)->state_mask &
850+
PSI_ONCPU) {
832851
common = group;
833852
break;
834853
}

0 commit comments

Comments
 (0)