Skip to content

Commit eb82bac

Browse files
committed
powercap/drivers/dtpm: Scale the power with the load
Currently the power consumption is based on the current OPP power assuming the entire performance domain is fully loaded. That gives very gross power estimation and we can do much better by using the load to scale the power consumption. Use the utilization to normalize and scale the power usage over the max possible power. Tested on a rock960 with 2 big CPUS, the power consumption estimation conforms with the expected one. Before this change: ~$ ~/dhrystone -t 1 -l 10000& ~$ cat /sys/devices/virtual/powercap/dtpm/dtpm:0/dtpm:0:1/constraint_0_max_power_uw 2260000 After this change: ~$ ~/dhrystone -t 1 -l 10000& ~$ cat /sys/devices/virtual/powercap/dtpm/dtpm:0/dtpm:0:1/constraint_0_max_power_uw 1130000 ~$ ~/dhrystone -t 2 -l 10000& ~$ cat /sys/devices/virtual/powercap/dtpm/dtpm:0/dtpm:0:1/constraint_0_max_power_uw 2260000 Signed-off-by: Daniel Lezcano <[email protected]> Reviewed-by: Lukasz Luba <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent d2cdc6a commit eb82bac

File tree

1 file changed

+39
-7
lines changed

1 file changed

+39
-7
lines changed

drivers/powercap/dtpm_cpu.c

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,27 +68,59 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
6868
return power_limit;
6969
}
7070

71+
static u64 scale_pd_power_uw(struct cpumask *pd_mask, u64 power)
72+
{
73+
unsigned long max = 0, sum_util = 0;
74+
int cpu;
75+
76+
for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
77+
78+
/*
79+
* The capacity is the same for all CPUs belonging to
80+
* the same perf domain, so a single call to
81+
* arch_scale_cpu_capacity() is enough. However, we
82+
* need the CPU parameter to be initialized by the
83+
* loop, so the call ends up in this block.
84+
*
85+
* We can initialize 'max' with a cpumask_first() call
86+
* before the loop but the bits computation is not
87+
* worth given the arch_scale_cpu_capacity() just
88+
* returns a value where the resulting assembly code
89+
* will be optimized by the compiler.
90+
*/
91+
max = arch_scale_cpu_capacity(cpu);
92+
sum_util += sched_cpu_util(cpu, max);
93+
}
94+
95+
/*
96+
* In the improbable case where all the CPUs of the perf
97+
* domain are offline, 'max' will be zero and will lead to an
98+
* illegal operation with a zero division.
99+
*/
100+
return max ? (power * ((sum_util << 10) / max)) >> 10 : 0;
101+
}
102+
71103
static u64 get_pd_power_uw(struct dtpm *dtpm)
72104
{
73105
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
74106
struct em_perf_domain *pd;
75-
struct cpumask cpus;
107+
struct cpumask *pd_mask;
76108
unsigned long freq;
77-
int i, nr_cpus;
109+
int i;
78110

79111
pd = em_cpu_get(dtpm_cpu->cpu);
80-
freq = cpufreq_quick_get(dtpm_cpu->cpu);
81112

82-
cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
83-
nr_cpus = cpumask_weight(&cpus);
113+
pd_mask = em_span_cpus(pd);
114+
115+
freq = cpufreq_quick_get(dtpm_cpu->cpu);
84116

85117
for (i = 0; i < pd->nr_perf_states; i++) {
86118

87119
if (pd->table[i].frequency < freq)
88120
continue;
89121

90-
return pd->table[i].power *
91-
MICROWATT_PER_MILLIWATT * nr_cpus;
122+
return scale_pd_power_uw(pd_mask, pd->table[i].power *
123+
MICROWATT_PER_MILLIWATT);
92124
}
93125

94126
return 0;

0 commit comments

Comments
 (0)