|
14 | 14 | #include <linux/acpi.h>
|
15 | 15 | #include <linux/arch_topology.h>
|
16 | 16 | #include <linux/cacheinfo.h>
|
| 17 | +#include <linux/cpufreq.h> |
17 | 18 | #include <linux/init.h>
|
18 | 19 | #include <linux/percpu.h>
|
19 | 20 |
|
@@ -120,4 +121,183 @@ int __init parse_acpi_topology(void)
|
120 | 121 | }
|
121 | 122 | #endif
|
122 | 123 |
|
| 124 | +#ifdef CONFIG_ARM64_AMU_EXTN |
123 | 125 |
|
| 126 | +#undef pr_fmt |
| 127 | +#define pr_fmt(fmt) "AMU: " fmt |
| 128 | + |
| 129 | +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale); |
| 130 | +static DEFINE_PER_CPU(u64, arch_const_cycles_prev); |
| 131 | +static DEFINE_PER_CPU(u64, arch_core_cycles_prev); |
| 132 | +static cpumask_var_t amu_fie_cpus; |
| 133 | + |
| 134 | +/* Initialize counter reference per-cpu variables for the current CPU */ |
| 135 | +void init_cpu_freq_invariance_counters(void) |
| 136 | +{ |
| 137 | + this_cpu_write(arch_core_cycles_prev, |
| 138 | + read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0)); |
| 139 | + this_cpu_write(arch_const_cycles_prev, |
| 140 | + read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)); |
| 141 | +} |
| 142 | + |
| 143 | +static int validate_cpu_freq_invariance_counters(int cpu) |
| 144 | +{ |
| 145 | + u64 max_freq_hz, ratio; |
| 146 | + |
| 147 | + if (!cpu_has_amu_feat(cpu)) { |
| 148 | + pr_debug("CPU%d: counters are not supported.\n", cpu); |
| 149 | + return -EINVAL; |
| 150 | + } |
| 151 | + |
| 152 | + if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) || |
| 153 | + !per_cpu(arch_core_cycles_prev, cpu))) { |
| 154 | + pr_debug("CPU%d: cycle counters are not enabled.\n", cpu); |
| 155 | + return -EINVAL; |
| 156 | + } |
| 157 | + |
| 158 | + /* Convert maximum frequency from KHz to Hz and validate */ |
| 159 | + max_freq_hz = cpufreq_get_hw_max_freq(cpu) * 1000; |
| 160 | + if (unlikely(!max_freq_hz)) { |
| 161 | + pr_debug("CPU%d: invalid maximum frequency.\n", cpu); |
| 162 | + return -EINVAL; |
| 163 | + } |
| 164 | + |
| 165 | + /* |
| 166 | + * Pre-compute the fixed ratio between the frequency of the constant |
| 167 | + * counter and the maximum frequency of the CPU. |
| 168 | + * |
| 169 | + * const_freq |
| 170 | + * arch_max_freq_scale = ---------------- * SCHED_CAPACITY_SCALE² |
| 171 | + * cpuinfo_max_freq |
| 172 | + * |
| 173 | + * We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE² |
| 174 | + * in order to ensure a good resolution for arch_max_freq_scale for |
| 175 | + * very low arch timer frequencies (down to the KHz range which should |
| 176 | + * be unlikely). |
| 177 | + */ |
| 178 | + ratio = (u64)arch_timer_get_rate() << (2 * SCHED_CAPACITY_SHIFT); |
| 179 | + ratio = div64_u64(ratio, max_freq_hz); |
| 180 | + if (!ratio) { |
| 181 | + WARN_ONCE(1, "System timer frequency too low.\n"); |
| 182 | + return -EINVAL; |
| 183 | + } |
| 184 | + |
| 185 | + per_cpu(arch_max_freq_scale, cpu) = (unsigned long)ratio; |
| 186 | + |
| 187 | + return 0; |
| 188 | +} |
| 189 | + |
| 190 | +static inline bool |
| 191 | +enable_policy_freq_counters(int cpu, cpumask_var_t valid_cpus) |
| 192 | +{ |
| 193 | + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); |
| 194 | + |
| 195 | + if (!policy) { |
| 196 | + pr_debug("CPU%d: No cpufreq policy found.\n", cpu); |
| 197 | + return false; |
| 198 | + } |
| 199 | + |
| 200 | + if (cpumask_subset(policy->related_cpus, valid_cpus)) |
| 201 | + cpumask_or(amu_fie_cpus, policy->related_cpus, |
| 202 | + amu_fie_cpus); |
| 203 | + |
| 204 | + cpufreq_cpu_put(policy); |
| 205 | + |
| 206 | + return true; |
| 207 | +} |
| 208 | + |
| 209 | +static DEFINE_STATIC_KEY_FALSE(amu_fie_key); |
| 210 | +#define amu_freq_invariant() static_branch_unlikely(&amu_fie_key) |
| 211 | + |
| 212 | +static int __init init_amu_fie(void) |
| 213 | +{ |
| 214 | + cpumask_var_t valid_cpus; |
| 215 | + bool have_policy = false; |
| 216 | + int ret = 0; |
| 217 | + int cpu; |
| 218 | + |
| 219 | + if (!zalloc_cpumask_var(&valid_cpus, GFP_KERNEL)) |
| 220 | + return -ENOMEM; |
| 221 | + |
| 222 | + if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) { |
| 223 | + ret = -ENOMEM; |
| 224 | + goto free_valid_mask; |
| 225 | + } |
| 226 | + |
| 227 | + for_each_present_cpu(cpu) { |
| 228 | + if (validate_cpu_freq_invariance_counters(cpu)) |
| 229 | + continue; |
| 230 | + cpumask_set_cpu(cpu, valid_cpus); |
| 231 | + have_policy |= enable_policy_freq_counters(cpu, valid_cpus); |
| 232 | + } |
| 233 | + |
| 234 | + /* |
| 235 | + * If we are not restricted by cpufreq policies, we only enable |
| 236 | + * the use of the AMU feature for FIE if all CPUs support AMU. |
| 237 | + * Otherwise, enable_policy_freq_counters has already enabled |
| 238 | + * policy cpus. |
| 239 | + */ |
| 240 | + if (!have_policy && cpumask_equal(valid_cpus, cpu_present_mask)) |
| 241 | + cpumask_or(amu_fie_cpus, amu_fie_cpus, valid_cpus); |
| 242 | + |
| 243 | + if (!cpumask_empty(amu_fie_cpus)) { |
| 244 | + pr_info("CPUs[%*pbl]: counters will be used for FIE.", |
| 245 | + cpumask_pr_args(amu_fie_cpus)); |
| 246 | + static_branch_enable(&amu_fie_key); |
| 247 | + } |
| 248 | + |
| 249 | +free_valid_mask: |
| 250 | + free_cpumask_var(valid_cpus); |
| 251 | + |
| 252 | + return ret; |
| 253 | +} |
| 254 | +late_initcall_sync(init_amu_fie); |
| 255 | + |
| 256 | +bool arch_freq_counters_available(struct cpumask *cpus) |
| 257 | +{ |
| 258 | + return amu_freq_invariant() && |
| 259 | + cpumask_subset(cpus, amu_fie_cpus); |
| 260 | +} |
| 261 | + |
| 262 | +void topology_scale_freq_tick(void) |
| 263 | +{ |
| 264 | + u64 prev_core_cnt, prev_const_cnt; |
| 265 | + u64 core_cnt, const_cnt, scale; |
| 266 | + int cpu = smp_processor_id(); |
| 267 | + |
| 268 | + if (!amu_freq_invariant()) |
| 269 | + return; |
| 270 | + |
| 271 | + if (!cpumask_test_cpu(cpu, amu_fie_cpus)) |
| 272 | + return; |
| 273 | + |
| 274 | + const_cnt = read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0); |
| 275 | + core_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0); |
| 276 | + prev_const_cnt = this_cpu_read(arch_const_cycles_prev); |
| 277 | + prev_core_cnt = this_cpu_read(arch_core_cycles_prev); |
| 278 | + |
| 279 | + if (unlikely(core_cnt <= prev_core_cnt || |
| 280 | + const_cnt <= prev_const_cnt)) |
| 281 | + goto store_and_exit; |
| 282 | + |
| 283 | + /* |
| 284 | + * /\core arch_max_freq_scale |
| 285 | + * scale = ------- * -------------------- |
| 286 | + * /\const SCHED_CAPACITY_SCALE |
| 287 | + * |
| 288 | + * See validate_cpu_freq_invariance_counters() for details on |
| 289 | + * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT. |
| 290 | + */ |
| 291 | + scale = core_cnt - prev_core_cnt; |
| 292 | + scale *= this_cpu_read(arch_max_freq_scale); |
| 293 | + scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT, |
| 294 | + const_cnt - prev_const_cnt); |
| 295 | + |
| 296 | + scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE); |
| 297 | + this_cpu_write(freq_scale, (unsigned long)scale); |
| 298 | + |
| 299 | +store_and_exit: |
| 300 | + this_cpu_write(arch_core_cycles_prev, core_cnt); |
| 301 | + this_cpu_write(arch_const_cycles_prev, const_cnt); |
| 302 | +} |
| 303 | +#endif /* CONFIG_ARM64_AMU_EXTN */ |
0 commit comments