Skip to content

Commit 2652df3

Browse files
committed
Merge branch 'pm-cpufreq'
Additional cpufreq updates for 4.18-rc1: fixes and cleanups in the core and drivers and intel_pstate extension to do iowait boosting on systems with HWP that improves performance quite a bit. * pm-cpufreq: cpufreq: imx6q: check speed grades for i.MX6ULL cpufreq: governors: Fix long idle detection logic in load calculation cpufreq: intel_pstate: enable boost for Skylake Xeon cpufreq: intel_pstate: New sysfs entry to control HWP boost cpufreq: intel_pstate: HWP boost performance on IO wakeup cpufreq: intel_pstate: Add HWP boost utility and sched util hooks cpufreq: ti-cpufreq: Use devres managed API in probe() cpufreq: ti-cpufreq: Fix an incorrect error return value cpufreq: ACPI: make function acpi_cpufreq_fast_switch() static cpufreq: kryo: allow building as a loadable module
2 parents b06c0b2 + 0aa9abd commit 2652df3

File tree

6 files changed

+209
-24
lines changed

6 files changed

+209
-24
lines changed

drivers/cpufreq/Kconfig.arm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ config ARM_OMAP2PLUS_CPUFREQ
125125
default ARCH_OMAP2PLUS
126126

127127
config ARM_QCOM_CPUFREQ_KRYO
128-
bool "Qualcomm Kryo based CPUFreq"
128+
tristate "Qualcomm Kryo based CPUFreq"
129129
depends on ARM64
130130
depends on QCOM_QFPROM
131131
depends on QCOM_SMEM

drivers/cpufreq/acpi-cpufreq.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -465,8 +465,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
465465
return result;
466466
}
467467

468-
unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
469-
unsigned int target_freq)
468+
static unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
469+
unsigned int target_freq)
470470
{
471471
struct acpi_cpufreq_data *data = policy->driver_data;
472472
struct acpi_processor_performance *perf;

drivers/cpufreq/cpufreq_governor.c

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
165165
* calls, so the previous load value can be used then.
166166
*/
167167
load = j_cdbs->prev_load;
168-
} else if (unlikely(time_elapsed > 2 * sampling_rate &&
168+
} else if (unlikely((int)idle_time > 2 * sampling_rate &&
169169
j_cdbs->prev_load)) {
170170
/*
171171
* If the CPU had gone completely idle and a task has
@@ -185,10 +185,8 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
185185
* clear prev_load to guarantee that the load will be
186186
* computed again next time.
187187
*
188-
* Detecting this situation is easy: the governor's
189-
* utilization update handler would not have run during
190-
* CPU-idle periods. Hence, an unusually large
191-
* 'time_elapsed' (as compared to the sampling rate)
188+
* Detecting this situation is easy: an unusually large
189+
* 'idle_time' (as compared to the sampling rate)
192190
* indicates this scenario.
193191
*/
194192
load = j_cdbs->prev_load;
@@ -217,8 +215,8 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
217215
j_cdbs->prev_load = load;
218216
}
219217

220-
if (time_elapsed > 2 * sampling_rate) {
221-
unsigned int periods = time_elapsed / sampling_rate;
218+
if (unlikely((int)idle_time > 2 * sampling_rate)) {
219+
unsigned int periods = idle_time / sampling_rate;
222220

223221
if (periods < idle_periods)
224222
idle_periods = periods;

drivers/cpufreq/imx6q-cpufreq.c

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,8 @@ static void imx6q_opp_check_speed_grading(struct device *dev)
266266
}
267267

268268
#define OCOTP_CFG3_6UL_SPEED_696MHZ 0x2
269+
#define OCOTP_CFG3_6ULL_SPEED_792MHZ 0x2
270+
#define OCOTP_CFG3_6ULL_SPEED_900MHZ 0x3
269271

270272
static void imx6ul_opp_check_speed_grading(struct device *dev)
271273
{
@@ -287,16 +289,30 @@ static void imx6ul_opp_check_speed_grading(struct device *dev)
287289
* Speed GRADING[1:0] defines the max speed of ARM:
288290
* 2b'00: Reserved;
289291
* 2b'01: 528000000Hz;
290-
* 2b'10: 696000000Hz;
291-
* 2b'11: Reserved;
292+
* 2b'10: 696000000Hz on i.MX6UL, 792000000Hz on i.MX6ULL;
293+
* 2b'11: 900000000Hz on i.MX6ULL only;
292294
* We need to set the max speed of ARM according to fuse map.
293295
*/
294296
val = readl_relaxed(base + OCOTP_CFG3);
295297
val >>= OCOTP_CFG3_SPEED_SHIFT;
296298
val &= 0x3;
297-
if (val != OCOTP_CFG3_6UL_SPEED_696MHZ)
298-
if (dev_pm_opp_disable(dev, 696000000))
299-
dev_warn(dev, "failed to disable 696MHz OPP\n");
299+
300+
if (of_machine_is_compatible("fsl,imx6ul")) {
301+
if (val != OCOTP_CFG3_6UL_SPEED_696MHZ)
302+
if (dev_pm_opp_disable(dev, 696000000))
303+
dev_warn(dev, "failed to disable 696MHz OPP\n");
304+
}
305+
306+
if (of_machine_is_compatible("fsl,imx6ull")) {
307+
if (val != OCOTP_CFG3_6ULL_SPEED_792MHZ)
308+
if (dev_pm_opp_disable(dev, 792000000))
309+
dev_warn(dev, "failed to disable 792MHz OPP\n");
310+
311+
if (val != OCOTP_CFG3_6ULL_SPEED_900MHZ)
312+
if (dev_pm_opp_disable(dev, 900000000))
313+
dev_warn(dev, "failed to disable 900MHz OPP\n");
314+
}
315+
300316
iounmap(base);
301317
put_node:
302318
of_node_put(np);
@@ -356,7 +372,8 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
356372
goto put_reg;
357373
}
358374

359-
if (of_machine_is_compatible("fsl,imx6ul"))
375+
if (of_machine_is_compatible("fsl,imx6ul") ||
376+
of_machine_is_compatible("fsl,imx6ull"))
360377
imx6ul_opp_check_speed_grading(cpu_dev);
361378
else
362379
imx6q_opp_check_speed_grading(cpu_dev);

drivers/cpufreq/intel_pstate.c

Lines changed: 176 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,11 @@ struct global_params {
221221
* preference/bias
222222
* @epp_saved: Saved EPP/EPB during system suspend or CPU offline
223223
* operation
224+
* @hwp_req_cached: Cached value of the last HWP Request MSR
225+
* @hwp_cap_cached: Cached value of the last HWP Capabilities MSR
226+
* @last_io_update: Last time when IO wake flag was set
227+
* @sched_flags: Store scheduler flags for possible cross CPU update
228+
* @hwp_boost_min: Last HWP boosted min performance
224229
*
225230
* This structure stores per CPU instance data for all CPUs.
226231
*/
@@ -253,6 +258,11 @@ struct cpudata {
253258
s16 epp_policy;
254259
s16 epp_default;
255260
s16 epp_saved;
261+
u64 hwp_req_cached;
262+
u64 hwp_cap_cached;
263+
u64 last_io_update;
264+
unsigned int sched_flags;
265+
u32 hwp_boost_min;
256266
};
257267

258268
static struct cpudata **all_cpu_data;
@@ -285,6 +295,7 @@ static struct pstate_funcs pstate_funcs __read_mostly;
285295

286296
static int hwp_active __read_mostly;
287297
static bool per_cpu_limits __read_mostly;
298+
static bool hwp_boost __read_mostly;
288299

289300
static struct cpufreq_driver *intel_pstate_driver __read_mostly;
290301

@@ -689,6 +700,7 @@ static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max,
689700
u64 cap;
690701

691702
rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
703+
WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap);
692704
if (global.no_turbo)
693705
*current_max = HWP_GUARANTEED_PERF(cap);
694706
else
@@ -763,6 +775,7 @@ static void intel_pstate_hwp_set(unsigned int cpu)
763775
intel_pstate_set_epb(cpu, epp);
764776
}
765777
skip_epp:
778+
WRITE_ONCE(cpu_data->hwp_req_cached, value);
766779
wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
767780
}
768781

@@ -1020,6 +1033,30 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
10201033
return count;
10211034
}
10221035

1036+
static ssize_t show_hwp_dynamic_boost(struct kobject *kobj,
1037+
struct attribute *attr, char *buf)
1038+
{
1039+
return sprintf(buf, "%u\n", hwp_boost);
1040+
}
1041+
1042+
static ssize_t store_hwp_dynamic_boost(struct kobject *a, struct attribute *b,
1043+
const char *buf, size_t count)
1044+
{
1045+
unsigned int input;
1046+
int ret;
1047+
1048+
ret = kstrtouint(buf, 10, &input);
1049+
if (ret)
1050+
return ret;
1051+
1052+
mutex_lock(&intel_pstate_driver_lock);
1053+
hwp_boost = !!input;
1054+
intel_pstate_update_policies();
1055+
mutex_unlock(&intel_pstate_driver_lock);
1056+
1057+
return count;
1058+
}
1059+
10231060
show_one(max_perf_pct, max_perf_pct);
10241061
show_one(min_perf_pct, min_perf_pct);
10251062

@@ -1029,6 +1066,7 @@ define_one_global_rw(max_perf_pct);
10291066
define_one_global_rw(min_perf_pct);
10301067
define_one_global_ro(turbo_pct);
10311068
define_one_global_ro(num_pstates);
1069+
define_one_global_rw(hwp_dynamic_boost);
10321070

10331071
static struct attribute *intel_pstate_attributes[] = {
10341072
&status.attr,
@@ -1069,6 +1107,11 @@ static void __init intel_pstate_sysfs_expose_params(void)
10691107
rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr);
10701108
WARN_ON(rc);
10711109

1110+
if (hwp_active) {
1111+
rc = sysfs_create_file(intel_pstate_kobject,
1112+
&hwp_dynamic_boost.attr);
1113+
WARN_ON(rc);
1114+
}
10721115
}
10731116
/************************** sysfs end ************************/
10741117

@@ -1381,6 +1424,116 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
13811424
intel_pstate_set_min_pstate(cpu);
13821425
}
13831426

1427+
/*
1428+
* Long hold time will keep high perf limits for long time,
1429+
* which negatively impacts perf/watt for some workloads,
1430+
* like specpower. 3ms is based on experiements on some
1431+
* workoads.
1432+
*/
1433+
static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
1434+
1435+
static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu)
1436+
{
1437+
u64 hwp_req = READ_ONCE(cpu->hwp_req_cached);
1438+
u32 max_limit = (hwp_req & 0xff00) >> 8;
1439+
u32 min_limit = (hwp_req & 0xff);
1440+
u32 boost_level1;
1441+
1442+
/*
1443+
* Cases to consider (User changes via sysfs or boot time):
1444+
* If, P0 (Turbo max) = P1 (Guaranteed max) = min:
1445+
* No boost, return.
1446+
* If, P0 (Turbo max) > P1 (Guaranteed max) = min:
1447+
* Should result in one level boost only for P0.
1448+
* If, P0 (Turbo max) = P1 (Guaranteed max) > min:
1449+
* Should result in two level boost:
1450+
* (min + p1)/2 and P1.
1451+
* If, P0 (Turbo max) > P1 (Guaranteed max) > min:
1452+
* Should result in three level boost:
1453+
* (min + p1)/2, P1 and P0.
1454+
*/
1455+
1456+
/* If max and min are equal or already at max, nothing to boost */
1457+
if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit)
1458+
return;
1459+
1460+
if (!cpu->hwp_boost_min)
1461+
cpu->hwp_boost_min = min_limit;
1462+
1463+
/* level at half way mark between min and guranteed */
1464+
boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1;
1465+
1466+
if (cpu->hwp_boost_min < boost_level1)
1467+
cpu->hwp_boost_min = boost_level1;
1468+
else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
1469+
cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached);
1470+
else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) &&
1471+
max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
1472+
cpu->hwp_boost_min = max_limit;
1473+
else
1474+
return;
1475+
1476+
hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min;
1477+
wrmsrl(MSR_HWP_REQUEST, hwp_req);
1478+
cpu->last_update = cpu->sample.time;
1479+
}
1480+
1481+
static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu)
1482+
{
1483+
if (cpu->hwp_boost_min) {
1484+
bool expired;
1485+
1486+
/* Check if we are idle for hold time to boost down */
1487+
expired = time_after64(cpu->sample.time, cpu->last_update +
1488+
hwp_boost_hold_time_ns);
1489+
if (expired) {
1490+
wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached);
1491+
cpu->hwp_boost_min = 0;
1492+
}
1493+
}
1494+
cpu->last_update = cpu->sample.time;
1495+
}
1496+
1497+
static inline void intel_pstate_update_util_hwp_local(struct cpudata *cpu,
1498+
u64 time)
1499+
{
1500+
cpu->sample.time = time;
1501+
1502+
if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) {
1503+
bool do_io = false;
1504+
1505+
cpu->sched_flags = 0;
1506+
/*
1507+
* Set iowait_boost flag and update time. Since IO WAIT flag
1508+
* is set all the time, we can't just conclude that there is
1509+
* some IO bound activity is scheduled on this CPU with just
1510+
* one occurrence. If we receive at least two in two
1511+
* consecutive ticks, then we treat as boost candidate.
1512+
*/
1513+
if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC))
1514+
do_io = true;
1515+
1516+
cpu->last_io_update = time;
1517+
1518+
if (do_io)
1519+
intel_pstate_hwp_boost_up(cpu);
1520+
1521+
} else {
1522+
intel_pstate_hwp_boost_down(cpu);
1523+
}
1524+
}
1525+
1526+
static inline void intel_pstate_update_util_hwp(struct update_util_data *data,
1527+
u64 time, unsigned int flags)
1528+
{
1529+
struct cpudata *cpu = container_of(data, struct cpudata, update_util);
1530+
1531+
cpu->sched_flags |= flags;
1532+
1533+
if (smp_processor_id() == cpu->cpu)
1534+
intel_pstate_update_util_hwp_local(cpu, time);
1535+
}
1536+
13841537
static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
13851538
{
13861539
struct sample *sample = &cpu->sample;
@@ -1641,6 +1794,12 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
16411794
{}
16421795
};
16431796

1797+
static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = {
1798+
ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
1799+
ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs),
1800+
{}
1801+
};
1802+
16441803
static int intel_pstate_init_cpu(unsigned int cpunum)
16451804
{
16461805
struct cpudata *cpu;
@@ -1671,6 +1830,10 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
16711830
intel_pstate_disable_ee(cpunum);
16721831

16731832
intel_pstate_hwp_enable(cpu);
1833+
1834+
id = x86_match_cpu(intel_pstate_hwp_boost_ids);
1835+
if (id)
1836+
hwp_boost = true;
16741837
}
16751838

16761839
intel_pstate_get_cpu_pstates(cpu);
@@ -1684,7 +1847,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
16841847
{
16851848
struct cpudata *cpu = all_cpu_data[cpu_num];
16861849

1687-
if (hwp_active)
1850+
if (hwp_active && !hwp_boost)
16881851
return;
16891852

16901853
if (cpu->update_util_set)
@@ -1693,7 +1856,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
16931856
/* Prevent intel_pstate_update_util() from using stale data. */
16941857
cpu->sample.time = 0;
16951858
cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
1696-
intel_pstate_update_util);
1859+
(hwp_active ?
1860+
intel_pstate_update_util_hwp :
1861+
intel_pstate_update_util));
16971862
cpu->update_util_set = true;
16981863
}
16991864

@@ -1805,8 +1970,16 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
18051970
intel_pstate_set_update_util_hook(policy->cpu);
18061971
}
18071972

1808-
if (hwp_active)
1973+
if (hwp_active) {
1974+
/*
1975+
* When hwp_boost was active before and dynamically it
1976+
* was turned off, in that case we need to clear the
1977+
* update util hook.
1978+
*/
1979+
if (!hwp_boost)
1980+
intel_pstate_clear_update_util_hook(policy->cpu);
18091981
intel_pstate_hwp_set(policy->cpu);
1982+
}
18101983

18111984
mutex_unlock(&intel_pstate_limits_lock);
18121985

0 commit comments

Comments
 (0)