Skip to content

Commit e0efd5b

Browse files
spandruvadarafaeljw
authored andcommitted
cpufreq: intel_pstate: Add HWP boost utility and sched util hooks
Added two utility functions to HWP boost up gradually and boost down to the default cached HWP request values. Boost up: Boost up updates HWP request minimum value in steps. This minimum value can reach upto at HWP request maximum values depends on how frequently, this boost up function is called. At max, boost up will take three steps to reach the maximum, depending on the current HWP request levels and HWP capabilities. For example, if the current settings are: If P0 (Turbo max) = P1 (Guaranteed max) = min No boost at all. If P0 (Turbo max) > P1 (Guaranteed max) = min Should result in one level boost only for P0. If P0 (Turbo max) = P1 (Guaranteed max) > min Should result in two level boost: (min + p1)/2 and P1. If P0 (Turbo max) > P1 (Guaranteed max) > min Should result in three level boost: (min + p1)/2, P1 and P0. We don't set any level between P0 and P1 as there is no guarantee that they will be honored. Boost down: After the system is idle for hold time of 3ms, the HWP request is reset to the default value from HWP init or user modified one via sysfs. Caching of HWP Request and Capabilities Store the HWP request value last set using MSR_HWP_REQUEST and read MSR_HWP_CAPABILITIES. This avoid reading of MSRs in the boost utility functions. These boost utility functions calculated limits are based on the latest HWP request value, which can be modified by setpolicy() callback. So if user space modifies the minimum perf value, that will be accounted for every time the boost up is called. There will be case when there can be contention with the user modified minimum perf, in that case user value will gain precedence. For example just before HWP_REQUEST MSR is updated from setpolicy() callback, the boost up function is called via scheduler tick callback. Here the cached MSR value is already the latest and limits are updated based on the latest user limits, but on return the MSR write callback called from setpolicy() callback will update the HWP_REQUEST value. This will be used till next time the boost up function is called. In addition add a variable to control HWP dynamic boosting. When HWP dynamic boost is active then set the HWP specific update util hook. The contents in the utility hooks will be filled in the subsequent patches. Reported-by: Mel Gorman <[email protected]> Tested-by: Giovanni Gherdovich <[email protected]> Signed-off-by: Srinivas Pandruvada <[email protected]> Signed-off-by: Rafael J. Wysocki <[email protected]>
1 parent d7231f9 commit e0efd5b

File tree

1 file changed

+97
-3
lines changed

1 file changed

+97
-3
lines changed

drivers/cpufreq/intel_pstate.c

Lines changed: 97 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,9 @@ struct global_params {
221221
* preference/bias
222222
* @epp_saved: Saved EPP/EPB during system suspend or CPU offline
223223
* operation
224+
* @hwp_req_cached: Cached value of the last HWP Request MSR
225+
* @hwp_cap_cached: Cached value of the last HWP Capabilities MSR
226+
* @hwp_boost_min: Last HWP boosted min performance
224227
*
225228
* This structure stores per CPU instance data for all CPUs.
226229
*/
@@ -253,6 +256,9 @@ struct cpudata {
253256
s16 epp_policy;
254257
s16 epp_default;
255258
s16 epp_saved;
259+
u64 hwp_req_cached;
260+
u64 hwp_cap_cached;
261+
u32 hwp_boost_min;
256262
};
257263

258264
static struct cpudata **all_cpu_data;
@@ -285,6 +291,7 @@ static struct pstate_funcs pstate_funcs __read_mostly;
285291

286292
static int hwp_active __read_mostly;
287293
static bool per_cpu_limits __read_mostly;
294+
static bool hwp_boost __read_mostly;
288295

289296
static struct cpufreq_driver *intel_pstate_driver __read_mostly;
290297

@@ -689,6 +696,7 @@ static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max,
689696
u64 cap;
690697

691698
rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
699+
WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap);
692700
if (global.no_turbo)
693701
*current_max = HWP_GUARANTEED_PERF(cap);
694702
else
@@ -763,6 +771,7 @@ static void intel_pstate_hwp_set(unsigned int cpu)
763771
intel_pstate_set_epb(cpu, epp);
764772
}
765773
skip_epp:
774+
WRITE_ONCE(cpu_data->hwp_req_cached, value);
766775
wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
767776
}
768777

@@ -1381,6 +1390,81 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
13811390
intel_pstate_set_min_pstate(cpu);
13821391
}
13831392

1393+
/*
1394+
* Long hold time will keep high perf limits for long time,
1395+
* which negatively impacts perf/watt for some workloads,
1396+
* like specpower. 3ms is based on experiements on some
1397+
* workoads.
1398+
*/
1399+
static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
1400+
1401+
static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu)
1402+
{
1403+
u64 hwp_req = READ_ONCE(cpu->hwp_req_cached);
1404+
u32 max_limit = (hwp_req & 0xff00) >> 8;
1405+
u32 min_limit = (hwp_req & 0xff);
1406+
u32 boost_level1;
1407+
1408+
/*
1409+
* Cases to consider (User changes via sysfs or boot time):
1410+
* If, P0 (Turbo max) = P1 (Guaranteed max) = min:
1411+
* No boost, return.
1412+
* If, P0 (Turbo max) > P1 (Guaranteed max) = min:
1413+
* Should result in one level boost only for P0.
1414+
* If, P0 (Turbo max) = P1 (Guaranteed max) > min:
1415+
* Should result in two level boost:
1416+
* (min + p1)/2 and P1.
1417+
* If, P0 (Turbo max) > P1 (Guaranteed max) > min:
1418+
* Should result in three level boost:
1419+
* (min + p1)/2, P1 and P0.
1420+
*/
1421+
1422+
/* If max and min are equal or already at max, nothing to boost */
1423+
if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit)
1424+
return;
1425+
1426+
if (!cpu->hwp_boost_min)
1427+
cpu->hwp_boost_min = min_limit;
1428+
1429+
/* level at half way mark between min and guranteed */
1430+
boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1;
1431+
1432+
if (cpu->hwp_boost_min < boost_level1)
1433+
cpu->hwp_boost_min = boost_level1;
1434+
else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
1435+
cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached);
1436+
else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) &&
1437+
max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
1438+
cpu->hwp_boost_min = max_limit;
1439+
else
1440+
return;
1441+
1442+
hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min;
1443+
wrmsrl(MSR_HWP_REQUEST, hwp_req);
1444+
cpu->last_update = cpu->sample.time;
1445+
}
1446+
1447+
static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu)
1448+
{
1449+
if (cpu->hwp_boost_min) {
1450+
bool expired;
1451+
1452+
/* Check if we are idle for hold time to boost down */
1453+
expired = time_after64(cpu->sample.time, cpu->last_update +
1454+
hwp_boost_hold_time_ns);
1455+
if (expired) {
1456+
wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached);
1457+
cpu->hwp_boost_min = 0;
1458+
}
1459+
}
1460+
cpu->last_update = cpu->sample.time;
1461+
}
1462+
1463+
static inline void intel_pstate_update_util_hwp(struct update_util_data *data,
1464+
u64 time, unsigned int flags)
1465+
{
1466+
}
1467+
13841468
static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
13851469
{
13861470
struct sample *sample = &cpu->sample;
@@ -1684,7 +1768,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
16841768
{
16851769
struct cpudata *cpu = all_cpu_data[cpu_num];
16861770

1687-
if (hwp_active)
1771+
if (hwp_active && !hwp_boost)
16881772
return;
16891773

16901774
if (cpu->update_util_set)
@@ -1693,7 +1777,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
16931777
/* Prevent intel_pstate_update_util() from using stale data. */
16941778
cpu->sample.time = 0;
16951779
cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
1696-
intel_pstate_update_util);
1780+
(hwp_active ?
1781+
intel_pstate_update_util_hwp :
1782+
intel_pstate_update_util));
16971783
cpu->update_util_set = true;
16981784
}
16991785

@@ -1805,8 +1891,16 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
18051891
intel_pstate_set_update_util_hook(policy->cpu);
18061892
}
18071893

1808-
if (hwp_active)
1894+
if (hwp_active) {
1895+
/*
1896+
* When hwp_boost was active before and dynamically it
1897+
* was turned off, in that case we need to clear the
1898+
* update util hook.
1899+
*/
1900+
if (!hwp_boost)
1901+
intel_pstate_clear_update_util_hook(policy->cpu);
18091902
intel_pstate_hwp_set(policy->cpu);
1903+
}
18101904

18111905
mutex_unlock(&intel_pstate_limits_lock);
18121906

0 commit comments

Comments
 (0)