Skip to content

Commit 7b010f9

Browse files
committed
cpufreq: intel_pstate: EAS support for hybrid platforms
Modify intel_pstate to register EM perf domains for CPUs on hybrid platforms without SMT which causes EAS to be enabled on them when schedutil is used as the cpufreq governor (which requires intel_pstate to operate in the passive mode). This change is targeting platforms (for example, Lunar Lake) where the "little" CPUs (E-cores) are always more energy-efficient than the "big" or "performance" CPUs (P-cores) when run at the same HWP performance level, so it is sufficient to tell EAS that E-cores are always preferred (so long as there is enough spare capacity on one of them to run the given task). However, migrating tasks between CPUs of the same type too often is not desirable because it may hurt both performance and energy efficiency due to leaving warm caches behind. For this reason, register a separate perf domain for each CPU and choose the cost values for them so that the cost mostly depends on the CPU type, but there is also a small component of it depending on the performance level (utilization) which helps to balance the load between CPUs of the same type. The cost component related to the CPU type is computed with the help of the observation that the IPC metric value for a given CPU is inversely proportional to its performance-to-frequency scaling factor and the cost of running code on it can be assumed to be roughly proportional to that IPC ratio (in principle, the higher the IPC ratio, the more resources are utilized when running at a given frequency, so the cost should be higher). For all CPUs that are online at the system initialization time, EM perf domains are registered when the driver starts up, after asymmetric capacity support has been enabled. For the CPUs that become online later, EM perf domains are registered after setting the asymmetric capacity for them. Signed-off-by: Rafael J. Wysocki <[email protected]> Tested-by: Christian Loehle <[email protected]> Reviewed-by: Dietmar Eggemann <[email protected]> Link: https://patch.msgid.link/[email protected]
1 parent 0b224fc commit 7b010f9

File tree

1 file changed

+113
-2
lines changed

1 file changed

+113
-2
lines changed

drivers/cpufreq/intel_pstate.c

Lines changed: 113 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ struct global_params {
221221
* @sched_flags: Store scheduler flags for possible cross CPU update
222222
* @hwp_boost_min: Last HWP boosted min performance
223223
* @suspended: Whether or not the driver has been suspended.
224+
* @pd_registered: Set when a perf domain is registered for this CPU.
224225
* @hwp_notify_work: workqueue for HWP notifications.
225226
*
226227
* This structure stores per CPU instance data for all CPUs.
@@ -260,6 +261,9 @@ struct cpudata {
260261
unsigned int sched_flags;
261262
u32 hwp_boost_min;
262263
bool suspended;
264+
#ifdef CONFIG_ENERGY_MODEL
265+
bool pd_registered;
266+
#endif
263267
struct delayed_work hwp_notify_work;
264268
};
265269

@@ -303,6 +307,7 @@ static bool hwp_is_hybrid;
303307

304308
static struct cpufreq_driver *intel_pstate_driver __read_mostly;
305309

310+
#define INTEL_PSTATE_CORE_SCALING 100000
306311
#define HYBRID_SCALING_FACTOR_ADL 78741
307312
#define HYBRID_SCALING_FACTOR_MTL 80000
308313
#define HYBRID_SCALING_FACTOR_LNL 86957
@@ -311,7 +316,7 @@ static int hybrid_scaling_factor;
311316

312317
static inline int core_get_scaling(void)
313318
{
314-
return 100000;
319+
return INTEL_PSTATE_CORE_SCALING;
315320
}
316321

317322
#ifdef CONFIG_ACPI
@@ -948,12 +953,105 @@ static struct cpudata *hybrid_max_perf_cpu __read_mostly;
948953
*/
949954
static DEFINE_MUTEX(hybrid_capacity_lock);
950955

956+
#ifdef CONFIG_ENERGY_MODEL
957+
#define HYBRID_EM_STATE_COUNT 4
958+
959+
static int hybrid_active_power(struct device *dev, unsigned long *power,
960+
unsigned long *freq)
961+
{
962+
/*
963+
* Create "utilization bins" of 0-40%, 40%-60%, 60%-80%, and 80%-100%
964+
* of the maximum capacity such that two CPUs of the same type will be
965+
* regarded as equally attractive if the utilization of each of them
966+
* falls into the same bin, which should prevent tasks from being
967+
* migrated between them too often.
968+
*
969+
* For this purpose, return the "frequency" of 2 for the first
970+
* performance level and otherwise leave the value set by the caller.
971+
*/
972+
if (!*freq)
973+
*freq = 2;
974+
975+
/* No power information. */
976+
*power = EM_MAX_POWER;
977+
978+
return 0;
979+
}
980+
981+
static int hybrid_get_cost(struct device *dev, unsigned long freq,
982+
unsigned long *cost)
983+
{
984+
struct pstate_data *pstate = &all_cpu_data[dev->id]->pstate;
985+
986+
/*
987+
* The smaller the perf-to-frequency scaling factor, the larger the IPC
988+
* ratio between the given CPU and the least capable CPU in the system.
989+
* Regard that IPC ratio as the primary cost component and assume that
990+
* the scaling factors for different CPU types will differ by at least
991+
* 5% and they will not be above INTEL_PSTATE_CORE_SCALING.
992+
*
993+
* Add the freq value to the cost, so that the cost of running on CPUs
994+
* of the same type in different "utilization bins" is different.
995+
*/
996+
*cost = div_u64(100ULL * INTEL_PSTATE_CORE_SCALING, pstate->scaling) + freq;
997+
998+
return 0;
999+
}
1000+
1001+
static bool hybrid_register_perf_domain(unsigned int cpu)
1002+
{
1003+
static const struct em_data_callback cb
1004+
= EM_ADV_DATA_CB(hybrid_active_power, hybrid_get_cost);
1005+
struct cpudata *cpudata = all_cpu_data[cpu];
1006+
struct device *cpu_dev;
1007+
1008+
/*
1009+
* Registering EM perf domains without enabling asymmetric CPU capacity
1010+
* support is not really useful and one domain should not be registered
1011+
* more than once.
1012+
*/
1013+
if (!hybrid_max_perf_cpu || cpudata->pd_registered)
1014+
return false;
1015+
1016+
cpu_dev = get_cpu_device(cpu);
1017+
if (!cpu_dev)
1018+
return false;
1019+
1020+
if (em_dev_register_perf_domain(cpu_dev, HYBRID_EM_STATE_COUNT, &cb,
1021+
cpumask_of(cpu), false))
1022+
return false;
1023+
1024+
cpudata->pd_registered = true;
1025+
1026+
return true;
1027+
}
1028+
1029+
static void hybrid_register_all_perf_domains(void)
1030+
{
1031+
unsigned int cpu;
1032+
1033+
for_each_online_cpu(cpu)
1034+
hybrid_register_perf_domain(cpu);
1035+
}
1036+
1037+
static void hybrid_update_perf_domain(struct cpudata *cpu)
1038+
{
1039+
if (cpu->pd_registered)
1040+
em_adjust_cpu_capacity(cpu->cpu);
1041+
}
1042+
#else /* !CONFIG_ENERGY_MODEL */
1043+
static inline bool hybrid_register_perf_domain(unsigned int cpu) { return false; }
1044+
static inline void hybrid_register_all_perf_domains(void) {}
1045+
static inline void hybrid_update_perf_domain(struct cpudata *cpu) {}
1046+
#endif /* CONFIG_ENERGY_MODEL */
1047+
9511048
static void hybrid_set_cpu_capacity(struct cpudata *cpu)
9521049
{
9531050
arch_set_cpu_capacity(cpu->cpu, cpu->capacity_perf,
9541051
hybrid_max_perf_cpu->capacity_perf,
9551052
cpu->capacity_perf,
9561053
cpu->pstate.max_pstate_physical);
1054+
hybrid_update_perf_domain(cpu);
9571055

9581056
topology_set_cpu_scale(cpu->cpu, arch_scale_cpu_capacity(cpu->cpu));
9591057

@@ -1044,6 +1142,11 @@ static void hybrid_refresh_cpu_capacity_scaling(void)
10441142
guard(mutex)(&hybrid_capacity_lock);
10451143

10461144
__hybrid_refresh_cpu_capacity_scaling();
1145+
/*
1146+
* Perf domains are not registered before setting hybrid_max_perf_cpu,
1147+
* so register them all after setting up CPU capacity scaling.
1148+
*/
1149+
hybrid_register_all_perf_domains();
10471150
}
10481151

10491152
static void hybrid_init_cpu_capacity_scaling(bool refresh)
@@ -1071,7 +1174,7 @@ static void hybrid_init_cpu_capacity_scaling(bool refresh)
10711174
hybrid_refresh_cpu_capacity_scaling();
10721175
/*
10731176
* Disabling ITMT causes sched domains to be rebuilt to disable asym
1074-
* packing and enable asym capacity.
1177+
* packing and enable asym capacity and EAS.
10751178
*/
10761179
sched_clear_itmt_support();
10771180
}
@@ -1149,6 +1252,14 @@ static void hybrid_update_capacity(struct cpudata *cpu)
11491252
}
11501253

11511254
hybrid_set_cpu_capacity(cpu);
1255+
/*
1256+
* If the CPU was offline to start with and it is going online for the
1257+
* first time, a perf domain needs to be registered for it if hybrid
1258+
* capacity scaling has been enabled already. In that case, sched
1259+
* domains need to be rebuilt to take the new perf domain into account.
1260+
*/
1261+
if (hybrid_register_perf_domain(cpu->cpu))
1262+
em_rebuild_sched_domains();
11521263

11531264
unlock:
11541265
mutex_unlock(&hybrid_capacity_lock);

0 commit comments

Comments
 (0)