@@ -221,6 +221,11 @@ struct global_params {
221
221
* preference/bias
222
222
* @epp_saved: Saved EPP/EPB during system suspend or CPU offline
223
223
* operation
224
+ * @hwp_req_cached: Cached value of the last HWP Request MSR
225
+ * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR
226
+ * @last_io_update: Last time when IO wake flag was set
227
+ * @sched_flags: Store scheduler flags for possible cross CPU update
228
+ * @hwp_boost_min: Last HWP boosted min performance
224
229
*
225
230
* This structure stores per CPU instance data for all CPUs.
226
231
*/
@@ -253,6 +258,11 @@ struct cpudata {
253
258
s16 epp_policy ;
254
259
s16 epp_default ;
255
260
s16 epp_saved ;
261
+ u64 hwp_req_cached ;
262
+ u64 hwp_cap_cached ;
263
+ u64 last_io_update ;
264
+ unsigned int sched_flags ;
265
+ u32 hwp_boost_min ;
256
266
};
257
267
258
268
static struct cpudata * * all_cpu_data ;
@@ -285,6 +295,7 @@ static struct pstate_funcs pstate_funcs __read_mostly;
285
295
286
296
static int hwp_active __read_mostly ;
287
297
static bool per_cpu_limits __read_mostly ;
298
+ static bool hwp_boost __read_mostly ;
288
299
289
300
static struct cpufreq_driver * intel_pstate_driver __read_mostly ;
290
301
@@ -689,6 +700,7 @@ static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max,
689
700
u64 cap ;
690
701
691
702
rdmsrl_on_cpu (cpu , MSR_HWP_CAPABILITIES , & cap );
703
+ WRITE_ONCE (all_cpu_data [cpu ]-> hwp_cap_cached , cap );
692
704
if (global .no_turbo )
693
705
* current_max = HWP_GUARANTEED_PERF (cap );
694
706
else
@@ -763,6 +775,7 @@ static void intel_pstate_hwp_set(unsigned int cpu)
763
775
intel_pstate_set_epb (cpu , epp );
764
776
}
765
777
skip_epp :
778
+ WRITE_ONCE (cpu_data -> hwp_req_cached , value );
766
779
wrmsrl_on_cpu (cpu , MSR_HWP_REQUEST , value );
767
780
}
768
781
@@ -1020,6 +1033,30 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
1020
1033
return count ;
1021
1034
}
1022
1035
1036
+ static ssize_t show_hwp_dynamic_boost (struct kobject * kobj ,
1037
+ struct attribute * attr , char * buf )
1038
+ {
1039
+ return sprintf (buf , "%u\n" , hwp_boost );
1040
+ }
1041
+
1042
+ static ssize_t store_hwp_dynamic_boost (struct kobject * a , struct attribute * b ,
1043
+ const char * buf , size_t count )
1044
+ {
1045
+ unsigned int input ;
1046
+ int ret ;
1047
+
1048
+ ret = kstrtouint (buf , 10 , & input );
1049
+ if (ret )
1050
+ return ret ;
1051
+
1052
+ mutex_lock (& intel_pstate_driver_lock );
1053
+ hwp_boost = !!input ;
1054
+ intel_pstate_update_policies ();
1055
+ mutex_unlock (& intel_pstate_driver_lock );
1056
+
1057
+ return count ;
1058
+ }
1059
+
1023
1060
show_one (max_perf_pct , max_perf_pct );
1024
1061
show_one (min_perf_pct , min_perf_pct );
1025
1062
@@ -1029,6 +1066,7 @@ define_one_global_rw(max_perf_pct);
1029
1066
define_one_global_rw (min_perf_pct );
1030
1067
define_one_global_ro (turbo_pct );
1031
1068
define_one_global_ro (num_pstates );
1069
+ define_one_global_rw (hwp_dynamic_boost );
1032
1070
1033
1071
static struct attribute * intel_pstate_attributes [] = {
1034
1072
& status .attr ,
@@ -1069,6 +1107,11 @@ static void __init intel_pstate_sysfs_expose_params(void)
1069
1107
rc = sysfs_create_file (intel_pstate_kobject , & min_perf_pct .attr );
1070
1108
WARN_ON (rc );
1071
1109
1110
+ if (hwp_active ) {
1111
+ rc = sysfs_create_file (intel_pstate_kobject ,
1112
+ & hwp_dynamic_boost .attr );
1113
+ WARN_ON (rc );
1114
+ }
1072
1115
}
1073
1116
/************************** sysfs end ************************/
1074
1117
@@ -1381,6 +1424,116 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
1381
1424
intel_pstate_set_min_pstate (cpu );
1382
1425
}
1383
1426
1427
+ /*
1428
+ * Long hold time will keep high perf limits for long time,
1429
+ * which negatively impacts perf/watt for some workloads,
1430
+ * like specpower. 3ms is based on experiements on some
1431
+ * workoads.
1432
+ */
1433
+ static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC ;
1434
+
1435
+ static inline void intel_pstate_hwp_boost_up (struct cpudata * cpu )
1436
+ {
1437
+ u64 hwp_req = READ_ONCE (cpu -> hwp_req_cached );
1438
+ u32 max_limit = (hwp_req & 0xff00 ) >> 8 ;
1439
+ u32 min_limit = (hwp_req & 0xff );
1440
+ u32 boost_level1 ;
1441
+
1442
+ /*
1443
+ * Cases to consider (User changes via sysfs or boot time):
1444
+ * If, P0 (Turbo max) = P1 (Guaranteed max) = min:
1445
+ * No boost, return.
1446
+ * If, P0 (Turbo max) > P1 (Guaranteed max) = min:
1447
+ * Should result in one level boost only for P0.
1448
+ * If, P0 (Turbo max) = P1 (Guaranteed max) > min:
1449
+ * Should result in two level boost:
1450
+ * (min + p1)/2 and P1.
1451
+ * If, P0 (Turbo max) > P1 (Guaranteed max) > min:
1452
+ * Should result in three level boost:
1453
+ * (min + p1)/2, P1 and P0.
1454
+ */
1455
+
1456
+ /* If max and min are equal or already at max, nothing to boost */
1457
+ if (max_limit == min_limit || cpu -> hwp_boost_min >= max_limit )
1458
+ return ;
1459
+
1460
+ if (!cpu -> hwp_boost_min )
1461
+ cpu -> hwp_boost_min = min_limit ;
1462
+
1463
+ /* level at half way mark between min and guranteed */
1464
+ boost_level1 = (HWP_GUARANTEED_PERF (cpu -> hwp_cap_cached ) + min_limit ) >> 1 ;
1465
+
1466
+ if (cpu -> hwp_boost_min < boost_level1 )
1467
+ cpu -> hwp_boost_min = boost_level1 ;
1468
+ else if (cpu -> hwp_boost_min < HWP_GUARANTEED_PERF (cpu -> hwp_cap_cached ))
1469
+ cpu -> hwp_boost_min = HWP_GUARANTEED_PERF (cpu -> hwp_cap_cached );
1470
+ else if (cpu -> hwp_boost_min == HWP_GUARANTEED_PERF (cpu -> hwp_cap_cached ) &&
1471
+ max_limit != HWP_GUARANTEED_PERF (cpu -> hwp_cap_cached ))
1472
+ cpu -> hwp_boost_min = max_limit ;
1473
+ else
1474
+ return ;
1475
+
1476
+ hwp_req = (hwp_req & ~GENMASK_ULL (7 , 0 )) | cpu -> hwp_boost_min ;
1477
+ wrmsrl (MSR_HWP_REQUEST , hwp_req );
1478
+ cpu -> last_update = cpu -> sample .time ;
1479
+ }
1480
+
1481
+ static inline void intel_pstate_hwp_boost_down (struct cpudata * cpu )
1482
+ {
1483
+ if (cpu -> hwp_boost_min ) {
1484
+ bool expired ;
1485
+
1486
+ /* Check if we are idle for hold time to boost down */
1487
+ expired = time_after64 (cpu -> sample .time , cpu -> last_update +
1488
+ hwp_boost_hold_time_ns );
1489
+ if (expired ) {
1490
+ wrmsrl (MSR_HWP_REQUEST , cpu -> hwp_req_cached );
1491
+ cpu -> hwp_boost_min = 0 ;
1492
+ }
1493
+ }
1494
+ cpu -> last_update = cpu -> sample .time ;
1495
+ }
1496
+
1497
+ static inline void intel_pstate_update_util_hwp_local (struct cpudata * cpu ,
1498
+ u64 time )
1499
+ {
1500
+ cpu -> sample .time = time ;
1501
+
1502
+ if (cpu -> sched_flags & SCHED_CPUFREQ_IOWAIT ) {
1503
+ bool do_io = false;
1504
+
1505
+ cpu -> sched_flags = 0 ;
1506
+ /*
1507
+ * Set iowait_boost flag and update time. Since IO WAIT flag
1508
+ * is set all the time, we can't just conclude that there is
1509
+ * some IO bound activity is scheduled on this CPU with just
1510
+ * one occurrence. If we receive at least two in two
1511
+ * consecutive ticks, then we treat as boost candidate.
1512
+ */
1513
+ if (time_before64 (time , cpu -> last_io_update + 2 * TICK_NSEC ))
1514
+ do_io = true;
1515
+
1516
+ cpu -> last_io_update = time ;
1517
+
1518
+ if (do_io )
1519
+ intel_pstate_hwp_boost_up (cpu );
1520
+
1521
+ } else {
1522
+ intel_pstate_hwp_boost_down (cpu );
1523
+ }
1524
+ }
1525
+
1526
+ static inline void intel_pstate_update_util_hwp (struct update_util_data * data ,
1527
+ u64 time , unsigned int flags )
1528
+ {
1529
+ struct cpudata * cpu = container_of (data , struct cpudata , update_util );
1530
+
1531
+ cpu -> sched_flags |= flags ;
1532
+
1533
+ if (smp_processor_id () == cpu -> cpu )
1534
+ intel_pstate_update_util_hwp_local (cpu , time );
1535
+ }
1536
+
1384
1537
static inline void intel_pstate_calc_avg_perf (struct cpudata * cpu )
1385
1538
{
1386
1539
struct sample * sample = & cpu -> sample ;
@@ -1641,6 +1794,12 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
1641
1794
{}
1642
1795
};
1643
1796
1797
+ static const struct x86_cpu_id intel_pstate_hwp_boost_ids [] = {
1798
+ ICPU (INTEL_FAM6_SKYLAKE_X , core_funcs ),
1799
+ ICPU (INTEL_FAM6_SKYLAKE_DESKTOP , core_funcs ),
1800
+ {}
1801
+ };
1802
+
1644
1803
static int intel_pstate_init_cpu (unsigned int cpunum )
1645
1804
{
1646
1805
struct cpudata * cpu ;
@@ -1671,6 +1830,10 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
1671
1830
intel_pstate_disable_ee (cpunum );
1672
1831
1673
1832
intel_pstate_hwp_enable (cpu );
1833
+
1834
+ id = x86_match_cpu (intel_pstate_hwp_boost_ids );
1835
+ if (id )
1836
+ hwp_boost = true;
1674
1837
}
1675
1838
1676
1839
intel_pstate_get_cpu_pstates (cpu );
@@ -1684,7 +1847,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
1684
1847
{
1685
1848
struct cpudata * cpu = all_cpu_data [cpu_num ];
1686
1849
1687
- if (hwp_active )
1850
+ if (hwp_active && ! hwp_boost )
1688
1851
return ;
1689
1852
1690
1853
if (cpu -> update_util_set )
@@ -1693,7 +1856,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
1693
1856
/* Prevent intel_pstate_update_util() from using stale data. */
1694
1857
cpu -> sample .time = 0 ;
1695
1858
cpufreq_add_update_util_hook (cpu_num , & cpu -> update_util ,
1696
- intel_pstate_update_util );
1859
+ (hwp_active ?
1860
+ intel_pstate_update_util_hwp :
1861
+ intel_pstate_update_util ));
1697
1862
cpu -> update_util_set = true;
1698
1863
}
1699
1864
@@ -1805,8 +1970,16 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
1805
1970
intel_pstate_set_update_util_hook (policy -> cpu );
1806
1971
}
1807
1972
1808
- if (hwp_active )
1973
+ if (hwp_active ) {
1974
+ /*
1975
+ * When hwp_boost was active before and dynamically it
1976
+ * was turned off, in that case we need to clear the
1977
+ * update util hook.
1978
+ */
1979
+ if (!hwp_boost )
1980
+ intel_pstate_clear_update_util_hook (policy -> cpu );
1809
1981
intel_pstate_hwp_set (policy -> cpu );
1982
+ }
1810
1983
1811
1984
mutex_unlock (& intel_pstate_limits_lock );
1812
1985
0 commit comments