Skip to content

Commit 7d4b19a

Browse files
committed
Merge Intel thermal control drivers changes for v6.2
- Add Raptor Lake-S support to the intel_tcc_cooling driver (Zhang Rui). - Make the intel_tcc_cooling driver detect TCC locking (Zhang Rui). - Address Coverity warning in intel_hfi_process_event() (Ricardo Neri). - Prevent accidental clearing of HFI in the package thermal interrupt status (Srinivas Pandruvada). - Protect the clearing of status bits in MSR_IA32_PACKAGE_THERM_STATUS and MSR_IA32_THERM_STATUS (Srinivas Pandruvada). - Allow the HFI interrupt handler to ACK an event for the same timestamp (Srinivas Pandruvada). * thermal-intel: thermal: intel: hfi: ACK HFI for the same timestamp thermal: intel: Protect clearing of thermal status bits thermal: intel: Prevent accidental clearing of HFI status thermal: intel: intel_tcc_cooling: Add TCC cooling support for RaptorLake-S thermal: intel: intel_tcc_cooling: Detect TCC lock bit thermal: intel: hfi: Improve the type of hfi_features::nr_table_pages
2 parents 4748f96 + c0e3acd commit 7d4b19a

File tree

5 files changed

+52
-31
lines changed

5 files changed

+52
-31
lines changed

drivers/thermal/intel/intel_hfi.c

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,7 @@
4242

4343
#include "../thermal_core.h"
4444
#include "intel_hfi.h"
45-
46-
#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | \
47-
BIT(9) | BIT(11) | BIT(26))
45+
#include "thermal_interrupt.h"
4846

4947
/* Hardware Feedback Interface MSR configuration bits */
5048
#define HW_FEEDBACK_PTR_VALID_BIT BIT(0)
@@ -137,7 +135,7 @@ struct hfi_instance {
137135
* Parameters and supported features that are common to all HFI instances
138136
*/
139137
struct hfi_features {
140-
unsigned int nr_table_pages;
138+
size_t nr_table_pages;
141139
unsigned int cpu_stride;
142140
unsigned int hdr_size;
143141
};
@@ -252,7 +250,7 @@ void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
252250
struct hfi_instance *hfi_instance;
253251
int cpu = smp_processor_id();
254252
struct hfi_cpu_info *info;
255-
u64 new_timestamp;
253+
u64 new_timestamp, msr, hfi;
256254

257255
if (!pkg_therm_status_msr_val)
258256
return;
@@ -281,9 +279,21 @@ void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
281279
if (!raw_spin_trylock(&hfi_instance->event_lock))
282280
return;
283281

284-
/* Skip duplicated updates. */
282+
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr);
283+
hfi = msr & PACKAGE_THERM_STATUS_HFI_UPDATED;
284+
if (!hfi) {
285+
raw_spin_unlock(&hfi_instance->event_lock);
286+
return;
287+
}
288+
289+
/*
290+
* Ack duplicate update. Since there is an active HFI
291+
* status from HW, it must be a new event, not a case
292+
* where a lagging CPU entered the locked region.
293+
*/
285294
new_timestamp = *(u64 *)hfi_instance->hw_table;
286295
if (*hfi_instance->timestamp == new_timestamp) {
296+
thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
287297
raw_spin_unlock(&hfi_instance->event_lock);
288298
return;
289299
}
@@ -297,16 +307,14 @@ void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
297307
memcpy(hfi_instance->local_table, hfi_instance->hw_table,
298308
hfi_features.nr_table_pages << PAGE_SHIFT);
299309

300-
raw_spin_unlock(&hfi_instance->table_lock);
301-
raw_spin_unlock(&hfi_instance->event_lock);
302-
303310
/*
304311
* Let hardware know that we are done reading the HFI table and it is
305312
* free to update it again.
306313
*/
307-
pkg_therm_status_msr_val &= THERM_STATUS_CLEAR_PKG_MASK &
308-
~PACKAGE_THERM_STATUS_HFI_UPDATED;
309-
wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, pkg_therm_status_msr_val);
314+
thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
315+
316+
raw_spin_unlock(&hfi_instance->table_lock);
317+
raw_spin_unlock(&hfi_instance->event_lock);
310318

311319
queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
312320
HFI_UPDATE_INTERVAL);

drivers/thermal/intel/intel_tcc_cooling.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#define TCC_SHIFT 24
1515
#define TCC_MASK (0x3fULL<<24)
1616
#define TCC_PROGRAMMABLE BIT(30)
17+
#define TCC_LOCKED BIT(31)
1718

1819
static struct thermal_cooling_device *tcc_cdev;
1920

@@ -84,6 +85,7 @@ static const struct x86_cpu_id tcc_ids[] __initconst = {
8485
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, NULL),
8586
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL),
8687
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, NULL),
88+
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, NULL),
8789
{}
8890
};
8991

@@ -108,6 +110,15 @@ static int __init tcc_cooling_init(void)
108110
if (!(val & TCC_PROGRAMMABLE))
109111
return -ENODEV;
110112

113+
err = rdmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, &val);
114+
if (err)
115+
return err;
116+
117+
if (val & TCC_LOCKED) {
118+
pr_info("TCC Offset locked\n");
119+
return -ENODEV;
120+
}
121+
111122
pr_info("Programmable TCC Offset detected\n");
112123

113124
tcc_cdev =

drivers/thermal/intel/therm_throt.c

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -190,32 +190,33 @@ static const struct attribute_group thermal_attr_group = {
190190
};
191191
#endif /* CONFIG_SYSFS */
192192

193-
#define CORE_LEVEL 0
194-
#define PACKAGE_LEVEL 1
195-
196193
#define THERM_THROT_POLL_INTERVAL HZ
197194
#define THERM_STATUS_PROCHOT_LOG BIT(1)
198195

199196
#define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
200-
#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11))
197+
#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(26))
201198

202-
static void clear_therm_status_log(int level)
199+
/*
200+
* Clear the bits in package thermal status register for bit = 1
201+
* in bitmask
202+
*/
203+
void thermal_clear_package_intr_status(int level, u64 bit_mask)
203204
{
205+
u64 msr_val;
204206
int msr;
205-
u64 mask, msr_val;
206207

207208
if (level == CORE_LEVEL) {
208209
msr = MSR_IA32_THERM_STATUS;
209-
mask = THERM_STATUS_CLEAR_CORE_MASK;
210+
msr_val = THERM_STATUS_CLEAR_CORE_MASK;
210211
} else {
211212
msr = MSR_IA32_PACKAGE_THERM_STATUS;
212-
mask = THERM_STATUS_CLEAR_PKG_MASK;
213+
msr_val = THERM_STATUS_CLEAR_PKG_MASK;
213214
}
214215

215-
rdmsrl(msr, msr_val);
216-
msr_val &= mask;
217-
wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG);
216+
msr_val &= ~bit_mask;
217+
wrmsrl(msr, msr_val);
218218
}
219+
EXPORT_SYMBOL_GPL(thermal_clear_package_intr_status);
219220

220221
static void get_therm_status(int level, bool *proc_hot, u8 *temp)
221222
{
@@ -295,7 +296,7 @@ static void __maybe_unused throttle_active_work(struct work_struct *work)
295296
state->average = avg;
296297

297298
re_arm:
298-
clear_therm_status_log(state->level);
299+
thermal_clear_package_intr_status(state->level, THERM_STATUS_PROCHOT_LOG);
299300
schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
300301
}
301302

drivers/thermal/intel/thermal_interrupt.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
#ifndef _INTEL_THERMAL_INTERRUPT_H
33
#define _INTEL_THERMAL_INTERRUPT_H
44

5+
#define CORE_LEVEL 0
6+
#define PACKAGE_LEVEL 1
7+
58
/* Interrupt Handler for package thermal thresholds */
69
extern int (*platform_thermal_package_notify)(__u64 msr_val);
710

@@ -15,4 +18,7 @@ extern bool (*platform_thermal_package_rate_control)(void);
1518
/* Handle HWP interrupt */
1619
extern void notify_hwp_interrupt(void);
1720

21+
/* Common function to clear Package thermal status register */
22+
extern void thermal_clear_package_intr_status(int level, u64 bit_mask);
23+
1824
#endif /* _INTEL_THERMAL_INTERRUPT_H */

drivers/thermal/intel/x86_pkg_temp_thermal.c

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,6 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
265265
struct thermal_zone_device *tzone = NULL;
266266
int cpu = smp_processor_id();
267267
struct zone_device *zonedev;
268-
u64 msr_val, wr_val;
269268

270269
mutex_lock(&thermal_zone_mutex);
271270
raw_spin_lock_irq(&pkg_temp_lock);
@@ -279,12 +278,8 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
279278
}
280279
zonedev->work_scheduled = false;
281280

282-
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
283-
wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
284-
if (wr_val != msr_val) {
285-
wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val);
286-
tzone = zonedev->tzone;
287-
}
281+
thermal_clear_package_intr_status(PACKAGE_LEVEL, THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
282+
tzone = zonedev->tzone;
288283

289284
enable_pkg_thres_interrupt();
290285
raw_spin_unlock_irq(&pkg_temp_lock);

0 commit comments

Comments
 (0)