Skip to content

Commit fd8bdb2

Browse files
committed
hwmon: (k10temp) Display up to eight sets of CCD temperatures
In HWiNFO, we see support for Tccd1, Tccd3, Tccd5, and Tccd7 temperature sensors on Zen2 based Threadripper CPUs. Checking register maps on Threadripper 3970X confirms SMN register addresses and values for those sensors. Register values observed in an idle system: 0x059950: 00000000 00000abc 00000000 00000ad8 0x059960: 00000000 00000ade 00000000 00000ae4 Under load: 0x059950: 00000000 00000c02 00000000 00000c14 0x059960: 00000000 00000c30 00000000 00000c22 More analysis shows that EPYC CPUs support up to 8 CCD temperature sensors. EPYC 7601 supports three CCD temperature sensors. Unlike Zen2 CPUs, the register space in Zen1 CPUs supports a maximum of four sensors, so only search for a maximum of four sensors on Zen1 CPUs. On top of that, in thm_10_0_sh_mask.h in the Linux kernel, we find definitions for THM_DIE{1-3}_TEMP__VALID_MASK, set to 0x00000800, as well as matching SMN addresses. This lets us conclude that bit 11 of the respective registers is a valid bit. With this assumption, the temperature offset is now 49 degrees C. This conveniently matches the documented temperature offset for Tdie, again suggesting that above registers indeed report temperatures sensor values. Assume that bit 11 is indeed a valid bit, and add support for the additional sensors. With this patch applied, output from 3970X (idle) looks as follows: k10temp-pci-00c3 Adapter: PCI adapter Tdie: +55.9°C Tctl: +55.9°C Tccd1: +39.8°C Tccd3: +43.8°C Tccd5: +43.8°C Tccd7: +44.8°C Tested-by: Michael Larabel <[email protected]> Signed-off-by: Guenter Roeck <[email protected]>
1 parent 9c4a38f commit fd8bdb2

File tree

1 file changed

+41
-41
lines changed

1 file changed

+41
-41
lines changed

drivers/hwmon/k10temp.c

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Copyright (c) 2020 Guenter Roeck <[email protected]>
88
*
99
* Implementation notes:
10-
* - CCD1 and CCD2 register address information as well as the calculation to
10+
* - CCD register address information as well as the calculation to
1111
* convert raw register values is from https://github.com/ocerman/zenpower.
1212
* The information is not confirmed from chip datasheets, but experiments
1313
* suggest that it provides reasonable temperature values.
@@ -18,11 +18,6 @@
1818
* normalized to report 1A/LSB for core current and and 0.25A/LSB for SoC
1919
* current. Reported values can be adjusted using the sensors configuration
2020
* file.
21-
* - It is unknown if the mechanism to read CCD1/CCD2 temperature as well as
22-
* current and voltage information works on higher-end Ryzen CPUs.
23-
* Information reported by Windows tools suggests that additional sensors
24-
* (both temperature and voltage/current) are supported, but their register
25-
* location is currently unknown.
2621
*/
2722

2823
#include <linux/bitops.h>
@@ -80,8 +75,10 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
8075

8176
/* F17h M01h Access througn SMN */
8277
#define F17H_M01H_REPORTED_TEMP_CTRL_OFFSET 0x00059800
83-
#define F17H_M70H_CCD1_TEMP 0x00059954
84-
#define F17H_M70H_CCD2_TEMP 0x00059958
78+
79+
#define F17H_M70H_CCD_TEMP(x) (0x00059954 + ((x) * 4))
80+
#define F17H_M70H_CCD_TEMP_VALID BIT(11)
81+
#define F17H_M70H_CCD_TEMP_MASK GENMASK(10, 0)
8582

8683
#define F17H_M01H_SVI 0x0005A000
8784
#define F17H_M01H_SVI_TEL_PLANE0 (F17H_M01H_SVI + 0xc)
@@ -100,8 +97,7 @@ struct k10temp_data {
10097
int temp_offset;
10198
u32 temp_adjust_mask;
10299
bool show_tdie;
103-
bool show_tccd1;
104-
bool show_tccd2;
100+
u32 show_tccd;
105101
u32 svi_addr[2];
106102
bool show_current;
107103
int cfactor[2];
@@ -188,6 +184,12 @@ const char *k10temp_temp_label[] = {
188184
"Tctl",
189185
"Tccd1",
190186
"Tccd2",
187+
"Tccd3",
188+
"Tccd4",
189+
"Tccd5",
190+
"Tccd6",
191+
"Tccd7",
192+
"Tccd8",
191193
};
192194

193195
const char *k10temp_in_label[] = {
@@ -277,15 +279,10 @@ static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
277279
if (*val < 0)
278280
*val = 0;
279281
break;
280-
case 2: /* Tccd1 */
281-
amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
282-
F17H_M70H_CCD1_TEMP, &regval);
283-
*val = (regval & 0xfff) * 125 - 305000;
284-
break;
285-
case 3: /* Tccd2 */
282+
case 2 ... 9: /* Tccd{1-8} */
286283
amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
287-
F17H_M70H_CCD2_TEMP, &regval);
288-
*val = (regval & 0xfff) * 125 - 305000;
284+
F17H_M70H_CCD_TEMP(channel - 2), &regval);
285+
*val = (regval & F17H_M70H_CCD_TEMP_MASK) * 125 - 49000;
289286
break;
290287
default:
291288
return -EOPNOTSUPP;
@@ -343,12 +340,8 @@ static umode_t k10temp_is_visible(const void *_data,
343340
if (!data->show_tdie)
344341
return 0;
345342
break;
346-
case 2: /* Tccd1 */
347-
if (!data->show_tccd1)
348-
return 0;
349-
break;
350-
case 3: /* Tccd2 */
351-
if (!data->show_tccd2)
343+
case 2 ... 9: /* Tccd{1-8} */
344+
if (!(data->show_tccd & BIT(channel - 2)))
352345
return 0;
353346
break;
354347
default:
@@ -382,12 +375,8 @@ static umode_t k10temp_is_visible(const void *_data,
382375
case 0: /* Tdie */
383376
case 1: /* Tctl */
384377
break;
385-
case 2: /* Tccd1 */
386-
if (!data->show_tccd1)
387-
return 0;
388-
break;
389-
case 3: /* Tccd2 */
390-
if (!data->show_tccd2)
378+
case 2 ... 9: /* Tccd{1-8} */
379+
if (!(data->show_tccd & BIT(channel - 2)))
391380
return 0;
392381
break;
393382
default:
@@ -520,6 +509,12 @@ static const struct hwmon_channel_info *k10temp_info[] = {
520509
HWMON_T_LABEL,
521510
HWMON_T_INPUT | HWMON_T_LABEL,
522511
HWMON_T_INPUT | HWMON_T_LABEL,
512+
HWMON_T_INPUT | HWMON_T_LABEL,
513+
HWMON_T_INPUT | HWMON_T_LABEL,
514+
HWMON_T_INPUT | HWMON_T_LABEL,
515+
HWMON_T_INPUT | HWMON_T_LABEL,
516+
HWMON_T_INPUT | HWMON_T_LABEL,
517+
HWMON_T_INPUT | HWMON_T_LABEL,
523518
HWMON_T_INPUT | HWMON_T_LABEL),
524519
HWMON_CHANNEL_INFO(in,
525520
HWMON_I_INPUT | HWMON_I_LABEL,
@@ -541,6 +536,20 @@ static const struct hwmon_chip_info k10temp_chip_info = {
541536
.info = k10temp_info,
542537
};
543538

539+
static void k10temp_get_ccd_support(struct pci_dev *pdev,
540+
struct k10temp_data *data, int limit)
541+
{
542+
u32 regval;
543+
int i;
544+
545+
for (i = 0; i < limit; i++) {
546+
amd_smn_read(amd_pci_dev_to_node_id(pdev),
547+
F17H_M70H_CCD_TEMP(i), &regval);
548+
if (regval & F17H_M70H_CCD_TEMP_VALID)
549+
data->show_tccd |= BIT(i);
550+
}
551+
}
552+
544553
static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
545554
{
546555
int unreliable = has_erratum_319(pdev);
@@ -571,8 +580,6 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
571580
data->read_htcreg = read_htcreg_nb_f15;
572581
data->read_tempreg = read_tempreg_nb_f15;
573582
} else if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) {
574-
u32 regval;
575-
576583
data->temp_adjust_mask = CUR_TEMP_RANGE_SEL_MASK;
577584
data->read_tempreg = read_tempreg_nb_f17;
578585
data->show_tdie = true;
@@ -587,6 +594,7 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
587594
data->svi_addr[1] = F17H_M01H_SVI_TEL_PLANE1;
588595
data->cfactor[0] = CFACTOR_ICORE;
589596
data->cfactor[1] = CFACTOR_ISOC;
597+
k10temp_get_ccd_support(pdev, data, 4);
590598
break;
591599
case 0x31: /* Zen2 Threadripper */
592600
case 0x71: /* Zen2 */
@@ -595,15 +603,7 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
595603
data->cfactor[1] = CFACTOR_ISOC;
596604
data->svi_addr[0] = F17H_M01H_SVI_TEL_PLANE1;
597605
data->svi_addr[1] = F17H_M01H_SVI_TEL_PLANE0;
598-
amd_smn_read(amd_pci_dev_to_node_id(pdev),
599-
F17H_M70H_CCD1_TEMP, &regval);
600-
if (regval & 0xfff)
601-
data->show_tccd1 = true;
602-
603-
amd_smn_read(amd_pci_dev_to_node_id(pdev),
604-
F17H_M70H_CCD2_TEMP, &regval);
605-
if (regval & 0xfff)
606-
data->show_tccd2 = true;
606+
k10temp_get_ccd_support(pdev, data, 8);
607607
break;
608608
}
609609
} else {

0 commit comments

Comments
 (0)