Skip to content

Commit ef5fca9

Browse files
lyndonlialexdeucher
authored andcommitted
drm/amdgpu: add the fan abnormal detection feature
Update the SW CTF limit from existing register when there's a fan failure detected via SMU interrupt. Signed-off-by: lyndonli <[email protected]> Reviewed-by: Hawking Zhang <[email protected]> Reviewed-by: Kenneth Feng <[email protected]> Reviewed-by: Evan Quan <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 1ec1321 commit ef5fca9

File tree

3 files changed

+30
-0
lines changed

3 files changed

+30
-0
lines changed

drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ struct smu_temperature_range {
168168
int mem_crit_max;
169169
int mem_emergency_max;
170170
int software_shutdown_temp;
171+
int software_shutdown_temp_offset;
171172
};
172173

173174
struct smu_state_validation_block {

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1376,6 +1376,7 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev,
13761376
*/
13771377
uint32_t ctxid = entry->src_data[0];
13781378
uint32_t data;
1379+
uint32_t high;
13791380

13801381
if (client_id == SOC15_IH_CLIENTID_THM) {
13811382
switch (src_id) {
@@ -1432,6 +1433,33 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev,
14321433
schedule_work(&smu->throttling_logging_work);
14331434

14341435
break;
1436+
case 0x8:
1437+
high = smu->thermal_range.software_shutdown_temp +
1438+
smu->thermal_range.software_shutdown_temp_offset;
1439+
high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, high);
1440+
dev_emerg(adev->dev, "Reduce soft CTF limit to %d (by an offset %d)\n",
1441+
high,
1442+
smu->thermal_range.software_shutdown_temp_offset);
1443+
1444+
data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL);
1445+
data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL,
1446+
DIG_THERM_INTH,
1447+
(high & 0xff));
1448+
data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK);
1449+
WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data);
1450+
break;
1451+
case 0x9:
1452+
high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP,
1453+
smu->thermal_range.software_shutdown_temp);
1454+
dev_emerg(adev->dev, "Recover soft CTF limit to %d\n", high);
1455+
1456+
data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL);
1457+
data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL,
1458+
DIG_THERM_INTH,
1459+
(high & 0xff));
1460+
data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK);
1461+
WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data);
1462+
break;
14351463
}
14361464
}
14371465
}

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,6 +1223,7 @@ static int smu_v13_0_7_get_thermal_temperature_range(struct smu_context *smu,
12231223
range->mem_emergency_max = (pptable->SkuTable.TemperatureLimit[TEMP_MEM] + CTF_OFFSET_MEM)*
12241224
SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
12251225
range->software_shutdown_temp = powerplay_table->software_shutdown_temp;
1226+
range->software_shutdown_temp_offset = pptable->SkuTable.FanAbnormalTempLimitOffset;
12261227

12271228
return 0;
12281229
}

0 commit comments

Comments
 (0)