Skip to content

Commit 708f220

Browse files
Danijel Slivkaalexdeucher
authored andcommitted
drm/amd/pm: Ignore initial value in smu response register
Why: If the reg mmMP1_SMN_C2PMSG_90 is being written to during amdgpu driver load or driver unload, subsequent amdgpu driver load will fail at smu_hw_init. The default of mmMP1_SMN_C2PMSG_90 register at a clean environment is 0x1 and if value differs from expected, amdgpu driver load will fail. How to fix: Ignore the initial value in smu response register before the first smu message is sent,if smc in SMU_FW_INIT state, just proceed further to send the message. If register holds an unexpected value after smu message was sent set, smc_state to SMU_FW_HANG state and no further smu messages will be sent. v2: Set SMU_FW_INIT state at the start of smu hw_init/resume. Check smc_fw_state before sending smu message if in hang state skip sending message. Set SMU_FW_HANG only in case unexpected value is detected Signed-off-by: Danijel Slivka <[email protected]> Reviewed-by: Kenneth Feng <[email protected]> Reviewed-by: Lijo Lazar <[email protected]> Reviewed-by: Asad Kamal <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent d02ddef commit 708f220

File tree

3 files changed

+40
-5
lines changed

3 files changed

+40
-5
lines changed

drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1755,6 +1755,8 @@ static int smu_start_smc_engine(struct smu_context *smu)
17551755
struct amdgpu_device *adev = smu->adev;
17561756
int ret = 0;
17571757

1758+
smu->smc_fw_state = SMU_FW_INIT;
1759+
17581760
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
17591761
if (amdgpu_ip_version(adev, MP1_HWIP, 0) < IP_VERSION(11, 0, 0)) {
17601762
if (smu->ppt_funcs->load_microcode) {

drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,12 @@ struct stb_context {
495495
spinlock_t lock;
496496
};
497497

498+
enum smu_fw_status {
499+
SMU_FW_INIT = 0,
500+
SMU_FW_RUNTIME,
501+
SMU_FW_HANG,
502+
};
503+
498504
#define WORKLOAD_POLICY_MAX 7
499505

500506
/*
@@ -562,6 +568,7 @@ struct smu_context {
562568
uint32_t smc_fw_if_version;
563569
uint32_t smc_fw_version;
564570
uint32_t smc_fw_caps;
571+
uint8_t smc_fw_state;
565572

566573
bool uploading_custom_pp_table;
567574
bool dc_controlled_by_gpio;

drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -315,11 +315,21 @@ int smu_cmn_send_msg_without_waiting(struct smu_context *smu,
315315
if (adev->no_hw_access)
316316
return 0;
317317

318-
reg = __smu_cmn_poll_stat(smu);
319-
res = __smu_cmn_reg2errno(smu, reg);
320-
if (reg == SMU_RESP_NONE ||
321-
res == -EREMOTEIO)
318+
if (smu->smc_fw_state == SMU_FW_HANG) {
319+
dev_err(adev->dev, "SMU is in hanged state, failed to send smu message!\n");
320+
res = -EREMOTEIO;
322321
goto Out;
322+
}
323+
324+
if (smu->smc_fw_state == SMU_FW_INIT) {
325+
smu->smc_fw_state = SMU_FW_RUNTIME;
326+
} else {
327+
reg = __smu_cmn_poll_stat(smu);
328+
res = __smu_cmn_reg2errno(smu, reg);
329+
if (reg == SMU_RESP_NONE || res == -EREMOTEIO)
330+
goto Out;
331+
}
332+
323333
__smu_cmn_send_msg(smu, msg_index, param);
324334
res = 0;
325335
Out:
@@ -350,6 +360,9 @@ int smu_cmn_wait_for_response(struct smu_context *smu)
350360
reg = __smu_cmn_poll_stat(smu);
351361
res = __smu_cmn_reg2errno(smu, reg);
352362

363+
if (res == -EREMOTEIO)
364+
smu->smc_fw_state = SMU_FW_HANG;
365+
353366
if (unlikely(smu->adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) &&
354367
res && (res != -ETIME)) {
355368
amdgpu_device_halt(smu->adev);
@@ -418,6 +431,16 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu,
418431
goto Out;
419432
}
420433

434+
if (smu->smc_fw_state == SMU_FW_HANG) {
435+
dev_err(adev->dev, "SMU is in hanged state, failed to send smu message!\n");
436+
res = -EREMOTEIO;
437+
goto Out;
438+
} else if (smu->smc_fw_state == SMU_FW_INIT) {
439+
/* Ignore initial smu response register value */
440+
poll = false;
441+
smu->smc_fw_state = SMU_FW_RUNTIME;
442+
}
443+
421444
if (poll) {
422445
reg = __smu_cmn_poll_stat(smu);
423446
res = __smu_cmn_reg2errno(smu, reg);
@@ -429,8 +452,11 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu,
429452
__smu_cmn_send_msg(smu, (uint16_t) index, param);
430453
reg = __smu_cmn_poll_stat(smu);
431454
res = __smu_cmn_reg2errno(smu, reg);
432-
if (res != 0)
455+
if (res != 0) {
456+
if (res == -EREMOTEIO)
457+
smu->smc_fw_state = SMU_FW_HANG;
433458
__smu_cmn_reg_print_error(smu, reg, index, param, msg);
459+
}
434460
if (read_arg) {
435461
smu_cmn_read_arg(smu, read_arg);
436462
dev_dbg(adev->dev, "smu send message: %s(%d) param: 0x%08x, resp: 0x%08x,\

0 commit comments

Comments
 (0)