Skip to content

Commit bf13da6

Browse files
Yang Wangalexdeucher
authored andcommitted
drm/amdgpu: correct smu v13.0.6 umc ras error check
correct smu v13.0.0 umc ras error check Signed-off-by: Yang Wang <[email protected]> Reviewed-by: Hawking Zhang <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent bc3c566 commit bf13da6

File tree

3 files changed

+16
-7
lines changed

3 files changed

+16
-7
lines changed

drivers/gpu/drm/amd/amdgpu/umc_v12_0.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,15 +88,15 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
8888
umc_v12_0_reset_error_count_per_channel, NULL);
8989
}
9090

91-
static bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status)
91+
bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status)
9292
{
9393
return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
9494
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
9595
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
9696
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1));
9797
}
9898

99-
static bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
99+
bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
100100
{
101101
return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
102102
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 ||

drivers/gpu/drm/amd/amdgpu/umc_v12_0.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@
117117
(pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \
118118
} while (0)
119119

120+
bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status);
121+
bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status);
122+
120123
extern const uint32_t
121124
umc_v12_0_channel_idx_tbl[]
122125
[UMC_V12_0_UMC_INSTANCE_NUM]

drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include "smu_cmn.h"
4949
#include "mp/mp_13_0_6_offset.h"
5050
#include "mp/mp_13_0_6_sh_mask.h"
51+
#include "umc_v12_0.h"
5152

5253
#undef MP1_Public
5354
#undef smnMP1_FIRMWARE_FLAGS
@@ -2481,7 +2482,7 @@ static int mca_decode_mca_ipid(struct amdgpu_device *adev, enum amdgpu_mca_error
24812482
return 0;
24822483
}
24832484

2484-
static int mca_normal_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
2485+
static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev,
24852486
enum amdgpu_mca_error_type type, int idx, uint32_t *count)
24862487
{
24872488
uint64_t status0;
@@ -2491,10 +2492,15 @@ static int mca_normal_mca_get_err_count(const struct mca_ras_info *mca_ras, stru
24912492
if (ret)
24922493
return ret;
24932494

2494-
if (REG_GET_FIELD(status0, MCMP1_STATUST0, Val))
2495-
*count = 1;
2496-
else
2495+
if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) {
24972496
*count = 0;
2497+
return 0;
2498+
}
2499+
2500+
if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(status0))
2501+
*count = 1;
2502+
else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(status0))
2503+
*count = 1;
24982504

24992505
return 0;
25002506
}
@@ -2608,7 +2614,7 @@ static const struct mca_ras_info mca_ras_table[] = {
26082614
{
26092615
.blkid = AMDGPU_RAS_BLOCK__UMC,
26102616
.ip = AMDGPU_MCA_IP_UMC,
2611-
.get_err_count = mca_normal_mca_get_err_count,
2617+
.get_err_count = mca_umc_mca_get_err_count,
26122618
}, {
26132619
.blkid = AMDGPU_RAS_BLOCK__GFX,
26142620
.ip = AMDGPU_MCA_IP_MP5,

0 commit comments

Comments
 (0)