@@ -88,6 +88,27 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
8888 umc_v12_0_reset_error_count_per_channel , NULL );
8989}
9090
91+ static bool umc_v12_0_is_uncorrectable_error (uint64_t mc_umc_status )
92+ {
93+ return ((REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , Val ) == 1 ) &&
94+ (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , Deferred ) == 1 ||
95+ REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , PCC ) == 1 ||
96+ REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , UC ) == 1 ||
97+ REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , TCC ) == 1 ));
98+ }
99+
100+ static bool umc_v12_0_is_correctable_error (uint64_t mc_umc_status )
101+ {
102+ return (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , Val ) == 1 &&
103+ (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , CECC ) == 1 ||
104+ (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , UECC ) == 1 &&
105+ REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , UC ) == 0 ) ||
106+ /* Identify data parity error in replay mode */
107+ ((REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , ErrorCodeExt ) == 0x5 ||
108+ REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , ErrorCodeExt ) == 0xb ) &&
109+ !(umc_v12_0_is_uncorrectable_error (mc_umc_status )))));
110+ }
111+
91112static void umc_v12_0_query_correctable_error_count (struct amdgpu_device * adev ,
92113 uint64_t umc_reg_offset ,
93114 unsigned long * error_count )
@@ -104,10 +125,7 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
104125 mc_umc_status =
105126 RREG64_PCIE_EXT ((mc_umc_status_addr + umc_reg_offset ) * 4 );
106127
107- if (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , Val ) == 1 &&
108- (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , CECC ) == 1 ||
109- (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , UECC ) == 1 &&
110- REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , UC ) == 0 )))
128+ if (umc_v12_0_is_correctable_error (mc_umc_status ))
111129 * error_count += 1 ;
112130}
113131
@@ -125,11 +143,7 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev
125143 mc_umc_status =
126144 RREG64_PCIE_EXT ((mc_umc_status_addr + umc_reg_offset ) * 4 );
127145
128- if ((REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , Val ) == 1 ) &&
129- (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , Deferred ) == 1 ||
130- REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , PCC ) == 1 ||
131- REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , UC ) == 1 ||
132- REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , TCC ) == 1 ))
146+ if (umc_v12_0_is_uncorrectable_error (mc_umc_status ))
133147 * error_count += 1 ;
134148}
135149
0 commit comments