@@ -1165,13 +1165,53 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s
1165
1165
}
1166
1166
}
1167
1167
1168
- /* query/inject/cure begin */
1169
- int amdgpu_ras_query_error_status (struct amdgpu_device * adev ,
1170
- struct ras_query_if * info )
1168
+ static int amdgpu_ras_query_error_status_helper (struct amdgpu_device * adev ,
1169
+ struct ras_query_if * info ,
1170
+ struct ras_err_data * err_data ,
1171
+ unsigned int error_query_mode )
1171
1172
{
1173
+ enum amdgpu_ras_block blk = info ? info -> head .block : AMDGPU_RAS_BLOCK_COUNT ;
1172
1174
struct amdgpu_ras_block_object * block_obj = NULL ;
1175
+
1176
+ if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY )
1177
+ return - EINVAL ;
1178
+
1179
+ if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY ) {
1180
+ if (info -> head .block == AMDGPU_RAS_BLOCK__UMC ) {
1181
+ amdgpu_ras_get_ecc_info (adev , err_data );
1182
+ } else {
1183
+ block_obj = amdgpu_ras_get_ras_block (adev , info -> head .block , 0 );
1184
+ if (!block_obj || !block_obj -> hw_ops ) {
1185
+ dev_dbg_once (adev -> dev , "%s doesn't config RAS function\n" ,
1186
+ get_ras_block_str (& info -> head ));
1187
+ return - EINVAL ;
1188
+ }
1189
+
1190
+ if (block_obj -> hw_ops -> query_ras_error_count )
1191
+ block_obj -> hw_ops -> query_ras_error_count (adev , & err_data );
1192
+
1193
+ if ((info -> head .block == AMDGPU_RAS_BLOCK__SDMA ) ||
1194
+ (info -> head .block == AMDGPU_RAS_BLOCK__GFX ) ||
1195
+ (info -> head .block == AMDGPU_RAS_BLOCK__MMHUB )) {
1196
+ if (block_obj -> hw_ops -> query_ras_error_status )
1197
+ block_obj -> hw_ops -> query_ras_error_status (adev );
1198
+ }
1199
+ }
1200
+ } else {
1201
+ /* FIXME: add code to check return value later */
1202
+ amdgpu_mca_smu_log_ras_error (adev , blk , AMDGPU_MCA_ERROR_TYPE_UE , err_data );
1203
+ amdgpu_mca_smu_log_ras_error (adev , blk , AMDGPU_MCA_ERROR_TYPE_CE , err_data );
1204
+ }
1205
+
1206
+ return 0 ;
1207
+ }
1208
+
1209
+ /* query/inject/cure begin */
1210
+ int amdgpu_ras_query_error_status (struct amdgpu_device * adev , struct ras_query_if * info )
1211
+ {
1173
1212
struct ras_manager * obj = amdgpu_ras_find_obj (adev , & info -> head );
1174
1213
struct ras_err_data err_data ;
1214
+ unsigned int error_query_mode ;
1175
1215
int ret ;
1176
1216
1177
1217
if (!obj )
@@ -1181,27 +1221,14 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
1181
1221
if (ret )
1182
1222
return ret ;
1183
1223
1184
- if (info -> head .block == AMDGPU_RAS_BLOCK__UMC ) {
1185
- amdgpu_ras_get_ecc_info (adev , & err_data );
1186
- } else {
1187
- block_obj = amdgpu_ras_get_ras_block (adev , info -> head .block , 0 );
1188
- if (!block_obj || !block_obj -> hw_ops ) {
1189
- dev_dbg_once (adev -> dev , "%s doesn't config RAS function\n" ,
1190
- get_ras_block_str (& info -> head ));
1191
- ret = - EINVAL ;
1192
- goto out_fini_err_data ;
1193
- }
1194
-
1195
- if (block_obj -> hw_ops -> query_ras_error_count )
1196
- block_obj -> hw_ops -> query_ras_error_count (adev , & err_data );
1224
+ if (!amdgpu_ras_get_error_query_mode (adev , & error_query_mode ))
1225
+ return - EINVAL ;
1197
1226
1198
- if ((info -> head .block == AMDGPU_RAS_BLOCK__SDMA ) ||
1199
- (info -> head .block == AMDGPU_RAS_BLOCK__GFX ) ||
1200
- (info -> head .block == AMDGPU_RAS_BLOCK__MMHUB )) {
1201
- if (block_obj -> hw_ops -> query_ras_error_status )
1202
- block_obj -> hw_ops -> query_ras_error_status (adev );
1203
- }
1204
- }
1227
+ ret = amdgpu_ras_query_error_status_helper (adev , info ,
1228
+ & err_data ,
1229
+ error_query_mode );
1230
+ if (ret )
1231
+ goto out_fini_err_data ;
1205
1232
1206
1233
amdgpu_rasmgr_error_data_statistic_update (obj , & err_data );
1207
1234
@@ -3397,6 +3424,26 @@ bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev)
3397
3424
return true;
3398
3425
}
3399
3426
3427
+ bool amdgpu_ras_get_error_query_mode (struct amdgpu_device * adev ,
3428
+ unsigned int * error_query_mode )
3429
+ {
3430
+ struct amdgpu_ras * con = amdgpu_ras_get_context (adev );
3431
+ const struct amdgpu_mca_smu_funcs * mca_funcs = adev -> mca .mca_funcs ;
3432
+
3433
+ if (!con ) {
3434
+ * error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY ;
3435
+ return false;
3436
+ }
3437
+
3438
+ if (mca_funcs && mca_funcs -> mca_set_debug_mode )
3439
+ * error_query_mode =
3440
+ (con -> is_mca_debug_mode ) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY ;
3441
+ else
3442
+ * error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY ;
3443
+
3444
+ return true;
3445
+ }
3446
+
3400
3447
/* Register each ip ras block into amdgpu ras */
3401
3448
int amdgpu_ras_register_ras_block (struct amdgpu_device * adev ,
3402
3449
struct amdgpu_ras_block_object * ras_block_obj )
0 commit comments