Skip to content

Commit 18eae36

Browse files
Tao Zhoualexdeucher
authored andcommitted
drm/amdgpu: check recovery status of xgmi hive in ras_reset_error_count
Handle xgmi hive case. Suggested-by: Hawking Zhang <[email protected]> Signed-off-by: Tao Zhou <[email protected]> Reviewed-by: Stanley.Yang <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 88e5c8f commit 18eae36

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1222,6 +1222,8 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
12221222
struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
12231223
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
12241224
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
1225+
struct amdgpu_hive_info *hive;
1226+
int hive_ras_recovery = 0;
12251227

12261228
if (!block_obj || !block_obj->hw_ops) {
12271229
dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
@@ -1233,8 +1235,15 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
12331235
!amdgpu_ras_get_mca_debug_mode(adev))
12341236
return -EOPNOTSUPP;
12351237

1238+
hive = amdgpu_get_xgmi_hive(adev);
1239+
if (hive) {
1240+
hive_ras_recovery = atomic_read(&hive->ras_recovery);
1241+
amdgpu_put_xgmi_hive(hive);
1242+
}
1243+
12361244
/* skip ras error reset in gpu reset */
1237-
if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery)) &&
1245+
if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) ||
1246+
hive_ras_recovery) &&
12381247
mca_funcs && mca_funcs->mca_set_debug_mode)
12391248
return -EOPNOTSUPP;
12401249

0 commit comments

Comments
 (0)