File tree Expand file tree Collapse file tree 2 files changed +12
-0
lines changed
drivers/gpu/drm/amd/amdgpu Expand file tree Collapse file tree 2 files changed +12
-0
lines changed Original file line number Diff line number Diff line change @@ -2065,6 +2065,14 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
2065
2065
ras -> gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET ;
2066
2066
reset_context .method = AMD_RESET_METHOD_MODE2 ;
2067
2067
}
2068
+
2069
+ /* Fatal error occurs in poison mode, mode1 reset is used to
2070
+ * recover gpu.
2071
+ */
2072
+ if (ras -> gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET ) {
2073
+ ras -> gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET ;
2074
+ set_bit (AMDGPU_NEED_FULL_RESET , & reset_context .flags );
2075
+ }
2068
2076
}
2069
2077
2070
2078
amdgpu_device_gpu_recover (ras -> adev , NULL , & reset_context );
@@ -2955,9 +2963,12 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
2955
2963
return ;
2956
2964
2957
2965
if (atomic_cmpxchg (& amdgpu_ras_in_intr , 0 , 1 ) == 0 ) {
2966
+ struct amdgpu_ras * ras = amdgpu_ras_get_context (adev );
2967
+
2958
2968
dev_info (adev -> dev , "uncorrectable hardware error"
2959
2969
"(ERREVENT_ATHUB_INTERRUPT) detected!\n" );
2960
2970
2971
+ ras -> gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET ;
2961
2972
amdgpu_ras_reset_gpu (adev );
2962
2973
}
2963
2974
}
Original file line number Diff line number Diff line change @@ -340,6 +340,7 @@ enum amdgpu_ras_ret {
340
340
#define AMDGPU_RAS_ERR_ADDRESS_VALID (1 << 2)
341
341
342
342
#define AMDGPU_RAS_GPU_RESET_MODE2_RESET (0x1 << 0)
343
+ #define AMDGPU_RAS_GPU_RESET_MODE1_RESET (0x1 << 1)
343
344
344
345
struct amdgpu_ras_err_status_reg_entry {
345
346
uint32_t hwip ;
You can’t perform that action at this time.
0 commit comments