@@ -2848,12 +2848,35 @@ static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
2848
2848
schedule_delayed_work (& con -> page_retirement_dwork , 0 );
2849
2849
}
2850
2850
2851
+ static int amdgpu_ras_poison_consumption_handler (struct amdgpu_device * adev ,
2852
+ struct ras_poison_msg * poison_msg )
2853
+ {
2854
+ struct amdgpu_ras * con = amdgpu_ras_get_context (adev );
2855
+ uint32_t reset = poison_msg -> reset ;
2856
+ uint16_t pasid = poison_msg -> pasid ;
2857
+
2858
+ kgd2kfd_set_sram_ecc_flag (adev -> kfd .dev );
2859
+
2860
+ if (poison_msg -> pasid_fn )
2861
+ poison_msg -> pasid_fn (adev , pasid , poison_msg -> data );
2862
+
2863
+ if (reset ) {
2864
+ flush_delayed_work (& con -> page_retirement_dwork );
2865
+
2866
+ con -> gpu_reset_flags |= reset ;
2867
+ amdgpu_ras_reset_gpu (adev );
2868
+ }
2869
+
2870
+ return 0 ;
2871
+ }
2872
+
2851
2873
static int amdgpu_ras_page_retirement_thread (void * param )
2852
2874
{
2853
2875
struct amdgpu_device * adev = (struct amdgpu_device * )param ;
2854
2876
struct amdgpu_ras * con = amdgpu_ras_get_context (adev );
2855
2877
struct ras_poison_msg poison_msg ;
2856
2878
enum amdgpu_ras_block ras_block ;
2879
+ bool poison_creation_is_handled = false;
2857
2880
2858
2881
while (!kthread_should_stop ()) {
2859
2882
@@ -2874,12 +2897,24 @@ static int amdgpu_ras_page_retirement_thread(void *param)
2874
2897
dev_info (adev -> dev , "Start processing ras block %s(%d)\n" ,
2875
2898
ras_block_str (ras_block ), ras_block );
2876
2899
2877
- if (ras_block == AMDGPU_RAS_BLOCK__UMC )
2900
+ if (ras_block == AMDGPU_RAS_BLOCK__UMC ) {
2878
2901
amdgpu_ras_poison_creation_handler (adev ,
2879
2902
MAX_UMC_POISON_POLLING_TIME_ASYNC );
2880
- else
2881
- amdgpu_umc_bad_page_polling_timeout (adev ,
2882
- false, MAX_UMC_POISON_POLLING_TIME_ASYNC );
2903
+ poison_creation_is_handled = true;
2904
+ } else {
2905
+ /* poison_creation_is_handled:
2906
+ * false: no poison creation interrupt, but it has poison
2907
+ * consumption interrupt.
2908
+ * true: It has poison creation interrupt at the beginning,
2909
+ * but it has no poison creation interrupt later.
2910
+ */
2911
+ amdgpu_ras_poison_creation_handler (adev ,
2912
+ poison_creation_is_handled ?
2913
+ 0 : MAX_UMC_POISON_POLLING_TIME_ASYNC );
2914
+
2915
+ amdgpu_ras_poison_consumption_handler (adev , & poison_msg );
2916
+ poison_creation_is_handled = false;
2917
+ }
2883
2918
}
2884
2919
2885
2920
return 0 ;
0 commit comments