@@ -2076,6 +2076,17 @@ static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj
2076
2076
{
2077
2077
dev_info (obj -> adev -> dev ,
2078
2078
"Poison is created\n" );
2079
+
2080
+ if (amdgpu_ip_version (obj -> adev , UMC_HWIP , 0 ) >= IP_VERSION (12 , 0 , 0 )) {
2081
+ struct amdgpu_ras * con = amdgpu_ras_get_context (obj -> adev );
2082
+
2083
+ amdgpu_ras_put_poison_req (obj -> adev ,
2084
+ AMDGPU_RAS_BLOCK__UMC , 0 , NULL , NULL , false);
2085
+
2086
+ atomic_inc (& con -> page_retirement_req_cnt );
2087
+
2088
+ wake_up (& con -> page_retirement_wq );
2089
+ }
2079
2090
}
2080
2091
2081
2092
static void amdgpu_ras_interrupt_umc_handler (struct ras_manager * obj ,
@@ -2727,15 +2738,13 @@ int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
2727
2738
return 0 ;
2728
2739
}
2729
2740
2730
- #ifdef PRE_DEFINED_FUNCTION
2731
2741
static int amdgpu_ras_get_poison_req (struct amdgpu_device * adev ,
2732
2742
struct ras_poison_msg * poison_msg )
2733
2743
{
2734
2744
struct amdgpu_ras * con = amdgpu_ras_get_context (adev );
2735
2745
2736
2746
return kfifo_get (& con -> poison_fifo , poison_msg );
2737
2747
}
2738
- #endif
2739
2748
2740
2749
static void amdgpu_ras_ecc_log_init (struct ras_ecc_log_info * ecc_log )
2741
2750
{
@@ -2766,10 +2775,54 @@ static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log)
2766
2775
mutex_destroy (& ecc_log -> lock );
2767
2776
ecc_log -> de_updated = false;
2768
2777
}
2778
+
2779
+ static int amdgpu_ras_query_ecc_status (struct amdgpu_device * adev ,
2780
+ enum amdgpu_ras_block ras_block , uint32_t timeout_ms )
2781
+ {
2782
+ int ret = 0 ;
2783
+ struct ras_ecc_log_info * ecc_log ;
2784
+ struct ras_query_if info ;
2785
+ uint32_t timeout = timeout_ms ;
2786
+ struct amdgpu_ras * ras = amdgpu_ras_get_context (adev );
2787
+
2788
+ memset (& info , 0 , sizeof (info ));
2789
+ info .head .block = ras_block ;
2790
+
2791
+ ecc_log = & ras -> umc_ecc_log ;
2792
+ ecc_log -> de_updated = false;
2793
+ do {
2794
+ ret = amdgpu_ras_query_error_status (adev , & info );
2795
+ if (ret ) {
2796
+ dev_err (adev -> dev , "Failed to query ras error! ret:%d\n" , ret );
2797
+ return ret ;
2798
+ }
2799
+
2800
+ if (timeout && !ecc_log -> de_updated ) {
2801
+ msleep (1 );
2802
+ timeout -- ;
2803
+ }
2804
+ } while (timeout && !ecc_log -> de_updated );
2805
+
2806
+ if (timeout_ms && !timeout ) {
2807
+ dev_warn (adev -> dev , "Can't find deferred error\n" );
2808
+ return - ETIMEDOUT ;
2809
+ }
2810
+
2811
+ return 0 ;
2812
+ }
2813
+
2814
+ static void amdgpu_ras_poison_creation_handler (struct amdgpu_device * adev ,
2815
+ uint32_t timeout )
2816
+ {
2817
+ amdgpu_ras_query_ecc_status (adev , AMDGPU_RAS_BLOCK__UMC , timeout );
2818
+ }
2819
+
2769
2820
static int amdgpu_ras_page_retirement_thread (void * param )
2770
2821
{
2771
2822
struct amdgpu_device * adev = (struct amdgpu_device * )param ;
2772
2823
struct amdgpu_ras * con = amdgpu_ras_get_context (adev );
2824
+ struct ras_poison_msg poison_msg ;
2825
+ enum amdgpu_ras_block ras_block ;
2773
2826
2774
2827
while (!kthread_should_stop ()) {
2775
2828
@@ -2780,13 +2833,22 @@ static int amdgpu_ras_page_retirement_thread(void *param)
2780
2833
if (kthread_should_stop ())
2781
2834
break ;
2782
2835
2783
- dev_info (adev -> dev , "Start processing page retirement. request:%d\n" ,
2784
- atomic_read (& con -> page_retirement_req_cnt ));
2785
-
2786
2836
atomic_dec (& con -> page_retirement_req_cnt );
2787
2837
2788
- amdgpu_umc_bad_page_polling_timeout (adev ,
2789
- 0 , MAX_UMC_POISON_POLLING_TIME_ASYNC );
2838
+ if (!amdgpu_ras_get_poison_req (adev , & poison_msg ))
2839
+ continue ;
2840
+
2841
+ ras_block = poison_msg .block ;
2842
+
2843
+ dev_info (adev -> dev , "Start processing ras block %s(%d)\n" ,
2844
+ ras_block_str (ras_block ), ras_block );
2845
+
2846
+ if (ras_block == AMDGPU_RAS_BLOCK__UMC )
2847
+ amdgpu_ras_poison_creation_handler (adev ,
2848
+ MAX_UMC_POISON_POLLING_TIME_ASYNC );
2849
+ else
2850
+ amdgpu_umc_bad_page_polling_timeout (adev ,
2851
+ false, MAX_UMC_POISON_POLLING_TIME_ASYNC );
2790
2852
}
2791
2853
2792
2854
return 0 ;
0 commit comments