@@ -222,6 +222,66 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
222
222
}
223
223
}
224
224
225
+ static int umc_v12_0_convert_err_addr (struct amdgpu_device * adev ,
226
+ struct ta_ras_query_address_input * addr_in ,
227
+ uint64_t * pfns , int len )
228
+ {
229
+ uint32_t col , row , row_xor , bank , channel_index ;
230
+ uint64_t soc_pa , retired_page , column , err_addr ;
231
+ struct ta_ras_query_address_output addr_out ;
232
+ uint32_t pos = 0 ;
233
+
234
+ err_addr = addr_in -> ma .err_addr ;
235
+ addr_in -> addr_type = TA_RAS_MCA_TO_PA ;
236
+ if (psp_ras_query_address (& adev -> psp , addr_in , & addr_out )) {
237
+ dev_warn (adev -> dev , "Failed to query RAS physical address for 0x%llx" ,
238
+ err_addr );
239
+ return 0 ;
240
+ }
241
+
242
+ soc_pa = addr_out .pa .pa ;
243
+ bank = addr_out .pa .bank ;
244
+ channel_index = addr_out .pa .channel_idx ;
245
+
246
+ col = (err_addr >> 1 ) & 0x1fULL ;
247
+ row = (err_addr >> 10 ) & 0x3fffULL ;
248
+ row_xor = row ^ (0x1ULL << 13 );
249
+ /* clear [C3 C2] in soc physical address */
250
+ soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT );
251
+ /* clear [C4] in soc physical address */
252
+ soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT );
253
+
254
+ /* loop for all possibilities of [C4 C3 C2] */
255
+ for (column = 0 ; column < UMC_V12_0_NA_MAP_PA_NUM ; column ++ ) {
256
+ retired_page = soc_pa | ((column & 0x3 ) << UMC_V12_0_PA_C2_BIT );
257
+ retired_page |= (((column & 0x4 ) >> 2 ) << UMC_V12_0_PA_C4_BIT );
258
+
259
+ if (pos >= len )
260
+ return 0 ;
261
+ pfns [pos ++ ] = retired_page >> AMDGPU_GPU_PAGE_SHIFT ;
262
+
263
+ /* include column bit 0 and 1 */
264
+ col &= 0x3 ;
265
+ col |= (column << 2 );
266
+ dev_info (adev -> dev ,
267
+ "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n" ,
268
+ retired_page , row , col , bank , channel_index );
269
+
270
+ /* shift R13 bit */
271
+ retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT );
272
+
273
+ if (pos >= len )
274
+ return 0 ;
275
+ pfns [pos ++ ] = retired_page >> AMDGPU_GPU_PAGE_SHIFT ;
276
+
277
+ dev_info (adev -> dev ,
278
+ "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n" ,
279
+ retired_page , row_xor , col , bank , channel_index );
280
+ }
281
+
282
+ return pos ;
283
+ }
284
+
225
285
static int umc_v12_0_query_error_address (struct amdgpu_device * adev ,
226
286
uint32_t node_inst , uint32_t umc_inst ,
227
287
uint32_t ch_inst , void * data )
@@ -482,8 +542,12 @@ static int umc_v12_0_ras_late_init(struct amdgpu_device *adev, struct ras_common
482
542
static int umc_v12_0_update_ecc_status (struct amdgpu_device * adev ,
483
543
uint64_t status , uint64_t ipid , uint64_t addr )
484
544
{
485
- uint16_t hwid , mcatype ;
486
545
struct amdgpu_ras * con = amdgpu_ras_get_context (adev );
546
+ uint16_t hwid , mcatype ;
547
+ struct ta_ras_query_address_input addr_in ;
548
+ uint64_t page_pfn [UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL ];
549
+ uint64_t err_addr ;
550
+ int count ;
487
551
488
552
hwid = REG_GET_FIELD (ipid , MCMP1_IPIDT0 , HardwareID );
489
553
mcatype = REG_GET_FIELD (ipid , MCMP1_IPIDT0 , McaType );
@@ -497,6 +561,34 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
497
561
if (!umc_v12_0_is_deferred_error (adev , status ))
498
562
return 0 ;
499
563
564
+ err_addr = REG_GET_FIELD (addr ,
565
+ MCA_UMC_UMC0_MCUMC_ADDRT0 , ErrorAddr );
566
+
567
+ dev_info (adev -> dev ,
568
+ "UMC:IPID:0x%llx, socket:%llu, aid:%llu, inst:%llu, ch:%llu, err_addr:0x%llx\n" ,
569
+ ipid ,
570
+ MCA_IPID_2_SOCKET_ID (ipid ),
571
+ MCA_IPID_2_DIE_ID (ipid ),
572
+ MCA_IPID_2_UMC_INST (ipid ),
573
+ MCA_IPID_2_UMC_CH (ipid ),
574
+ err_addr );
575
+
576
+ memset (page_pfn , 0 , sizeof (page_pfn ));
577
+
578
+ memset (& addr_in , 0 , sizeof (addr_in ));
579
+ addr_in .ma .err_addr = err_addr ;
580
+ addr_in .ma .ch_inst = MCA_IPID_2_UMC_CH (ipid );
581
+ addr_in .ma .umc_inst = MCA_IPID_2_UMC_INST (ipid );
582
+ addr_in .ma .node_inst = MCA_IPID_2_DIE_ID (ipid );
583
+ addr_in .ma .socket_id = MCA_IPID_2_SOCKET_ID (ipid );
584
+
585
+ count = umc_v12_0_convert_err_addr (adev ,
586
+ & addr_in , page_pfn , ARRAY_SIZE (page_pfn ));
587
+ if (count <= 0 ) {
588
+ dev_warn (adev -> dev , "Fail to convert error address! count:%d\n" , count );
589
+ return 0 ;
590
+ }
591
+
500
592
con -> umc_ecc_log .de_updated = true;
501
593
502
594
return 0 ;
0 commit comments