Skip to content

Commit a734adf

Browse files
YiPeng Chaialexdeucher
authored andcommitted
drm/amdgpu: add poison creation handler
Add poison creation handler. Signed-off-by: YiPeng Chai <[email protected]> Reviewed-by: Tao Zhou <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent f493dd6 commit a734adf

File tree

1 file changed

+69
-7
lines changed

1 file changed

+69
-7
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

Lines changed: 69 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2076,6 +2076,17 @@ static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj
20762076
{
20772077
dev_info(obj->adev->dev,
20782078
"Poison is created\n");
2079+
2080+
if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
2081+
struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev);
2082+
2083+
amdgpu_ras_put_poison_req(obj->adev,
2084+
AMDGPU_RAS_BLOCK__UMC, 0, NULL, NULL, false);
2085+
2086+
atomic_inc(&con->page_retirement_req_cnt);
2087+
2088+
wake_up(&con->page_retirement_wq);
2089+
}
20792090
}
20802091

20812092
static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
@@ -2727,15 +2738,13 @@ int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
27272738
return 0;
27282739
}
27292740

2730-
#ifdef PRE_DEFINED_FUNCTION
27312741
static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev,
27322742
struct ras_poison_msg *poison_msg)
27332743
{
27342744
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
27352745

27362746
return kfifo_get(&con->poison_fifo, poison_msg);
27372747
}
2738-
#endif
27392748

27402749
static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log)
27412750
{
@@ -2766,10 +2775,54 @@ static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log)
27662775
mutex_destroy(&ecc_log->lock);
27672776
ecc_log->de_updated = false;
27682777
}
2778+
2779+
static int amdgpu_ras_query_ecc_status(struct amdgpu_device *adev,
2780+
enum amdgpu_ras_block ras_block, uint32_t timeout_ms)
2781+
{
2782+
int ret = 0;
2783+
struct ras_ecc_log_info *ecc_log;
2784+
struct ras_query_if info;
2785+
uint32_t timeout = timeout_ms;
2786+
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
2787+
2788+
memset(&info, 0, sizeof(info));
2789+
info.head.block = ras_block;
2790+
2791+
ecc_log = &ras->umc_ecc_log;
2792+
ecc_log->de_updated = false;
2793+
do {
2794+
ret = amdgpu_ras_query_error_status(adev, &info);
2795+
if (ret) {
2796+
dev_err(adev->dev, "Failed to query ras error! ret:%d\n", ret);
2797+
return ret;
2798+
}
2799+
2800+
if (timeout && !ecc_log->de_updated) {
2801+
msleep(1);
2802+
timeout--;
2803+
}
2804+
} while (timeout && !ecc_log->de_updated);
2805+
2806+
if (timeout_ms && !timeout) {
2807+
dev_warn(adev->dev, "Can't find deferred error\n");
2808+
return -ETIMEDOUT;
2809+
}
2810+
2811+
return 0;
2812+
}
2813+
2814+
static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
2815+
uint32_t timeout)
2816+
{
2817+
amdgpu_ras_query_ecc_status(adev, AMDGPU_RAS_BLOCK__UMC, timeout);
2818+
}
2819+
27692820
static int amdgpu_ras_page_retirement_thread(void *param)
27702821
{
27712822
struct amdgpu_device *adev = (struct amdgpu_device *)param;
27722823
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2824+
struct ras_poison_msg poison_msg;
2825+
enum amdgpu_ras_block ras_block;
27732826

27742827
while (!kthread_should_stop()) {
27752828

@@ -2780,13 +2833,22 @@ static int amdgpu_ras_page_retirement_thread(void *param)
27802833
if (kthread_should_stop())
27812834
break;
27822835

2783-
dev_info(adev->dev, "Start processing page retirement. request:%d\n",
2784-
atomic_read(&con->page_retirement_req_cnt));
2785-
27862836
atomic_dec(&con->page_retirement_req_cnt);
27872837

2788-
amdgpu_umc_bad_page_polling_timeout(adev,
2789-
0, MAX_UMC_POISON_POLLING_TIME_ASYNC);
2838+
if (!amdgpu_ras_get_poison_req(adev, &poison_msg))
2839+
continue;
2840+
2841+
ras_block = poison_msg.block;
2842+
2843+
dev_info(adev->dev, "Start processing ras block %s(%d)\n",
2844+
ras_block_str(ras_block), ras_block);
2845+
2846+
if (ras_block == AMDGPU_RAS_BLOCK__UMC)
2847+
amdgpu_ras_poison_creation_handler(adev,
2848+
MAX_UMC_POISON_POLLING_TIME_ASYNC);
2849+
else
2850+
amdgpu_umc_bad_page_polling_timeout(adev,
2851+
false, MAX_UMC_POISON_POLLING_TIME_ASYNC);
27902852
}
27912853

27922854
return 0;

0 commit comments

Comments
 (0)