Skip to content

Commit 370fbff

Browse files
YiPeng Chaialexdeucher
authored andcommitted
drm/amdgpu: add poison consumption handler
Add poison consumption handler. Signed-off-by: YiPeng Chai <[email protected]> Reviewed-by: Tao Zhou <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent bfa579b commit 370fbff

File tree

1 file changed

+39
-4
lines changed

1 file changed

+39
-4
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2848,12 +2848,35 @@ static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
28482848
schedule_delayed_work(&con->page_retirement_dwork, 0);
28492849
}
28502850

2851+
static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
2852+
struct ras_poison_msg *poison_msg)
2853+
{
2854+
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2855+
uint32_t reset = poison_msg->reset;
2856+
uint16_t pasid = poison_msg->pasid;
2857+
2858+
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
2859+
2860+
if (poison_msg->pasid_fn)
2861+
poison_msg->pasid_fn(adev, pasid, poison_msg->data);
2862+
2863+
if (reset) {
2864+
flush_delayed_work(&con->page_retirement_dwork);
2865+
2866+
con->gpu_reset_flags |= reset;
2867+
amdgpu_ras_reset_gpu(adev);
2868+
}
2869+
2870+
return 0;
2871+
}
2872+
28512873
static int amdgpu_ras_page_retirement_thread(void *param)
28522874
{
28532875
struct amdgpu_device *adev = (struct amdgpu_device *)param;
28542876
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
28552877
struct ras_poison_msg poison_msg;
28562878
enum amdgpu_ras_block ras_block;
2879+
bool poison_creation_is_handled = false;
28572880

28582881
while (!kthread_should_stop()) {
28592882

@@ -2874,12 +2897,24 @@ static int amdgpu_ras_page_retirement_thread(void *param)
28742897
dev_info(adev->dev, "Start processing ras block %s(%d)\n",
28752898
ras_block_str(ras_block), ras_block);
28762899

2877-
if (ras_block == AMDGPU_RAS_BLOCK__UMC)
2900+
if (ras_block == AMDGPU_RAS_BLOCK__UMC) {
28782901
amdgpu_ras_poison_creation_handler(adev,
28792902
MAX_UMC_POISON_POLLING_TIME_ASYNC);
2880-
else
2881-
amdgpu_umc_bad_page_polling_timeout(adev,
2882-
false, MAX_UMC_POISON_POLLING_TIME_ASYNC);
2903+
poison_creation_is_handled = true;
2904+
} else {
2905+
/* poison_creation_is_handled:
2906+
* false: no poison creation interrupt, but it has poison
2907+
* consumption interrupt.
2908+
* true: It has poison creation interrupt at the beginning,
2909+
* but it has no poison creation interrupt later.
2910+
*/
2911+
amdgpu_ras_poison_creation_handler(adev,
2912+
poison_creation_is_handled ?
2913+
0 : MAX_UMC_POISON_POLLING_TIME_ASYNC);
2914+
2915+
amdgpu_ras_poison_consumption_handler(adev, &poison_msg);
2916+
poison_creation_is_handled = false;
2917+
}
28832918
}
28842919

28852920
return 0;

0 commit comments

Comments
 (0)