Skip to content

Commit bfa579b

Browse files
YiPeng Chaialexdeucher
authored andcommitted
drm/amdgpu: prepare to handle pasid poison consumption
Prepare to handle pasid poison consumption. Signed-off-by: YiPeng Chai <[email protected]> Reviewed-by: Tao Zhou <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 314c38c commit bfa579b

File tree

5 files changed

+31
-9
lines changed

5 files changed

+31
-9
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -747,10 +747,17 @@ bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev)
747747
return amdgpu_ras_get_fed_status(adev);
748748
}
749749

750+
void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
751+
enum amdgpu_ras_block block, uint16_t pasid,
752+
pasid_notify pasid_fn, void *data, uint32_t reset)
753+
{
754+
amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset);
755+
}
756+
750757
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
751758
enum amdgpu_ras_block block, uint32_t reset)
752759
{
753-
amdgpu_umc_poison_handler(adev, block, reset);
760+
amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL, reset);
754761
}
755762

756763
int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,11 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
337337
struct tile_config *config);
338338
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
339339
enum amdgpu_ras_block block, uint32_t reset);
340+
341+
void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
342+
enum amdgpu_ras_block block, uint16_t pasid,
343+
pasid_notify pasid_fn, void *data, uint32_t reset);
344+
340345
bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);
341346
bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
342347
void amdgpu_amdkfd_block_mmu_notifications(void *p);

drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,9 @@ int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev,
252252
return 0;
253253
}
254254

255-
int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
256-
enum amdgpu_ras_block block, uint32_t reset)
255+
int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
256+
enum amdgpu_ras_block block, uint16_t pasid,
257+
pasid_notify pasid_fn, void *data, uint32_t reset)
257258
{
258259
int ret = AMDGPU_RAS_SUCCESS;
259260

@@ -291,16 +292,14 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
291292

292293
amdgpu_ras_error_data_fini(&err_data);
293294
} else {
294-
if (reset) {
295-
amdgpu_umc_bad_page_polling_timeout(adev,
296-
reset, MAX_UMC_POISON_POLLING_TIME_SYNC);
297-
} else {
298295
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
299296

297+
amdgpu_ras_put_poison_req(adev,
298+
block, pasid, pasid_fn, data, reset);
299+
300300
atomic_inc(&con->page_retirement_req_cnt);
301301

302302
wake_up(&con->page_retirement_wq);
303-
}
304303
}
305304
} else {
306305
if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
@@ -313,6 +312,13 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
313312
return ret;
314313
}
315314

315+
int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
316+
enum amdgpu_ras_block block, uint32_t reset)
317+
{
318+
return amdgpu_umc_pasid_poison_handler(adev,
319+
block, 0, NULL, NULL, reset);
320+
}
321+
316322
int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
317323
void *ras_error_status,
318324
struct amdgpu_iv_entry *entry)

drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@ int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);
106106
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
107107
int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
108108
enum amdgpu_ras_block block, uint32_t reset);
109+
int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
110+
enum amdgpu_ras_block block, uint16_t pasid,
111+
pasid_notify pasid_fn, void *data, uint32_t reset);
109112
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
110113
struct amdgpu_irq_src *source,
111114
struct amdgpu_iv_entry *entry);

drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,8 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
190190
dev_warn(dev->adev->dev,
191191
"poison is consumed by client %d, kick off gpu reset flow\n", client_id);
192192

193-
amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset);
193+
amdgpu_amdkfd_ras_pasid_poison_consumption_handler(dev->adev,
194+
block, pasid, NULL, NULL, reset);
194195
}
195196

196197
static bool context_id_expected(struct kfd_dev *dev)

0 commit comments

Comments
 (0)