Skip to content

Commit e37ccf4

Browse files
PhilipYangAalexdeucher
authored andcommitted
drm/amdgpu: Show warning message if IH ring overflow
If IH primary ring and KFD ih fifo overflows, we may miss CP, SDMA interrupts and cause application soft hang. Show warning message with ring name if overflow happens. Add function to get ih ring name to avoid duplicating it. To keep warning message consistent between GPU generations, change all *_ih.c except ASICs older than Vega which has only one ih ring. Signed-off-by: Philip Yang <[email protected]> Reviewed-by: Christian König <[email protected]> Reviewed-by: Felix Kuehling <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent de84484 commit e37ccf4

File tree

6 files changed

+15
-11
lines changed

6 files changed

+15
-11
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,3 +298,9 @@ uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
298298
dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
299299
return dw1 | ((u64)(dw2 & 0xffff) << 32);
300300
}
301+
302+
const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
303+
{
304+
return ih == &adev->irq.ih ? "ih" : ih == &adev->irq.ih_soft ? "sw ih" :
305+
ih == &adev->irq.ih1 ? "ih1" : ih == &adev->irq.ih2 ? "ih2" : "unknown";
306+
}

drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,4 +110,5 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
110110
struct amdgpu_iv_entry *entry);
111111
uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
112112
signed int offset);
113+
const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
113114
#endif

drivers/gpu/drm/amd/amdgpu/navi10_ih.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -434,9 +434,8 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
434434
* this should allow us to catch up.
435435
*/
436436
tmp = (wptr + 32) & ih->ptr_mask;
437-
dev_warn(adev->dev, "IH ring buffer overflow "
438-
"(0x%08X, 0x%08X, 0x%08X)\n",
439-
wptr, ih->rptr, tmp);
437+
dev_warn(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
438+
amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
440439
ih->rptr = tmp;
441440

442441
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);

drivers/gpu/drm/amd/amdgpu/vega10_ih.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -364,9 +364,8 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
364364
* this should allow us to catchup.
365365
*/
366366
tmp = (wptr + 32) & ih->ptr_mask;
367-
dev_warn(adev->dev, "IH ring buffer overflow "
368-
"(0x%08X, 0x%08X, 0x%08X)\n",
369-
wptr, ih->rptr, tmp);
367+
dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
368+
amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
370369
ih->rptr = tmp;
371370

372371
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);

drivers/gpu/drm/amd/amdgpu/vega20_ih.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -444,9 +444,8 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
444444
* this should allow us to catchup.
445445
*/
446446
tmp = (wptr + 32) & ih->ptr_mask;
447-
dev_warn(adev->dev, "IH ring buffer overflow "
448-
"(0x%08X, 0x%08X, 0x%08X)\n",
449-
wptr, ih->rptr, tmp);
447+
dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
448+
amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
450449
ih->rptr = tmp;
451450

452451
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);

drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ void kfd_interrupt_exit(struct kfd_node *node)
108108
bool enqueue_ih_ring_entry(struct kfd_node *node, const void *ih_ring_entry)
109109
{
110110
if (kfifo_is_full(&node->ih_fifo)) {
111-
dev_dbg_ratelimited(node->adev->dev,
112-
"Interrupt ring overflow, dropping interrupt\n");
111+
dev_warn_ratelimited(node->adev->dev, "KFD node %d ih_fifo overflow\n",
112+
node->node_id);
113113
return false;
114114
}
115115

0 commit comments

Comments
 (0)