Skip to content

Commit 23eb492

Browse files
PhilipYangAalexdeucher
authored andcommitted
drm/amdgpu: enable Navi retry fault wptr overflow
If xnack is on, VM retry fault interrupt send to IH ring1, and ring1 will be full quickly. IH cannot receive other interrupts, this causes deadlock if migrating buffer using sdma and waiting for sdma done while handling retry fault. Remove VMC from IH storm client, enable ring1 write pointer overflow, then IH will drop retry fault interrupts and be able to receive other interrupts while driver is handling retry fault. IH ring1 write pointer doesn't writeback to memory by IH, and ring1 write pointer recorded by self-irq is not updated, so always read the latest ring1 write pointer from register. Signed-off-by: Philip Yang <[email protected]> Reviewed-by: Felix Kuehling <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 71ee923 commit 23eb492

File tree

1 file changed

+12
-21
lines changed

1 file changed

+12
-21
lines changed

drivers/gpu/drm/amd/amdgpu/navi10_ih.c

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -276,10 +276,8 @@ static int navi10_ih_enable_ring(struct amdgpu_device *adev,
276276
tmp = navi10_ih_rb_cntl(ih, tmp);
277277
if (ih == &adev->irq.ih)
278278
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
279-
if (ih == &adev->irq.ih1) {
280-
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
279+
if (ih == &adev->irq.ih1)
281280
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
282-
}
283281

284282
if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
285283
if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
@@ -320,7 +318,6 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
320318
{
321319
struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2};
322320
u32 ih_chicken;
323-
u32 tmp;
324321
int ret;
325322
int i;
326323

@@ -364,15 +361,6 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
364361
adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
365362
ih[0]->doorbell_index);
366363

367-
tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL);
368-
tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
369-
CLIENT18_IS_STORM_CLIENT, 1);
370-
WREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL, tmp);
371-
372-
tmp = RREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL);
373-
tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
374-
WREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL, tmp);
375-
376364
pci_set_master(adev->pdev);
377365

378366
/* enable interrupts */
@@ -421,12 +409,19 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
421409
u32 wptr, tmp;
422410
struct amdgpu_ih_regs *ih_regs;
423411

424-
wptr = le32_to_cpu(*ih->wptr_cpu);
425-
ih_regs = &ih->ih_regs;
412+
if (ih == &adev->irq.ih) {
413+
/* Only ring0 supports writeback. On other rings fall back
414+
* to register-based code with overflow checking below.
415+
*/
416+
wptr = le32_to_cpu(*ih->wptr_cpu);
426417

427-
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
428-
goto out;
418+
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
419+
goto out;
420+
}
429421

422+
ih_regs = &ih->ih_regs;
423+
424+
/* Double check that the overflow wasn't already cleared. */
430425
wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
431426
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
432427
goto out;
@@ -514,15 +509,11 @@ static int navi10_ih_self_irq(struct amdgpu_device *adev,
514509
struct amdgpu_irq_src *source,
515510
struct amdgpu_iv_entry *entry)
516511
{
517-
uint32_t wptr = cpu_to_le32(entry->src_data[0]);
518-
519512
switch (entry->ring_id) {
520513
case 1:
521-
*adev->irq.ih1.wptr_cpu = wptr;
522514
schedule_work(&adev->irq.ih1_work);
523515
break;
524516
case 2:
525-
*adev->irq.ih2.wptr_cpu = wptr;
526517
schedule_work(&adev->irq.ih2_work);
527518
break;
528519
default: break;

0 commit comments

Comments
 (0)