Skip to content

Commit 072b441

Browse files
Prike Liangalexdeucher
authored andcommitted
drm/amdgpu/gfx11: fallback to driver reset compute queue directly (v2)
Since the MES FW resets kernel compute queue always failed, this may caused by the KIQ failed to process unmap KCQ. So, before MES FW work properly that will fallback to driver executes dequeue and resets SPI directly. Besides, rework the ring reset function and make the busy ring type reset in each function respectively. Acked-by: Vitaly Prosyak <[email protected]> Signed-off-by: Prike Liang <[email protected]> Reviewed-by: Alex Deucher <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent f2ea269 commit 072b441

File tree

1 file changed

+71
-13
lines changed

1 file changed

+71
-13
lines changed

drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

Lines changed: 71 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3984,13 +3984,13 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
39843984
return 0;
39853985
}
39863986

3987-
static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
3987+
static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring, bool reset)
39883988
{
39893989
struct amdgpu_device *adev = ring->adev;
39903990
struct v11_gfx_mqd *mqd = ring->mqd_ptr;
39913991
int mqd_idx = ring - &adev->gfx.gfx_ring[0];
39923992

3993-
if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3993+
if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
39943994
memset((void *)mqd, 0, sizeof(*mqd));
39953995
mutex_lock(&adev->srbm_mutex);
39963996
soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -4026,7 +4026,7 @@ static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
40264026

40274027
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
40284028
if (!r) {
4029-
r = gfx_v11_0_gfx_init_queue(ring);
4029+
r = gfx_v11_0_gfx_init_queue(ring, false);
40304030
amdgpu_bo_kunmap(ring->mqd_obj);
40314031
ring->mqd_ptr = NULL;
40324032
}
@@ -4321,13 +4321,13 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
43214321
return 0;
43224322
}
43234323

4324-
static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
4324+
static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
43254325
{
43264326
struct amdgpu_device *adev = ring->adev;
43274327
struct v11_compute_mqd *mqd = ring->mqd_ptr;
43284328
int mqd_idx = ring - &adev->gfx.compute_ring[0];
43294329

4330-
if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4330+
if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
43314331
memset((void *)mqd, 0, sizeof(*mqd));
43324332
mutex_lock(&adev->srbm_mutex);
43334333
soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -4391,7 +4391,7 @@ static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
43914391
goto done;
43924392
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
43934393
if (!r) {
4394-
r = gfx_v11_0_kcq_init_queue(ring);
4394+
r = gfx_v11_0_kcq_init_queue(ring, false);
43954395
amdgpu_bo_kunmap(ring->mqd_obj);
43964396
ring->mqd_ptr = NULL;
43974397
}
@@ -6544,18 +6544,76 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
65446544
amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
65456545
}
65466546

6547-
static int gfx_v11_0_reset_ring(struct amdgpu_ring *ring, unsigned int vmid)
6547+
static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
65486548
{
6549+
struct amdgpu_device *adev = ring->adev;
65496550
int r;
65506551

65516552
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid);
65526553
if (r)
65536554
return r;
65546555

6555-
/* reset the ring */
6556-
ring->wptr = 0;
6557-
*ring->wptr_cpu_addr = 0;
6558-
amdgpu_ring_clear_ring(ring);
6556+
r = amdgpu_bo_reserve(ring->mqd_obj, false);
6557+
if (unlikely(r != 0)) {
6558+
dev_err(adev->dev, "fail to resv mqd_obj\n");
6559+
return r;
6560+
}
6561+
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
6562+
if (!r) {
6563+
r = gfx_v11_0_gfx_init_queue(ring, true);
6564+
amdgpu_bo_kunmap(ring->mqd_obj);
6565+
ring->mqd_ptr = NULL;
6566+
}
6567+
amdgpu_bo_unreserve(ring->mqd_obj);
6568+
if (r) {
6569+
dev_err(adev->dev, "fail to unresv mqd_obj\n");
6570+
return r;
6571+
}
6572+
6573+
r = amdgpu_mes_map_legacy_queue(adev, ring);
6574+
if (r) {
6575+
dev_err(adev->dev, "failed to remap kgq\n");
6576+
return r;
6577+
}
6578+
6579+
return amdgpu_ring_test_ring(ring);
6580+
}
6581+
6582+
static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
6583+
{
6584+
struct amdgpu_device *adev = ring->adev;
6585+
int r;
6586+
6587+
gfx_v11_0_set_safe_mode(adev, 0);
6588+
mutex_lock(&adev->srbm_mutex);
6589+
soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6590+
WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
6591+
WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
6592+
soc21_grbm_select(adev, 0, 0, 0, 0);
6593+
mutex_unlock(&adev->srbm_mutex);
6594+
gfx_v11_0_unset_safe_mode(adev, 0);
6595+
6596+
r = amdgpu_bo_reserve(ring->mqd_obj, false);
6597+
if (unlikely(r != 0)) {
6598+
dev_err(adev->dev, "fail to resv mqd_obj\n");
6599+
return r;
6600+
}
6601+
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
6602+
if (!r) {
6603+
r = gfx_v11_0_kcq_init_queue(ring, true);
6604+
amdgpu_bo_kunmap(ring->mqd_obj);
6605+
ring->mqd_ptr = NULL;
6606+
}
6607+
amdgpu_bo_unreserve(ring->mqd_obj);
6608+
if (r) {
6609+
dev_err(adev->dev, "fail to unresv mqd_obj\n");
6610+
return r;
6611+
}
6612+
r = amdgpu_mes_map_legacy_queue(adev, ring);
6613+
if (r) {
6614+
dev_err(adev->dev, "failed to remap kcq\n");
6615+
return r;
6616+
}
65596617

65606618
return amdgpu_ring_test_ring(ring);
65616619
}
@@ -6761,7 +6819,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
67616819
.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
67626820
.soft_recovery = gfx_v11_0_ring_soft_recovery,
67636821
.emit_mem_sync = gfx_v11_0_emit_mem_sync,
6764-
.reset = gfx_v11_0_reset_ring,
6822+
.reset = gfx_v11_0_reset_kgq,
67656823
};
67666824

67676825
static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
@@ -6799,7 +6857,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
67996857
.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
68006858
.soft_recovery = gfx_v11_0_ring_soft_recovery,
68016859
.emit_mem_sync = gfx_v11_0_emit_mem_sync,
6802-
.reset = gfx_v11_0_reset_ring,
6860+
.reset = gfx_v11_0_reset_kcq,
68036861
};
68046862

68056863
static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {

0 commit comments

Comments
 (0)