Skip to content

Commit 8fe4fde

Browse files
committed
drm/amdgpu/gfx12: fallback to driver reset compute queue directly
Since the MES FW resets kernel compute queue always failed, this may caused by the KIQ failed to process unmap KCQ. So, before MES FW work properly that will fallback to driver executes dequeue and resets SPI directly. Besides, rework the ring reset function and make the busy ring type reset in each function respectively. Acked-by: Vitaly Prosyak <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 2480599 commit 8fe4fde

File tree

1 file changed

+79
-14
lines changed

1 file changed

+79
-14
lines changed

drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c

Lines changed: 79 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2916,13 +2916,13 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
29162916
return 0;
29172917
}
29182918

2919-
static int gfx_v12_0_gfx_init_queue(struct amdgpu_ring *ring)
2919+
static int gfx_v12_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
29202920
{
29212921
struct amdgpu_device *adev = ring->adev;
29222922
struct v12_gfx_mqd *mqd = ring->mqd_ptr;
29232923
int mqd_idx = ring - &adev->gfx.gfx_ring[0];
29242924

2925-
if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
2925+
if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
29262926
memset((void *)mqd, 0, sizeof(*mqd));
29272927
mutex_lock(&adev->srbm_mutex);
29282928
soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -2958,7 +2958,7 @@ static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
29582958

29592959
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
29602960
if (!r) {
2961-
r = gfx_v12_0_gfx_init_queue(ring);
2961+
r = gfx_v12_0_kgq_init_queue(ring, false);
29622962
amdgpu_bo_kunmap(ring->mqd_obj);
29632963
ring->mqd_ptr = NULL;
29642964
}
@@ -3262,13 +3262,13 @@ static int gfx_v12_0_kiq_init_queue(struct amdgpu_ring *ring)
32623262
return 0;
32633263
}
32643264

3265-
static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring)
3265+
static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
32663266
{
32673267
struct amdgpu_device *adev = ring->adev;
32683268
struct v12_compute_mqd *mqd = ring->mqd_ptr;
32693269
int mqd_idx = ring - &adev->gfx.compute_ring[0];
32703270

3271-
if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3271+
if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
32723272
memset((void *)mqd, 0, sizeof(*mqd));
32733273
mutex_lock(&adev->srbm_mutex);
32743274
soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -3332,7 +3332,7 @@ static int gfx_v12_0_kcq_resume(struct amdgpu_device *adev)
33323332
goto done;
33333333
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
33343334
if (!r) {
3335-
r = gfx_v12_0_kcq_init_queue(ring);
3335+
r = gfx_v12_0_kcq_init_queue(ring, false);
33363336
amdgpu_bo_kunmap(ring->mqd_obj);
33373337
ring->mqd_ptr = NULL;
33383338
}
@@ -5158,18 +5158,83 @@ static void gfx_v12_ip_dump(void *handle)
51585158
amdgpu_gfx_off_ctrl(adev, true);
51595159
}
51605160

5161-
static int gfx_v12_0_reset_ring(struct amdgpu_ring *ring, unsigned int vmid)
5161+
static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
51625162
{
5163+
struct amdgpu_device *adev = ring->adev;
51635164
int r;
51645165

51655166
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid);
5166-
if (r)
5167+
if (r) {
5168+
dev_err(adev->dev, "reset via MES failed %d\n", r);
51675169
return r;
5170+
}
51685171

5169-
/* reset the ring */
5170-
ring->wptr = 0;
5171-
*ring->wptr_cpu_addr = 0;
5172-
amdgpu_ring_clear_ring(ring);
5172+
r = amdgpu_bo_reserve(ring->mqd_obj, false);
5173+
if (unlikely(r != 0)) {
5174+
dev_err(adev->dev, "fail to resv mqd_obj\n");
5175+
return r;
5176+
}
5177+
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
5178+
if (!r) {
5179+
r = gfx_v12_0_kgq_init_queue(ring, true);
5180+
amdgpu_bo_kunmap(ring->mqd_obj);
5181+
ring->mqd_ptr = NULL;
5182+
}
5183+
amdgpu_bo_unreserve(ring->mqd_obj);
5184+
if (r) {
5185+
DRM_ERROR("fail to unresv mqd_obj\n");
5186+
return r;
5187+
}
5188+
5189+
r = amdgpu_mes_map_legacy_queue(adev, ring);
5190+
if (r) {
5191+
dev_err(adev->dev, "failed to remap kgq\n");
5192+
return r;
5193+
}
5194+
5195+
return amdgpu_ring_test_ring(ring);
5196+
}
5197+
5198+
static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
5199+
{
5200+
struct amdgpu_device *adev = ring->adev;
5201+
int r, i;
5202+
5203+
gfx_v12_0_set_safe_mode(adev, 0);
5204+
mutex_lock(&adev->srbm_mutex);
5205+
soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5206+
WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
5207+
WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
5208+
for (i = 0; i < adev->usec_timeout; i++) {
5209+
if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
5210+
break;
5211+
udelay(1);
5212+
}
5213+
soc24_grbm_select(adev, 0, 0, 0, 0);
5214+
mutex_unlock(&adev->srbm_mutex);
5215+
gfx_v12_0_unset_safe_mode(adev, 0);
5216+
5217+
r = amdgpu_bo_reserve(ring->mqd_obj, false);
5218+
if (unlikely(r != 0)) {
5219+
DRM_ERROR("fail to resv mqd_obj\n");
5220+
return r;
5221+
}
5222+
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
5223+
if (!r) {
5224+
r = gfx_v12_0_kcq_init_queue(ring, true);
5225+
amdgpu_bo_kunmap(ring->mqd_obj);
5226+
ring->mqd_ptr = NULL;
5227+
}
5228+
amdgpu_bo_unreserve(ring->mqd_obj);
5229+
if (r) {
5230+
DRM_ERROR("fail to unresv mqd_obj\n");
5231+
return r;
5232+
}
5233+
r = amdgpu_mes_map_legacy_queue(adev, ring);
5234+
if (r) {
5235+
dev_err(adev->dev, "failed to remap kcq\n");
5236+
return r;
5237+
}
51735238

51745239
return amdgpu_ring_test_ring(ring);
51755240
}
@@ -5236,7 +5301,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = {
52365301
.emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
52375302
.soft_recovery = gfx_v12_0_ring_soft_recovery,
52385303
.emit_mem_sync = gfx_v12_0_emit_mem_sync,
5239-
.reset = gfx_v12_0_reset_ring,
5304+
.reset = gfx_v12_0_reset_kgq,
52405305
};
52415306

52425307
static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
@@ -5271,7 +5336,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
52715336
.emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
52725337
.soft_recovery = gfx_v12_0_ring_soft_recovery,
52735338
.emit_mem_sync = gfx_v12_0_emit_mem_sync,
5274-
.reset = gfx_v12_0_reset_ring,
5339+
.reset = gfx_v12_0_reset_kcq,
52755340
};
52765341

52775342
static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {

0 commit comments

Comments
 (0)