Skip to content

Commit a72002c

Browse files
committed
drm/amdgpu: Make use of drm_wedge_task_info
To notify userspace about which task (if any) made the device get in a wedge state, make use of drm_wedge_task_info parameter, filling it with the task PID and name. Reviewed-by: Christian König <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: André Almeida <[email protected]>
1 parent 35dc4ce commit a72002c

File tree

2 files changed

+16
-4
lines changed

2 files changed

+16
-4
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6363,8 +6363,17 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
63636363

63646364
atomic_set(&adev->reset_domain->reset_res, r);
63656365

6366-
if (!r)
6367-
drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL);
6366+
if (!r) {
6367+
struct amdgpu_task_info *ti = NULL;
6368+
6369+
if (job)
6370+
ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid);
6371+
6372+
drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
6373+
ti ? &ti->task : NULL);
6374+
6375+
amdgpu_vm_put_task_info(ti);
6376+
}
63686377

63696378
return r;
63706379
}

drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
8989
{
9090
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
9191
struct amdgpu_job *job = to_amdgpu_job(s_job);
92+
struct drm_wedge_task_info *info = NULL;
9293
struct amdgpu_task_info *ti;
9394
struct amdgpu_device *adev = ring->adev;
9495
int idx;
@@ -125,7 +126,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
125126
ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
126127
if (ti) {
127128
amdgpu_vm_print_task_info(adev, ti);
128-
amdgpu_vm_put_task_info(ti);
129+
info = &ti->task;
129130
}
130131

131132
/* attempt a per ring reset */
@@ -164,13 +165,15 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
164165
if (amdgpu_ring_sched_ready(ring))
165166
drm_sched_start(&ring->sched, 0);
166167
dev_err(adev->dev, "Ring %s reset succeeded\n", ring->sched.name);
167-
drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL);
168+
drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, info);
168169
goto exit;
169170
}
170171
dev_err(adev->dev, "Ring %s reset failure\n", ring->sched.name);
171172
}
172173
dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
173174

175+
amdgpu_vm_put_task_info(ti);
176+
174177
if (amdgpu_device_should_recover_gpu(ring->adev)) {
175178
struct amdgpu_reset_context reset_context;
176179
memset(&reset_context, 0, sizeof(reset_context));

0 commit comments

Comments
 (0)