Skip to content

Commit 60b57dc

Browse files
Jie1zhanggregkh
authored andcommitted
drm/amdkfd: pause autosuspend when creating pdd
[ Upstream commit 438b39a ] When using MES creating a pdd will require talking to the GPU to setup the relevant context. The code here forgot to wake up the GPU in case it was in suspend, this causes KVM to EFAULT for passthrough GPU for example. This issue can be masked if the GPU was woken up by other things (e.g. opening the KMS node) first and have not yet gone to sleep. v4: do the allocation of proc_ctx_bo in a lazy fashion when the first queue is created in a process (Felix) Signed-off-by: Jesse Zhang <[email protected]> Reviewed-by: Yunxiang Li <[email protected]> Signed-off-by: Alex Deucher <[email protected]> Cc: [email protected] Signed-off-by: Sasha Levin <[email protected]>
1 parent 4312b60 commit 60b57dc

File tree

2 files changed

+17
-21
lines changed

2 files changed

+17
-21
lines changed

drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,21 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
197197
if (dqm->is_hws_hang)
198198
return -EIO;
199199

200+
if (!pdd->proc_ctx_cpu_ptr) {
201+
r = amdgpu_amdkfd_alloc_gtt_mem(adev,
202+
AMDGPU_MES_PROC_CTX_SIZE,
203+
&pdd->proc_ctx_bo,
204+
&pdd->proc_ctx_gpu_addr,
205+
&pdd->proc_ctx_cpu_ptr,
206+
false);
207+
if (r) {
208+
dev_err(adev->dev,
209+
"failed to allocate process context bo\n");
210+
return r;
211+
}
212+
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
213+
}
214+
200215
memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
201216
queue_input.process_id = qpd->pqm->process->pasid;
202217
queue_input.page_table_base_addr = qpd->page_table_base;

drivers/gpu/drm/amd/amdkfd/kfd_process.c

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,7 +1046,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
10461046

10471047
kfd_free_process_doorbells(pdd->dev->kfd, pdd);
10481048

1049-
if (pdd->dev->kfd->shared_resources.enable_mes)
1049+
if (pdd->dev->kfd->shared_resources.enable_mes &&
1050+
pdd->proc_ctx_cpu_ptr)
10501051
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
10511052
&pdd->proc_ctx_bo);
10521053
/*
@@ -1572,7 +1573,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
15721573
struct kfd_process *p)
15731574
{
15741575
struct kfd_process_device *pdd = NULL;
1575-
int retval = 0;
15761576

15771577
if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
15781578
return NULL;
@@ -1596,21 +1596,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
15961596
pdd->user_gpu_id = dev->id;
15971597
atomic64_set(&pdd->evict_duration_counter, 0);
15981598

1599-
if (dev->kfd->shared_resources.enable_mes) {
1600-
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
1601-
AMDGPU_MES_PROC_CTX_SIZE,
1602-
&pdd->proc_ctx_bo,
1603-
&pdd->proc_ctx_gpu_addr,
1604-
&pdd->proc_ctx_cpu_ptr,
1605-
false);
1606-
if (retval) {
1607-
dev_err(dev->adev->dev,
1608-
"failed to allocate process context bo\n");
1609-
goto err_free_pdd;
1610-
}
1611-
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
1612-
}
1613-
16141599
p->pdds[p->n_pdds++] = pdd;
16151600
if (kfd_dbg_is_per_vmid_supported(pdd->dev))
16161601
pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap(
@@ -1622,10 +1607,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
16221607
idr_init(&pdd->alloc_idr);
16231608

16241609
return pdd;
1625-
1626-
err_free_pdd:
1627-
kfree(pdd);
1628-
return NULL;
16291610
}
16301611

16311612
/**

0 commit comments

Comments
 (0)