Skip to content

Commit 8ecee4c

Browse files
Guchun Chenalexdeucher
authored andcommitted
drm/amdgpu: fix slab-out-of-bounds issue in amdgpu_vm_pt_create
Recent code set xcp_id stored from file private data when opening device to amdgpu bo for accounting memory usage etc, but not all VMs are attached to this fpriv structure like the vm cases in amdgpu_mes_self_test, otherwise, KASAN will complain below out of bound access. And more importantly, VM code should not touch fpriv structure, so drop fpriv code handling from amdgpu_vm_pt. [ 77.292314] BUG: KASAN: slab-out-of-bounds in amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu] [ 77.293845] Read of size 4 at addr ffff888102c48a48 by task modprobe/1069 [ 77.294146] Call Trace: [ 77.294178] <TASK> [ 77.294208] dump_stack_lvl+0x49/0x63 [ 77.294260] print_report+0x16f/0x4a6 [ 77.294307] ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu] [ 77.295979] ? kasan_complete_mode_report_info+0x3c/0x200 [ 77.296057] ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu] [ 77.297556] kasan_report+0xb4/0x130 [ 77.297609] ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu] [ 77.299202] __asan_load4+0x6f/0x90 [ 77.299272] amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu] [ 77.300796] ? amdgpu_init+0x6e/0x1000 [amdgpu] [ 77.302222] ? amdgpu_vm_pt_clear+0x750/0x750 [amdgpu] [ 77.303721] ? preempt_count_sub+0x18/0xc0 [ 77.303786] amdgpu_vm_init+0x39e/0x870 [amdgpu] [ 77.305186] ? amdgpu_vm_wait_idle+0x90/0x90 [amdgpu] [ 77.306683] ? kasan_set_track+0x25/0x30 [ 77.306737] ? kasan_save_alloc_info+0x1b/0x30 [ 77.306795] ? __kasan_kmalloc+0x87/0xa0 [ 77.306852] amdgpu_mes_self_test+0x169/0x620 [amdgpu] v2: without specifying xcp partition for PD/PT bo, the xcp id is -1. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2686 Fixes: 3ebfd22 ("drm/amdkfd: Store xcp partition id to amdgpu bo") Signed-off-by: Guchun Chen <[email protected]> Tested-by: Mikhail Gavrilov <[email protected]> Reviewed-by: Felix Kuehling <[email protected]> Reviewed-by: Christian König <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent dcaa32e commit 8ecee4c

File tree

5 files changed

+14
-11
lines changed

5 files changed

+14
-11
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1233,7 +1233,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
12331233
if (r)
12341234
goto error_pasid;
12351235

1236-
r = amdgpu_vm_init(adev, &fpriv->vm);
1236+
r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
12371237
if (r)
12381238
goto error_pasid;
12391239

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
13821382
goto error_pasid;
13831383
}
13841384

1385-
r = amdgpu_vm_init(adev, vm);
1385+
r = amdgpu_vm_init(adev, vm, -1);
13861386
if (r) {
13871387
DRM_ERROR("failed to initialize vm\n");
13881388
goto error_pasid;

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2121,13 +2121,14 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
21212121
*
21222122
* @adev: amdgpu_device pointer
21232123
* @vm: requested vm
2124+
* @xcp_id: GPU partition selection id
21242125
*
21252126
* Init @vm fields.
21262127
*
21272128
* Returns:
21282129
* 0 for success, error for failure.
21292130
*/
2130-
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2131+
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id)
21312132
{
21322133
struct amdgpu_bo *root_bo;
21332134
struct amdgpu_bo_vm *root;
@@ -2177,7 +2178,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
21772178
vm->evicting = false;
21782179

21792180
r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
2180-
false, &root);
2181+
false, &root, xcp_id);
21812182
if (r)
21822183
goto error_free_delayed;
21832184
root_bo = &root->bo;

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
392392
u32 pasid);
393393

394394
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
395-
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
395+
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id);
396396
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
397397
void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
398398
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
@@ -475,7 +475,8 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
475475
int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
476476
struct amdgpu_bo_vm *vmbo, bool immediate);
477477
int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
478-
int level, bool immediate, struct amdgpu_bo_vm **vmbo);
478+
int level, bool immediate, struct amdgpu_bo_vm **vmbo,
479+
int32_t xcp_id);
479480
void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm);
480481
bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev,
481482
struct amdgpu_vm *vm);

drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -498,11 +498,12 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
498498
* @level: the page table level
499499
* @immediate: use a immediate update
500500
* @vmbo: pointer to the buffer object pointer
501+
* @xcp_id: GPU partition id
501502
*/
502503
int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
503-
int level, bool immediate, struct amdgpu_bo_vm **vmbo)
504+
int level, bool immediate, struct amdgpu_bo_vm **vmbo,
505+
int32_t xcp_id)
504506
{
505-
struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv, vm);
506507
struct amdgpu_bo_param bp;
507508
struct amdgpu_bo *bo;
508509
struct dma_resv *resv;
@@ -535,7 +536,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
535536

536537
bp.type = ttm_bo_type_kernel;
537538
bp.no_wait_gpu = immediate;
538-
bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
539+
bp.xcp_id_plus1 = xcp_id + 1;
539540

540541
if (vm->root.bo)
541542
bp.resv = vm->root.bo->tbo.base.resv;
@@ -561,7 +562,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
561562
bp.type = ttm_bo_type_kernel;
562563
bp.resv = bo->tbo.base.resv;
563564
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
564-
bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
565+
bp.xcp_id_plus1 = xcp_id + 1;
565566

566567
r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
567568

@@ -606,7 +607,7 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
606607
return 0;
607608

608609
amdgpu_vm_eviction_unlock(vm);
609-
r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
610+
r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt, 0);
610611
amdgpu_vm_eviction_lock(vm);
611612
if (r)
612613
return r;

0 commit comments

Comments
 (0)