Skip to content

Commit 74ef952

Browse files
yunxialiChristianKoenigAMD
authored andcommitted
drm/amdgpu: track bo memory stats at runtime
Before, every time fdinfo is queried we try to lock all the BOs in the VM and calculate memory usage from scratch. This works okay if the fdinfo is rarely read and the VMs don't have a ton of BOs. If either of these conditions is not true, we get a massive performance hit. In this new revision, we track the BOs as they change states. This way when the fdinfo is queried we only need to take the status lock and copy out the usage stats with minimal impact to the runtime performance. With this new approach however, we would no longer be able to track active buffers. Signed-off-by: Yunxiang Li <[email protected]> Reviewed-by: Christian König <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] Signed-off-by: Christian König <[email protected]>
1 parent a541a6e commit 74ef952

File tree

9 files changed

+232
-139
lines changed

9 files changed

+232
-139
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "amdgpu_gem.h"
3737
#include "amdgpu_dma_buf.h"
3838
#include "amdgpu_xgmi.h"
39+
#include "amdgpu_vm.h"
3940
#include <drm/amdgpu_drm.h>
4041
#include <drm/ttm/ttm_tt.h>
4142
#include <linux/dma-buf.h>
@@ -60,6 +61,8 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
6061
if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
6162
attach->peer2peer = false;
6263

64+
amdgpu_vm_bo_update_shared(bo);
65+
6366
return 0;
6467
}
6568

drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
6060
struct amdgpu_fpriv *fpriv = file->driver_priv;
6161
struct amdgpu_vm *vm = &fpriv->vm;
6262

63-
struct amdgpu_mem_stats stats[__AMDGPU_PL_LAST + 1] = { };
63+
struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
6464
ktime_t usage[AMDGPU_HW_IP_NUM];
6565
const char *pl_name[] = {
6666
[TTM_PL_VRAM] = "vram",
@@ -72,15 +72,8 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
7272
[AMDGPU_PL_DOORBELL] = "doorbell",
7373
};
7474
unsigned int hw_ip, i;
75-
int ret;
76-
77-
ret = amdgpu_bo_reserve(vm->root.bo, false);
78-
if (ret)
79-
return;
80-
81-
amdgpu_vm_get_memory(vm, stats, ARRAY_SIZE(stats));
82-
amdgpu_bo_unreserve(vm->root.bo);
8375

76+
amdgpu_vm_get_memory(vm, stats);
8477
amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
8578

8679
/*
@@ -97,7 +90,6 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
9790

9891
drm_print_memory_stats(p,
9992
&stats[i].drm,
100-
DRM_GEM_OBJECT_ACTIVE |
10193
DRM_GEM_OBJECT_RESIDENT |
10294
DRM_GEM_OBJECT_PURGEABLE,
10395
pl_name[i]);
@@ -115,9 +107,11 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
115107
drm_printf(p, "amd-evicted-vram:\t%llu KiB\n",
116108
stats[TTM_PL_VRAM].evicted/1024UL);
117109
drm_printf(p, "amd-requested-vram:\t%llu KiB\n",
118-
stats[TTM_PL_VRAM].requested/1024UL);
110+
(stats[TTM_PL_VRAM].drm.shared +
111+
stats[TTM_PL_VRAM].drm.private) / 1024UL);
119112
drm_printf(p, "amd-requested-gtt:\t%llu KiB\n",
120-
stats[TTM_PL_TT].requested/1024UL);
113+
(stats[TTM_PL_TT].drm.shared +
114+
stats[TTM_PL_TT].drm.private) / 1024UL);
121115

122116
for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
123117
if (!usage[hw_ip])

drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "amdgpu_dma_buf.h"
4343
#include "amdgpu_hmm.h"
4444
#include "amdgpu_xgmi.h"
45+
#include "amdgpu_vm.h"
4546

4647
static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
4748
{
@@ -179,6 +180,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
179180
if (r)
180181
return r;
181182

183+
amdgpu_vm_bo_update_shared(abo);
182184
bo_va = amdgpu_vm_bo_find(vm, abo);
183185
if (!bo_va)
184186
bo_va = amdgpu_vm_bo_add(adev, vm, abo);
@@ -252,6 +254,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
252254
goto out_unlock;
253255

254256
amdgpu_vm_bo_del(adev, bo_va);
257+
amdgpu_vm_bo_update_shared(bo);
255258
if (!amdgpu_vm_ready(vm))
256259
goto out_unlock;
257260

drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

Lines changed: 40 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,7 +1157,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
11571157
return;
11581158

11591159
abo = ttm_to_amdgpu_bo(bo);
1160-
amdgpu_vm_bo_invalidate(abo, evict);
1160+
amdgpu_vm_bo_move(abo, new_mem, evict);
11611161

11621162
amdgpu_bo_kunmap(abo);
11631163

@@ -1170,75 +1170,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
11701170
old_mem ? old_mem->mem_type : -1);
11711171
}
11721172

1173-
void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
1174-
struct amdgpu_mem_stats *stats,
1175-
unsigned int sz)
1176-
{
1177-
const unsigned int domain_to_pl[] = {
1178-
[ilog2(AMDGPU_GEM_DOMAIN_CPU)] = TTM_PL_SYSTEM,
1179-
[ilog2(AMDGPU_GEM_DOMAIN_GTT)] = TTM_PL_TT,
1180-
[ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM,
1181-
[ilog2(AMDGPU_GEM_DOMAIN_GDS)] = AMDGPU_PL_GDS,
1182-
[ilog2(AMDGPU_GEM_DOMAIN_GWS)] = AMDGPU_PL_GWS,
1183-
[ilog2(AMDGPU_GEM_DOMAIN_OA)] = AMDGPU_PL_OA,
1184-
[ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] = AMDGPU_PL_DOORBELL,
1185-
};
1186-
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
1187-
struct ttm_resource *res = bo->tbo.resource;
1188-
struct drm_gem_object *obj = &bo->tbo.base;
1189-
uint64_t size = amdgpu_bo_size(bo);
1190-
unsigned int type;
1191-
1192-
if (!res) {
1193-
/*
1194-
* If no backing store use one of the preferred domain for basic
1195-
* stats. We take the MSB since that should give a reasonable
1196-
* view.
1197-
*/
1198-
BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT ||
1199-
TTM_PL_VRAM < TTM_PL_SYSTEM);
1200-
type = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK);
1201-
if (!type)
1202-
return;
1203-
type--;
1204-
if (drm_WARN_ON_ONCE(&adev->ddev,
1205-
type >= ARRAY_SIZE(domain_to_pl)))
1206-
return;
1207-
type = domain_to_pl[type];
1208-
} else {
1209-
type = res->mem_type;
1210-
}
1211-
1212-
if (drm_WARN_ON_ONCE(&adev->ddev, type >= sz))
1213-
return;
1214-
1215-
/* DRM stats common fields: */
1216-
1217-
if (drm_gem_object_is_shared_for_memory_stats(obj))
1218-
stats[type].drm.shared += size;
1219-
else
1220-
stats[type].drm.private += size;
1221-
1222-
if (res) {
1223-
stats[type].drm.resident += size;
1224-
1225-
if (!dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_BOOKKEEP))
1226-
stats[type].drm.active += size;
1227-
else if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
1228-
stats[type].drm.purgeable += size;
1229-
}
1230-
1231-
/* amdgpu specific stats: */
1232-
1233-
if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) {
1234-
stats[TTM_PL_VRAM].requested += size;
1235-
if (type != TTM_PL_VRAM)
1236-
stats[TTM_PL_VRAM].evicted += size;
1237-
} else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) {
1238-
stats[TTM_PL_TT].requested += size;
1239-
}
1240-
}
1241-
12421173
/**
12431174
* amdgpu_bo_release_notify - notification about a BO being released
12441175
* @bo: pointer to a buffer object
@@ -1453,6 +1384,45 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
14531384
return amdgpu_gmc_sign_extend(offset);
14541385
}
14551386

1387+
/**
1388+
* amdgpu_bo_mem_stats_placement - bo placement for memory accounting
1389+
* @bo: the buffer object we should look at
1390+
*
1391+
* BO can have multiple preferred placements, to avoid double counting we want
1392+
* to file it under a single placement for memory stats.
1393+
* Luckily, if we take the highest set bit in preferred_domains the result is
1394+
* quite sensible.
1395+
*
1396+
* Returns:
1397+
* Which of the placements should the BO be accounted under.
1398+
*/
1399+
uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo)
1400+
{
1401+
uint32_t domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK;
1402+
1403+
if (!domain)
1404+
return TTM_PL_SYSTEM;
1405+
1406+
switch (rounddown_pow_of_two(domain)) {
1407+
case AMDGPU_GEM_DOMAIN_CPU:
1408+
return TTM_PL_SYSTEM;
1409+
case AMDGPU_GEM_DOMAIN_GTT:
1410+
return TTM_PL_TT;
1411+
case AMDGPU_GEM_DOMAIN_VRAM:
1412+
return TTM_PL_VRAM;
1413+
case AMDGPU_GEM_DOMAIN_GDS:
1414+
return AMDGPU_PL_GDS;
1415+
case AMDGPU_GEM_DOMAIN_GWS:
1416+
return AMDGPU_PL_GWS;
1417+
case AMDGPU_GEM_DOMAIN_OA:
1418+
return AMDGPU_PL_OA;
1419+
case AMDGPU_GEM_DOMAIN_DOORBELL:
1420+
return AMDGPU_PL_DOORBELL;
1421+
default:
1422+
return TTM_PL_SYSTEM;
1423+
}
1424+
}
1425+
14561426
/**
14571427
* amdgpu_bo_get_preferred_domain - get preferred domain
14581428
* @adev: amdgpu device object

drivers/gpu/drm/amd/amdgpu/amdgpu_object.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -300,9 +300,7 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv,
300300
int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr);
301301
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
302302
u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
303-
void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
304-
struct amdgpu_mem_stats *stats,
305-
unsigned int size);
303+
uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo);
306304
uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
307305
uint32_t domain);
308306

drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,15 @@
2626

2727
#include <linux/dma-direction.h>
2828
#include <drm/gpu_scheduler.h>
29+
#include <drm/ttm/ttm_placement.h>
2930
#include "amdgpu_vram_mgr.h"
30-
#include "amdgpu.h"
3131

3232
#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
3333
#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
3434
#define AMDGPU_PL_OA (TTM_PL_PRIV + 2)
3535
#define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3)
3636
#define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4)
37-
#define __AMDGPU_PL_LAST (TTM_PL_PRIV + 4)
37+
#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 5)
3838

3939
#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
4040
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2

0 commit comments

Comments
 (0)