Skip to content

Commit b88baab

Browse files
committed
drm/nouveau: implement new VM_BIND uAPI
This commit provides the implementation for the new uapi motivated by the Vulkan API. It allows user mode drivers (UMDs) to: 1) Initialize a GPU virtual address (VA) space via the new DRM_IOCTL_NOUVEAU_VM_INIT ioctl for UMDs to specify the portion of VA space managed by the kernel and userspace, respectively. 2) Allocate and free a VA space region as well as bind and unbind memory to the GPUs VA space via the new DRM_IOCTL_NOUVEAU_VM_BIND ioctl. UMDs can request the named operations to be processed either synchronously or asynchronously. It supports DRM syncobjs (incl. timelines) as synchronization mechanism. The management of the GPU VA mappings is implemented with the DRM GPU VA manager. 3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl. The execution happens asynchronously. It supports DRM syncobj (incl. timelines) as synchronization mechanism. DRM GEM object locking is handled with drm_exec. Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC, use the DRM GPU scheduler for the asynchronous paths. Reviewed-by: Dave Airlie <[email protected]> Signed-off-by: Danilo Krummrich <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 6b252cf commit b88baab

19 files changed

+3321
-69
lines changed

Documentation/gpu/driver-uapi.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,7 @@ drm/nouveau uAPI
1313
VM_BIND / EXEC uAPI
1414
-------------------
1515

16+
.. kernel-doc:: drivers/gpu/drm/nouveau/nouveau_exec.c
17+
:doc: Overview
18+
1619
.. kernel-doc:: include/uapi/drm/nouveau_drm.h

drivers/gpu/drm/nouveau/Kbuild

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ nouveau-y += nouveau_prime.o
4747
nouveau-y += nouveau_sgdma.o
4848
nouveau-y += nouveau_ttm.o
4949
nouveau-y += nouveau_vmm.o
50+
nouveau-y += nouveau_exec.o
51+
nouveau-y += nouveau_sched.o
52+
nouveau-y += nouveau_uvmm.o
5053

5154
# DRM - modesetting
5255
nouveau-$(CONFIG_DRM_NOUVEAU_BACKLIGHT) += nouveau_backlight.o

drivers/gpu/drm/nouveau/Kconfig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ config DRM_NOUVEAU
1010
select DRM_KMS_HELPER
1111
select DRM_TTM
1212
select DRM_TTM_HELPER
13+
select DRM_EXEC
14+
select DRM_SCHED
1315
select I2C
1416
select I2C_ALGOBIT
1517
select BACKLIGHT_CLASS_DEVICE if DRM_NOUVEAU_BACKLIGHT

drivers/gpu/drm/nouveau/nouveau_abi16.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "nouveau_chan.h"
3636
#include "nouveau_abi16.h"
3737
#include "nouveau_vmm.h"
38+
#include "nouveau_sched.h"
3839

3940
static struct nouveau_abi16 *
4041
nouveau_abi16(struct drm_file *file_priv)
@@ -125,6 +126,17 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
125126
{
126127
struct nouveau_abi16_ntfy *ntfy, *temp;
127128

129+
/* When a client exits without waiting for it's queued up jobs to
130+
* finish it might happen that we fault the channel. This is due to
131+
* drm_file_free() calling drm_gem_release() before the postclose()
132+
* callback. Hence, we can't tear down this scheduler entity before
133+
* uvmm mappings are unmapped. Currently, we can't detect this case.
134+
*
135+
* However, this should be rare and harmless, since the channel isn't
136+
* needed anymore.
137+
*/
138+
nouveau_sched_entity_fini(&chan->sched_entity);
139+
128140
/* wait for all activity to stop before cleaning up */
129141
if (chan->chan)
130142
nouveau_channel_idle(chan->chan);
@@ -261,6 +273,13 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
261273
if (!drm->channel)
262274
return nouveau_abi16_put(abi16, -ENODEV);
263275

276+
/* If uvmm wasn't initialized until now disable it completely to prevent
277+
* userspace from mixing up UAPIs.
278+
*
279+
* The client lock is already acquired by nouveau_abi16_get().
280+
*/
281+
__nouveau_cli_disable_uvmm_noinit(cli);
282+
264283
device = &abi16->device;
265284
engine = NV_DEVICE_HOST_RUNLIST_ENGINES_GR;
266285

@@ -304,6 +323,11 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
304323
if (ret)
305324
goto done;
306325

326+
ret = nouveau_sched_entity_init(&chan->sched_entity, &drm->sched,
327+
drm->sched_wq);
328+
if (ret)
329+
goto done;
330+
307331
init->channel = chan->chan->chid;
308332

309333
if (device->info.family >= NV_DEVICE_INFO_V0_TESLA)

drivers/gpu/drm/nouveau/nouveau_abi16.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ struct nouveau_abi16_chan {
2626
struct nouveau_bo *ntfy;
2727
struct nouveau_vma *ntfy_vma;
2828
struct nvkm_mm heap;
29+
struct nouveau_sched_entity sched_entity;
2930
};
3031

3132
struct nouveau_abi16 {

drivers/gpu/drm/nouveau/nouveau_bo.c

Lines changed: 102 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, int *align, u64 *size)
199199

200200
struct nouveau_bo *
201201
nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
202-
u32 tile_mode, u32 tile_flags)
202+
u32 tile_mode, u32 tile_flags, bool internal)
203203
{
204204
struct nouveau_drm *drm = cli->drm;
205205
struct nouveau_bo *nvbo;
@@ -233,68 +233,103 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
233233
nvbo->force_coherent = true;
234234
}
235235

236-
if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) {
237-
nvbo->kind = (tile_flags & 0x0000ff00) >> 8;
238-
if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
239-
kfree(nvbo);
240-
return ERR_PTR(-EINVAL);
236+
nvbo->contig = !(tile_flags & NOUVEAU_GEM_TILE_NONCONTIG);
237+
if (!nouveau_cli_uvmm(cli) || internal) {
238+
/* for BO noVM allocs, don't assign kinds */
239+
if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) {
240+
nvbo->kind = (tile_flags & 0x0000ff00) >> 8;
241+
if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
242+
kfree(nvbo);
243+
return ERR_PTR(-EINVAL);
244+
}
245+
246+
nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind;
247+
} else if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
248+
nvbo->kind = (tile_flags & 0x00007f00) >> 8;
249+
nvbo->comp = (tile_flags & 0x00030000) >> 16;
250+
if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
251+
kfree(nvbo);
252+
return ERR_PTR(-EINVAL);
253+
}
254+
} else {
255+
nvbo->zeta = (tile_flags & 0x00000007);
241256
}
257+
nvbo->mode = tile_mode;
258+
259+
/* Determine the desirable target GPU page size for the buffer. */
260+
for (i = 0; i < vmm->page_nr; i++) {
261+
/* Because we cannot currently allow VMM maps to fail
262+
* during buffer migration, we need to determine page
263+
* size for the buffer up-front, and pre-allocate its
264+
* page tables.
265+
*
266+
* Skip page sizes that can't support needed domains.
267+
*/
268+
if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
269+
(domain & NOUVEAU_GEM_DOMAIN_VRAM) && !vmm->page[i].vram)
270+
continue;
271+
if ((domain & NOUVEAU_GEM_DOMAIN_GART) &&
272+
(!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
273+
continue;
242274

243-
nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind;
244-
} else
245-
if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
246-
nvbo->kind = (tile_flags & 0x00007f00) >> 8;
247-
nvbo->comp = (tile_flags & 0x00030000) >> 16;
248-
if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
275+
/* Select this page size if it's the first that supports
276+
* the potential memory domains, or when it's compatible
277+
* with the requested compression settings.
278+
*/
279+
if (pi < 0 || !nvbo->comp || vmm->page[i].comp)
280+
pi = i;
281+
282+
/* Stop once the buffer is larger than the current page size. */
283+
if (*size >= 1ULL << vmm->page[i].shift)
284+
break;
285+
}
286+
287+
if (WARN_ON(pi < 0)) {
249288
kfree(nvbo);
250289
return ERR_PTR(-EINVAL);
251290
}
252-
} else {
253-
nvbo->zeta = (tile_flags & 0x00000007);
254-
}
255-
nvbo->mode = tile_mode;
256-
nvbo->contig = !(tile_flags & NOUVEAU_GEM_TILE_NONCONTIG);
257-
258-
/* Determine the desirable target GPU page size for the buffer. */
259-
for (i = 0; i < vmm->page_nr; i++) {
260-
/* Because we cannot currently allow VMM maps to fail
261-
* during buffer migration, we need to determine page
262-
* size for the buffer up-front, and pre-allocate its
263-
* page tables.
264-
*
265-
* Skip page sizes that can't support needed domains.
266-
*/
267-
if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
268-
(domain & NOUVEAU_GEM_DOMAIN_VRAM) && !vmm->page[i].vram)
269-
continue;
270-
if ((domain & NOUVEAU_GEM_DOMAIN_GART) &&
271-
(!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
272-
continue;
273-
274-
/* Select this page size if it's the first that supports
275-
* the potential memory domains, or when it's compatible
276-
* with the requested compression settings.
277-
*/
278-
if (pi < 0 || !nvbo->comp || vmm->page[i].comp)
279-
pi = i;
280291

281-
/* Stop once the buffer is larger than the current page size. */
282-
if (*size >= 1ULL << vmm->page[i].shift)
283-
break;
284-
}
292+
/* Disable compression if suitable settings couldn't be found. */
293+
if (nvbo->comp && !vmm->page[pi].comp) {
294+
if (mmu->object.oclass >= NVIF_CLASS_MMU_GF100)
295+
nvbo->kind = mmu->kind[nvbo->kind];
296+
nvbo->comp = 0;
297+
}
298+
nvbo->page = vmm->page[pi].shift;
299+
} else {
300+
/* reject other tile flags when in VM mode. */
301+
if (tile_mode)
302+
return ERR_PTR(-EINVAL);
303+
if (tile_flags & ~NOUVEAU_GEM_TILE_NONCONTIG)
304+
return ERR_PTR(-EINVAL);
285305

286-
if (WARN_ON(pi < 0)) {
287-
kfree(nvbo);
288-
return ERR_PTR(-EINVAL);
289-
}
306+
/* Determine the desirable target GPU page size for the buffer. */
307+
for (i = 0; i < vmm->page_nr; i++) {
308+
/* Because we cannot currently allow VMM maps to fail
309+
* during buffer migration, we need to determine page
310+
* size for the buffer up-front, and pre-allocate its
311+
* page tables.
312+
*
313+
* Skip page sizes that can't support needed domains.
314+
*/
315+
if ((domain & NOUVEAU_GEM_DOMAIN_VRAM) && !vmm->page[i].vram)
316+
continue;
317+
if ((domain & NOUVEAU_GEM_DOMAIN_GART) &&
318+
(!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
319+
continue;
290320

291-
/* Disable compression if suitable settings couldn't be found. */
292-
if (nvbo->comp && !vmm->page[pi].comp) {
293-
if (mmu->object.oclass >= NVIF_CLASS_MMU_GF100)
294-
nvbo->kind = mmu->kind[nvbo->kind];
295-
nvbo->comp = 0;
321+
if (pi < 0)
322+
pi = i;
323+
/* Stop once the buffer is larger than the current page size. */
324+
if (*size >= 1ULL << vmm->page[i].shift)
325+
break;
326+
}
327+
if (WARN_ON(pi < 0)) {
328+
kfree(nvbo);
329+
return ERR_PTR(-EINVAL);
330+
}
331+
nvbo->page = vmm->page[pi].shift;
296332
}
297-
nvbo->page = vmm->page[pi].shift;
298333

299334
nouveau_bo_fixup_align(nvbo, align, size);
300335

@@ -307,18 +342,26 @@ nouveau_bo_init(struct nouveau_bo *nvbo, u64 size, int align, u32 domain,
307342
{
308343
int type = sg ? ttm_bo_type_sg : ttm_bo_type_device;
309344
int ret;
345+
struct ttm_operation_ctx ctx = {
346+
.interruptible = false,
347+
.no_wait_gpu = false,
348+
.resv = robj,
349+
};
310350

311351
nouveau_bo_placement_set(nvbo, domain, 0);
312352
INIT_LIST_HEAD(&nvbo->io_reserve_lru);
313353

314-
ret = ttm_bo_init_validate(nvbo->bo.bdev, &nvbo->bo, type,
315-
&nvbo->placement, align >> PAGE_SHIFT, false,
354+
ret = ttm_bo_init_reserved(nvbo->bo.bdev, &nvbo->bo, type,
355+
&nvbo->placement, align >> PAGE_SHIFT, &ctx,
316356
sg, robj, nouveau_bo_del_ttm);
317357
if (ret) {
318358
/* ttm will call nouveau_bo_del_ttm if it fails.. */
319359
return ret;
320360
}
321361

362+
if (!robj)
363+
ttm_bo_unreserve(&nvbo->bo);
364+
322365
return 0;
323366
}
324367

@@ -332,7 +375,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
332375
int ret;
333376

334377
nvbo = nouveau_bo_alloc(cli, &size, &align, domain, tile_mode,
335-
tile_flags);
378+
tile_flags, true);
336379
if (IS_ERR(nvbo))
337380
return PTR_ERR(nvbo);
338381

@@ -951,6 +994,7 @@ static void nouveau_bo_move_ntfy(struct ttm_buffer_object *bo,
951994
list_for_each_entry(vma, &nvbo->vma_list, head) {
952995
nouveau_vma_map(vma, mem);
953996
}
997+
nouveau_uvmm_bo_map_all(nvbo, mem);
954998
} else {
955999
list_for_each_entry(vma, &nvbo->vma_list, head) {
9561000
ret = dma_resv_wait_timeout(bo->base.resv,
@@ -959,6 +1003,7 @@ static void nouveau_bo_move_ntfy(struct ttm_buffer_object *bo,
9591003
WARN_ON(ret <= 0);
9601004
nouveau_vma_unmap(vma);
9611005
}
1006+
nouveau_uvmm_bo_unmap_all(nvbo);
9621007
}
9631008

9641009
if (new_reg)

drivers/gpu/drm/nouveau/nouveau_bo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ struct nouveau_bo {
2626
struct list_head entry;
2727
int pbbo_index;
2828
bool validate_mapped;
29+
bool no_share;
2930

3031
/* GPU address space is independent of CPU word size */
3132
uint64_t offset;
@@ -73,7 +74,7 @@ extern struct ttm_device_funcs nouveau_bo_driver;
7374

7475
void nouveau_bo_move_init(struct nouveau_drm *);
7576
struct nouveau_bo *nouveau_bo_alloc(struct nouveau_cli *, u64 *size, int *align,
76-
u32 domain, u32 tile_mode, u32 tile_flags);
77+
u32 domain, u32 tile_mode, u32 tile_flags, bool internal);
7778
int nouveau_bo_init(struct nouveau_bo *, u64 size, int align, u32 domain,
7879
struct sg_table *sg, struct dma_resv *robj);
7980
int nouveau_bo_new(struct nouveau_cli *, u64 size, int align, u32 domain,

0 commit comments

Comments
 (0)