Skip to content

Commit a4500b8

Browse files
Al Viroalexdeucher
authored andcommitted
drm/amdkfd: CRIU fixes
Instead of trying to use close_fd() on failure exits, just have criu_get_prime_handle() store the file reference without inserting it into descriptor table. Then, once the callers are past the last failure exit, they can go and either insert all those file references into the corresponding slots of descriptor table, or drop all those file references and free the unused descriptors. Reviewed-by: Felix Kuehling <[email protected]> Signed-off-by: Al Viro <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 6c6ca71 commit a4500b8

File tree

1 file changed

+46
-18
lines changed

1 file changed

+46
-18
lines changed

drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

Lines changed: 46 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
#include <linux/mman.h>
3737
#include <linux/ptrace.h>
3838
#include <linux/dma-buf.h>
39-
#include <linux/fdtable.h>
4039
#include <linux/processor.h>
4140
#include "kfd_priv.h"
4241
#include "kfd_device_queue_manager.h"
@@ -1835,7 +1834,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
18351834
}
18361835

18371836
static int criu_get_prime_handle(struct kgd_mem *mem,
1838-
int flags, u32 *shared_fd)
1837+
int flags, u32 *shared_fd,
1838+
struct file **file)
18391839
{
18401840
struct dma_buf *dmabuf;
18411841
int ret;
@@ -1846,20 +1846,40 @@ static int criu_get_prime_handle(struct kgd_mem *mem,
18461846
return ret;
18471847
}
18481848

1849-
ret = dma_buf_fd(dmabuf, flags);
1849+
ret = get_unused_fd_flags(flags);
18501850
if (ret < 0) {
18511851
pr_err("dmabuf create fd failed, ret:%d\n", ret);
18521852
goto out_free_dmabuf;
18531853
}
18541854

18551855
*shared_fd = ret;
1856+
*file = dmabuf->file;
18561857
return 0;
18571858

18581859
out_free_dmabuf:
18591860
dma_buf_put(dmabuf);
18601861
return ret;
18611862
}
18621863

1864+
static void commit_files(struct file **files,
1865+
struct kfd_criu_bo_bucket *bo_buckets,
1866+
unsigned int count,
1867+
int err)
1868+
{
1869+
while (count--) {
1870+
struct file *file = files[count];
1871+
1872+
if (!file)
1873+
continue;
1874+
if (err) {
1875+
fput(file);
1876+
put_unused_fd(bo_buckets[count].dmabuf_fd);
1877+
} else {
1878+
fd_install(bo_buckets[count].dmabuf_fd, file);
1879+
}
1880+
}
1881+
}
1882+
18631883
static int criu_checkpoint_bos(struct kfd_process *p,
18641884
uint32_t num_bos,
18651885
uint8_t __user *user_bos,
@@ -1868,6 +1888,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
18681888
{
18691889
struct kfd_criu_bo_bucket *bo_buckets;
18701890
struct kfd_criu_bo_priv_data *bo_privs;
1891+
struct file **files = NULL;
18711892
int ret = 0, pdd_index, bo_index = 0, id;
18721893
void *mem;
18731894

@@ -1881,6 +1902,12 @@ static int criu_checkpoint_bos(struct kfd_process *p,
18811902
goto exit;
18821903
}
18831904

1905+
files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL);
1906+
if (!files) {
1907+
ret = -ENOMEM;
1908+
goto exit;
1909+
}
1910+
18841911
for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
18851912
struct kfd_process_device *pdd = p->pdds[pdd_index];
18861913
struct amdgpu_bo *dumper_bo;
@@ -1923,7 +1950,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
19231950
ret = criu_get_prime_handle(kgd_mem,
19241951
bo_bucket->alloc_flags &
19251952
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
1926-
&bo_bucket->dmabuf_fd);
1953+
&bo_bucket->dmabuf_fd, &files[bo_index]);
19271954
if (ret)
19281955
goto exit;
19291956
} else {
@@ -1974,12 +2001,8 @@ static int criu_checkpoint_bos(struct kfd_process *p,
19742001
*priv_offset += num_bos * sizeof(*bo_privs);
19752002

19762003
exit:
1977-
while (ret && bo_index--) {
1978-
if (bo_buckets[bo_index].alloc_flags
1979-
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
1980-
close_fd(bo_buckets[bo_index].dmabuf_fd);
1981-
}
1982-
2004+
commit_files(files, bo_buckets, bo_index, ret);
2005+
kvfree(files);
19832006
kvfree(bo_buckets);
19842007
kvfree(bo_privs);
19852008
return ret;
@@ -2331,7 +2354,8 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
23312354

23322355
static int criu_restore_bo(struct kfd_process *p,
23332356
struct kfd_criu_bo_bucket *bo_bucket,
2334-
struct kfd_criu_bo_priv_data *bo_priv)
2357+
struct kfd_criu_bo_priv_data *bo_priv,
2358+
struct file **file)
23352359
{
23362360
struct kfd_process_device *pdd;
23372361
struct kgd_mem *kgd_mem;
@@ -2383,7 +2407,7 @@ static int criu_restore_bo(struct kfd_process *p,
23832407
if (bo_bucket->alloc_flags
23842408
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
23852409
ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
2386-
&bo_bucket->dmabuf_fd);
2410+
&bo_bucket->dmabuf_fd, file);
23872411
if (ret)
23882412
return ret;
23892413
} else {
@@ -2400,6 +2424,7 @@ static int criu_restore_bos(struct kfd_process *p,
24002424
{
24012425
struct kfd_criu_bo_bucket *bo_buckets = NULL;
24022426
struct kfd_criu_bo_priv_data *bo_privs = NULL;
2427+
struct file **files = NULL;
24032428
int ret = 0;
24042429
uint32_t i = 0;
24052430

@@ -2413,6 +2438,12 @@ static int criu_restore_bos(struct kfd_process *p,
24132438
if (!bo_buckets)
24142439
return -ENOMEM;
24152440

2441+
files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL);
2442+
if (!files) {
2443+
ret = -ENOMEM;
2444+
goto exit;
2445+
}
2446+
24162447
ret = copy_from_user(bo_buckets, (void __user *)args->bos,
24172448
args->num_bos * sizeof(*bo_buckets));
24182449
if (ret) {
@@ -2438,7 +2469,7 @@ static int criu_restore_bos(struct kfd_process *p,
24382469

24392470
/* Create and map new BOs */
24402471
for (; i < args->num_bos; i++) {
2441-
ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
2472+
ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]);
24422473
if (ret) {
24432474
pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
24442475
goto exit;
@@ -2453,11 +2484,8 @@ static int criu_restore_bos(struct kfd_process *p,
24532484
ret = -EFAULT;
24542485

24552486
exit:
2456-
while (ret && i--) {
2457-
if (bo_buckets[i].alloc_flags
2458-
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
2459-
close_fd(bo_buckets[i].dmabuf_fd);
2460-
}
2487+
commit_files(files, bo_buckets, i, ret);
2488+
kvfree(files);
24612489
kvfree(bo_buckets);
24622490
kvfree(bo_privs);
24632491
return ret;

0 commit comments

Comments
 (0)