Skip to content

Commit 17ff53b

Browse files
sourabgupta3nvidia-bfigg
authored andcommitted
NVIDIA: SAUCE: Add NVMe Patches to enable GDS
BugLink: https://bugs.launchpad.net/bugs/1982519 With this change, the NVMe driver would be enabled to support GPUDirectStorage(GDS). The change is around nvme/nvme rdma map_data() and unmap_data(), where the IO request is first intercepted to check for GDS pages and if it is a GDS page then the request is served by GDS driver component called nvidia-fs, else the request would be served by the standard NVMe driver code. Acked-by: Rebanta Mitra <[email protected]> Acked-by: Prashant Prabhu <[email protected]> Signed-off-by: Sourab Gupta <[email protected]> Acked-by: Brad Figg <[email protected]> Acked-by: Ian May <[email protected]> Acked-by: Jacob Martin <[email protected]> Signed-off-by: Brad Figg <[email protected]>
1 parent 3a14fb2 commit 17ff53b

File tree

5 files changed

+308
-1
lines changed

5 files changed

+308
-1
lines changed

drivers/nvme/host/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-License-Identifier: GPL-2.0
22

33
ccflags-y += -I$(src)
4-
4+
ccflags-y += -DCONFIG_NVFS
55
obj-$(CONFIG_NVME_CORE) += nvme-core.o
66
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
77
obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o
@@ -20,6 +20,7 @@ nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
2020
nvme-core-$(CONFIG_NVME_AUTH) += auth.o
2121

2222
nvme-y += pci.o
23+
nvme-y += nvfs-dma.o
2324

2425
nvme-fabrics-y += fabrics.o
2526

drivers/nvme/host/nvfs-dma.c

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifdef CONFIG_NVFS
16+
#define MODULE_PREFIX nvme_v1
17+
#include "nvfs.h"
18+
19+
struct nvfs_dma_rw_ops *nvfs_ops;
20+
21+
atomic_t nvfs_shutdown = ATOMIC_INIT(1);
22+
23+
DEFINE_PER_CPU(long, nvfs_n_ops);
24+
25+
// must have for compatability
26+
#define NVIDIA_FS_COMPAT_FT(ops) \
27+
(NVIDIA_FS_CHECK_FT_SGLIST_PREP(ops) && NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops))
28+
29+
// protected via nvfs_module_mutex
30+
int REGISTER_FUNC(struct nvfs_dma_rw_ops *ops)
31+
{
32+
if (NVIDIA_FS_COMPAT_FT(ops)) {
33+
nvfs_ops = ops;
34+
atomic_set(&nvfs_shutdown, 0);
35+
return 0;
36+
} else
37+
return -EOPNOTSUPP;
38+
39+
40+
}
41+
EXPORT_SYMBOL(REGISTER_FUNC);
42+
43+
// protected via nvfs_module_mutex
44+
void UNREGISTER_FUNC(void)
45+
{
46+
(void) atomic_cmpxchg(&nvfs_shutdown, 0, 1);
47+
do{
48+
msleep(NVFS_HOLD_TIME_MS);
49+
} while (nvfs_count_ops());
50+
nvfs_ops = NULL;
51+
}
52+
EXPORT_SYMBOL(UNREGISTER_FUNC);
53+
#endif

drivers/nvme/host/nvfs-dma.h

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifndef NVFS_DMA_H
16+
#define NVFS_DMA_H
17+
18+
static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
19+
struct request *req, struct nvme_rw_command *cmnd);
20+
21+
static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
22+
struct request *req, struct nvme_rw_command *cmnd);
23+
24+
static bool nvme_nvfs_unmap_data(struct nvme_dev *dev, struct request *req)
25+
{
26+
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
27+
enum dma_data_direction dma_dir = rq_dma_dir(req);
28+
29+
if (!iod || !iod->sgt.nents)
30+
return false;
31+
32+
if (iod->sgt.sgl && !is_pci_p2pdma_page(sg_page(iod->sgt.sgl)) &&
33+
!blk_integrity_rq(req) &&
34+
!iod->dma_len &&
35+
nvfs_ops != NULL) {
36+
int count;
37+
count = nvfs_ops->nvfs_dma_unmap_sg(dev->dev, iod->sgt.sgl, iod->sgt.nents, dma_dir);
38+
if (!count)
39+
return false;
40+
41+
nvfs_put_ops();
42+
return true;
43+
}
44+
45+
return false;
46+
}
47+
48+
static blk_status_t nvme_nvfs_map_data(struct nvme_dev *dev, struct request *req,
49+
struct nvme_command *cmnd, bool *is_nvfs_io)
50+
{
51+
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
52+
struct request_queue *q = req->q;
53+
enum dma_data_direction dma_dir = rq_dma_dir(req);
54+
blk_status_t ret = BLK_STS_RESOURCE;
55+
int nr_mapped;
56+
57+
nr_mapped = 0;
58+
*is_nvfs_io = false;
59+
60+
if (!blk_integrity_rq(req) && nvfs_get_ops()) {
61+
iod->dma_len = 0;
62+
iod->sgt.sgl = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
63+
if (!iod->sgt.sgl) {
64+
nvfs_put_ops();
65+
return BLK_STS_RESOURCE;
66+
}
67+
68+
sg_init_table(iod->sgt.sgl, blk_rq_nr_phys_segments(req));
69+
// associates bio pages to scatterlist
70+
iod->sgt.orig_nents = nvfs_ops->nvfs_blk_rq_map_sg(q, req, iod->sgt.sgl);
71+
if (!iod->sgt.orig_nents) {
72+
mempool_free(iod->sgt.sgl, dev->iod_mempool);
73+
nvfs_put_ops();
74+
return BLK_STS_IOERR; // reset to original ret
75+
}
76+
*is_nvfs_io = true;
77+
78+
if (unlikely((iod->sgt.orig_nents == NVFS_IO_ERR))) {
79+
pr_err("%s: failed to map sg_nents=:%d\n", __func__, iod->sgt.nents);
80+
mempool_free(iod->sgt.sgl, dev->iod_mempool);
81+
nvfs_put_ops();
82+
return BLK_STS_IOERR;
83+
}
84+
85+
nr_mapped = nvfs_ops->nvfs_dma_map_sg_attrs(dev->dev,
86+
iod->sgt.sgl,
87+
iod->sgt.orig_nents,
88+
dma_dir,
89+
DMA_ATTR_NO_WARN);
90+
91+
92+
if (unlikely((nr_mapped == NVFS_IO_ERR))) {
93+
mempool_free(iod->sgt.sgl, dev->iod_mempool);
94+
nvfs_put_ops();
95+
pr_err("%s: failed to dma map sglist=:%d\n", __func__, iod->sgt.nents);
96+
return BLK_STS_IOERR;
97+
}
98+
99+
if (unlikely(nr_mapped == NVFS_CPU_REQ)) {
100+
mempool_free(iod->sgt.sgl, dev->iod_mempool);
101+
nvfs_put_ops();
102+
BUG();
103+
}
104+
105+
iod->sgt.nents = nr_mapped;
106+
107+
if (nvme_pci_use_sgls(dev, req)) { // TBD: not tested on SGL mode supporting drive
108+
ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
109+
} else {
110+
// push dma address to hw registers
111+
ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
112+
}
113+
114+
if (ret != BLK_STS_OK) {
115+
nvme_nvfs_unmap_data(dev, req);
116+
mempool_free(iod->sgt.sgl, dev->iod_mempool);
117+
}
118+
return ret;
119+
}
120+
return ret;
121+
}
122+
123+
#endif /* NVFS_DMA_H */

drivers/nvme/host/nvfs.h

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifndef NVFS_H
16+
#define NVFS_H
17+
18+
#include <linux/types.h>
19+
#include <linux/delay.h>
20+
#include <linux/blkdev.h>
21+
#include <linux/cpumask.h>
22+
#include <linux/scatterlist.h>
23+
#include <linux/percpu-defs.h>
24+
#include <linux/dma-direction.h>
25+
26+
#define REGSTR2(x) x##_register_nvfs_dma_ops
27+
#define REGSTR(x) REGSTR2(x)
28+
29+
#define UNREGSTR2(x) x##_unregister_nvfs_dma_ops
30+
#define UNREGSTR(x) UNREGSTR2(x)
31+
32+
#define REGISTER_FUNC REGSTR(MODULE_PREFIX)
33+
#define UNREGISTER_FUNC UNREGSTR(MODULE_PREFIX)
34+
35+
#define NVFS_IO_ERR -1
36+
#define NVFS_CPU_REQ -2
37+
38+
#define NVFS_HOLD_TIME_MS 1000
39+
40+
extern struct nvfs_dma_rw_ops *nvfs_ops;
41+
42+
extern atomic_t nvfs_shutdown;
43+
44+
DECLARE_PER_CPU(long, nvfs_n_ops);
45+
46+
static inline long nvfs_count_ops(void)
47+
{
48+
int i;
49+
long sum = 0;
50+
51+
for_each_possible_cpu(i)
52+
sum += per_cpu(nvfs_n_ops, i);
53+
return sum;
54+
}
55+
56+
static inline bool nvfs_get_ops(void)
57+
{
58+
if (nvfs_ops && !atomic_read(&nvfs_shutdown)) {
59+
this_cpu_inc(nvfs_n_ops);
60+
return true;
61+
}
62+
return false;
63+
}
64+
65+
static inline void nvfs_put_ops(void)
66+
{
67+
this_cpu_dec(nvfs_n_ops);
68+
}
69+
70+
struct nvfs_dma_rw_ops {
71+
unsigned long long ft_bmap; // feature bitmap
72+
73+
int (*nvfs_blk_rq_map_sg) (struct request_queue *q,
74+
struct request *req,
75+
struct scatterlist *sglist);
76+
77+
int (*nvfs_dma_map_sg_attrs) (struct device *device,
78+
struct scatterlist *sglist,
79+
int nents,
80+
enum dma_data_direction dma_dir,
81+
unsigned long attrs);
82+
83+
int (*nvfs_dma_unmap_sg) (struct device *device,
84+
struct scatterlist *sglist,
85+
int nents,
86+
enum dma_data_direction dma_dir);
87+
88+
bool (*nvfs_is_gpu_page) (struct page *page);
89+
90+
unsigned int (*nvfs_gpu_index) (struct page *page);
91+
92+
unsigned int (*nvfs_device_priority) (struct device *dev, unsigned int gpu_index);
93+
};
94+
95+
// feature list for dma_ops, values indicate bit pos
96+
enum ft_bits {
97+
nvfs_ft_prep_sglist = 1ULL << 0,
98+
nvfs_ft_map_sglist = 1ULL << 1,
99+
nvfs_ft_is_gpu_page = 1ULL << 2,
100+
nvfs_ft_device_priority = 1ULL << 3,
101+
};
102+
103+
// check features for use in registration with vendor drivers
104+
#define NVIDIA_FS_CHECK_FT_SGLIST_PREP(ops) ((ops)->ft_bmap & nvfs_ft_prep_sglist)
105+
#define NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops) ((ops)->ft_bmap & nvfs_ft_map_sglist)
106+
#define NVIDIA_FS_CHECK_FT_GPU_PAGE(ops) ((ops)->ft_bmap & nvfs_ft_is_gpu_page)
107+
#define NVIDIA_FS_CHECK_FT_DEVICE_PRIORITY(ops) ((ops)->ft_bmap & nvfs_ft_device_priority)
108+
109+
int REGISTER_FUNC (struct nvfs_dma_rw_ops *ops);
110+
111+
void UNREGISTER_FUNC (void);
112+
113+
#endif /* NVFS_H */

drivers/nvme/host/pci.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232

3333
#include "trace.h"
3434
#include "nvme.h"
35+
#ifdef CONFIG_NVFS
36+
#include "nvfs.h"
37+
#endif
3538

3639
#define SQ_SIZE(q) ((q)->q_depth << (q)->sqes)
3740
#define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion))
@@ -565,6 +568,9 @@ static void nvme_free_sgls(struct nvme_dev *dev, struct request *req)
565568
}
566569
}
567570

571+
#ifdef CONFIG_NVFS
572+
#include "nvfs-dma.h"
573+
#endif
568574
static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
569575
{
570576
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -577,7 +583,12 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
577583

578584
WARN_ON_ONCE(!iod->sgt.nents);
579585

586+
#ifdef CONFIG_NVFS
587+
if (!nvme_nvfs_unmap_data(dev, req))
588+
dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0);
589+
#else
580590
dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0);
591+
#endif
581592

582593
if (iod->nr_allocations == 0)
583594
dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0],
@@ -824,6 +835,12 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
824835
blk_status_t ret = BLK_STS_RESOURCE;
825836
int rc;
826837

838+
#ifdef CONFIG_NVFS
839+
bool is_nvfs_io = false;
840+
ret = nvme_nvfs_map_data(dev, req, cmnd, &is_nvfs_io);
841+
if (is_nvfs_io)
842+
return ret;
843+
#endif
827844
if (blk_rq_nr_phys_segments(req) == 1) {
828845
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
829846
struct bio_vec bv = req_bvec(req);

0 commit comments

Comments
 (0)