Skip to content

Commit 15da3dd

Browse files
committed
Merge tag 'nvme-6.13-2024-11-13' of git://git.infradead.org/nvme into for-6.13/block
Pull NVMe updates from Keith: "nvme updates for Linux 6.13 - Use uring_cmd helper (Pavel) - Host Memory Buffer allocation enhancements (Christoph) - Target persistent reservation support (Guixin) - Persistent reservation tracing (Guixen) - NVMe 2.1 specification support (Keith) - Rotational Meta Support (Matias, Wang, Keith) - Volatile cache detection enhancment (Guixen)" * tag 'nvme-6.13-2024-11-13' of git://git.infradead.org/nvme: (22 commits) nvmet: add tracing of reservation commands nvme: parse reservation commands's action and rtype to string nvmet: report ns's vwc not present nvme: check ns's volatile write cache not present nvme: add rotational support nvme: use command set independent id ns if available nvmet: support for csi identify ns nvmet: implement rotational media information log nvmet: implement endurance groups nvmet: declare 2.1 version compliance nvmet: implement crto property nvmet: implement supported features log nvmet: implement supported log pages nvmet: implement active command set ns list nvmet: implement id ns for nvm command set nvmet: support reservation feature nvme: add reservation command's defines nvme-core: remove repeated wq flags nvmet: make nvmet_wq visible in sysfs nvme-pci: use dma_alloc_noncontigous if possible ...
2 parents 6975c1a + 50bee38 commit 15da3dd

File tree

14 files changed

+1968
-52
lines changed

14 files changed

+1968
-52
lines changed

drivers/nvme/host/core.c

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ struct nvme_ns_info {
4242
bool is_readonly;
4343
bool is_ready;
4444
bool is_removed;
45+
bool is_rotational;
46+
bool no_vwc;
4547
};
4648

4749
unsigned int admin_timeout = 60;
@@ -1615,6 +1617,8 @@ static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
16151617
info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
16161618
info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
16171619
info->is_ready = id->nstat & NVME_NSTAT_NRDY;
1620+
info->is_rotational = id->nsfeat & NVME_NS_ROTATIONAL;
1621+
info->no_vwc = id->nsfeat & NVME_NS_VWC_NOT_PRESENT;
16181622
}
16191623
kfree(id);
16201624
return ret;
@@ -2157,11 +2161,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
21572161
ns->head->ids.csi == NVME_CSI_ZNS)
21582162
nvme_update_zone_info(ns, &lim, &zi);
21592163

2160-
if (ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT)
2164+
if ((ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT) && !info->no_vwc)
21612165
lim.features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
21622166
else
21632167
lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);
21642168

2169+
if (info->is_rotational)
2170+
lim.features |= BLK_FEAT_ROTATIONAL;
2171+
21652172
/*
21662173
* Register a metadata profile for PI, or the plain non-integrity NVMe
21672174
* metadata masquerading as Type 0 if supported, otherwise reject block
@@ -3608,6 +3615,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
36083615
head->ns_id = info->nsid;
36093616
head->ids = info->ids;
36103617
head->shared = info->is_shared;
3618+
head->rotational = info->is_rotational;
36113619
ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
36123620
ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE);
36133621
kref_init(&head->ref);
@@ -3988,7 +3996,7 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
39883996
{
39893997
struct nvme_ns_info info = { .nsid = nsid };
39903998
struct nvme_ns *ns;
3991-
int ret;
3999+
int ret = 1;
39924000

39934001
if (nvme_identify_ns_descs(ctrl, &info))
39944002
return;
@@ -4005,9 +4013,10 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
40054013
* set up a namespace. If not fall back to the legacy version.
40064014
*/
40074015
if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) ||
4008-
(info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS))
4016+
(info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS) ||
4017+
ctrl->vs >= NVME_VS(2, 0, 0))
40094018
ret = nvme_ns_info_from_id_cs_indep(ctrl, &info);
4010-
else
4019+
if (ret > 0)
40114020
ret = nvme_ns_info_from_identify(ctrl, &info);
40124021

40134022
if (info.is_removed)
@@ -5006,6 +5015,8 @@ static inline void _nvme_check_size(void)
50065015
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_nvm) != NVME_IDENTIFY_DATA_SIZE);
50075016
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
50085017
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
5018+
BUILD_BUG_ON(sizeof(struct nvme_endurance_group_log) != 512);
5019+
BUILD_BUG_ON(sizeof(struct nvme_rotational_media_log) != 512);
50095020
BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
50105021
BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64);
50115022
BUILD_BUG_ON(sizeof(struct nvme_feat_host_behavior) != 512);
@@ -5014,22 +5025,20 @@ static inline void _nvme_check_size(void)
50145025

50155026
static int __init nvme_core_init(void)
50165027
{
5028+
unsigned int wq_flags = WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS;
50175029
int result = -ENOMEM;
50185030

50195031
_nvme_check_size();
50205032

5021-
nvme_wq = alloc_workqueue("nvme-wq",
5022-
WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
5033+
nvme_wq = alloc_workqueue("nvme-wq", wq_flags, 0);
50235034
if (!nvme_wq)
50245035
goto out;
50255036

5026-
nvme_reset_wq = alloc_workqueue("nvme-reset-wq",
5027-
WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
5037+
nvme_reset_wq = alloc_workqueue("nvme-reset-wq", wq_flags, 0);
50285038
if (!nvme_reset_wq)
50295039
goto destroy_wq;
50305040

5031-
nvme_delete_wq = alloc_workqueue("nvme-delete-wq",
5032-
WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
5041+
nvme_delete_wq = alloc_workqueue("nvme-delete-wq", wq_flags, 0);
50335042
if (!nvme_delete_wq)
50345043
goto destroy_reset_wq;
50355044

drivers/nvme/host/ioctl.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ struct nvme_uring_cmd_pdu {
401401
static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
402402
struct io_uring_cmd *ioucmd)
403403
{
404-
return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
404+
return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu);
405405
}
406406

407407
static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd,
@@ -631,8 +631,6 @@ static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd,
631631
struct nvme_ctrl *ctrl = ns->ctrl;
632632
int ret;
633633

634-
BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu));
635-
636634
ret = nvme_uring_cmd_checks(issue_flags);
637635
if (ret)
638636
return ret;

drivers/nvme/host/nvme.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ struct nvme_ns_head {
474474
struct list_head entry;
475475
struct kref ref;
476476
bool shared;
477+
bool rotational;
477478
bool passthru_err_log_enabled;
478479
struct nvme_effects_log *effects;
479480
u64 nuse;

drivers/nvme/host/pci.c

Lines changed: 62 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ struct nvme_dev {
141141
struct nvme_ctrl ctrl;
142142
u32 last_ps;
143143
bool hmb;
144+
struct sg_table *hmb_sgt;
144145

145146
mempool_t *iod_mempool;
146147

@@ -153,6 +154,7 @@ struct nvme_dev {
153154
/* host memory buffer support: */
154155
u64 host_mem_size;
155156
u32 nr_host_mem_descs;
157+
u32 host_mem_descs_size;
156158
dma_addr_t host_mem_descs_dma;
157159
struct nvme_host_mem_buf_desc *host_mem_descs;
158160
void **host_mem_desc_bufs;
@@ -1951,7 +1953,7 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
19511953
return ret;
19521954
}
19531955

1954-
static void nvme_free_host_mem(struct nvme_dev *dev)
1956+
static void nvme_free_host_mem_multi(struct nvme_dev *dev)
19551957
{
19561958
int i;
19571959

@@ -1966,18 +1968,54 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
19661968

19671969
kfree(dev->host_mem_desc_bufs);
19681970
dev->host_mem_desc_bufs = NULL;
1969-
dma_free_coherent(dev->dev,
1970-
dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs),
1971+
}
1972+
1973+
static void nvme_free_host_mem(struct nvme_dev *dev)
1974+
{
1975+
if (dev->hmb_sgt)
1976+
dma_free_noncontiguous(dev->dev, dev->host_mem_size,
1977+
dev->hmb_sgt, DMA_BIDIRECTIONAL);
1978+
else
1979+
nvme_free_host_mem_multi(dev);
1980+
1981+
dma_free_coherent(dev->dev, dev->host_mem_descs_size,
19711982
dev->host_mem_descs, dev->host_mem_descs_dma);
19721983
dev->host_mem_descs = NULL;
1984+
dev->host_mem_descs_size = 0;
19731985
dev->nr_host_mem_descs = 0;
19741986
}
19751987

1976-
static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
1988+
static int nvme_alloc_host_mem_single(struct nvme_dev *dev, u64 size)
1989+
{
1990+
dev->hmb_sgt = dma_alloc_noncontiguous(dev->dev, size,
1991+
DMA_BIDIRECTIONAL, GFP_KERNEL, 0);
1992+
if (!dev->hmb_sgt)
1993+
return -ENOMEM;
1994+
1995+
dev->host_mem_descs = dma_alloc_coherent(dev->dev,
1996+
sizeof(*dev->host_mem_descs), &dev->host_mem_descs_dma,
1997+
GFP_KERNEL);
1998+
if (!dev->host_mem_descs) {
1999+
dma_free_noncontiguous(dev->dev, dev->host_mem_size,
2000+
dev->hmb_sgt, DMA_BIDIRECTIONAL);
2001+
dev->hmb_sgt = NULL;
2002+
return -ENOMEM;
2003+
}
2004+
dev->host_mem_size = size;
2005+
dev->host_mem_descs_size = sizeof(*dev->host_mem_descs);
2006+
dev->nr_host_mem_descs = 1;
2007+
2008+
dev->host_mem_descs[0].addr =
2009+
cpu_to_le64(dev->hmb_sgt->sgl->dma_address);
2010+
dev->host_mem_descs[0].size = cpu_to_le32(size / NVME_CTRL_PAGE_SIZE);
2011+
return 0;
2012+
}
2013+
2014+
static int nvme_alloc_host_mem_multi(struct nvme_dev *dev, u64 preferred,
19772015
u32 chunk_size)
19782016
{
19792017
struct nvme_host_mem_buf_desc *descs;
1980-
u32 max_entries, len;
2018+
u32 max_entries, len, descs_size;
19812019
dma_addr_t descs_dma;
19822020
int i = 0;
19832021
void **bufs;
@@ -1990,8 +2028,9 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
19902028
if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
19912029
max_entries = dev->ctrl.hmmaxd;
19922030

1993-
descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs),
1994-
&descs_dma, GFP_KERNEL);
2031+
descs_size = max_entries * sizeof(*descs);
2032+
descs = dma_alloc_coherent(dev->dev, descs_size, &descs_dma,
2033+
GFP_KERNEL);
19952034
if (!descs)
19962035
goto out;
19972036

@@ -2020,6 +2059,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
20202059
dev->host_mem_size = size;
20212060
dev->host_mem_descs = descs;
20222061
dev->host_mem_descs_dma = descs_dma;
2062+
dev->host_mem_descs_size = descs_size;
20232063
dev->host_mem_desc_bufs = bufs;
20242064
return 0;
20252065

@@ -2034,8 +2074,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
20342074

20352075
kfree(bufs);
20362076
out_free_descs:
2037-
dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs,
2038-
descs_dma);
2077+
dma_free_coherent(dev->dev, descs_size, descs, descs_dma);
20392078
out:
20402079
dev->host_mem_descs = NULL;
20412080
return -ENOMEM;
@@ -2047,9 +2086,18 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
20472086
u64 hmminds = max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
20482087
u64 chunk_size;
20492088

2089+
/*
2090+
* If there is an IOMMU that can merge pages, try a virtually
2091+
* non-contiguous allocation for a single segment first.
2092+
*/
2093+
if (!(PAGE_SIZE & dma_get_merge_boundary(dev->dev))) {
2094+
if (!nvme_alloc_host_mem_single(dev, preferred))
2095+
return 0;
2096+
}
2097+
20502098
/* start big and work our way down */
20512099
for (chunk_size = min_chunk; chunk_size >= hmminds; chunk_size /= 2) {
2052-
if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) {
2100+
if (!nvme_alloc_host_mem_multi(dev, preferred, chunk_size)) {
20532101
if (!min || dev->host_mem_size >= min)
20542102
return 0;
20552103
nvme_free_host_mem(dev);
@@ -2097,8 +2145,10 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
20972145
}
20982146

20992147
dev_info(dev->ctrl.device,
2100-
"allocated %lld MiB host memory buffer.\n",
2101-
dev->host_mem_size >> ilog2(SZ_1M));
2148+
"allocated %lld MiB host memory buffer (%u segment%s).\n",
2149+
dev->host_mem_size >> ilog2(SZ_1M),
2150+
dev->nr_host_mem_descs,
2151+
str_plural(dev->nr_host_mem_descs));
21022152
}
21032153

21042154
ret = nvme_set_host_mem(dev, enable_bits);

drivers/nvme/host/trace.c

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -228,41 +228,87 @@ static const char *nvme_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10)
228228

229229
static const char *nvme_trace_resv_reg(struct trace_seq *p, u8 *cdw10)
230230
{
231+
static const char * const rrega_strs[] = {
232+
[0x00] = "register",
233+
[0x01] = "unregister",
234+
[0x02] = "replace",
235+
};
231236
const char *ret = trace_seq_buffer_ptr(p);
232237
u8 rrega = cdw10[0] & 0x7;
233238
u8 iekey = (cdw10[0] >> 3) & 0x1;
234239
u8 ptpl = (cdw10[3] >> 6) & 0x3;
240+
const char *rrega_str;
241+
242+
if (rrega < ARRAY_SIZE(rrega_strs) && rrega_strs[rrega])
243+
rrega_str = rrega_strs[rrega];
244+
else
245+
rrega_str = "reserved";
235246

236-
trace_seq_printf(p, "rrega=%u, iekey=%u, ptpl=%u",
237-
rrega, iekey, ptpl);
247+
trace_seq_printf(p, "rrega=%u:%s, iekey=%u, ptpl=%u",
248+
rrega, rrega_str, iekey, ptpl);
238249
trace_seq_putc(p, 0);
239250

240251
return ret;
241252
}
242253

254+
static const char * const rtype_strs[] = {
255+
[0x00] = "reserved",
256+
[0x01] = "write exclusive",
257+
[0x02] = "exclusive access",
258+
[0x03] = "write exclusive registrants only",
259+
[0x04] = "exclusive access registrants only",
260+
[0x05] = "write exclusive all registrants",
261+
[0x06] = "exclusive access all registrants",
262+
};
263+
243264
static const char *nvme_trace_resv_acq(struct trace_seq *p, u8 *cdw10)
244265
{
266+
static const char * const racqa_strs[] = {
267+
[0x00] = "acquire",
268+
[0x01] = "preempt",
269+
[0x02] = "preempt and abort",
270+
};
245271
const char *ret = trace_seq_buffer_ptr(p);
246272
u8 racqa = cdw10[0] & 0x7;
247273
u8 iekey = (cdw10[0] >> 3) & 0x1;
248274
u8 rtype = cdw10[1];
275+
const char *racqa_str = "reserved";
276+
const char *rtype_str = "reserved";
249277

250-
trace_seq_printf(p, "racqa=%u, iekey=%u, rtype=%u",
251-
racqa, iekey, rtype);
278+
if (racqa < ARRAY_SIZE(racqa_strs) && racqa_strs[racqa])
279+
racqa_str = racqa_strs[racqa];
280+
281+
if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
282+
rtype_str = rtype_strs[rtype];
283+
284+
trace_seq_printf(p, "racqa=%u:%s, iekey=%u, rtype=%u:%s",
285+
racqa, racqa_str, iekey, rtype, rtype_str);
252286
trace_seq_putc(p, 0);
253287

254288
return ret;
255289
}
256290

257291
static const char *nvme_trace_resv_rel(struct trace_seq *p, u8 *cdw10)
258292
{
293+
static const char * const rrela_strs[] = {
294+
[0x00] = "release",
295+
[0x01] = "clear",
296+
};
259297
const char *ret = trace_seq_buffer_ptr(p);
260298
u8 rrela = cdw10[0] & 0x7;
261299
u8 iekey = (cdw10[0] >> 3) & 0x1;
262300
u8 rtype = cdw10[1];
301+
const char *rrela_str = "reserved";
302+
const char *rtype_str = "reserved";
303+
304+
if (rrela < ARRAY_SIZE(rrela_strs) && rrela_strs[rrela])
305+
rrela_str = rrela_strs[rrela];
306+
307+
if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
308+
rtype_str = rtype_strs[rtype];
263309

264-
trace_seq_printf(p, "rrela=%u, iekey=%u, rtype=%u",
265-
rrela, iekey, rtype);
310+
trace_seq_printf(p, "rrela=%u:%s, iekey=%u, rtype=%u:%s",
311+
rrela, rrela_str, iekey, rtype, rtype_str);
266312
trace_seq_putc(p, 0);
267313

268314
return ret;

drivers/nvme/target/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o
1010
obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o
1111

1212
nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
13-
discovery.o io-cmd-file.o io-cmd-bdev.o
13+
discovery.o io-cmd-file.o io-cmd-bdev.o pr.o
1414
nvmet-$(CONFIG_NVME_TARGET_DEBUGFS) += debugfs.o
1515
nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o
1616
nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o

0 commit comments

Comments
 (0)