Skip to content

Commit 80a0d64

Browse files
committed
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "Small collection of fixes that would be nice to have in -rc1. This contains: - NVMe pull request form Christoph, mostly with fixes for nvme-pci, host memory buffer in particular. - Error handling fixup for cgwb_create(), in case allocation of 'wb' fails. From Christophe Jaillet. - Ensure that trace_block_getrq() gets the 'dev' in an appropriate fashion, to avoid a potential NULL deref. From Greg Thelen. - Regression fix for dm-mq with blk-mq, fixing a problem with stacking IO schedulers. From me. - string.h fixup, fixing an issue with memcpy_and_pad(). This original change came in through an NVMe dependency, which is why I'm including it here. From Martin Wilck. - Fix potential int overflow in __blkdev_sectors_to_bio_pages(), from Mikulas. - MBR enable fix for sed-opal, from Scott" * 'for-linus' of git://git.kernel.dk/linux-block: block: directly insert blk-mq request from blk_insert_cloned_request() mm/backing-dev.c: fix an error handling path in 'cgwb_create()' string.h: un-fortify memcpy_and_pad nvme-pci: implement the HMB entry number and size limitations nvme-pci: propagate (some) errors from host memory buffer setup nvme-pci: use appropriate initial chunk size for HMB allocation nvme-pci: fix host memory buffer allocation fallback nvme: fix lightnvm check block: fix integer overflow in __blkdev_sectors_to_bio_pages() block: sed-opal: Set MBRDone on S3 resume path if TPER is MBREnabled block: tolerate tracing of NULL bio
2 parents 20e52ee + 157f377 commit 80a0d64

File tree

14 files changed

+134
-81
lines changed

14 files changed

+134
-81
lines changed

block/blk-core.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2342,7 +2342,12 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
23422342
if (q->mq_ops) {
23432343
if (blk_queue_io_stat(q))
23442344
blk_account_io_start(rq, true);
2345-
blk_mq_sched_insert_request(rq, false, true, false, false);
2345+
/*
2346+
* Since we have a scheduler attached on the top device,
2347+
* bypass a potential scheduler on the bottom device for
2348+
* insert.
2349+
*/
2350+
blk_mq_request_bypass_insert(rq);
23462351
return BLK_STS_OK;
23472352
}
23482353

block/blk-lib.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,9 +269,9 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
269269
*/
270270
static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
271271
{
272-
sector_t bytes = (nr_sects << 9) + PAGE_SIZE - 1;
272+
sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);
273273

274-
return min(bytes >> PAGE_SHIFT, (sector_t)BIO_MAX_PAGES);
274+
return min(pages, (sector_t)BIO_MAX_PAGES);
275275
}
276276

277277
/**

block/blk-mq.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,6 +1401,22 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
14011401
blk_mq_hctx_mark_pending(hctx, ctx);
14021402
}
14031403

1404+
/*
1405+
* Should only be used carefully, when the caller knows we want to
1406+
* bypass a potential IO scheduler on the target device.
1407+
*/
1408+
void blk_mq_request_bypass_insert(struct request *rq)
1409+
{
1410+
struct blk_mq_ctx *ctx = rq->mq_ctx;
1411+
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu);
1412+
1413+
spin_lock(&hctx->lock);
1414+
list_add_tail(&rq->queuelist, &hctx->dispatch);
1415+
spin_unlock(&hctx->lock);
1416+
1417+
blk_mq_run_hw_queue(hctx, false);
1418+
}
1419+
14041420
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
14051421
struct list_head *list)
14061422

block/blk-mq.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
5454
*/
5555
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
5656
bool at_head);
57+
void blk_mq_request_bypass_insert(struct request *rq);
5758
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
5859
struct list_head *list);
5960

block/opal_proto.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ enum opal_response_token {
4646
#define GENERIC_HOST_SESSION_NUM 0x41
4747

4848
#define TPER_SYNC_SUPPORTED 0x01
49+
#define MBR_ENABLED_MASK 0x10
4950

5051
#define TINY_ATOM_DATA_MASK 0x3F
5152
#define TINY_ATOM_SIGNED 0x40

block/sed-opal.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ struct parsed_resp {
8080

8181
struct opal_dev {
8282
bool supported;
83+
bool mbr_enabled;
8384

8485
void *data;
8586
sec_send_recv *send_recv;
@@ -283,6 +284,14 @@ static bool check_tper(const void *data)
283284
return true;
284285
}
285286

287+
static bool check_mbrenabled(const void *data)
288+
{
289+
const struct d0_locking_features *lfeat = data;
290+
u8 sup_feat = lfeat->supported_features;
291+
292+
return !!(sup_feat & MBR_ENABLED_MASK);
293+
}
294+
286295
static bool check_sum(const void *data)
287296
{
288297
const struct d0_single_user_mode *sum = data;
@@ -417,6 +426,7 @@ static int opal_discovery0_end(struct opal_dev *dev)
417426
u32 hlen = be32_to_cpu(hdr->length);
418427

419428
print_buffer(dev->resp, hlen);
429+
dev->mbr_enabled = false;
420430

421431
if (hlen > IO_BUFFER_LENGTH - sizeof(*hdr)) {
422432
pr_debug("Discovery length overflows buffer (%zu+%u)/%u\n",
@@ -442,6 +452,8 @@ static int opal_discovery0_end(struct opal_dev *dev)
442452
check_geometry(dev, body);
443453
break;
444454
case FC_LOCKING:
455+
dev->mbr_enabled = check_mbrenabled(body->features);
456+
break;
445457
case FC_ENTERPRISE:
446458
case FC_DATASTORE:
447459
/* some ignored properties */
@@ -2190,6 +2202,21 @@ static int __opal_lock_unlock(struct opal_dev *dev,
21902202
return next(dev);
21912203
}
21922204

2205+
static int __opal_set_mbr_done(struct opal_dev *dev, struct opal_key *key)
2206+
{
2207+
u8 mbr_done_tf = 1;
2208+
const struct opal_step mbrdone_step [] = {
2209+
{ opal_discovery0, },
2210+
{ start_admin1LSP_opal_session, key },
2211+
{ set_mbr_done, &mbr_done_tf },
2212+
{ end_opal_session, },
2213+
{ NULL, }
2214+
};
2215+
2216+
dev->steps = mbrdone_step;
2217+
return next(dev);
2218+
}
2219+
21932220
static int opal_lock_unlock(struct opal_dev *dev,
21942221
struct opal_lock_unlock *lk_unlk)
21952222
{
@@ -2345,6 +2372,11 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
23452372
suspend->unlk.session.sum);
23462373
was_failure = true;
23472374
}
2375+
if (dev->mbr_enabled) {
2376+
ret = __opal_set_mbr_done(dev, &suspend->unlk.session.opal_key);
2377+
if (ret)
2378+
pr_debug("Failed to set MBR Done in S3 resume\n");
2379+
}
23482380
}
23492381
mutex_unlock(&dev->dev_lock);
23502382
return was_failure;

drivers/nvme/host/core.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1897,6 +1897,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
18971897
ctrl->cntlid = le16_to_cpu(id->cntlid);
18981898
ctrl->hmpre = le32_to_cpu(id->hmpre);
18991899
ctrl->hmmin = le32_to_cpu(id->hmmin);
1900+
ctrl->hmminds = le32_to_cpu(id->hmminds);
1901+
ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
19001902
}
19011903

19021904
kfree(id);
@@ -2377,10 +2379,11 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
23772379

23782380
nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid);
23792381

2380-
if (nvme_nvm_ns_supported(ns, id) &&
2381-
nvme_nvm_register(ns, disk_name, node)) {
2382-
dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
2383-
goto out_free_id;
2382+
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
2383+
if (nvme_nvm_register(ns, disk_name, node)) {
2384+
dev_warn(ctrl->device, "LightNVM init failure\n");
2385+
goto out_free_id;
2386+
}
23842387
}
23852388

23862389
disk = alloc_disk_node(0, node);

drivers/nvme/host/lightnvm.c

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -955,29 +955,3 @@ void nvme_nvm_unregister_sysfs(struct nvme_ns *ns)
955955
sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
956956
&nvm_dev_attr_group);
957957
}
958-
959-
/* move to shared place when used in multiple places. */
960-
#define PCI_VENDOR_ID_CNEX 0x1d1d
961-
#define PCI_DEVICE_ID_CNEX_WL 0x2807
962-
#define PCI_DEVICE_ID_CNEX_QEMU 0x1f1f
963-
964-
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
965-
{
966-
struct nvme_ctrl *ctrl = ns->ctrl;
967-
/* XXX: this is poking into PCI structures from generic code! */
968-
struct pci_dev *pdev = to_pci_dev(ctrl->dev);
969-
970-
/* QEMU NVMe simulator - PCI ID + Vendor specific bit */
971-
if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
972-
pdev->device == PCI_DEVICE_ID_CNEX_QEMU &&
973-
id->vs[0] == 0x1)
974-
return 1;
975-
976-
/* CNEX Labs - PCI ID + Vendor specific bit */
977-
if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
978-
pdev->device == PCI_DEVICE_ID_CNEX_WL &&
979-
id->vs[0] == 0x1)
980-
return 1;
981-
982-
return 0;
983-
}

drivers/nvme/host/nvme.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ enum nvme_quirks {
7575
* The deepest sleep state should not be used.
7676
*/
7777
NVME_QUIRK_NO_DEEPEST_PS = (1 << 5),
78+
79+
/*
80+
* Supports the LighNVM command set if indicated in vs[1].
81+
*/
82+
NVME_QUIRK_LIGHTNVM = (1 << 6),
7883
};
7984

8085
/*
@@ -176,8 +181,11 @@ struct nvme_ctrl {
176181
u64 ps_max_latency_us;
177182
bool apst_enabled;
178183

184+
/* PCIe only: */
179185
u32 hmpre;
180186
u32 hmmin;
187+
u32 hmminds;
188+
u16 hmmaxd;
181189

182190
/* Fabrics only */
183191
u16 sqsize;
@@ -320,7 +328,6 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
320328
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
321329

322330
#ifdef CONFIG_NVM
323-
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
324331
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
325332
void nvme_nvm_unregister(struct nvme_ns *ns);
326333
int nvme_nvm_register_sysfs(struct nvme_ns *ns);
@@ -339,10 +346,6 @@ static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns)
339346
return 0;
340347
}
341348
static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {};
342-
static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
343-
{
344-
return 0;
345-
}
346349
static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
347350
unsigned long arg)
348351
{

drivers/nvme/host/pci.c

Lines changed: 50 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1612,21 +1612,23 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
16121612
dev->host_mem_descs = NULL;
16131613
}
16141614

1615-
static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
1615+
static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
1616+
u32 chunk_size)
16161617
{
16171618
struct nvme_host_mem_buf_desc *descs;
1618-
u32 chunk_size, max_entries, len;
1619+
u32 max_entries, len;
16191620
dma_addr_t descs_dma;
16201621
int i = 0;
16211622
void **bufs;
16221623
u64 size = 0, tmp;
16231624

1624-
/* start big and work our way down */
1625-
chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
1626-
retry:
16271625
tmp = (preferred + chunk_size - 1);
16281626
do_div(tmp, chunk_size);
16291627
max_entries = tmp;
1628+
1629+
if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
1630+
max_entries = dev->ctrl.hmmaxd;
1631+
16301632
descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs),
16311633
&descs_dma, GFP_KERNEL);
16321634
if (!descs)
@@ -1650,15 +1652,9 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
16501652
i++;
16511653
}
16521654

1653-
if (!size || (min && size < min)) {
1654-
dev_warn(dev->ctrl.device,
1655-
"failed to allocate host memory buffer.\n");
1655+
if (!size)
16561656
goto out_free_bufs;
1657-
}
16581657

1659-
dev_info(dev->ctrl.device,
1660-
"allocated %lld MiB host memory buffer.\n",
1661-
size >> ilog2(SZ_1M));
16621658
dev->nr_host_mem_descs = i;
16631659
dev->host_mem_size = size;
16641660
dev->host_mem_descs = descs;
@@ -1679,29 +1675,43 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
16791675
dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs,
16801676
descs_dma);
16811677
out:
1682-
/* try a smaller chunk size if we failed early */
1683-
if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
1684-
chunk_size /= 2;
1685-
goto retry;
1686-
}
16871678
dev->host_mem_descs = NULL;
16881679
return -ENOMEM;
16891680
}
16901681

1691-
static void nvme_setup_host_mem(struct nvme_dev *dev)
1682+
static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
1683+
{
1684+
u32 chunk_size;
1685+
1686+
/* start big and work our way down */
1687+
for (chunk_size = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES);
1688+
chunk_size >= max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
1689+
chunk_size /= 2) {
1690+
if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) {
1691+
if (!min || dev->host_mem_size >= min)
1692+
return 0;
1693+
nvme_free_host_mem(dev);
1694+
}
1695+
}
1696+
1697+
return -ENOMEM;
1698+
}
1699+
1700+
static int nvme_setup_host_mem(struct nvme_dev *dev)
16921701
{
16931702
u64 max = (u64)max_host_mem_size_mb * SZ_1M;
16941703
u64 preferred = (u64)dev->ctrl.hmpre * 4096;
16951704
u64 min = (u64)dev->ctrl.hmmin * 4096;
16961705
u32 enable_bits = NVME_HOST_MEM_ENABLE;
1706+
int ret = 0;
16971707

16981708
preferred = min(preferred, max);
16991709
if (min > max) {
17001710
dev_warn(dev->ctrl.device,
17011711
"min host memory (%lld MiB) above limit (%d MiB).\n",
17021712
min >> ilog2(SZ_1M), max_host_mem_size_mb);
17031713
nvme_free_host_mem(dev);
1704-
return;
1714+
return 0;
17051715
}
17061716

17071717
/*
@@ -1715,12 +1725,21 @@ static void nvme_setup_host_mem(struct nvme_dev *dev)
17151725
}
17161726

17171727
if (!dev->host_mem_descs) {
1718-
if (nvme_alloc_host_mem(dev, min, preferred))
1719-
return;
1728+
if (nvme_alloc_host_mem(dev, min, preferred)) {
1729+
dev_warn(dev->ctrl.device,
1730+
"failed to allocate host memory buffer.\n");
1731+
return 0; /* controller must work without HMB */
1732+
}
1733+
1734+
dev_info(dev->ctrl.device,
1735+
"allocated %lld MiB host memory buffer.\n",
1736+
dev->host_mem_size >> ilog2(SZ_1M));
17201737
}
17211738

1722-
if (nvme_set_host_mem(dev, enable_bits))
1739+
ret = nvme_set_host_mem(dev, enable_bits);
1740+
if (ret)
17231741
nvme_free_host_mem(dev);
1742+
return ret;
17241743
}
17251744

17261745
static int nvme_setup_io_queues(struct nvme_dev *dev)
@@ -2164,8 +2183,11 @@ static void nvme_reset_work(struct work_struct *work)
21642183
"unable to allocate dma for dbbuf\n");
21652184
}
21662185

2167-
if (dev->ctrl.hmpre)
2168-
nvme_setup_host_mem(dev);
2186+
if (dev->ctrl.hmpre) {
2187+
result = nvme_setup_host_mem(dev);
2188+
if (result < 0)
2189+
goto out;
2190+
}
21692191

21702192
result = nvme_setup_io_queues(dev);
21712193
if (result)
@@ -2497,6 +2519,10 @@ static const struct pci_device_id nvme_id_table[] = {
24972519
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
24982520
{ PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */
24992521
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
2522+
{ PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */
2523+
.driver_data = NVME_QUIRK_LIGHTNVM, },
2524+
{ PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */
2525+
.driver_data = NVME_QUIRK_LIGHTNVM, },
25002526
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
25012527
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
25022528
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },

0 commit comments

Comments
 (0)