Skip to content

Commit c4bd70e

Browse files
committed
Merge tag 'for-linus-2019-10-03' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: - Mandate timespec64 for the io_uring timeout ABI (Arnd) - Set of NVMe changes via Sagi: - controller removal race fix from Balbir - quirk additions from Gabriel and Jian-Hong - nvme-pci power state save fix from Mario - Add 64bit user commands (for 64bit registers) from Marta - nvme-rdma/nvme-tcp fixes from Max, Mark and Me - Minor cleanups and nits from James, Dan and John - Two s390 dasd fixes (Jan, Stefan) - Have loop change block size in DIO mode (Martijn) - paride pg header ifdef guard (Masahiro) - Two blk-mq queue scheduler tweaks, fixing an ordering issue on zoned devices and suboptimal performance on others (Ming) * tag 'for-linus-2019-10-03' of git://git.kernel.dk/linux-block: (22 commits) block: sed-opal: fix sparse warning: convert __be64 data block: sed-opal: fix sparse warning: obsolete array init. block: pg: add header include guard Revert "s390/dasd: Add discard support for ESE volumes" s390/dasd: Fix error handling during online processing io_uring: use __kernel_timespec in timeout ABI loop: change queue block size to match when using DIO blk-mq: apply normal plugging for HDD blk-mq: honor IO scheduler for multiqueue devices nvme-rdma: fix possible use-after-free in connect timeout nvme: Move ctrl sqsize to generic space nvme: Add ctrl attributes for queue_count and sqsize nvme: allow 64-bit results in passthru commands nvme: Add quirk for Kingston NVME SSD running FW E8FK11.T nvmet-tcp: remove superflous check on request sgl Added QUIRKs for ADATA XPG SX8200 Pro 512GB nvme-rdma: Fix max_hw_sectors calculation nvme: fix an error code in nvme_init_subsystem() nvme-pci: Save PCI state before putting drive into deepest state nvme-tcp: fix wrong stop condition in io_work ...
2 parents cc3a7bf + a9eb49c commit c4bd70e

File tree

14 files changed

+218
-132
lines changed

14 files changed

+218
-132
lines changed

block/blk-mq.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1992,10 +1992,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
19921992
/* bypass scheduler for flush rq */
19931993
blk_insert_flush(rq);
19941994
blk_mq_run_hw_queue(data.hctx, true);
1995-
} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) {
1995+
} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs ||
1996+
!blk_queue_nonrot(q))) {
19961997
/*
19971998
* Use plugging if we have a ->commit_rqs() hook as well, as
19981999
* we know the driver uses bd->last in a smart fashion.
2000+
*
2001+
* Use normal plugging if this disk is slow HDD, as sequential
2002+
* IO may benefit a lot from plug merging.
19992003
*/
20002004
unsigned int request_count = plug->rq_count;
20012005
struct request *last = NULL;
@@ -2012,6 +2016,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
20122016
}
20132017

20142018
blk_add_rq_to_plug(plug, rq);
2019+
} else if (q->elevator) {
2020+
blk_mq_sched_insert_request(rq, false, true, true);
20152021
} else if (plug && !blk_queue_nomerges(q)) {
20162022
/*
20172023
* We do limited plugging. If the bio can be merged, do that.
@@ -2035,8 +2041,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
20352041
blk_mq_try_issue_directly(data.hctx, same_queue_rq,
20362042
&cookie);
20372043
}
2038-
} else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
2039-
!data.hctx->dispatch_busy)) {
2044+
} else if ((q->nr_hw_queues > 1 && is_sync) ||
2045+
!data.hctx->dispatch_busy) {
20402046
blk_mq_try_issue_directly(data.hctx, rq, &cookie);
20412047
} else {
20422048
blk_mq_sched_insert_request(rq, false, true, true);

block/sed-opal.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = {
129129
{ 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x84, 0x01 },
130130

131131
/* tables */
132-
[OPAL_TABLE_TABLE]
132+
[OPAL_TABLE_TABLE] =
133133
{ 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01 },
134134
[OPAL_LOCKINGRANGE_GLOBAL] =
135135
{ 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x01 },
@@ -372,8 +372,8 @@ static void check_geometry(struct opal_dev *dev, const void *data)
372372
{
373373
const struct d0_geometry_features *geo = data;
374374

375-
dev->align = geo->alignment_granularity;
376-
dev->lowest_lba = geo->lowest_aligned_lba;
375+
dev->align = be64_to_cpu(geo->alignment_granularity);
376+
dev->lowest_lba = be64_to_cpu(geo->lowest_aligned_lba);
377377
}
378378

379379
static int execute_step(struct opal_dev *dev,

drivers/block/loop.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -994,6 +994,16 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
994994
if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
995995
blk_queue_write_cache(lo->lo_queue, true, false);
996996

997+
if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) {
998+
/* In case of direct I/O, match underlying block size */
999+
unsigned short bsize = bdev_logical_block_size(
1000+
inode->i_sb->s_bdev);
1001+
1002+
blk_queue_logical_block_size(lo->lo_queue, bsize);
1003+
blk_queue_physical_block_size(lo->lo_queue, bsize);
1004+
blk_queue_io_min(lo->lo_queue, bsize);
1005+
}
1006+
9971007
loop_update_rotational(lo);
9981008
loop_update_dio(lo);
9991009
set_capacity(lo->lo_disk, size);

drivers/nvme/host/core.c

Lines changed: 113 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,13 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
102102
*/
103103
if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
104104
return;
105-
revalidate_disk(ns->disk);
106105
blk_set_queue_dying(ns->queue);
107106
/* Forcibly unquiesce queues to avoid blocking dispatch */
108107
blk_mq_unquiesce_queue(ns->queue);
108+
/*
109+
* Revalidate after unblocking dispatchers that may be holding bd_butex
110+
*/
111+
revalidate_disk(ns->disk);
109112
}
110113

111114
static void nvme_queue_scan(struct nvme_ctrl *ctrl)
@@ -847,7 +850,7 @@ static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf,
847850
static int nvme_submit_user_cmd(struct request_queue *q,
848851
struct nvme_command *cmd, void __user *ubuffer,
849852
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
850-
u32 meta_seed, u32 *result, unsigned timeout)
853+
u32 meta_seed, u64 *result, unsigned timeout)
851854
{
852855
bool write = nvme_is_write(cmd);
853856
struct nvme_ns *ns = q->queuedata;
@@ -888,7 +891,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
888891
else
889892
ret = nvme_req(req)->status;
890893
if (result)
891-
*result = le32_to_cpu(nvme_req(req)->result.u32);
894+
*result = le64_to_cpu(nvme_req(req)->result.u64);
892895
if (meta && !ret && !write) {
893896
if (copy_to_user(meta_buffer, meta, meta_len))
894897
ret = -EFAULT;
@@ -1335,6 +1338,54 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
13351338
struct nvme_command c;
13361339
unsigned timeout = 0;
13371340
u32 effects;
1341+
u64 result;
1342+
int status;
1343+
1344+
if (!capable(CAP_SYS_ADMIN))
1345+
return -EACCES;
1346+
if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
1347+
return -EFAULT;
1348+
if (cmd.flags)
1349+
return -EINVAL;
1350+
1351+
memset(&c, 0, sizeof(c));
1352+
c.common.opcode = cmd.opcode;
1353+
c.common.flags = cmd.flags;
1354+
c.common.nsid = cpu_to_le32(cmd.nsid);
1355+
c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
1356+
c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
1357+
c.common.cdw10 = cpu_to_le32(cmd.cdw10);
1358+
c.common.cdw11 = cpu_to_le32(cmd.cdw11);
1359+
c.common.cdw12 = cpu_to_le32(cmd.cdw12);
1360+
c.common.cdw13 = cpu_to_le32(cmd.cdw13);
1361+
c.common.cdw14 = cpu_to_le32(cmd.cdw14);
1362+
c.common.cdw15 = cpu_to_le32(cmd.cdw15);
1363+
1364+
if (cmd.timeout_ms)
1365+
timeout = msecs_to_jiffies(cmd.timeout_ms);
1366+
1367+
effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
1368+
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
1369+
(void __user *)(uintptr_t)cmd.addr, cmd.data_len,
1370+
(void __user *)(uintptr_t)cmd.metadata,
1371+
cmd.metadata_len, 0, &result, timeout);
1372+
nvme_passthru_end(ctrl, effects);
1373+
1374+
if (status >= 0) {
1375+
if (put_user(result, &ucmd->result))
1376+
return -EFAULT;
1377+
}
1378+
1379+
return status;
1380+
}
1381+
1382+
static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
1383+
struct nvme_passthru_cmd64 __user *ucmd)
1384+
{
1385+
struct nvme_passthru_cmd64 cmd;
1386+
struct nvme_command c;
1387+
unsigned timeout = 0;
1388+
u32 effects;
13381389
int status;
13391390

13401391
if (!capable(CAP_SYS_ADMIN))
@@ -1405,6 +1456,41 @@ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
14051456
srcu_read_unlock(&head->srcu, idx);
14061457
}
14071458

1459+
static bool is_ctrl_ioctl(unsigned int cmd)
1460+
{
1461+
if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD)
1462+
return true;
1463+
if (is_sed_ioctl(cmd))
1464+
return true;
1465+
return false;
1466+
}
1467+
1468+
static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
1469+
void __user *argp,
1470+
struct nvme_ns_head *head,
1471+
int srcu_idx)
1472+
{
1473+
struct nvme_ctrl *ctrl = ns->ctrl;
1474+
int ret;
1475+
1476+
nvme_get_ctrl(ns->ctrl);
1477+
nvme_put_ns_from_disk(head, srcu_idx);
1478+
1479+
switch (cmd) {
1480+
case NVME_IOCTL_ADMIN_CMD:
1481+
ret = nvme_user_cmd(ctrl, NULL, argp);
1482+
break;
1483+
case NVME_IOCTL_ADMIN64_CMD:
1484+
ret = nvme_user_cmd64(ctrl, NULL, argp);
1485+
break;
1486+
default:
1487+
ret = sed_ioctl(ctrl->opal_dev, cmd, argp);
1488+
break;
1489+
}
1490+
nvme_put_ctrl(ctrl);
1491+
return ret;
1492+
}
1493+
14081494
static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
14091495
unsigned int cmd, unsigned long arg)
14101496
{
@@ -1422,20 +1508,8 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
14221508
* seperately and drop the ns SRCU reference early. This avoids a
14231509
* deadlock when deleting namespaces using the passthrough interface.
14241510
*/
1425-
if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) {
1426-
struct nvme_ctrl *ctrl = ns->ctrl;
1427-
1428-
nvme_get_ctrl(ns->ctrl);
1429-
nvme_put_ns_from_disk(head, srcu_idx);
1430-
1431-
if (cmd == NVME_IOCTL_ADMIN_CMD)
1432-
ret = nvme_user_cmd(ctrl, NULL, argp);
1433-
else
1434-
ret = sed_ioctl(ctrl->opal_dev, cmd, argp);
1435-
1436-
nvme_put_ctrl(ctrl);
1437-
return ret;
1438-
}
1511+
if (is_ctrl_ioctl(cmd))
1512+
return nvme_handle_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
14391513

14401514
switch (cmd) {
14411515
case NVME_IOCTL_ID:
@@ -1448,6 +1522,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
14481522
case NVME_IOCTL_SUBMIT_IO:
14491523
ret = nvme_submit_io(ns, argp);
14501524
break;
1525+
case NVME_IOCTL_IO64_CMD:
1526+
ret = nvme_user_cmd64(ns->ctrl, ns, argp);
1527+
break;
14511528
default:
14521529
if (ns->ndev)
14531530
ret = nvme_nvm_ioctl(ns, cmd, arg);
@@ -2289,6 +2366,16 @@ static const struct nvme_core_quirk_entry core_quirks[] = {
22892366
.vid = 0x14a4,
22902367
.fr = "22301111",
22912368
.quirks = NVME_QUIRK_SIMPLE_SUSPEND,
2369+
},
2370+
{
2371+
/*
2372+
* This Kingston E8FK11.T firmware version has no interrupt
2373+
* after resume with actions related to suspend to idle
2374+
* https://bugzilla.kernel.org/show_bug.cgi?id=204887
2375+
*/
2376+
.vid = 0x2646,
2377+
.fr = "E8FK11.T",
2378+
.quirks = NVME_QUIRK_SIMPLE_SUSPEND,
22922379
}
22932380
};
22942381

@@ -2540,8 +2627,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
25402627
list_add_tail(&subsys->entry, &nvme_subsystems);
25412628
}
25422629

2543-
if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
2544-
dev_name(ctrl->device))) {
2630+
ret = sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
2631+
dev_name(ctrl->device));
2632+
if (ret) {
25452633
dev_err(ctrl->device,
25462634
"failed to create sysfs link from subsystem.\n");
25472635
goto out_put_subsystem;
@@ -2838,6 +2926,8 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
28382926
switch (cmd) {
28392927
case NVME_IOCTL_ADMIN_CMD:
28402928
return nvme_user_cmd(ctrl, NULL, argp);
2929+
case NVME_IOCTL_ADMIN64_CMD:
2930+
return nvme_user_cmd64(ctrl, NULL, argp);
28412931
case NVME_IOCTL_IO_CMD:
28422932
return nvme_dev_user_cmd(ctrl, argp);
28432933
case NVME_IOCTL_RESET:
@@ -3045,6 +3135,8 @@ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
30453135

30463136
nvme_show_int_function(cntlid);
30473137
nvme_show_int_function(numa_node);
3138+
nvme_show_int_function(queue_count);
3139+
nvme_show_int_function(sqsize);
30483140

30493141
static ssize_t nvme_sysfs_delete(struct device *dev,
30503142
struct device_attribute *attr, const char *buf,
@@ -3125,6 +3217,8 @@ static struct attribute *nvme_dev_attrs[] = {
31253217
&dev_attr_address.attr,
31263218
&dev_attr_state.attr,
31273219
&dev_attr_numa_node.attr,
3220+
&dev_attr_queue_count.attr,
3221+
&dev_attr_sqsize.attr,
31283222
NULL
31293223
};
31303224

drivers/nvme/host/nvme.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ struct nvme_ctrl {
221221
u16 oacs;
222222
u16 nssa;
223223
u16 nr_streams;
224+
u16 sqsize;
224225
u32 max_namespaces;
225226
atomic_t abort_limit;
226227
u8 vwc;
@@ -269,7 +270,6 @@ struct nvme_ctrl {
269270
u16 hmmaxd;
270271

271272
/* Fabrics only */
272-
u16 sqsize;
273273
u32 ioccsz;
274274
u32 iorcsz;
275275
u16 icdoff;

drivers/nvme/host/pci.c

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2946,26 +2946,29 @@ static int nvme_suspend(struct device *dev)
29462946
if (ret < 0)
29472947
goto unfreeze;
29482948

2949+
/*
2950+
* A saved state prevents pci pm from generically controlling the
2951+
* device's power. If we're using protocol specific settings, we don't
2952+
* want pci interfering.
2953+
*/
2954+
pci_save_state(pdev);
2955+
29492956
ret = nvme_set_power_state(ctrl, ctrl->npss);
29502957
if (ret < 0)
29512958
goto unfreeze;
29522959

29532960
if (ret) {
2961+
/* discard the saved state */
2962+
pci_load_saved_state(pdev, NULL);
2963+
29542964
/*
29552965
* Clearing npss forces a controller reset on resume. The
29562966
* correct value will be resdicovered then.
29572967
*/
29582968
nvme_dev_disable(ndev, true);
29592969
ctrl->npss = 0;
29602970
ret = 0;
2961-
goto unfreeze;
29622971
}
2963-
/*
2964-
* A saved state prevents pci pm from generically controlling the
2965-
* device's power. If we're using protocol specific settings, we don't
2966-
* want pci interfering.
2967-
*/
2968-
pci_save_state(pdev);
29692972
unfreeze:
29702973
nvme_unfreeze(ctrl);
29712974
return ret;
@@ -3090,6 +3093,9 @@ static const struct pci_device_id nvme_id_table[] = {
30903093
.driver_data = NVME_QUIRK_LIGHTNVM, },
30913094
{ PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */
30923095
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
3096+
{ PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */
3097+
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
3098+
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
30933099
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
30943100
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
30953101
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },

0 commit comments

Comments
 (0)