Skip to content

Commit 8df2a0a

Browse files
committed
Merge tag 'block-5.7-2020-04-10' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "Here's a set of fixes that should go into this merge window. This contains: - NVMe pull request from Christoph with various fixes - Better discard support for loop (Evan) - Only call ->commit_rqs() if we have queued IO (Keith) - blkcg offlining fixes (Tejun) - fix (and fix the fix) for busy partitions" * tag 'block-5.7-2020-04-10' of git://git.kernel.dk/linux-block: block: fix busy device checking in blk_drop_partitions again block: fix busy device checking in blk_drop_partitions nvmet-rdma: fix double free of rdma queue blk-mq: don't commit_rqs() if none were queued nvme-fc: Revert "add module to ops template to allow module references" nvme: fix deadlock caused by ANA update wrong locking nvmet-rdma: fix bonding failover possible NULL deref loop: Better discard support for block devices loop: Report EOPNOTSUPP properly nvmet: fix NULL dereference when removing a referral nvme: inherit stable pages constraint in the mpath stack device blkcg: don't offline parent blkcg first blkcg: rename blkcg->cgwb_refcnt to ->online_pin and always use it nvme-tcp: fix possible crash in recv error flow nvme-tcp: don't poll a non-live queue nvme-tcp: fix possible crash in write_zeroes processing nvmet-fc: fix typo in comment nvme-rdma: Replace comma with a semicolon nvme-fcloop: fix deallocation of working context nvme: fix compat address handling in several ioctls
2 parents 172edde + cb6b771 commit 8df2a0a

File tree

18 files changed

+324
-180
lines changed

18 files changed

+324
-180
lines changed

block/blk-cgroup.c

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -883,8 +883,8 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
883883
/* this prevents anyone from attaching or migrating to this blkcg */
884884
wb_blkcg_offline(blkcg);
885885

886-
/* put the base cgwb reference allowing step 2 to be triggered */
887-
blkcg_cgwb_put(blkcg);
886+
/* put the base online pin allowing step 2 to be triggered */
887+
blkcg_unpin_online(blkcg);
888888
}
889889

890890
/**
@@ -983,11 +983,11 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
983983
}
984984

985985
spin_lock_init(&blkcg->lock);
986+
refcount_set(&blkcg->online_pin, 1);
986987
INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN);
987988
INIT_HLIST_HEAD(&blkcg->blkg_list);
988989
#ifdef CONFIG_CGROUP_WRITEBACK
989990
INIT_LIST_HEAD(&blkcg->cgwb_list);
990-
refcount_set(&blkcg->cgwb_refcnt, 1);
991991
#endif
992992
list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
993993

@@ -1006,6 +1006,21 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
10061006
return ret;
10071007
}
10081008

1009+
static int blkcg_css_online(struct cgroup_subsys_state *css)
1010+
{
1011+
struct blkcg *blkcg = css_to_blkcg(css);
1012+
struct blkcg *parent = blkcg_parent(blkcg);
1013+
1014+
/*
1015+
* blkcg_pin_online() is used to delay blkcg offline so that blkgs
1016+
* don't go offline while cgwbs are still active on them. Pin the
1017+
* parent so that offline always happens towards the root.
1018+
*/
1019+
if (parent)
1020+
blkcg_pin_online(parent);
1021+
return 0;
1022+
}
1023+
10091024
/**
10101025
* blkcg_init_queue - initialize blkcg part of request queue
10111026
* @q: request_queue to initialize
@@ -1199,6 +1214,7 @@ static void blkcg_exit(struct task_struct *tsk)
11991214

12001215
struct cgroup_subsys io_cgrp_subsys = {
12011216
.css_alloc = blkcg_css_alloc,
1217+
.css_online = blkcg_css_online,
12021218
.css_offline = blkcg_css_offline,
12031219
.css_free = blkcg_css_free,
12041220
.can_attach = blkcg_can_attach,

block/blk-mq.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,7 +1289,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
12891289
* the driver there was more coming, but that turned out to
12901290
* be a lie.
12911291
*/
1292-
if (q->mq_ops->commit_rqs)
1292+
if (q->mq_ops->commit_rqs && queued)
12931293
q->mq_ops->commit_rqs(hctx);
12941294

12951295
spin_lock(&hctx->lock);
@@ -1911,6 +1911,8 @@ blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
19111911
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
19121912
struct list_head *list)
19131913
{
1914+
int queued = 0;
1915+
19141916
while (!list_empty(list)) {
19151917
blk_status_t ret;
19161918
struct request *rq = list_first_entry(list, struct request,
@@ -1926,15 +1928,16 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
19261928
break;
19271929
}
19281930
blk_mq_end_request(rq, ret);
1929-
}
1931+
} else
1932+
queued++;
19301933
}
19311934

19321935
/*
19331936
* If we didn't flush the entire list, we could have told
19341937
* the driver there was more coming, but that turned out to
19351938
* be a lie.
19361939
*/
1937-
if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs)
1940+
if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs && queued)
19381941
hctx->queue->mq_ops->commit_rqs(hctx);
19391942
}
19401943

block/partitions/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev)
496496

497497
if (!disk_part_scan_enabled(disk))
498498
return 0;
499-
if (bdev->bd_part_count || bdev->bd_super)
499+
if (bdev->bd_part_count || bdev->bd_openers > 1)
500500
return -EBUSY;
501501
res = invalidate_partition(disk, 0);
502502
if (res)

drivers/block/loop.c

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -429,11 +429,12 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
429429
* information.
430430
*/
431431
struct file *file = lo->lo_backing_file;
432+
struct request_queue *q = lo->lo_queue;
432433
int ret;
433434

434435
mode |= FALLOC_FL_KEEP_SIZE;
435436

436-
if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) {
437+
if (!blk_queue_discard(q)) {
437438
ret = -EOPNOTSUPP;
438439
goto out;
439440
}
@@ -463,7 +464,7 @@ static void lo_complete_rq(struct request *rq)
463464
if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) ||
464465
req_op(rq) != REQ_OP_READ) {
465466
if (cmd->ret < 0)
466-
ret = BLK_STS_IOERR;
467+
ret = errno_to_blk_status(cmd->ret);
467468
goto end_io;
468469
}
469470

@@ -867,28 +868,47 @@ static void loop_config_discard(struct loop_device *lo)
867868
struct inode *inode = file->f_mapping->host;
868869
struct request_queue *q = lo->lo_queue;
869870

871+
/*
872+
* If the backing device is a block device, mirror its zeroing
873+
* capability. Set the discard sectors to the block device's zeroing
874+
* capabilities because loop discards result in blkdev_issue_zeroout(),
875+
* not blkdev_issue_discard(). This maintains consistent behavior with
876+
* file-backed loop devices: discarded regions read back as zero.
877+
*/
878+
if (S_ISBLK(inode->i_mode) && !lo->lo_encrypt_key_size) {
879+
struct request_queue *backingq;
880+
881+
backingq = bdev_get_queue(inode->i_bdev);
882+
blk_queue_max_discard_sectors(q,
883+
backingq->limits.max_write_zeroes_sectors);
884+
885+
blk_queue_max_write_zeroes_sectors(q,
886+
backingq->limits.max_write_zeroes_sectors);
887+
870888
/*
871889
* We use punch hole to reclaim the free space used by the
872890
* image a.k.a. discard. However we do not support discard if
873891
* encryption is enabled, because it may give an attacker
874892
* useful information.
875893
*/
876-
if ((!file->f_op->fallocate) ||
877-
lo->lo_encrypt_key_size) {
894+
} else if (!file->f_op->fallocate || lo->lo_encrypt_key_size) {
878895
q->limits.discard_granularity = 0;
879896
q->limits.discard_alignment = 0;
880897
blk_queue_max_discard_sectors(q, 0);
881898
blk_queue_max_write_zeroes_sectors(q, 0);
882-
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
883-
return;
884-
}
885899

886-
q->limits.discard_granularity = inode->i_sb->s_blocksize;
887-
q->limits.discard_alignment = 0;
900+
} else {
901+
q->limits.discard_granularity = inode->i_sb->s_blocksize;
902+
q->limits.discard_alignment = 0;
903+
904+
blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
905+
blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
906+
}
888907

889-
blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
890-
blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
891-
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
908+
if (q->limits.max_write_zeroes_sectors)
909+
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
910+
else
911+
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
892912
}
893913

894914
static void loop_unprepare_queue(struct loop_device *lo)
@@ -1955,7 +1975,10 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
19551975
failed:
19561976
/* complete non-aio request */
19571977
if (!cmd->use_aio || ret) {
1958-
cmd->ret = ret ? -EIO : 0;
1978+
if (ret == -EOPNOTSUPP)
1979+
cmd->ret = ret;
1980+
else
1981+
cmd->ret = ret ? -EIO : 0;
19591982
blk_mq_complete_request(rq);
19601983
}
19611984
}

drivers/nvme/host/core.c

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include <linux/blkdev.h>
88
#include <linux/blk-mq.h>
9+
#include <linux/compat.h>
910
#include <linux/delay.h>
1011
#include <linux/errno.h>
1112
#include <linux/hdreg.h>
@@ -1252,6 +1253,18 @@ static void nvme_enable_aen(struct nvme_ctrl *ctrl)
12521253
queue_work(nvme_wq, &ctrl->async_event_work);
12531254
}
12541255

1256+
/*
1257+
* Convert integer values from ioctl structures to user pointers, silently
1258+
* ignoring the upper bits in the compat case to match behaviour of 32-bit
1259+
* kernels.
1260+
*/
1261+
static void __user *nvme_to_user_ptr(uintptr_t ptrval)
1262+
{
1263+
if (in_compat_syscall())
1264+
ptrval = (compat_uptr_t)ptrval;
1265+
return (void __user *)ptrval;
1266+
}
1267+
12551268
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
12561269
{
12571270
struct nvme_user_io io;
@@ -1275,7 +1288,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
12751288

12761289
length = (io.nblocks + 1) << ns->lba_shift;
12771290
meta_len = (io.nblocks + 1) * ns->ms;
1278-
metadata = (void __user *)(uintptr_t)io.metadata;
1291+
metadata = nvme_to_user_ptr(io.metadata);
12791292

12801293
if (ns->ext) {
12811294
length += meta_len;
@@ -1298,7 +1311,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
12981311
c.rw.appmask = cpu_to_le16(io.appmask);
12991312

13001313
return nvme_submit_user_cmd(ns->queue, &c,
1301-
(void __user *)(uintptr_t)io.addr, length,
1314+
nvme_to_user_ptr(io.addr), length,
13021315
metadata, meta_len, lower_32_bits(io.slba), NULL, 0);
13031316
}
13041317

@@ -1418,9 +1431,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
14181431

14191432
effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
14201433
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
1421-
(void __user *)(uintptr_t)cmd.addr, cmd.data_len,
1422-
(void __user *)(uintptr_t)cmd.metadata,
1423-
cmd.metadata_len, 0, &result, timeout);
1434+
nvme_to_user_ptr(cmd.addr), cmd.data_len,
1435+
nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
1436+
0, &result, timeout);
14241437
nvme_passthru_end(ctrl, effects);
14251438

14261439
if (status >= 0) {
@@ -1465,8 +1478,8 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
14651478

14661479
effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
14671480
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
1468-
(void __user *)(uintptr_t)cmd.addr, cmd.data_len,
1469-
(void __user *)(uintptr_t)cmd.metadata, cmd.metadata_len,
1481+
nvme_to_user_ptr(cmd.addr), cmd.data_len,
1482+
nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
14701483
0, &cmd.result, timeout);
14711484
nvme_passthru_end(ctrl, effects);
14721485

@@ -1884,6 +1897,13 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
18841897
if (ns->head->disk) {
18851898
nvme_update_disk_info(ns->head->disk, ns, id);
18861899
blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
1900+
if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
1901+
struct backing_dev_info *info =
1902+
ns->head->disk->queue->backing_dev_info;
1903+
1904+
info->capabilities |= BDI_CAP_STABLE_WRITES;
1905+
}
1906+
18871907
revalidate_disk(ns->head->disk);
18881908
}
18891909
#endif

drivers/nvme/host/fc.c

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
342342
!template->ls_req || !template->fcp_io ||
343343
!template->ls_abort || !template->fcp_abort ||
344344
!template->max_hw_queues || !template->max_sgl_segments ||
345-
!template->max_dif_sgl_segments || !template->dma_boundary ||
346-
!template->module) {
345+
!template->max_dif_sgl_segments || !template->dma_boundary) {
347346
ret = -EINVAL;
348347
goto out_reghost_failed;
349348
}
@@ -2016,7 +2015,6 @@ nvme_fc_ctrl_free(struct kref *ref)
20162015
{
20172016
struct nvme_fc_ctrl *ctrl =
20182017
container_of(ref, struct nvme_fc_ctrl, ref);
2019-
struct nvme_fc_lport *lport = ctrl->lport;
20202018
unsigned long flags;
20212019

20222020
if (ctrl->ctrl.tagset) {
@@ -2043,7 +2041,6 @@ nvme_fc_ctrl_free(struct kref *ref)
20432041
if (ctrl->ctrl.opts)
20442042
nvmf_free_options(ctrl->ctrl.opts);
20452043
kfree(ctrl);
2046-
module_put(lport->ops->module);
20472044
}
20482045

20492046
static void
@@ -3074,15 +3071,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
30743071
goto out_fail;
30753072
}
30763073

3077-
if (!try_module_get(lport->ops->module)) {
3078-
ret = -EUNATCH;
3079-
goto out_free_ctrl;
3080-
}
3081-
30823074
idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL);
30833075
if (idx < 0) {
30843076
ret = -ENOSPC;
3085-
goto out_mod_put;
3077+
goto out_free_ctrl;
30863078
}
30873079

30883080
ctrl->ctrl.opts = opts;
@@ -3232,8 +3224,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
32323224
out_free_ida:
32333225
put_device(ctrl->dev);
32343226
ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
3235-
out_mod_put:
3236-
module_put(lport->ops->module);
32373227
out_free_ctrl:
32383228
kfree(ctrl);
32393229
out_fail:

drivers/nvme/host/multipath.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
510510
if (!nr_nsids)
511511
return 0;
512512

513-
down_write(&ctrl->namespaces_rwsem);
513+
down_read(&ctrl->namespaces_rwsem);
514514
list_for_each_entry(ns, &ctrl->namespaces, list) {
515515
unsigned nsid = le32_to_cpu(desc->nsids[n]);
516516

@@ -521,7 +521,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
521521
if (++n == nr_nsids)
522522
break;
523523
}
524-
up_write(&ctrl->namespaces_rwsem);
524+
up_read(&ctrl->namespaces_rwsem);
525525
return 0;
526526
}
527527

drivers/nvme/host/rdma.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1342,7 +1342,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
13421342
int ret;
13431343

13441344
sge->addr = qe->dma;
1345-
sge->length = sizeof(struct nvme_command),
1345+
sge->length = sizeof(struct nvme_command);
13461346
sge->lkey = queue->device->pd->local_dma_lkey;
13471347

13481348
wr.next = NULL;

0 commit comments

Comments
 (0)