Skip to content

Commit d418d07

Browse files
committed
Merge tag 'for-linus-2019-10-18' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: - NVMe pull request from Keith that address deadlocks, double resets, memory leaks, and other regression. - Fixup elv_support_iosched() for bio based devices (Damien) - Fixup for the ahci PCS quirk (Dan) - Socket O_NONBLOCK handling fix for io_uring (me) - Timeout sequence io_uring fixes (yangerkun) - MD warning fix for parameter default_layout (Song) - blkcg activation fixes (Tejun) - blk-rq-qos node deletion fix (Tejun) * tag 'for-linus-2019-10-18' of git://git.kernel.dk/linux-block: nvme-pci: Set the prp2 correctly when using more than 4k page io_uring: fix logic error in io_timeout io_uring: fix up O_NONBLOCK handling for sockets md/raid0: fix warning message for parameter default_layout libata/ahci: Fix PCS quirk application blk-rq-qos: fix first node deletion of rq_qos_del() blkcg: Fix multiple bugs in blkcg_activate_policy() io_uring: consider the overflow of sequence for timeout req nvme-tcp: fix possible leakage during error flow nvmet-loop: fix possible leakage during error flow block: Fix elv_support_iosched() nvme-tcp: Initialize sk->sk_ll_usec only with NET_RX_BUSY_POLL nvme: Wait for reset state when required nvme: Prevent resets during paused controller state nvme: Restart request timers in resetting state nvme: Remove ADMIN_ONLY state nvme-pci: Free tagset if no IO queues nvme: retain split access workaround for capability reads nvme: fix possible deadlock when nvme_update_formats fails
2 parents dfdcff3 + b55f009 commit d418d07

File tree

13 files changed

+266
-117
lines changed

13 files changed

+266
-117
lines changed

block/blk-cgroup.c

Lines changed: 51 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,57 +1362,90 @@ int blkcg_activate_policy(struct request_queue *q,
13621362
const struct blkcg_policy *pol)
13631363
{
13641364
struct blkg_policy_data *pd_prealloc = NULL;
1365-
struct blkcg_gq *blkg;
1365+
struct blkcg_gq *blkg, *pinned_blkg = NULL;
13661366
int ret;
13671367

13681368
if (blkcg_policy_enabled(q, pol))
13691369
return 0;
13701370

13711371
if (queue_is_mq(q))
13721372
blk_mq_freeze_queue(q);
1373-
pd_prealloc:
1374-
if (!pd_prealloc) {
1375-
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, &blkcg_root);
1376-
if (!pd_prealloc) {
1377-
ret = -ENOMEM;
1378-
goto out_bypass_end;
1379-
}
1380-
}
1381-
1373+
retry:
13821374
spin_lock_irq(&q->queue_lock);
13831375

1384-
/* blkg_list is pushed at the head, reverse walk to init parents first */
1376+
/* blkg_list is pushed at the head, reverse walk to allocate parents first */
13851377
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
13861378
struct blkg_policy_data *pd;
13871379

13881380
if (blkg->pd[pol->plid])
13891381
continue;
13901382

1391-
pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, &blkcg_root);
1392-
if (!pd)
1393-
swap(pd, pd_prealloc);
1383+
/* If prealloc matches, use it; otherwise try GFP_NOWAIT */
1384+
if (blkg == pinned_blkg) {
1385+
pd = pd_prealloc;
1386+
pd_prealloc = NULL;
1387+
} else {
1388+
pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q,
1389+
blkg->blkcg);
1390+
}
1391+
13941392
if (!pd) {
1393+
/*
1394+
* GFP_NOWAIT failed. Free the existing one and
1395+
* prealloc for @blkg w/ GFP_KERNEL.
1396+
*/
1397+
if (pinned_blkg)
1398+
blkg_put(pinned_blkg);
1399+
blkg_get(blkg);
1400+
pinned_blkg = blkg;
1401+
13951402
spin_unlock_irq(&q->queue_lock);
1396-
goto pd_prealloc;
1403+
1404+
if (pd_prealloc)
1405+
pol->pd_free_fn(pd_prealloc);
1406+
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q,
1407+
blkg->blkcg);
1408+
if (pd_prealloc)
1409+
goto retry;
1410+
else
1411+
goto enomem;
13971412
}
13981413

13991414
blkg->pd[pol->plid] = pd;
14001415
pd->blkg = blkg;
14011416
pd->plid = pol->plid;
1402-
if (pol->pd_init_fn)
1403-
pol->pd_init_fn(pd);
14041417
}
14051418

1419+
/* all allocated, init in the same order */
1420+
if (pol->pd_init_fn)
1421+
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
1422+
pol->pd_init_fn(blkg->pd[pol->plid]);
1423+
14061424
__set_bit(pol->plid, q->blkcg_pols);
14071425
ret = 0;
14081426

14091427
spin_unlock_irq(&q->queue_lock);
1410-
out_bypass_end:
1428+
out:
14111429
if (queue_is_mq(q))
14121430
blk_mq_unfreeze_queue(q);
1431+
if (pinned_blkg)
1432+
blkg_put(pinned_blkg);
14131433
if (pd_prealloc)
14141434
pol->pd_free_fn(pd_prealloc);
14151435
return ret;
1436+
1437+
enomem:
1438+
/* alloc failed, nothing's initialized yet, free everything */
1439+
spin_lock_irq(&q->queue_lock);
1440+
list_for_each_entry(blkg, &q->blkg_list, q_node) {
1441+
if (blkg->pd[pol->plid]) {
1442+
pol->pd_free_fn(blkg->pd[pol->plid]);
1443+
blkg->pd[pol->plid] = NULL;
1444+
}
1445+
}
1446+
spin_unlock_irq(&q->queue_lock);
1447+
ret = -ENOMEM;
1448+
goto out;
14161449
}
14171450
EXPORT_SYMBOL_GPL(blkcg_activate_policy);
14181451

block/blk-rq-qos.h

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,16 +108,13 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
108108

109109
static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
110110
{
111-
struct rq_qos *cur, *prev = NULL;
112-
for (cur = q->rq_qos; cur; cur = cur->next) {
113-
if (cur == rqos) {
114-
if (prev)
115-
prev->next = rqos->next;
116-
else
117-
q->rq_qos = cur;
111+
struct rq_qos **cur;
112+
113+
for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
114+
if (*cur == rqos) {
115+
*cur = rqos->next;
118116
break;
119117
}
120-
prev = cur;
121118
}
122119

123120
blk_mq_debugfs_unregister_rqos(rqos);

block/elevator.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,8 @@ int elevator_switch_mq(struct request_queue *q,
616616

617617
static inline bool elv_support_iosched(struct request_queue *q)
618618
{
619-
if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))
619+
if (!q->mq_ops ||
620+
(q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)))
620621
return false;
621622
return true;
622623
}

drivers/ata/ahci.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1600,7 +1600,9 @@ static void ahci_intel_pcs_quirk(struct pci_dev *pdev, struct ahci_host_priv *hp
16001600
*/
16011601
if (!id || id->vendor != PCI_VENDOR_ID_INTEL)
16021602
return;
1603-
if (((enum board_ids) id->driver_data) < board_ahci_pcs7)
1603+
1604+
/* Skip applying the quirk on Denverton and beyond */
1605+
if (((enum board_ids) id->driver_data) >= board_ahci_pcs7)
16041606
return;
16051607

16061608
/*

drivers/md/raid0.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
154154
} else {
155155
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
156156
mdname(mddev));
157-
pr_err("md/raid0: please set raid.default_layout to 1 or 2\n");
157+
pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
158158
err = -ENOTSUPP;
159159
goto abort;
160160
}

drivers/nvme/host/core.c

Lines changed: 68 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,26 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl)
116116
/*
117117
* Only new queue scan work when admin and IO queues are both alive
118118
*/
119-
if (ctrl->state == NVME_CTRL_LIVE)
119+
if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
120120
queue_work(nvme_wq, &ctrl->scan_work);
121121
}
122122

123+
/*
124+
* Use this function to proceed with scheduling reset_work for a controller
125+
* that had previously been set to the resetting state. This is intended for
126+
* code paths that can't be interrupted by other reset attempts. A hot removal
127+
* may prevent this from succeeding.
128+
*/
129+
int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
130+
{
131+
if (ctrl->state != NVME_CTRL_RESETTING)
132+
return -EBUSY;
133+
if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
134+
return -EBUSY;
135+
return 0;
136+
}
137+
EXPORT_SYMBOL_GPL(nvme_try_sched_reset);
138+
123139
int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
124140
{
125141
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@@ -137,8 +153,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
137153
ret = nvme_reset_ctrl(ctrl);
138154
if (!ret) {
139155
flush_work(&ctrl->reset_work);
140-
if (ctrl->state != NVME_CTRL_LIVE &&
141-
ctrl->state != NVME_CTRL_ADMIN_ONLY)
156+
if (ctrl->state != NVME_CTRL_LIVE)
142157
ret = -ENETRESET;
143158
}
144159

@@ -315,15 +330,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
315330

316331
old_state = ctrl->state;
317332
switch (new_state) {
318-
case NVME_CTRL_ADMIN_ONLY:
319-
switch (old_state) {
320-
case NVME_CTRL_CONNECTING:
321-
changed = true;
322-
/* FALLTHRU */
323-
default:
324-
break;
325-
}
326-
break;
327333
case NVME_CTRL_LIVE:
328334
switch (old_state) {
329335
case NVME_CTRL_NEW:
@@ -339,7 +345,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
339345
switch (old_state) {
340346
case NVME_CTRL_NEW:
341347
case NVME_CTRL_LIVE:
342-
case NVME_CTRL_ADMIN_ONLY:
343348
changed = true;
344349
/* FALLTHRU */
345350
default:
@@ -359,7 +364,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
359364
case NVME_CTRL_DELETING:
360365
switch (old_state) {
361366
case NVME_CTRL_LIVE:
362-
case NVME_CTRL_ADMIN_ONLY:
363367
case NVME_CTRL_RESETTING:
364368
case NVME_CTRL_CONNECTING:
365369
changed = true;
@@ -381,8 +385,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
381385
break;
382386
}
383387

384-
if (changed)
388+
if (changed) {
385389
ctrl->state = new_state;
390+
wake_up_all(&ctrl->state_wq);
391+
}
386392

387393
spin_unlock_irqrestore(&ctrl->lock, flags);
388394
if (changed && ctrl->state == NVME_CTRL_LIVE)
@@ -391,6 +397,39 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
391397
}
392398
EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
393399

400+
/*
401+
* Returns true for sink states that can't ever transition back to live.
402+
*/
403+
static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
404+
{
405+
switch (ctrl->state) {
406+
case NVME_CTRL_NEW:
407+
case NVME_CTRL_LIVE:
408+
case NVME_CTRL_RESETTING:
409+
case NVME_CTRL_CONNECTING:
410+
return false;
411+
case NVME_CTRL_DELETING:
412+
case NVME_CTRL_DEAD:
413+
return true;
414+
default:
415+
WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
416+
return true;
417+
}
418+
}
419+
420+
/*
421+
* Waits for the controller state to be resetting, or returns false if it is
422+
* not possible to ever transition to that state.
423+
*/
424+
bool nvme_wait_reset(struct nvme_ctrl *ctrl)
425+
{
426+
wait_event(ctrl->state_wq,
427+
nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
428+
nvme_state_terminal(ctrl));
429+
return ctrl->state == NVME_CTRL_RESETTING;
430+
}
431+
EXPORT_SYMBOL_GPL(nvme_wait_reset);
432+
394433
static void nvme_free_ns_head(struct kref *ref)
395434
{
396435
struct nvme_ns_head *head =
@@ -1306,8 +1345,6 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl)
13061345
if (ns->disk && nvme_revalidate_disk(ns->disk))
13071346
nvme_set_queue_dying(ns);
13081347
up_read(&ctrl->namespaces_rwsem);
1309-
1310-
nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
13111348
}
13121349

13131350
static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
@@ -1323,6 +1360,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
13231360
nvme_unfreeze(ctrl);
13241361
nvme_mpath_unfreeze(ctrl->subsys);
13251362
mutex_unlock(&ctrl->subsys->lock);
1363+
nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
13261364
mutex_unlock(&ctrl->scan_lock);
13271365
}
13281366
if (effects & NVME_CMD_EFFECTS_CCC)
@@ -2874,7 +2912,6 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
28742912

28752913
switch (ctrl->state) {
28762914
case NVME_CTRL_LIVE:
2877-
case NVME_CTRL_ADMIN_ONLY:
28782915
break;
28792916
default:
28802917
return -EWOULDBLOCK;
@@ -3168,7 +3205,6 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
31683205
static const char *const state_name[] = {
31693206
[NVME_CTRL_NEW] = "new",
31703207
[NVME_CTRL_LIVE] = "live",
3171-
[NVME_CTRL_ADMIN_ONLY] = "only-admin",
31723208
[NVME_CTRL_RESETTING] = "resetting",
31733209
[NVME_CTRL_CONNECTING] = "connecting",
31743210
[NVME_CTRL_DELETING] = "deleting",
@@ -3679,11 +3715,10 @@ static void nvme_scan_work(struct work_struct *work)
36793715
struct nvme_id_ctrl *id;
36803716
unsigned nn;
36813717

3682-
if (ctrl->state != NVME_CTRL_LIVE)
3718+
/* No tagset on a live ctrl means IO queues could not created */
3719+
if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
36833720
return;
36843721

3685-
WARN_ON_ONCE(!ctrl->tagset);
3686-
36873722
if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
36883723
dev_info(ctrl->device, "rescanning namespaces.\n");
36893724
nvme_clear_changed_ns_log(ctrl);
@@ -3844,13 +3879,13 @@ static void nvme_fw_act_work(struct work_struct *work)
38443879
if (time_after(jiffies, fw_act_timeout)) {
38453880
dev_warn(ctrl->device,
38463881
"Fw activation timeout, reset controller\n");
3847-
nvme_reset_ctrl(ctrl);
3848-
break;
3882+
nvme_try_sched_reset(ctrl);
3883+
return;
38493884
}
38503885
msleep(100);
38513886
}
38523887

3853-
if (ctrl->state != NVME_CTRL_LIVE)
3888+
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
38543889
return;
38553890

38563891
nvme_start_queues(ctrl);
@@ -3870,7 +3905,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
38703905
nvme_queue_scan(ctrl);
38713906
break;
38723907
case NVME_AER_NOTICE_FW_ACT_STARTING:
3873-
queue_work(nvme_wq, &ctrl->fw_act_work);
3908+
/*
3909+
* We are (ab)using the RESETTING state to prevent subsequent
3910+
* recovery actions from interfering with the controller's
3911+
* firmware activation.
3912+
*/
3913+
if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
3914+
queue_work(nvme_wq, &ctrl->fw_act_work);
38743915
break;
38753916
#ifdef CONFIG_NVME_MULTIPATH
38763917
case NVME_AER_NOTICE_ANA:
@@ -3993,6 +4034,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
39934034
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
39944035
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
39954036
INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
4037+
init_waitqueue_head(&ctrl->state_wq);
39964038

39974039
INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
39984040
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));

drivers/nvme/host/fabrics.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,7 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
182182
static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
183183
bool queue_live)
184184
{
185-
if (likely(ctrl->state == NVME_CTRL_LIVE ||
186-
ctrl->state == NVME_CTRL_ADMIN_ONLY))
185+
if (likely(ctrl->state == NVME_CTRL_LIVE))
187186
return true;
188187
return __nvmf_check_ready(ctrl, rq, queue_live);
189188
}

0 commit comments

Comments
 (0)