Skip to content

Commit 0a85ed6

Browse files
committed
Merge tag 'block-5.7-2020-05-09' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: - a small series fixing a use-after-free of bdi name (Christoph,Yufen) - NVMe fix for a regression with the smaller CQ update (Alexey) - NVMe fix for a hang at namespace scanning error recovery (Sagi) - fix race with blk-iocost iocg->abs_vdebt updates (Tejun) * tag 'block-5.7-2020-05-09' of git://git.kernel.dk/linux-block: nvme: fix possible hang when ns scanning fails during error recovery nvme-pci: fix "slimmer CQ head update" bdi: add a ->dev_name field to struct backing_dev_info bdi: use bdi_dev_name() to get device name bdi: move bdi_dev_name out of line vboxsf: don't use the source name in the bdi name iocost: protect iocg->abs_vdebt with iocg->waitq.lock
2 parents e99332e + 59c7c3c commit 0a85ed6

File tree

12 files changed

+107
-68
lines changed

12 files changed

+107
-68
lines changed

block/bfq-iosched.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@
123123
#include <linux/ioprio.h>
124124
#include <linux/sbitmap.h>
125125
#include <linux/delay.h>
126+
#include <linux/backing-dev.h>
126127

127128
#include "blk.h"
128129
#include "blk-mq.h"
@@ -4976,8 +4977,9 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
49764977
ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
49774978
switch (ioprio_class) {
49784979
default:
4979-
dev_err(bfqq->bfqd->queue->backing_dev_info->dev,
4980-
"bfq: bad prio class %d\n", ioprio_class);
4980+
pr_err("bdi %s: bfq: bad prio class %d\n",
4981+
bdi_dev_name(bfqq->bfqd->queue->backing_dev_info),
4982+
ioprio_class);
49814983
/* fall through */
49824984
case IOPRIO_CLASS_NONE:
49834985
/*

block/blk-cgroup.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ const char *blkg_dev_name(struct blkcg_gq *blkg)
496496
{
497497
/* some drivers (floppy) instantiate a queue w/o disk registered */
498498
if (blkg->q->backing_dev_info->dev)
499-
return dev_name(blkg->q->backing_dev_info->dev);
499+
return bdi_dev_name(blkg->q->backing_dev_info);
500500
return NULL;
501501
}
502502

block/blk-iocost.c

Lines changed: 71 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ struct ioc_gq {
466466
*/
467467
atomic64_t vtime;
468468
atomic64_t done_vtime;
469-
atomic64_t abs_vdebt;
469+
u64 abs_vdebt;
470470
u64 last_vtime;
471471

472472
/*
@@ -1142,7 +1142,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
11421142
struct iocg_wake_ctx ctx = { .iocg = iocg };
11431143
u64 margin_ns = (u64)(ioc->period_us *
11441144
WAITQ_TIMER_MARGIN_PCT / 100) * NSEC_PER_USEC;
1145-
u64 abs_vdebt, vdebt, vshortage, expires, oexpires;
1145+
u64 vdebt, vshortage, expires, oexpires;
11461146
s64 vbudget;
11471147
u32 hw_inuse;
11481148

@@ -1152,18 +1152,15 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
11521152
vbudget = now->vnow - atomic64_read(&iocg->vtime);
11531153

11541154
/* pay off debt */
1155-
abs_vdebt = atomic64_read(&iocg->abs_vdebt);
1156-
vdebt = abs_cost_to_cost(abs_vdebt, hw_inuse);
1155+
vdebt = abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
11571156
if (vdebt && vbudget > 0) {
11581157
u64 delta = min_t(u64, vbudget, vdebt);
11591158
u64 abs_delta = min(cost_to_abs_cost(delta, hw_inuse),
1160-
abs_vdebt);
1159+
iocg->abs_vdebt);
11611160

11621161
atomic64_add(delta, &iocg->vtime);
11631162
atomic64_add(delta, &iocg->done_vtime);
1164-
atomic64_sub(abs_delta, &iocg->abs_vdebt);
1165-
if (WARN_ON_ONCE(atomic64_read(&iocg->abs_vdebt) < 0))
1166-
atomic64_set(&iocg->abs_vdebt, 0);
1163+
iocg->abs_vdebt -= abs_delta;
11671164
}
11681165

11691166
/*
@@ -1219,12 +1216,18 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
12191216
u64 expires, oexpires;
12201217
u32 hw_inuse;
12211218

1219+
lockdep_assert_held(&iocg->waitq.lock);
1220+
12221221
/* debt-adjust vtime */
12231222
current_hweight(iocg, NULL, &hw_inuse);
1224-
vtime += abs_cost_to_cost(atomic64_read(&iocg->abs_vdebt), hw_inuse);
1223+
vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
12251224

1226-
/* clear or maintain depending on the overage */
1227-
if (time_before_eq64(vtime, now->vnow)) {
1225+
/*
1226+
* Clear or maintain depending on the overage. Non-zero vdebt is what
1227+
* guarantees that @iocg is online and future iocg_kick_delay() will
1228+
* clear use_delay. Don't leave it on when there's no vdebt.
1229+
*/
1230+
if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) {
12281231
blkcg_clear_delay(blkg);
12291232
return false;
12301233
}
@@ -1258,9 +1261,12 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
12581261
{
12591262
struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer);
12601263
struct ioc_now now;
1264+
unsigned long flags;
12611265

1266+
spin_lock_irqsave(&iocg->waitq.lock, flags);
12621267
ioc_now(iocg->ioc, &now);
12631268
iocg_kick_delay(iocg, &now, 0);
1269+
spin_unlock_irqrestore(&iocg->waitq.lock, flags);
12641270

12651271
return HRTIMER_NORESTART;
12661272
}
@@ -1368,14 +1374,13 @@ static void ioc_timer_fn(struct timer_list *timer)
13681374
* should have woken up in the last period and expire idle iocgs.
13691375
*/
13701376
list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) {
1371-
if (!waitqueue_active(&iocg->waitq) &&
1372-
!atomic64_read(&iocg->abs_vdebt) && !iocg_is_idle(iocg))
1377+
if (!waitqueue_active(&iocg->waitq) && iocg->abs_vdebt &&
1378+
!iocg_is_idle(iocg))
13731379
continue;
13741380

13751381
spin_lock(&iocg->waitq.lock);
13761382

1377-
if (waitqueue_active(&iocg->waitq) ||
1378-
atomic64_read(&iocg->abs_vdebt)) {
1383+
if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) {
13791384
/* might be oversleeping vtime / hweight changes, kick */
13801385
iocg_kick_waitq(iocg, &now);
13811386
iocg_kick_delay(iocg, &now, 0);
@@ -1718,28 +1723,49 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
17181723
* tests are racy but the races aren't systemic - we only miss once
17191724
* in a while which is fine.
17201725
*/
1721-
if (!waitqueue_active(&iocg->waitq) &&
1722-
!atomic64_read(&iocg->abs_vdebt) &&
1726+
if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt &&
17231727
time_before_eq64(vtime + cost, now.vnow)) {
17241728
iocg_commit_bio(iocg, bio, cost);
17251729
return;
17261730
}
17271731

17281732
/*
1729-
* We're over budget. If @bio has to be issued regardless,
1730-
* remember the abs_cost instead of advancing vtime.
1731-
* iocg_kick_waitq() will pay off the debt before waking more IOs.
1733+
* We activated above but w/o any synchronization. Deactivation is
1734+
* synchronized with waitq.lock and we won't get deactivated as long
1735+
* as we're waiting or has debt, so we're good if we're activated
1736+
* here. In the unlikely case that we aren't, just issue the IO.
1737+
*/
1738+
spin_lock_irq(&iocg->waitq.lock);
1739+
1740+
if (unlikely(list_empty(&iocg->active_list))) {
1741+
spin_unlock_irq(&iocg->waitq.lock);
1742+
iocg_commit_bio(iocg, bio, cost);
1743+
return;
1744+
}
1745+
1746+
/*
1747+
* We're over budget. If @bio has to be issued regardless, remember
1748+
* the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
1749+
* off the debt before waking more IOs.
1750+
*
17321751
* This way, the debt is continuously paid off each period with the
1733-
* actual budget available to the cgroup. If we just wound vtime,
1734-
* we would incorrectly use the current hw_inuse for the entire
1735-
* amount which, for example, can lead to the cgroup staying
1736-
* blocked for a long time even with substantially raised hw_inuse.
1752+
* actual budget available to the cgroup. If we just wound vtime, we
1753+
* would incorrectly use the current hw_inuse for the entire amount
1754+
* which, for example, can lead to the cgroup staying blocked for a
1755+
* long time even with substantially raised hw_inuse.
1756+
*
1757+
* An iocg with vdebt should stay online so that the timer can keep
1758+
* deducting its vdebt and [de]activate use_delay mechanism
1759+
* accordingly. We don't want to race against the timer trying to
1760+
* clear them and leave @iocg inactive w/ dangling use_delay heavily
1761+
* penalizing the cgroup and its descendants.
17371762
*/
17381763
if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) {
1739-
atomic64_add(abs_cost, &iocg->abs_vdebt);
1764+
iocg->abs_vdebt += abs_cost;
17401765
if (iocg_kick_delay(iocg, &now, cost))
17411766
blkcg_schedule_throttle(rqos->q,
17421767
(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
1768+
spin_unlock_irq(&iocg->waitq.lock);
17431769
return;
17441770
}
17451771

@@ -1756,20 +1782,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
17561782
* All waiters are on iocg->waitq and the wait states are
17571783
* synchronized using waitq.lock.
17581784
*/
1759-
spin_lock_irq(&iocg->waitq.lock);
1760-
1761-
/*
1762-
* We activated above but w/o any synchronization. Deactivation is
1763-
* synchronized with waitq.lock and we won't get deactivated as
1764-
* long as we're waiting, so we're good if we're activated here.
1765-
* In the unlikely case that we are deactivated, just issue the IO.
1766-
*/
1767-
if (unlikely(list_empty(&iocg->active_list))) {
1768-
spin_unlock_irq(&iocg->waitq.lock);
1769-
iocg_commit_bio(iocg, bio, cost);
1770-
return;
1771-
}
1772-
17731785
init_waitqueue_func_entry(&wait.wait, iocg_wake_fn);
17741786
wait.wait.private = current;
17751787
wait.bio = bio;
@@ -1801,6 +1813,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
18011813
struct ioc_now now;
18021814
u32 hw_inuse;
18031815
u64 abs_cost, cost;
1816+
unsigned long flags;
18041817

18051818
/* bypass if disabled or for root cgroup */
18061819
if (!ioc->enabled || !iocg->level)
@@ -1820,15 +1833,28 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
18201833
iocg->cursor = bio_end;
18211834

18221835
/*
1823-
* Charge if there's enough vtime budget and the existing request
1824-
* has cost assigned. Otherwise, account it as debt. See debt
1825-
* handling in ioc_rqos_throttle() for details.
1836+
* Charge if there's enough vtime budget and the existing request has
1837+
* cost assigned.
18261838
*/
18271839
if (rq->bio && rq->bio->bi_iocost_cost &&
1828-
time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow))
1840+
time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) {
18291841
iocg_commit_bio(iocg, bio, cost);
1830-
else
1831-
atomic64_add(abs_cost, &iocg->abs_vdebt);
1842+
return;
1843+
}
1844+
1845+
/*
1846+
* Otherwise, account it as debt if @iocg is online, which it should
1847+
* be for the vast majority of cases. See debt handling in
1848+
* ioc_rqos_throttle() for details.
1849+
*/
1850+
spin_lock_irqsave(&iocg->waitq.lock, flags);
1851+
if (likely(!list_empty(&iocg->active_list))) {
1852+
iocg->abs_vdebt += abs_cost;
1853+
iocg_kick_delay(iocg, &now, cost);
1854+
} else {
1855+
iocg_commit_bio(iocg, bio, cost);
1856+
}
1857+
spin_unlock_irqrestore(&iocg->waitq.lock, flags);
18321858
}
18331859

18341860
static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
@@ -1998,7 +2024,6 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
19982024
iocg->ioc = ioc;
19992025
atomic64_set(&iocg->vtime, now.vnow);
20002026
atomic64_set(&iocg->done_vtime, now.vnow);
2001-
atomic64_set(&iocg->abs_vdebt, 0);
20022027
atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period));
20032028
INIT_LIST_HEAD(&iocg->active_list);
20042029
iocg->hweight_active = HWEIGHT_WHOLE;

drivers/nvme/host/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1110,7 +1110,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
11101110
* Don't treat an error as fatal, as we potentially already
11111111
* have a NGUID or EUI-64.
11121112
*/
1113-
if (status > 0)
1113+
if (status > 0 && !(status & NVME_SC_DNR))
11141114
status = 0;
11151115
goto free_data;
11161116
}

drivers/nvme/host/pci.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -973,9 +973,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
973973

974974
static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
975975
{
976-
if (++nvmeq->cq_head == nvmeq->q_depth) {
976+
u16 tmp = nvmeq->cq_head + 1;
977+
978+
if (tmp == nvmeq->q_depth) {
977979
nvmeq->cq_head = 0;
978980
nvmeq->cq_phase ^= 1;
981+
} else {
982+
nvmeq->cq_head = tmp;
979983
}
980984
}
981985

fs/ceph/debugfs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
271271
&congestion_kb_fops);
272272

273273
snprintf(name, sizeof(name), "../../bdi/%s",
274-
dev_name(fsc->sb->s_bdi->dev));
274+
bdi_dev_name(fsc->sb->s_bdi));
275275
fsc->debugfs_bdi =
276276
debugfs_create_symlink("bdi",
277277
fsc->client->debugfs_dir,

fs/vboxsf/super.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
164164
goto fail_free;
165165
}
166166

167-
err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id);
167+
err = super_setup_bdi_name(sb, "vboxsf-%d", sbi->bdi_id);
168168
if (err)
169169
goto fail_free;
170170

include/linux/backing-dev-defs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ struct backing_dev_info {
219219
wait_queue_head_t wb_waitq;
220220

221221
struct device *dev;
222+
char dev_name[64];
222223
struct device *owner;
223224

224225
struct timer_list laptop_mode_wb_timer;

include/linux/backing-dev.h

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -505,13 +505,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
505505
(1 << WB_async_congested));
506506
}
507507

508-
extern const char *bdi_unknown_name;
509-
510-
static inline const char *bdi_dev_name(struct backing_dev_info *bdi)
511-
{
512-
if (!bdi || !bdi->dev)
513-
return bdi_unknown_name;
514-
return dev_name(bdi->dev);
515-
}
508+
const char *bdi_dev_name(struct backing_dev_info *bdi);
516509

517510
#endif /* _LINUX_BACKING_DEV_H */

include/trace/events/wbt.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ TRACE_EVENT(wbt_stat,
3333
),
3434

3535
TP_fast_assign(
36-
strlcpy(__entry->name, dev_name(bdi->dev),
36+
strlcpy(__entry->name, bdi_dev_name(bdi),
3737
ARRAY_SIZE(__entry->name));
3838
__entry->rmean = stat[0].mean;
3939
__entry->rmin = stat[0].min;
@@ -68,7 +68,7 @@ TRACE_EVENT(wbt_lat,
6868
),
6969

7070
TP_fast_assign(
71-
strlcpy(__entry->name, dev_name(bdi->dev),
71+
strlcpy(__entry->name, bdi_dev_name(bdi),
7272
ARRAY_SIZE(__entry->name));
7373
__entry->lat = div_u64(lat, 1000);
7474
),
@@ -105,7 +105,7 @@ TRACE_EVENT(wbt_step,
105105
),
106106

107107
TP_fast_assign(
108-
strlcpy(__entry->name, dev_name(bdi->dev),
108+
strlcpy(__entry->name, bdi_dev_name(bdi),
109109
ARRAY_SIZE(__entry->name));
110110
__entry->msg = msg;
111111
__entry->step = step;
@@ -141,7 +141,7 @@ TRACE_EVENT(wbt_timer,
141141
),
142142

143143
TP_fast_assign(
144-
strlcpy(__entry->name, dev_name(bdi->dev),
144+
strlcpy(__entry->name, bdi_dev_name(bdi),
145145
ARRAY_SIZE(__entry->name));
146146
__entry->status = status;
147147
__entry->step = step;

0 commit comments

Comments
 (0)