Skip to content

Commit d1cf752

Browse files
committed
Merge tag 'block-6.17-20250828' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe: - Fix a lockdep spotted issue on recursive locking for zoned writes, in case of errors - Update bcache MAINTAINERS entry address for Coly - Fix for a ublk release issue, with selftests - Fix for a regression introduced in this cycle, where it assumed q->rq_qos was always set if the bio flag indicated that - Fix for a regression introduced in this cycle, where loop retrieving block device sizes got broken * tag 'block-6.17-20250828' of git://git.kernel.dk/linux: bcache: change maintainer's email address ublk selftests: add --no_ublk_fixed_fd for not using registered ublk char device ublk: avoid ublk_io_release() called after ublk char dev is closed block: validate QoS before calling __rq_qos_done_bio() blk-zoned: Fix a lockdep complaint about recursive locking loop: fix zero sized loop for block special file
2 parents 220374a + 95a7c50 commit d1cf752

File tree

11 files changed

+175
-56
lines changed

11 files changed

+175
-56
lines changed

MAINTAINERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4205,7 +4205,7 @@ W: http://www.baycom.org/~tom/ham/ham.html
42054205
F: drivers/net/hamradio/baycom*
42064206

42074207
BCACHE (BLOCK LAYER CACHE)
4208-
M: Coly Li <colyli@kernel.org>
4208+
M: Coly Li <colyli@fnnas.com>
42094209
M: Kent Overstreet <[email protected]>
42104210
42114211
S: Maintained

block/blk-rq-qos.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -149,12 +149,15 @@ static inline void rq_qos_done_bio(struct bio *bio)
149149
q = bdev_get_queue(bio->bi_bdev);
150150

151151
/*
152-
* If a bio has BIO_QOS_xxx set, it implicitly implies that
153-
* q->rq_qos is present. So, we skip re-checking q->rq_qos
154-
* here as an extra optimization and directly call
155-
* __rq_qos_done_bio().
152+
* A BIO may carry BIO_QOS_* flags even if the associated request_queue
153+
* does not have rq_qos enabled. This can happen with stacked block
154+
* devices — for example, NVMe multipath, where it's possible that the
155+
* bottom device has QoS enabled but the top device does not. Therefore,
156+
* always verify that q->rq_qos is present and QoS is enabled before
157+
* calling __rq_qos_done_bio().
156158
*/
157-
__rq_qos_done_bio(q->rq_qos, bio);
159+
if (test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags) && q->rq_qos)
160+
__rq_qos_done_bio(q->rq_qos, bio);
158161
}
159162

160163
static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio)

block/blk-zoned.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1286,14 +1286,14 @@ static void blk_zone_wplug_bio_work(struct work_struct *work)
12861286
struct block_device *bdev;
12871287
unsigned long flags;
12881288
struct bio *bio;
1289+
bool prepared;
12891290

12901291
/*
12911292
* Submit the next plugged BIO. If we do not have any, clear
12921293
* the plugged flag.
12931294
*/
1294-
spin_lock_irqsave(&zwplug->lock, flags);
1295-
12961295
again:
1296+
spin_lock_irqsave(&zwplug->lock, flags);
12971297
bio = bio_list_pop(&zwplug->bio_list);
12981298
if (!bio) {
12991299
zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
@@ -1304,13 +1304,14 @@ static void blk_zone_wplug_bio_work(struct work_struct *work)
13041304
trace_blk_zone_wplug_bio(zwplug->disk->queue, zwplug->zone_no,
13051305
bio->bi_iter.bi_sector, bio_sectors(bio));
13061306

1307-
if (!blk_zone_wplug_prepare_bio(zwplug, bio)) {
1307+
prepared = blk_zone_wplug_prepare_bio(zwplug, bio);
1308+
spin_unlock_irqrestore(&zwplug->lock, flags);
1309+
1310+
if (!prepared) {
13081311
blk_zone_wplug_bio_io_error(zwplug, bio);
13091312
goto again;
13101313
}
13111314

1312-
spin_unlock_irqrestore(&zwplug->lock, flags);
1313-
13141315
bdev = bio->bi_bdev;
13151316

13161317
/*

drivers/block/loop.c

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -139,20 +139,26 @@ static int part_shift;
139139

140140
static loff_t lo_calculate_size(struct loop_device *lo, struct file *file)
141141
{
142-
struct kstat stat;
143142
loff_t loopsize;
144143
int ret;
145144

146-
/*
147-
* Get the accurate file size. This provides better results than
148-
* cached inode data, particularly for network filesystems where
149-
* metadata may be stale.
150-
*/
151-
ret = vfs_getattr_nosec(&file->f_path, &stat, STATX_SIZE, 0);
152-
if (ret)
153-
return 0;
145+
if (S_ISBLK(file_inode(file)->i_mode)) {
146+
loopsize = i_size_read(file->f_mapping->host);
147+
} else {
148+
struct kstat stat;
149+
150+
/*
151+
* Get the accurate file size. This provides better results than
152+
* cached inode data, particularly for network filesystems where
153+
* metadata may be stale.
154+
*/
155+
ret = vfs_getattr_nosec(&file->f_path, &stat, STATX_SIZE, 0);
156+
if (ret)
157+
return 0;
158+
159+
loopsize = stat.size;
160+
}
154161

155-
loopsize = stat.size;
156162
if (lo->lo_offset > 0)
157163
loopsize -= lo->lo_offset;
158164
/* offset is beyond i_size, weird but possible */

drivers/block/ublk_drv.c

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ struct ublk_device {
239239
struct mutex cancel_mutex;
240240
bool canceling;
241241
pid_t ublksrv_tgid;
242+
struct delayed_work exit_work;
242243
};
243244

244245
/* header of ublk_params */
@@ -1595,12 +1596,62 @@ static void ublk_set_canceling(struct ublk_device *ub, bool canceling)
15951596
ublk_get_queue(ub, i)->canceling = canceling;
15961597
}
15971598

1598-
static int ublk_ch_release(struct inode *inode, struct file *filp)
1599+
static bool ublk_check_and_reset_active_ref(struct ublk_device *ub)
15991600
{
1600-
struct ublk_device *ub = filp->private_data;
1601+
int i, j;
1602+
1603+
if (!(ub->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY |
1604+
UBLK_F_AUTO_BUF_REG)))
1605+
return false;
1606+
1607+
for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
1608+
struct ublk_queue *ubq = ublk_get_queue(ub, i);
1609+
1610+
for (j = 0; j < ubq->q_depth; j++) {
1611+
struct ublk_io *io = &ubq->ios[j];
1612+
unsigned int refs = refcount_read(&io->ref) +
1613+
io->task_registered_buffers;
1614+
1615+
/*
1616+
* UBLK_REFCOUNT_INIT or zero means no active
1617+
* reference
1618+
*/
1619+
if (refs != UBLK_REFCOUNT_INIT && refs != 0)
1620+
return true;
1621+
1622+
/* reset to zero if the io hasn't active references */
1623+
refcount_set(&io->ref, 0);
1624+
io->task_registered_buffers = 0;
1625+
}
1626+
}
1627+
return false;
1628+
}
1629+
1630+
static void ublk_ch_release_work_fn(struct work_struct *work)
1631+
{
1632+
struct ublk_device *ub =
1633+
container_of(work, struct ublk_device, exit_work.work);
16011634
struct gendisk *disk;
16021635
int i;
16031636

1637+
/*
1638+
* For zero-copy and auto buffer register modes, I/O references
1639+
* might not be dropped naturally when the daemon is killed, but
1640+
* io_uring guarantees that registered bvec kernel buffers are
1641+
* unregistered finally when freeing io_uring context, then the
1642+
* active references are dropped.
1643+
*
1644+
* Wait until active references are dropped for avoiding use-after-free
1645+
*
1646+
* registered buffer may be unregistered in io_ring's release hander,
1647+
* so have to wait by scheduling work function for avoiding the two
1648+
* file release dependency.
1649+
*/
1650+
if (ublk_check_and_reset_active_ref(ub)) {
1651+
schedule_delayed_work(&ub->exit_work, 1);
1652+
return;
1653+
}
1654+
16041655
/*
16051656
* disk isn't attached yet, either device isn't live, or it has
16061657
* been removed already, so we needn't to do anything
@@ -1673,6 +1724,23 @@ static int ublk_ch_release(struct inode *inode, struct file *filp)
16731724
ublk_reset_ch_dev(ub);
16741725
out:
16751726
clear_bit(UB_STATE_OPEN, &ub->state);
1727+
1728+
/* put the reference grabbed in ublk_ch_release() */
1729+
ublk_put_device(ub);
1730+
}
1731+
1732+
static int ublk_ch_release(struct inode *inode, struct file *filp)
1733+
{
1734+
struct ublk_device *ub = filp->private_data;
1735+
1736+
/*
1737+
* Grab ublk device reference, so it won't be gone until we are
1738+
* really released from work function.
1739+
*/
1740+
ublk_get_device(ub);
1741+
1742+
INIT_DELAYED_WORK(&ub->exit_work, ublk_ch_release_work_fn);
1743+
schedule_delayed_work(&ub->exit_work, 0);
16761744
return 0;
16771745
}
16781746

tools/testing/selftests/ublk/file_backed.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q,
2020
struct io_uring_sqe *sqe[1];
2121

2222
ublk_io_alloc_sqes(t, sqe, 1);
23-
io_uring_prep_fsync(sqe[0], 1 /*fds[1]*/, IORING_FSYNC_DATASYNC);
23+
io_uring_prep_fsync(sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, IORING_FSYNC_DATASYNC);
2424
io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
2525
/* bit63 marks us as tgt io */
2626
sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
@@ -42,7 +42,7 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
4242
if (!sqe[0])
4343
return -ENOMEM;
4444

45-
io_uring_prep_rw(op, sqe[0], 1 /*fds[1]*/,
45+
io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/,
4646
addr,
4747
iod->nr_sectors << 9,
4848
iod->start_sector << 9);
@@ -56,19 +56,19 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
5656

5757
ublk_io_alloc_sqes(t, sqe, 3);
5858

59-
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
59+
io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
6060
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
6161
sqe[0]->user_data = build_user_data(tag,
6262
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
6363

64-
io_uring_prep_rw(op, sqe[1], 1 /*fds[1]*/, 0,
64+
io_uring_prep_rw(op, sqe[1], ublk_get_registered_fd(q, 1) /*fds[1]*/, 0,
6565
iod->nr_sectors << 9,
6666
iod->start_sector << 9);
6767
sqe[1]->buf_index = tag;
6868
sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
6969
sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
7070

71-
io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
71+
io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
7272
sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1);
7373

7474
return 2;

tools/testing/selftests/ublk/kublk.c

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ static void ublk_thread_deinit(struct ublk_thread *t)
432432
}
433433
}
434434

435-
static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
435+
static int ublk_queue_init(struct ublk_queue *q, unsigned long long extra_flags)
436436
{
437437
struct ublk_dev *dev = q->dev;
438438
int depth = dev->dev_info.queue_depth;
@@ -446,6 +446,9 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
446446
q->flags = dev->dev_info.flags;
447447
q->flags |= extra_flags;
448448

449+
/* Cache fd in queue for fast path access */
450+
q->ublk_fd = dev->fds[0];
451+
449452
cmd_buf_size = ublk_queue_cmd_buf_sz(q);
450453
off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz();
451454
q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ,
@@ -481,9 +484,10 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
481484
return -ENOMEM;
482485
}
483486

484-
static int ublk_thread_init(struct ublk_thread *t)
487+
static int ublk_thread_init(struct ublk_thread *t, unsigned long long extra_flags)
485488
{
486489
struct ublk_dev *dev = t->dev;
490+
unsigned long long flags = dev->dev_info.flags | extra_flags;
487491
int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth;
488492
int ret;
489493

@@ -512,7 +516,17 @@ static int ublk_thread_init(struct ublk_thread *t)
512516

513517
io_uring_register_ring_fd(&t->ring);
514518

515-
ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds);
519+
if (flags & UBLKS_Q_NO_UBLK_FIXED_FD) {
520+
/* Register only backing files starting from index 1, exclude ublk control device */
521+
if (dev->nr_fds > 1) {
522+
ret = io_uring_register_files(&t->ring, &dev->fds[1], dev->nr_fds - 1);
523+
} else {
524+
/* No backing files to register, skip file registration */
525+
ret = 0;
526+
}
527+
} else {
528+
ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds);
529+
}
516530
if (ret) {
517531
ublk_err("ublk dev %d thread %d register files failed %d\n",
518532
t->dev->dev_info.dev_id, t->idx, ret);
@@ -626,9 +640,12 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io)
626640

627641
/* These fields should be written once, never change */
628642
ublk_set_sqe_cmd_op(sqe[0], cmd_op);
629-
sqe[0]->fd = 0; /* dev->fds[0] */
643+
sqe[0]->fd = ublk_get_registered_fd(q, 0); /* dev->fds[0] */
630644
sqe[0]->opcode = IORING_OP_URING_CMD;
631-
sqe[0]->flags = IOSQE_FIXED_FILE;
645+
if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD)
646+
sqe[0]->flags = 0; /* Use raw FD, not fixed file */
647+
else
648+
sqe[0]->flags = IOSQE_FIXED_FILE;
632649
sqe[0]->rw_flags = 0;
633650
cmd->tag = io->tag;
634651
cmd->q_id = q->q_id;
@@ -832,6 +849,7 @@ struct ublk_thread_info {
832849
unsigned idx;
833850
sem_t *ready;
834851
cpu_set_t *affinity;
852+
unsigned long long extra_flags;
835853
};
836854

837855
static void *ublk_io_handler_fn(void *data)
@@ -844,7 +862,7 @@ static void *ublk_io_handler_fn(void *data)
844862
t->dev = info->dev;
845863
t->idx = info->idx;
846864

847-
ret = ublk_thread_init(t);
865+
ret = ublk_thread_init(t, info->extra_flags);
848866
if (ret) {
849867
ublk_err("ublk dev %d thread %u init failed\n",
850868
dev_id, t->idx);
@@ -934,6 +952,8 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
934952

935953
if (ctx->auto_zc_fallback)
936954
extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK;
955+
if (ctx->no_ublk_fixed_fd)
956+
extra_flags |= UBLKS_Q_NO_UBLK_FIXED_FD;
937957

938958
for (i = 0; i < dinfo->nr_hw_queues; i++) {
939959
dev->q[i].dev = dev;
@@ -951,6 +971,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
951971
tinfo[i].dev = dev;
952972
tinfo[i].idx = i;
953973
tinfo[i].ready = &ready;
974+
tinfo[i].extra_flags = extra_flags;
954975

955976
/*
956977
* If threads are not tied 1:1 to queues, setting thread
@@ -1471,7 +1492,7 @@ static void __cmd_create_help(char *exe, bool recovery)
14711492
printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
14721493
exe, recovery ? "recover" : "add");
14731494
printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n");
1474-
printf("\t[-e 0|1 ] [-i 0|1]\n");
1495+
printf("\t[-e 0|1 ] [-i 0|1] [--no_ublk_fixed_fd]\n");
14751496
printf("\t[--nthreads threads] [--per_io_tasks]\n");
14761497
printf("\t[target options] [backfile1] [backfile2] ...\n");
14771498
printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
@@ -1534,6 +1555,7 @@ int main(int argc, char *argv[])
15341555
{ "size", 1, NULL, 's'},
15351556
{ "nthreads", 1, NULL, 0 },
15361557
{ "per_io_tasks", 0, NULL, 0 },
1558+
{ "no_ublk_fixed_fd", 0, NULL, 0 },
15371559
{ 0, 0, 0, 0 }
15381560
};
15391561
const struct ublk_tgt_ops *ops = NULL;
@@ -1613,6 +1635,8 @@ int main(int argc, char *argv[])
16131635
ctx.nthreads = strtol(optarg, NULL, 10);
16141636
if (!strcmp(longopts[option_idx].name, "per_io_tasks"))
16151637
ctx.per_io_tasks = 1;
1638+
if (!strcmp(longopts[option_idx].name, "no_ublk_fixed_fd"))
1639+
ctx.no_ublk_fixed_fd = 1;
16161640
break;
16171641
case '?':
16181642
/*

0 commit comments

Comments
 (0)