Skip to content

Commit f8eacd8

Browse files
committed
Merge tag 'block-6.12-20241018' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe: - NVMe pull request via Keith: - Fix target passthrough identifier (Nilay) - Fix tcp locking (Hannes) - Replace list with sbitmap for tracking RDMA rsp tags (Guixen) - Remove unnecessary fallthrough statements (Tokunori) - Remove ready-without-media support (Greg) - Fix multipath partition scan deadlock (Keith) - Fix concurrent PCI reset and remove queue mapping (Maurizio) - Fabrics shutdown fixes (Nilay) - Fix for a kerneldoc warning (Keith) - Fix a race with blk-rq-qos and wakeups (Omar) - Cleanup of checking for always-set tag_set (SurajSonawane2415) - Fix for a crash with CPU hotplug notifiers (Ming) - Don't allow zero-copy ublk on unprivileged device (Ming) - Use array_index_nospec() for CDROM (Josh) - Remove dead code in drbd (David) - Tweaks to elevator loading (Breno) * tag 'block-6.12-20241018' of git://git.kernel.dk/linux: cdrom: Avoid barrier_nospec() in cdrom_ioctl_media_changed() nvme: use helper nvme_ctrl_state in nvme_keep_alive_finish function nvme: make keep-alive synchronous operation nvme-loop: flush off pending I/O while shutting down loop controller nvme-pci: fix race condition between reset and nvme_dev_disable() ublk: don't allow user copy for unprivileged device blk-rq-qos: fix crash on rq_qos_wait vs. rq_qos_wake_function race nvme-multipath: defer partition scanning blk-mq: setup queue ->tag_set before initializing hctx elevator: Remove argument from elevator_find_get elevator: do not request_module if elevator exists drbd: Remove unused conn_lowest_minor nvme: disable CC.CRIME (NVME_CC_CRIME) nvme: delete unnecessary fallthru comment nvmet-rdma: use sbitmap to replace rsp free list block: Fix elevator_get_default() checking for NULL q->tag_set nvme: tcp: avoid race between queue_lock lock and destroy nvmet-passthru: clear EUID/NGUID/UUID while using loop target block: fix blk_rq_map_integrity_sg kernel-doc
2 parents a041f47 + b0bf1af commit f8eacd8

File tree

16 files changed

+152
-98
lines changed

16 files changed

+152
-98
lines changed

block/blk-mq.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4310,6 +4310,12 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
43104310
/* mark the queue as mq asap */
43114311
q->mq_ops = set->ops;
43124312

4313+
/*
4314+
* ->tag_set has to be setup before initialize hctx, which cpuphp
4315+
* handler needs it for checking queue mapping
4316+
*/
4317+
q->tag_set = set;
4318+
43134319
if (blk_mq_alloc_ctxs(q))
43144320
goto err_exit;
43154321

@@ -4328,8 +4334,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
43284334
INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
43294335
blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
43304336

4331-
q->tag_set = set;
4332-
43334337
q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
43344338

43354339
INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);

block/blk-rq-qos.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,8 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr,
219219

220220
data->got_token = true;
221221
smp_wmb();
222-
list_del_init(&curr->entry);
223222
wake_up_process(data->task);
223+
list_del_init_careful(&curr->entry);
224224
return 1;
225225
}
226226

block/elevator.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,7 @@ static struct elevator_type *__elevator_find(const char *name)
106106
return NULL;
107107
}
108108

109-
static struct elevator_type *elevator_find_get(struct request_queue *q,
110-
const char *name)
109+
static struct elevator_type *elevator_find_get(const char *name)
111110
{
112111
struct elevator_type *e;
113112

@@ -551,7 +550,7 @@ EXPORT_SYMBOL_GPL(elv_unregister);
551550
static inline bool elv_support_iosched(struct request_queue *q)
552551
{
553552
if (!queue_is_mq(q) ||
554-
(q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)))
553+
(q->tag_set->flags & BLK_MQ_F_NO_SCHED))
555554
return false;
556555
return true;
557556
}
@@ -562,14 +561,14 @@ static inline bool elv_support_iosched(struct request_queue *q)
562561
*/
563562
static struct elevator_type *elevator_get_default(struct request_queue *q)
564563
{
565-
if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
564+
if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
566565
return NULL;
567566

568567
if (q->nr_hw_queues != 1 &&
569568
!blk_mq_is_shared_tags(q->tag_set->flags))
570569
return NULL;
571570

572-
return elevator_find_get(q, "mq-deadline");
571+
return elevator_find_get("mq-deadline");
573572
}
574573

575574
/*
@@ -697,7 +696,7 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
697696
if (q->elevator && elevator_match(q->elevator->type, elevator_name))
698697
return 0;
699698

700-
e = elevator_find_get(q, elevator_name);
699+
e = elevator_find_get(elevator_name);
701700
if (!e)
702701
return -EINVAL;
703702
ret = elevator_switch(q, e);
@@ -709,13 +708,21 @@ int elv_iosched_load_module(struct gendisk *disk, const char *buf,
709708
size_t count)
710709
{
711710
char elevator_name[ELV_NAME_MAX];
711+
struct elevator_type *found;
712+
const char *name;
712713

713714
if (!elv_support_iosched(disk->queue))
714715
return -EOPNOTSUPP;
715716

716717
strscpy(elevator_name, buf, sizeof(elevator_name));
718+
name = strstrip(elevator_name);
717719

718-
request_module("%s-iosched", strstrip(elevator_name));
720+
spin_lock(&elv_list_lock);
721+
found = __elevator_find(name);
722+
spin_unlock(&elv_list_lock);
723+
724+
if (!found)
725+
request_module("%s-iosched", name);
719726

720727
return 0;
721728
}

drivers/block/drbd/drbd_int.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1364,7 +1364,6 @@ extern struct bio_set drbd_io_bio_set;
13641364

13651365
extern struct mutex resources_mutex;
13661366

1367-
extern int conn_lowest_minor(struct drbd_connection *connection);
13681367
extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor);
13691368
extern void drbd_destroy_device(struct kref *kref);
13701369
extern void drbd_delete_device(struct drbd_device *device);

drivers/block/drbd/drbd_main.c

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -471,20 +471,6 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
471471
wait_for_completion(&thi->stop);
472472
}
473473

474-
int conn_lowest_minor(struct drbd_connection *connection)
475-
{
476-
struct drbd_peer_device *peer_device;
477-
int vnr = 0, minor = -1;
478-
479-
rcu_read_lock();
480-
peer_device = idr_get_next(&connection->peer_devices, &vnr);
481-
if (peer_device)
482-
minor = device_to_minor(peer_device->device);
483-
rcu_read_unlock();
484-
485-
return minor;
486-
}
487-
488474
#ifdef CONFIG_SMP
489475
/*
490476
* drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs

drivers/block/ublk_drv.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2380,10 +2380,19 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
23802380
* TODO: provide forward progress for RECOVERY handler, so that
23812381
* unprivileged device can benefit from it
23822382
*/
2383-
if (info.flags & UBLK_F_UNPRIVILEGED_DEV)
2383+
if (info.flags & UBLK_F_UNPRIVILEGED_DEV) {
23842384
info.flags &= ~(UBLK_F_USER_RECOVERY_REISSUE |
23852385
UBLK_F_USER_RECOVERY);
23862386

2387+
/*
2388+
* For USER_COPY, we depends on userspace to fill request
2389+
* buffer by pwrite() to ublk char device, which can't be
2390+
* used for unprivileged device
2391+
*/
2392+
if (info.flags & UBLK_F_USER_COPY)
2393+
return -EINVAL;
2394+
}
2395+
23872396
/* the created device is always owned by current user */
23882397
ublk_store_owner_uid_gid(&info.owner_uid, &info.owner_gid);
23892398

drivers/cdrom/cdrom.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2313,7 +2313,7 @@ static int cdrom_ioctl_media_changed(struct cdrom_device_info *cdi,
23132313
return -EINVAL;
23142314

23152315
/* Prevent arg from speculatively bypassing the length check */
2316-
barrier_nospec();
2316+
arg = array_index_nospec(arg, cdi->capacity);
23172317

23182318
info = kmalloc(sizeof(*info), GFP_KERNEL);
23192319
if (!info)

drivers/nvme/host/core.c

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,14 +1292,12 @@ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
12921292
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
12931293
}
12941294

1295-
static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
1296-
blk_status_t status)
1295+
static void nvme_keep_alive_finish(struct request *rq,
1296+
blk_status_t status, struct nvme_ctrl *ctrl)
12971297
{
1298-
struct nvme_ctrl *ctrl = rq->end_io_data;
1299-
unsigned long flags;
1300-
bool startka = false;
13011298
unsigned long rtt = jiffies - (rq->deadline - rq->timeout);
13021299
unsigned long delay = nvme_keep_alive_work_period(ctrl);
1300+
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
13031301

13041302
/*
13051303
* Subtract off the keepalive RTT so nvme_keep_alive_work runs
@@ -1313,25 +1311,17 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
13131311
delay = 0;
13141312
}
13151313

1316-
blk_mq_free_request(rq);
1317-
13181314
if (status) {
13191315
dev_err(ctrl->device,
13201316
"failed nvme_keep_alive_end_io error=%d\n",
13211317
status);
1322-
return RQ_END_IO_NONE;
1318+
return;
13231319
}
13241320

13251321
ctrl->ka_last_check_time = jiffies;
13261322
ctrl->comp_seen = false;
1327-
spin_lock_irqsave(&ctrl->lock, flags);
1328-
if (ctrl->state == NVME_CTRL_LIVE ||
1329-
ctrl->state == NVME_CTRL_CONNECTING)
1330-
startka = true;
1331-
spin_unlock_irqrestore(&ctrl->lock, flags);
1332-
if (startka)
1323+
if (state == NVME_CTRL_LIVE || state == NVME_CTRL_CONNECTING)
13331324
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
1334-
return RQ_END_IO_NONE;
13351325
}
13361326

13371327
static void nvme_keep_alive_work(struct work_struct *work)
@@ -1340,6 +1330,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
13401330
struct nvme_ctrl, ka_work);
13411331
bool comp_seen = ctrl->comp_seen;
13421332
struct request *rq;
1333+
blk_status_t status;
13431334

13441335
ctrl->ka_last_check_time = jiffies;
13451336

@@ -1362,9 +1353,9 @@ static void nvme_keep_alive_work(struct work_struct *work)
13621353
nvme_init_request(rq, &ctrl->ka_cmd);
13631354

13641355
rq->timeout = ctrl->kato * HZ;
1365-
rq->end_io = nvme_keep_alive_end_io;
1366-
rq->end_io_data = ctrl;
1367-
blk_execute_rq_nowait(rq, false);
1356+
status = blk_execute_rq(rq, false);
1357+
nvme_keep_alive_finish(rq, status, ctrl);
1358+
blk_mq_free_request(rq);
13681359
}
13691360

13701361
static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
@@ -2458,8 +2449,13 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
24582449
else
24592450
ctrl->ctrl_config = NVME_CC_CSS_NVM;
24602451

2461-
if (ctrl->cap & NVME_CAP_CRMS_CRWMS && ctrl->cap & NVME_CAP_CRMS_CRIMS)
2462-
ctrl->ctrl_config |= NVME_CC_CRIME;
2452+
/*
2453+
* Setting CRIME results in CSTS.RDY before the media is ready. This
2454+
* makes it possible for media related commands to return the error
2455+
* NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY. Until the driver is
2456+
* restructured to handle retries, disable CC.CRIME.
2457+
*/
2458+
ctrl->ctrl_config &= ~NVME_CC_CRIME;
24632459

24642460
ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
24652461
ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
@@ -2489,10 +2485,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
24892485
* devices are known to get this wrong. Use the larger of the
24902486
* two values.
24912487
*/
2492-
if (ctrl->ctrl_config & NVME_CC_CRIME)
2493-
ready_timeout = NVME_CRTO_CRIMT(crto);
2494-
else
2495-
ready_timeout = NVME_CRTO_CRWMT(crto);
2488+
ready_timeout = NVME_CRTO_CRWMT(crto);
24962489

24972490
if (ready_timeout < timeout)
24982491
dev_warn_once(ctrl->device, "bad crto:%x cap:%llx\n",

drivers/nvme/host/multipath.c

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,6 @@ static bool nvme_available_path(struct nvme_ns_head *head)
431431
case NVME_CTRL_LIVE:
432432
case NVME_CTRL_RESETTING:
433433
case NVME_CTRL_CONNECTING:
434-
/* fallthru */
435434
return true;
436435
default:
437436
break;
@@ -580,6 +579,20 @@ static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
580579
return ret;
581580
}
582581

582+
static void nvme_partition_scan_work(struct work_struct *work)
583+
{
584+
struct nvme_ns_head *head =
585+
container_of(work, struct nvme_ns_head, partition_scan_work);
586+
587+
if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN,
588+
&head->disk->state)))
589+
return;
590+
591+
mutex_lock(&head->disk->open_mutex);
592+
bdev_disk_changed(head->disk, false);
593+
mutex_unlock(&head->disk->open_mutex);
594+
}
595+
583596
static void nvme_requeue_work(struct work_struct *work)
584597
{
585598
struct nvme_ns_head *head =
@@ -606,6 +619,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
606619
bio_list_init(&head->requeue_list);
607620
spin_lock_init(&head->requeue_lock);
608621
INIT_WORK(&head->requeue_work, nvme_requeue_work);
622+
INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work);
609623

610624
/*
611625
* Add a multipath node if the subsystems supports multiple controllers.
@@ -629,6 +643,16 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
629643
return PTR_ERR(head->disk);
630644
head->disk->fops = &nvme_ns_head_ops;
631645
head->disk->private_data = head;
646+
647+
/*
648+
* We need to suppress the partition scan from occuring within the
649+
* controller's scan_work context. If a path error occurs here, the IO
650+
* will wait until a path becomes available or all paths are torn down,
651+
* but that action also occurs within scan_work, so it would deadlock.
652+
* Defer the partion scan to a different context that does not block
653+
* scan_work.
654+
*/
655+
set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state);
632656
sprintf(head->disk->disk_name, "nvme%dn%d",
633657
ctrl->subsys->instance, head->instance);
634658
return 0;
@@ -655,6 +679,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
655679
return;
656680
}
657681
nvme_add_ns_head_cdev(head);
682+
kblockd_schedule_work(&head->partition_scan_work);
658683
}
659684

660685
mutex_lock(&head->lock);
@@ -974,14 +999,14 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
974999
return;
9751000
if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
9761001
nvme_cdev_del(&head->cdev, &head->cdev_device);
1002+
/*
1003+
* requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
1004+
* to allow multipath to fail all I/O.
1005+
*/
1006+
synchronize_srcu(&head->srcu);
1007+
kblockd_schedule_work(&head->requeue_work);
9771008
del_gendisk(head->disk);
9781009
}
979-
/*
980-
* requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
981-
* to allow multipath to fail all I/O.
982-
*/
983-
synchronize_srcu(&head->srcu);
984-
kblockd_schedule_work(&head->requeue_work);
9851010
}
9861011

9871012
void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@@ -991,6 +1016,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
9911016
/* make sure all pending bios are cleaned up */
9921017
kblockd_schedule_work(&head->requeue_work);
9931018
flush_work(&head->requeue_work);
1019+
flush_work(&head->partition_scan_work);
9941020
put_disk(head->disk);
9951021
}
9961022

drivers/nvme/host/nvme.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,7 @@ struct nvme_ns_head {
494494
struct bio_list requeue_list;
495495
spinlock_t requeue_lock;
496496
struct work_struct requeue_work;
497+
struct work_struct partition_scan_work;
497498
struct mutex lock;
498499
unsigned long flags;
499500
#define NVME_NSHEAD_DISK_LIVE 0

0 commit comments

Comments
 (0)