Skip to content

Commit 2988dfe

Browse files
committed
Merge tag 'block-6.17-20250808' of git://git.kernel.dk/linux
Pull more block updates from Jens Axboe: - MD pull request via Yu: - mddev null-ptr-dereference fix, by Erkun - md-cluster fail to remove the faulty disk regression fix, by Heming - minor cleanup, by Li Nan and Jinchao - mdadm lifetime regression fix reported by syzkaller, by Yu Kuai - MD pull request via Christoph - add support for getting the FDP featuee in fabrics passthru path (Nitesh Shetty) - add capability to connect to an administrative controller (Kamaljit Singh) - fix a leak on sgl setup error (Keith Busch) - initialize discovery subsys after debugfs is initialized (Mohamed Khalfella) - fix various comment typos (Bjorn Helgaas) - remove unneeded semicolons (Jiapeng Chong) - nvmet debugfs ordering issue fix - Fix UAF in the tag_set in zloop - Ensure sbitmap shallow depth covers entire set - Reduce lock roundtrips in io context lookup - Move scheduler tags alloc/free out of elevator and freeze lock, to fix some lockdep found issues - Improve robustness of queue limits checking - Fix a regression with IO priorities, if no io context exists * tag 'block-6.17-20250808' of git://git.kernel.dk/linux: (26 commits) lib/sbitmap: make sbitmap_get_shallow() internal lib/sbitmap: convert shallow_depth from one word to the whole sbitmap nvmet: exit debugfs after discovery subsystem exits block, bfq: Reorder struct bfq_iocq_bfqq_data md: make rdev_addable usable for rcu mode md/raid1: remove struct pool_info and related code md/raid1: change r1conf->r1bio_pool to a pointer type block: ensure discard_granularity is zero when discard is not supported zloop: fix KASAN use-after-free of tag set block: Fix default IO priority if there is no IO context nvme: fix various comment typos nvme-auth: remove unneeded semicolon nvme-pci: fix leak on sgl setup error nvmet: initialize discovery subsys after debugfs is initialized nvme: add capability to connect to an administrative controller nvmet: add support for FDP in fabrics passthru path md: rename recovery_cp to resync_offset md/md-cluster: handle REMOVE message earlier md: fix create on open mddev lifetime regression block: fix potential deadlock while running nr_hw_queue update ...
2 parents 24bbfb8 + 45fa9f9 commit 2988dfe

38 files changed

+519
-442
lines changed

block/bfq-iosched.c

Lines changed: 21 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -454,17 +454,10 @@ static struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
454454
*/
455455
static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
456456
{
457-
struct bfq_io_cq *icq;
458-
unsigned long flags;
459-
460457
if (!current->io_context)
461458
return NULL;
462459

463-
spin_lock_irqsave(&q->queue_lock, flags);
464-
icq = icq_to_bic(ioc_lookup_icq(q));
465-
spin_unlock_irqrestore(&q->queue_lock, flags);
466-
467-
return icq;
460+
return icq_to_bic(ioc_lookup_icq(q));
468461
}
469462

470463
/*
@@ -701,17 +694,13 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
701694
{
702695
struct bfq_data *bfqd = data->q->elevator->elevator_data;
703696
struct bfq_io_cq *bic = bfq_bic_lookup(data->q);
704-
int depth;
705-
unsigned limit = data->q->nr_requests;
706-
unsigned int act_idx;
697+
unsigned int limit, act_idx;
707698

708699
/* Sync reads have full depth available */
709-
if (op_is_sync(opf) && !op_is_write(opf)) {
710-
depth = 0;
711-
} else {
712-
depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)];
713-
limit = (limit * depth) >> bfqd->full_depth_shift;
714-
}
700+
if (op_is_sync(opf) && !op_is_write(opf))
701+
limit = data->q->nr_requests;
702+
else
703+
limit = bfqd->async_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)];
715704

716705
for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) {
717706
/* Fast path to check if bfqq is already allocated. */
@@ -725,14 +714,16 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
725714
* available requests and thus starve other entities.
726715
*/
727716
if (bfqq_request_over_limit(bfqd, bic, opf, act_idx, limit)) {
728-
depth = 1;
717+
limit = 1;
729718
break;
730719
}
731720
}
721+
732722
bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u",
733-
__func__, bfqd->wr_busy_queues, op_is_sync(opf), depth);
734-
if (depth)
735-
data->shallow_depth = depth;
723+
__func__, bfqd->wr_busy_queues, op_is_sync(opf), limit);
724+
725+
if (limit < data->q->nr_requests)
726+
data->shallow_depth = limit;
736727
}
737728

738729
static struct bfq_queue *
@@ -2457,15 +2448,8 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
24572448
unsigned int nr_segs)
24582449
{
24592450
struct bfq_data *bfqd = q->elevator->elevator_data;
2460-
struct request *free = NULL;
2461-
/*
2462-
* bfq_bic_lookup grabs the queue_lock: invoke it now and
2463-
* store its return value for later use, to avoid nesting
2464-
* queue_lock inside the bfqd->lock. We assume that the bic
2465-
* returned by bfq_bic_lookup does not go away before
2466-
* bfqd->lock is taken.
2467-
*/
24682451
struct bfq_io_cq *bic = bfq_bic_lookup(q);
2452+
struct request *free = NULL;
24692453
bool ret;
24702454

24712455
spin_lock_irq(&bfqd->lock);
@@ -7128,9 +7112,8 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
71287112
*/
71297113
static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
71307114
{
7131-
unsigned int depth = 1U << bt->sb.shift;
7115+
unsigned int nr_requests = bfqd->queue->nr_requests;
71327116

7133-
bfqd->full_depth_shift = bt->sb.shift;
71347117
/*
71357118
* In-word depths if no bfq_queue is being weight-raised:
71367119
* leaving 25% of tags only for sync reads.
@@ -7142,13 +7125,13 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
71427125
* limit 'something'.
71437126
*/
71447127
/* no more than 50% of tags for async I/O */
7145-
bfqd->word_depths[0][0] = max(depth >> 1, 1U);
7128+
bfqd->async_depths[0][0] = max(nr_requests >> 1, 1U);
71467129
/*
71477130
* no more than 75% of tags for sync writes (25% extra tags
71487131
* w.r.t. async I/O, to prevent async I/O from starving sync
71497132
* writes)
71507133
*/
7151-
bfqd->word_depths[0][1] = max((depth * 3) >> 2, 1U);
7134+
bfqd->async_depths[0][1] = max((nr_requests * 3) >> 2, 1U);
71527135

71537136
/*
71547137
* In-word depths in case some bfq_queue is being weight-
@@ -7158,9 +7141,9 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
71587141
* shortage.
71597142
*/
71607143
/* no more than ~18% of tags for async I/O */
7161-
bfqd->word_depths[1][0] = max((depth * 3) >> 4, 1U);
7144+
bfqd->async_depths[1][0] = max((nr_requests * 3) >> 4, 1U);
71627145
/* no more than ~37% of tags for sync writes (~20% extra tags) */
7163-
bfqd->word_depths[1][1] = max((depth * 6) >> 4, 1U);
7146+
bfqd->async_depths[1][1] = max((nr_requests * 6) >> 4, 1U);
71647147
}
71657148

71667149
static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx)
@@ -7232,22 +7215,16 @@ static void bfq_init_root_group(struct bfq_group *root_group,
72327215
root_group->sched_data.bfq_class_idle_last_service = jiffies;
72337216
}
72347217

7235-
static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
7218+
static int bfq_init_queue(struct request_queue *q, struct elevator_queue *eq)
72367219
{
72377220
struct bfq_data *bfqd;
7238-
struct elevator_queue *eq;
72397221
unsigned int i;
72407222
struct blk_independent_access_ranges *ia_ranges = q->disk->ia_ranges;
72417223

7242-
eq = elevator_alloc(q, e);
7243-
if (!eq)
7244-
return -ENOMEM;
7245-
72467224
bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node);
7247-
if (!bfqd) {
7248-
kobject_put(&eq->kobj);
7225+
if (!bfqd)
72497226
return -ENOMEM;
7250-
}
7227+
72517228
eq->elevator_data = bfqd;
72527229

72537230
spin_lock_irq(&q->queue_lock);
@@ -7405,7 +7382,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
74057382

74067383
out_free:
74077384
kfree(bfqd);
7408-
kobject_put(&eq->kobj);
74097385
return -ENOMEM;
74107386
}
74117387

block/bfq-iosched.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -427,9 +427,6 @@ struct bfq_iocq_bfqq_data {
427427
*/
428428
bool saved_IO_bound;
429429

430-
u64 saved_io_start_time;
431-
u64 saved_tot_idle_time;
432-
433430
/*
434431
* Same purpose as the previous fields for the values of the
435432
* field keeping the queue's belonging to a large burst
@@ -450,20 +447,23 @@ struct bfq_iocq_bfqq_data {
450447
*/
451448
unsigned int saved_weight;
452449

450+
u64 saved_io_start_time;
451+
u64 saved_tot_idle_time;
452+
453453
/*
454454
* Similar to previous fields: save wr information.
455455
*/
456456
unsigned long saved_wr_coeff;
457457
unsigned long saved_last_wr_start_finish;
458458
unsigned long saved_service_from_wr;
459459
unsigned long saved_wr_start_at_switch_to_srt;
460-
unsigned int saved_wr_cur_max_time;
461460
struct bfq_ttime saved_ttime;
461+
unsigned int saved_wr_cur_max_time;
462462

463463
/* Save also injection state */
464-
u64 saved_last_serv_time_ns;
465464
unsigned int saved_inject_limit;
466465
unsigned long saved_decrease_time_jif;
466+
u64 saved_last_serv_time_ns;
467467

468468
/* candidate queue for a stable merge (due to close creation time) */
469469
struct bfq_queue *stable_merge_bfqq;
@@ -813,8 +813,7 @@ struct bfq_data {
813813
* Depth limits used in bfq_limit_depth (see comments on the
814814
* function)
815815
*/
816-
unsigned int word_depths[2][2];
817-
unsigned int full_depth_shift;
816+
unsigned int async_depths[2][2];
818817

819818
/*
820819
* Number of independent actuators. This is equal to 1 in

block/blk-ioc.c

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -308,24 +308,23 @@ int __copy_io(unsigned long clone_flags, struct task_struct *tsk)
308308

309309
#ifdef CONFIG_BLK_ICQ
310310
/**
311-
* ioc_lookup_icq - lookup io_cq from ioc
311+
* ioc_lookup_icq - lookup io_cq from ioc in io issue path
312312
* @q: the associated request_queue
313313
*
314314
* Look up io_cq associated with @ioc - @q pair from @ioc. Must be called
315-
* with @q->queue_lock held.
315+
* from io issue path, either return NULL if current issue io to @q for the
316+
* first time, or return a valid icq.
316317
*/
317318
struct io_cq *ioc_lookup_icq(struct request_queue *q)
318319
{
319320
struct io_context *ioc = current->io_context;
320321
struct io_cq *icq;
321322

322-
lockdep_assert_held(&q->queue_lock);
323-
324323
/*
325324
* icq's are indexed from @ioc using radix tree and hint pointer,
326-
* both of which are protected with RCU. All removals are done
327-
* holding both q and ioc locks, and we're holding q lock - if we
328-
* find a icq which points to us, it's guaranteed to be valid.
325+
* both of which are protected with RCU, io issue path ensures that
326+
* both request_queue and current task are valid, the found icq
327+
* is guaranteed to be valid until the io is done.
329328
*/
330329
rcu_read_lock();
331330
icq = rcu_dereference(ioc->icq_hint);
@@ -419,10 +418,7 @@ struct io_cq *ioc_find_get_icq(struct request_queue *q)
419418
task_unlock(current);
420419
} else {
421420
get_io_context(ioc);
422-
423-
spin_lock_irq(&q->queue_lock);
424421
icq = ioc_lookup_icq(q);
425-
spin_unlock_irq(&q->queue_lock);
426422
}
427423

428424
if (!icq) {

0 commit comments

Comments
 (0)