Skip to content

Commit 42ef30f

Browse files
YuKuai-huaweikawasaki
authored andcommitted
lib/sbitmap: convert shallow_depth from one word to the whole sbitmap
Currently elevators will record internal 'async_depth' to throttle asynchronous requests, and they both calculate shallow_dpeth based on sb->shift, with the respect that sb->shift is the available tags in one word. However, sb->shift is not the availbale tags in the last word, see __map_depth: if (index == sb->map_nr - 1) return sb->depth - (index << sb->shift); For consequence, if the last word is used, more tags can be get than expected, for example, assume nr_requests=256 and there are four words, in the worst case if user set nr_requests=32, then the first word is the last word, and still use bits per word, which is 64, to calculate async_depth is wrong. One the other hand, due to cgroup qos, bfq can allow only one request to be allocated, and set shallow_dpeth=1 will still allow the number of words request to be allocated. Fix this problems by using shallow_depth to the whole sbitmap instead of per word, also change kyber, mq-deadline and bfq to follow this. Signed-off-by: Yu Kuai <[email protected]>
1 parent 0b59764 commit 42ef30f

File tree

6 files changed

+51
-73
lines changed

6 files changed

+51
-73
lines changed

block/bfq-iosched.c

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -701,17 +701,13 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
701701
{
702702
struct bfq_data *bfqd = data->q->elevator->elevator_data;
703703
struct bfq_io_cq *bic = bfq_bic_lookup(data->q);
704-
int depth;
705-
unsigned limit = data->q->nr_requests;
706-
unsigned int act_idx;
704+
unsigned int limit, act_idx;
707705

708706
/* Sync reads have full depth available */
709-
if (op_is_sync(opf) && !op_is_write(opf)) {
710-
depth = 0;
711-
} else {
712-
depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)];
713-
limit = (limit * depth) >> bfqd->full_depth_shift;
714-
}
707+
if (op_is_sync(opf) && !op_is_write(opf))
708+
limit = data->q->nr_requests;
709+
else
710+
limit = bfqd->async_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)];
715711

716712
for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) {
717713
/* Fast path to check if bfqq is already allocated. */
@@ -725,14 +721,16 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
725721
* available requests and thus starve other entities.
726722
*/
727723
if (bfqq_request_over_limit(bfqd, bic, opf, act_idx, limit)) {
728-
depth = 1;
724+
limit = 1;
729725
break;
730726
}
731727
}
728+
732729
bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u",
733-
__func__, bfqd->wr_busy_queues, op_is_sync(opf), depth);
734-
if (depth)
735-
data->shallow_depth = depth;
730+
__func__, bfqd->wr_busy_queues, op_is_sync(opf), limit);
731+
732+
if (limit < data->q->nr_requests)
733+
data->shallow_depth = limit;
736734
}
737735

738736
static struct bfq_queue *
@@ -7128,9 +7126,8 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
71287126
*/
71297127
static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
71307128
{
7131-
unsigned int depth = 1U << bt->sb.shift;
7129+
unsigned int nr_requests = bfqd->queue->nr_requests;
71327130

7133-
bfqd->full_depth_shift = bt->sb.shift;
71347131
/*
71357132
* In-word depths if no bfq_queue is being weight-raised:
71367133
* leaving 25% of tags only for sync reads.
@@ -7142,13 +7139,13 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
71427139
* limit 'something'.
71437140
*/
71447141
/* no more than 50% of tags for async I/O */
7145-
bfqd->word_depths[0][0] = max(depth >> 1, 1U);
7142+
bfqd->async_depths[0][0] = max(nr_requests >> 1, 1U);
71467143
/*
71477144
* no more than 75% of tags for sync writes (25% extra tags
71487145
* w.r.t. async I/O, to prevent async I/O from starving sync
71497146
* writes)
71507147
*/
7151-
bfqd->word_depths[0][1] = max((depth * 3) >> 2, 1U);
7148+
bfqd->async_depths[0][1] = max((nr_requests * 3) >> 2, 1U);
71527149

71537150
/*
71547151
* In-word depths in case some bfq_queue is being weight-
@@ -7158,9 +7155,9 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
71587155
* shortage.
71597156
*/
71607157
/* no more than ~18% of tags for async I/O */
7161-
bfqd->word_depths[1][0] = max((depth * 3) >> 4, 1U);
7158+
bfqd->async_depths[1][0] = max((nr_requests * 3) >> 4, 1U);
71627159
/* no more than ~37% of tags for sync writes (~20% extra tags) */
7163-
bfqd->word_depths[1][1] = max((depth * 6) >> 4, 1U);
7160+
bfqd->async_depths[1][1] = max((nr_requests * 6) >> 4, 1U);
71647161
}
71657162

71667163
static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx)

block/bfq-iosched.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -813,8 +813,7 @@ struct bfq_data {
813813
* Depth limits used in bfq_limit_depth (see comments on the
814814
* function)
815815
*/
816-
unsigned int word_depths[2][2];
817-
unsigned int full_depth_shift;
816+
unsigned int async_depths[2][2];
818817

819818
/*
820819
* Number of independent actuators. This is equal to 1 in

block/kyber-iosched.c

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -157,10 +157,7 @@ struct kyber_queue_data {
157157
*/
158158
struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];
159159

160-
/*
161-
* Async request percentage, converted to per-word depth for
162-
* sbitmap_get_shallow().
163-
*/
160+
/* Number of allowed async requests. */
164161
unsigned int async_depth;
165162

166163
struct kyber_cpu_latency __percpu *cpu_latency;
@@ -454,10 +451,8 @@ static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx)
454451
{
455452
struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
456453
struct blk_mq_tags *tags = hctx->sched_tags;
457-
unsigned int shift = tags->bitmap_tags.sb.shift;
458-
459-
kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
460454

455+
kqd->async_depth = hctx->queue->nr_requests * KYBER_ASYNC_PERCENT / 100U;
461456
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, kqd->async_depth);
462457
}
463458

block/mq-deadline.c

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -487,20 +487,6 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
487487
return rq;
488488
}
489489

490-
/*
491-
* 'depth' is a number in the range 1..INT_MAX representing a number of
492-
* requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since
493-
* 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow().
494-
* Values larger than q->nr_requests have the same effect as q->nr_requests.
495-
*/
496-
static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth)
497-
{
498-
struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags;
499-
const unsigned int nrr = hctx->queue->nr_requests;
500-
501-
return ((qdepth << bt->sb.shift) + nrr - 1) / nrr;
502-
}
503-
504490
/*
505491
* Called by __blk_mq_alloc_request(). The shallow_depth value set by this
506492
* function is used by __blk_mq_get_tag().
@@ -517,7 +503,7 @@ static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
517503
* Throttle asynchronous requests and writes such that these requests
518504
* do not block the allocation of synchronous requests.
519505
*/
520-
data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth);
506+
data->shallow_depth = dd->async_depth;
521507
}
522508

523509
/* Called by blk_mq_update_nr_requests(). */

include/linux/sbitmap.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,12 +213,12 @@ int sbitmap_get(struct sbitmap *sb);
213213
* sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap,
214214
* limiting the depth used from each word.
215215
* @sb: Bitmap to allocate from.
216-
* @shallow_depth: The maximum number of bits to allocate from a single word.
216+
* @shallow_depth: The maximum number of bits to allocate from the bitmap.
217217
*
218218
* This rather specific operation allows for having multiple users with
219219
* different allocation limits. E.g., there can be a high-priority class that
220220
* uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow()
221-
* with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority
221+
* with a @shallow_depth of (sb->depth >> 1). Then, the low-priority
222222
* class can only allocate half of the total bits in the bitmap, preventing it
223223
* from starving out the high-priority class.
224224
*
@@ -478,7 +478,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
478478
* sbitmap_queue, limiting the depth used from each word, with preemption
479479
* already disabled.
480480
* @sbq: Bitmap queue to allocate from.
481-
* @shallow_depth: The maximum number of bits to allocate from a single word.
481+
* @shallow_depth: The maximum number of bits to allocate from the queue.
482482
* See sbitmap_get_shallow().
483483
*
484484
* If you call this, make sure to call sbitmap_queue_min_shallow_depth() after

lib/sbitmap.c

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -208,8 +208,27 @@ static int sbitmap_find_bit_in_word(struct sbitmap_word *map,
208208
return nr;
209209
}
210210

211+
static unsigned int __map_depth_with_shallow(const struct sbitmap *sb,
212+
int index,
213+
unsigned int shallow_depth)
214+
{
215+
unsigned int lower_bound = 0;
216+
217+
if (shallow_depth >= sb->depth)
218+
return __map_depth(sb, index);
219+
220+
if (index > 0)
221+
lower_bound += (index - 1) << sb->shift;
222+
223+
if (shallow_depth <= lower_bound)
224+
return 0;
225+
226+
return min_t(unsigned int, __map_depth(sb, index),
227+
shallow_depth - lower_bound);
228+
}
229+
211230
static int sbitmap_find_bit(struct sbitmap *sb,
212-
unsigned int depth,
231+
unsigned int shallow_depth,
213232
unsigned int index,
214233
unsigned int alloc_hint,
215234
bool wrap)
@@ -218,12 +237,12 @@ static int sbitmap_find_bit(struct sbitmap *sb,
218237
int nr = -1;
219238

220239
for (i = 0; i < sb->map_nr; i++) {
221-
nr = sbitmap_find_bit_in_word(&sb->map[index],
222-
min_t(unsigned int,
223-
__map_depth(sb, index),
224-
depth),
225-
alloc_hint, wrap);
240+
unsigned int depth = __map_depth_with_shallow(sb, index,
241+
shallow_depth);
226242

243+
if (depth)
244+
nr = sbitmap_find_bit_in_word(&sb->map[index], depth,
245+
alloc_hint, wrap);
227246
if (nr != -1) {
228247
nr += index << sb->shift;
229248
break;
@@ -406,27 +425,9 @@ EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
406425
static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
407426
unsigned int depth)
408427
{
409-
unsigned int wake_batch;
410-
unsigned int shallow_depth;
411-
412-
/*
413-
* Each full word of the bitmap has bits_per_word bits, and there might
414-
* be a partial word. There are depth / bits_per_word full words and
415-
* depth % bits_per_word bits left over. In bitwise arithmetic:
416-
*
417-
* bits_per_word = 1 << shift
418-
* depth / bits_per_word = depth >> shift
419-
* depth % bits_per_word = depth & ((1 << shift) - 1)
420-
*
421-
* Each word can be limited to sbq->min_shallow_depth bits.
422-
*/
423-
shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth);
424-
depth = ((depth >> sbq->sb.shift) * shallow_depth +
425-
min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth));
426-
wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1,
427-
SBQ_WAKE_BATCH);
428-
429-
return wake_batch;
428+
return clamp_t(unsigned int,
429+
min(depth, sbq->min_shallow_depth) / SBQ_WAIT_QUEUES,
430+
1, SBQ_WAKE_BATCH);
430431
}
431432

432433
int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,

0 commit comments

Comments
 (0)