Skip to content

Commit 31cf92f

Browse files
committed
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "Three patches that should go into this release. Two of them are from Paolo and fix up some corner cases with BFQ, and the last patch is from Ming and fixes up a potential usage count imbalance regression due to the recent NOWAIT work" * 'for-linus' of git://git.kernel.dk/linux-block: blk-mq: don't leak preempt counter/q_usage_counter when allocating rq failed block, bfq: consider also in_service_entity to state whether an entity is active block, bfq: reset in_service_entity if it becomes idle
2 parents d555eb6 + 1ad43c0 commit 31cf92f

File tree

3 files changed

+109
-76
lines changed

3 files changed

+109
-76
lines changed

block/bfq-iosched.h

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,17 +71,29 @@ struct bfq_service_tree {
7171
*
7272
* bfq_sched_data is the basic scheduler queue. It supports three
7373
* ioprio_classes, and can be used either as a toplevel queue or as an
74-
* intermediate queue on a hierarchical setup. @next_in_service
75-
* points to the active entity of the sched_data service trees that
76-
* will be scheduled next. It is used to reduce the number of steps
77-
* needed for each hierarchical-schedule update.
74+
* intermediate queue in a hierarchical setup.
7875
*
7976
* The supported ioprio_classes are the same as in CFQ, in descending
8077
* priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
8178
* Requests from higher priority queues are served before all the
8279
* requests from lower priority queues; among requests of the same
8380
* queue requests are served according to B-WF2Q+.
84-
* All the fields are protected by the queue lock of the containing bfqd.
81+
*
82+
* The schedule is implemented by the service trees, plus the field
83+
* @next_in_service, which points to the entity on the active trees
84+
* that will be served next, if 1) no changes in the schedule occurs
85+
* before the current in-service entity is expired, 2) the in-service
86+
* queue becomes idle when it expires, and 3) if the entity pointed by
87+
* in_service_entity is not a queue, then the in-service child entity
88+
* of the entity pointed by in_service_entity becomes idle on
89+
* expiration. This peculiar definition allows for the following
90+
* optimization, not yet exploited: while a given entity is still in
91+
* service, we already know which is the best candidate for next
92+
* service among the other active entitities in the same parent
93+
* entity. We can then quickly compare the timestamps of the
94+
* in-service entity with those of such best candidate.
95+
*
96+
* All fields are protected by the lock of the containing bfqd.
8597
*/
8698
struct bfq_sched_data {
8799
/* entity in service */

block/bfq-wf2q.c

Lines changed: 81 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -188,21 +188,23 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
188188

189189
/*
190190
* This function tells whether entity stops being a candidate for next
191-
* service, according to the following logic.
191+
* service, according to the restrictive definition of the field
192+
* next_in_service. In particular, this function is invoked for an
193+
* entity that is about to be set in service.
192194
*
193-
* This function is invoked for an entity that is about to be set in
194-
* service. If such an entity is a queue, then the entity is no longer
195-
* a candidate for next service (i.e, a candidate entity to serve
196-
* after the in-service entity is expired). The function then returns
197-
* true.
195+
* If entity is a queue, then the entity is no longer a candidate for
196+
* next service according to the that definition, because entity is
197+
* about to become the in-service queue. This function then returns
198+
* true if entity is a queue.
198199
*
199-
* In contrast, the entity could stil be a candidate for next service
200-
* if it is not a queue, and has more than one child. In fact, even if
201-
* one of its children is about to be set in service, other children
202-
* may still be the next to serve. As a consequence, a non-queue
203-
* entity is not a candidate for next-service only if it has only one
204-
* child. And only if this condition holds, then the function returns
205-
* true for a non-queue entity.
200+
* In contrast, entity could still be a candidate for next service if
201+
* it is not a queue, and has more than one active child. In fact,
202+
* even if one of its children is about to be set in service, other
203+
* active children may still be the next to serve, for the parent
204+
* entity, even according to the above definition. As a consequence, a
205+
* non-queue entity is not a candidate for next-service only if it has
206+
* only one active child. And only if this condition holds, then this
207+
* function returns true for a non-queue entity.
206208
*/
207209
static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
208210
{
@@ -213,6 +215,18 @@ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
213215

214216
bfqg = container_of(entity, struct bfq_group, entity);
215217

218+
/*
219+
* The field active_entities does not always contain the
220+
* actual number of active children entities: it happens to
221+
* not account for the in-service entity in case the latter is
222+
* removed from its active tree (which may get done after
223+
* invoking the function bfq_no_longer_next_in_service in
224+
* bfq_get_next_queue). Fortunately, here, i.e., while
225+
* bfq_no_longer_next_in_service is not yet completed in
226+
* bfq_get_next_queue, bfq_active_extract has not yet been
227+
* invoked, and thus active_entities still coincides with the
228+
* actual number of active entities.
229+
*/
216230
if (bfqg->active_entities == 1)
217231
return true;
218232

@@ -954,7 +968,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
954968
* one of its children receives a new request.
955969
*
956970
* Basically, this function updates the timestamps of entity and
957-
* inserts entity into its active tree, ater possible extracting it
971+
* inserts entity into its active tree, ater possibly extracting it
958972
* from its idle tree.
959973
*/
960974
static void __bfq_activate_entity(struct bfq_entity *entity,
@@ -1048,17 +1062,16 @@ static void __bfq_requeue_entity(struct bfq_entity *entity)
10481062
entity->start = entity->finish;
10491063
/*
10501064
* In addition, if the entity had more than one child
1051-
* when set in service, then was not extracted from
1065+
* when set in service, then it was not extracted from
10521066
* the active tree. This implies that the position of
10531067
* the entity in the active tree may need to be
10541068
* changed now, because we have just updated the start
10551069
* time of the entity, and we will update its finish
10561070
* time in a moment (the requeueing is then, more
10571071
* precisely, a repositioning in this case). To
10581072
* implement this repositioning, we: 1) dequeue the
1059-
* entity here, 2) update the finish time and
1060-
* requeue the entity according to the new
1061-
* timestamps below.
1073+
* entity here, 2) update the finish time and requeue
1074+
* the entity according to the new timestamps below.
10621075
*/
10631076
if (entity->tree)
10641077
bfq_active_extract(st, entity);
@@ -1105,9 +1118,10 @@ static void __bfq_activate_requeue_entity(struct bfq_entity *entity,
11051118

11061119

11071120
/**
1108-
* bfq_activate_entity - activate or requeue an entity representing a bfq_queue,
1109-
* and activate, requeue or reposition all ancestors
1110-
* for which such an update becomes necessary.
1121+
* bfq_activate_requeue_entity - activate or requeue an entity representing a
1122+
* bfq_queue, and activate, requeue or reposition
1123+
* all ancestors for which such an update becomes
1124+
* necessary.
11111125
* @entity: the entity to activate.
11121126
* @non_blocking_wait_rq: true if this entity was waiting for a request
11131127
* @requeue: true if this is a requeue, which implies that bfqq is
@@ -1135,9 +1149,9 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity,
11351149
* @ins_into_idle_tree: if false, the entity will not be put into the
11361150
* idle tree.
11371151
*
1138-
* Deactivates an entity, independently from its previous state. Must
1152+
* Deactivates an entity, independently of its previous state. Must
11391153
* be invoked only if entity is on a service tree. Extracts the entity
1140-
* from that tree, and if necessary and allowed, puts it on the idle
1154+
* from that tree, and if necessary and allowed, puts it into the idle
11411155
* tree.
11421156
*/
11431157
bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
@@ -1158,8 +1172,10 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
11581172
st = bfq_entity_service_tree(entity);
11591173
is_in_service = entity == sd->in_service_entity;
11601174

1161-
if (is_in_service)
1175+
if (is_in_service) {
11621176
bfq_calc_finish(entity, entity->service);
1177+
sd->in_service_entity = NULL;
1178+
}
11631179

11641180
if (entity->tree == &st->active)
11651181
bfq_active_extract(st, entity);
@@ -1177,7 +1193,7 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
11771193
/**
11781194
* bfq_deactivate_entity - deactivate an entity representing a bfq_queue.
11791195
* @entity: the entity to deactivate.
1180-
* @ins_into_idle_tree: true if the entity can be put on the idle tree
1196+
* @ins_into_idle_tree: true if the entity can be put into the idle tree
11811197
*/
11821198
static void bfq_deactivate_entity(struct bfq_entity *entity,
11831199
bool ins_into_idle_tree,
@@ -1208,16 +1224,29 @@ static void bfq_deactivate_entity(struct bfq_entity *entity,
12081224
*/
12091225
bfq_update_next_in_service(sd, NULL);
12101226

1211-
if (sd->next_in_service)
1227+
if (sd->next_in_service || sd->in_service_entity) {
12121228
/*
1213-
* The parent entity is still backlogged,
1214-
* because next_in_service is not NULL. So, no
1215-
* further upwards deactivation must be
1216-
* performed. Yet, next_in_service has
1217-
* changed. Then the schedule does need to be
1218-
* updated upwards.
1229+
* The parent entity is still active, because
1230+
* either next_in_service or in_service_entity
1231+
* is not NULL. So, no further upwards
1232+
* deactivation must be performed. Yet,
1233+
* next_in_service has changed. Then the
1234+
* schedule does need to be updated upwards.
1235+
*
1236+
* NOTE If in_service_entity is not NULL, then
1237+
* next_in_service may happen to be NULL,
1238+
* although the parent entity is evidently
1239+
* active. This happens if 1) the entity
1240+
* pointed by in_service_entity is the only
1241+
* active entity in the parent entity, and 2)
1242+
* according to the definition of
1243+
* next_in_service, the in_service_entity
1244+
* cannot be considered as
1245+
* next_in_service. See the comments on the
1246+
* definition of next_in_service for details.
12191247
*/
12201248
break;
1249+
}
12211250

12221251
/*
12231252
* If we get here, then the parent is no more
@@ -1494,47 +1523,34 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
14941523

14951524
/*
14961525
* If entity is no longer a candidate for next
1497-
* service, then we extract it from its active tree,
1498-
* for the following reason. To further boost the
1499-
* throughput in some special case, BFQ needs to know
1500-
* which is the next candidate entity to serve, while
1501-
* there is already an entity in service. In this
1502-
* respect, to make it easy to compute/update the next
1503-
* candidate entity to serve after the current
1504-
* candidate has been set in service, there is a case
1505-
* where it is necessary to extract the current
1506-
* candidate from its service tree. Such a case is
1507-
* when the entity just set in service cannot be also
1508-
* a candidate for next service. Details about when
1509-
* this conditions holds are reported in the comments
1510-
* on the function bfq_no_longer_next_in_service()
1511-
* invoked below.
1526+
* service, then it must be extracted from its active
1527+
* tree, so as to make sure that it won't be
1528+
* considered when computing next_in_service. See the
1529+
* comments on the function
1530+
* bfq_no_longer_next_in_service() for details.
15121531
*/
15131532
if (bfq_no_longer_next_in_service(entity))
15141533
bfq_active_extract(bfq_entity_service_tree(entity),
15151534
entity);
15161535

15171536
/*
1518-
* For the same reason why we may have just extracted
1519-
* entity from its active tree, we may need to update
1520-
* next_in_service for the sched_data of entity too,
1521-
* regardless of whether entity has been extracted.
1522-
* In fact, even if entity has not been extracted, a
1523-
* descendant entity may get extracted. Such an event
1524-
* would cause a change in next_in_service for the
1525-
* level of the descendant entity, and thus possibly
1526-
* back to upper levels.
1537+
* Even if entity is not to be extracted according to
1538+
* the above check, a descendant entity may get
1539+
* extracted in one of the next iterations of this
1540+
* loop. Such an event could cause a change in
1541+
* next_in_service for the level of the descendant
1542+
* entity, and thus possibly back to this level.
15271543
*
1528-
* We cannot perform the resulting needed update
1529-
* before the end of this loop, because, to know which
1530-
* is the correct next-to-serve candidate entity for
1531-
* each level, we need first to find the leaf entity
1532-
* to set in service. In fact, only after we know
1533-
* which is the next-to-serve leaf entity, we can
1534-
* discover whether the parent entity of the leaf
1535-
* entity becomes the next-to-serve, and so on.
1544+
* However, we cannot perform the resulting needed
1545+
* update of next_in_service for this level before the
1546+
* end of the whole loop, because, to know which is
1547+
* the correct next-to-serve candidate entity for each
1548+
* level, we need first to find the leaf entity to set
1549+
* in service. In fact, only after we know which is
1550+
* the next-to-serve leaf entity, we can discover
1551+
* whether the parent entity of the leaf entity
1552+
* becomes the next-to-serve, and so on.
15361553
*/
1537-
15381554
}
15391555

15401556
bfqq = bfq_entity_to_bfqq(entity);

block/blk-mq.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -301,11 +301,12 @@ static struct request *blk_mq_get_request(struct request_queue *q,
301301
struct elevator_queue *e = q->elevator;
302302
struct request *rq;
303303
unsigned int tag;
304+
struct blk_mq_ctx *local_ctx = NULL;
304305

305306
blk_queue_enter_live(q);
306307
data->q = q;
307308
if (likely(!data->ctx))
308-
data->ctx = blk_mq_get_ctx(q);
309+
data->ctx = local_ctx = blk_mq_get_ctx(q);
309310
if (likely(!data->hctx))
310311
data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
311312
if (op & REQ_NOWAIT)
@@ -324,6 +325,10 @@ static struct request *blk_mq_get_request(struct request_queue *q,
324325

325326
tag = blk_mq_get_tag(data);
326327
if (tag == BLK_MQ_TAG_FAIL) {
328+
if (local_ctx) {
329+
blk_mq_put_ctx(local_ctx);
330+
data->ctx = NULL;
331+
}
327332
blk_queue_exit(q);
328333
return NULL;
329334
}
@@ -356,12 +361,12 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
356361

357362
rq = blk_mq_get_request(q, NULL, op, &alloc_data);
358363

359-
blk_mq_put_ctx(alloc_data.ctx);
360-
blk_queue_exit(q);
361-
362364
if (!rq)
363365
return ERR_PTR(-EWOULDBLOCK);
364366

367+
blk_mq_put_ctx(alloc_data.ctx);
368+
blk_queue_exit(q);
369+
365370
rq->__data_len = 0;
366371
rq->__sector = (sector_t) -1;
367372
rq->bio = rq->biotail = NULL;
@@ -407,11 +412,11 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
407412

408413
rq = blk_mq_get_request(q, NULL, op, &alloc_data);
409414

410-
blk_queue_exit(q);
411-
412415
if (!rq)
413416
return ERR_PTR(-EWOULDBLOCK);
414417

418+
blk_queue_exit(q);
419+
415420
return rq;
416421
}
417422
EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);

0 commit comments

Comments
 (0)