@@ -25,6 +25,7 @@ static void ioem_error_injection(struct request* rq);
25
25
/**
26
26
* struct ioem_data - the main data of ioem
27
27
* @root: The rb tree root, which is sorted according to `time_to_send`
28
+ * @list: The list head, which is used to store the waiting requests for IOPS limitation
28
29
* @lock: The spinlock of the whole structure
29
30
* @timer: The timer used to trigger the dispatch after reaching the
30
31
* `time_to_send`.
@@ -47,6 +48,8 @@ static void ioem_error_injection(struct request* rq);
47
48
*/
48
49
struct ioem_data {
49
50
struct rb_root_cached root ;
51
+ struct list_head list ;
52
+
50
53
spinlock_t lock ;
51
54
52
55
struct hrtimer timer ;
@@ -81,6 +84,8 @@ static bool ioem_limit_should_affect(struct ioem_data* data, struct request* rq)
81
84
struct ioem_priv {
82
85
u64 time_to_send ;
83
86
bool ioem_limit_should_affect ;
87
+ bool in_rbtree ;
88
+ bool in_list ;
84
89
}__attribute__((packed ));
85
90
86
91
struct ioem_priv * ioem_priv (struct request * rq )
@@ -232,9 +237,19 @@ static void ioem_data_sync_with_injections(struct ioem_data* data);
232
237
*/
233
238
static void ioem_erase_head (struct ioem_data * data , struct request * rq )
234
239
{
235
- rb_erase_cached (& rq -> rb_node , & data -> root );
236
- RB_CLEAR_NODE (& rq -> rb_node );
237
- INIT_LIST_HEAD (& rq -> queuelist );
240
+ if (ioem_priv (rq )-> in_rbtree ) {
241
+ rb_erase_cached (& rq -> rb_node , & data -> root );
242
+ RB_CLEAR_NODE (& rq -> rb_node );
243
+
244
+ ioem_priv (rq )-> in_rbtree = false;
245
+ }
246
+
247
+ if (ioem_priv (rq )-> in_list ) {
248
+ list_del (& rq -> queuelist );
249
+ INIT_LIST_HEAD (& rq -> queuelist );
250
+
251
+ ioem_priv (rq )-> in_list = false;
252
+ }
238
253
}
239
254
240
255
/**
@@ -268,6 +283,8 @@ static void ioem_data_init(struct ioem_data* data, enum hrtimer_restart (*functi
268
283
269
284
spin_lock_init (& data -> lock );
270
285
data -> root = RB_ROOT_CACHED ;
286
+ INIT_LIST_HEAD (& data -> list );
287
+
271
288
data -> timer .function = function ;
272
289
data -> next_expires = 0 ;
273
290
@@ -302,6 +319,8 @@ static void ioem_enqueue(struct ioem_data *data, struct request *rq)
302
319
303
320
rb_link_node (& rq -> rb_node , parent , p );
304
321
rb_insert_color_cached (& rq -> rb_node , & data -> root , leftmost );
322
+
323
+ ioem_priv (rq )-> in_rbtree = true;
305
324
}
306
325
307
326
/**
@@ -318,60 +337,90 @@ static void ioem_enqueue(struct ioem_data *data, struct request *rq)
318
337
*/
319
338
static struct request * ioem_dequeue (struct ioem_data * data )
320
339
{
321
- u64 now , time_to_send ;
340
+ u64 now , time_to_send = 0 ;
322
341
struct request * rq = NULL ;
342
+ struct irl_dispatch_return irl_ret ;
343
+
344
+ now = ktime_get_ns ();
345
+ if (!list_empty (& data -> list )) {
346
+ rq = list_first_entry (& data -> list , struct request , queuelist );
347
+ // if now is ealier than the `time_to_send`, there is no need to try to
348
+ // dispatch
349
+ if (now < ioem_priv (rq )-> time_to_send ) {
350
+ irl_ret = irl_dispatch (data , rq );
351
+ if (irl_ret .dispatch > 0 ) {
352
+ // not exceeded, return the request
353
+ ioem_erase_head (data , rq );
354
+ goto out ;
355
+ } else {
356
+ ioem_priv (rq )-> time_to_send = irl_ret .time_to_send ;
357
+ time_to_send = irl_ret .time_to_send ;
358
+ }
359
+ }
323
360
361
+ rq = NULL ;
362
+ }
363
+
364
+ // at this time, rq is NULL, and the `time_to_send` is 0, or the next time
365
+ // when irl counter will be reset.
324
366
if (RB_EMPTY_ROOT (& data -> root .rb_root )) {
325
- return NULL ;
367
+ goto out ;
326
368
}
327
369
328
- now = ktime_get_ns ();
329
370
while (true) {
330
- struct irl_dispatch_return irl_ret ;
331
-
332
371
rq = ioem_peek_request (data );
333
- time_to_send = ioem_priv (rq )-> time_to_send ;
372
+ if (time_to_send == 0 ) {
373
+ time_to_send = ioem_priv (rq )-> time_to_send ;
374
+ } else {
375
+ time_to_send = min (ioem_priv (rq )-> time_to_send , time_to_send );
376
+ }
334
377
335
378
// if this request's `time_to_send` is earlier than now, later requests
336
379
// will be all later than now, then we need to return without any
337
380
// request dispatched.
338
381
if (time_to_send > now ) {
339
382
rq = NULL ;
340
- break ;
383
+ goto out ;
341
384
}
342
385
343
386
// check the IRL to decide whether the quota has exceeded
344
387
ioem_erase_head (data , rq );
345
388
346
- irl_ret = irl_dispatch (data , rq );
347
- if (irl_ret .dispatch > 0 ) {
348
- // not exceeded, return the request
349
- break ;
350
- } else {
351
- // exceeded. Modify the time_to_send of this request, and reinsert
352
- // to the rb_tree.
353
- ioem_priv (rq )-> time_to_send = irl_ret .time_to_send ;
354
- ioem_enqueue (data , rq );
355
-
356
- rq = NULL ;
389
+ if (ioem_priv (rq )-> ioem_limit_should_affect ) {
390
+ irl_ret = irl_dispatch (data , rq );
391
+ if (irl_ret .dispatch > 0 ) {
392
+ // not exceeded, return the request
393
+ goto out ;
394
+ } else {
395
+ // exceeded. Modify the time_to_send of this request, and reinsert
396
+ // to the waiting list.
397
+ ioem_priv (rq )-> time_to_send = irl_ret .time_to_send ;
398
+ list_add_tail (& rq -> queuelist , & data -> list );
399
+ ioem_priv (rq )-> in_list = true;
400
+
401
+ rq = NULL ;
402
+ }
357
403
}
358
404
}
359
405
406
+ out :
360
407
// There are three possible situations to reach here:
361
408
// 1. The request is not NULL and is prepared to send
362
409
// 2. The earliest time_to_send is later than now
363
410
if (rq != NULL ) {
364
411
return rq ;
365
412
}
366
413
367
- if (hrtimer_is_queued (& data -> timer )) {
368
- if (data -> next_expires <= time_to_send ) {
369
- return NULL ;
414
+ if (time_to_send != 0 ) {
415
+ if (hrtimer_is_queued (& data -> timer )) {
416
+ if (data -> next_expires <= time_to_send ) {
417
+ return NULL ;
418
+ }
370
419
}
371
- }
372
420
373
- data -> next_expires = time_to_send ;
374
- hrtimer_start (& data -> timer , ns_to_ktime (time_to_send ), HRTIMER_MODE_ABS_PINNED );
421
+ data -> next_expires = time_to_send ;
422
+ hrtimer_start (& data -> timer , ns_to_ktime (time_to_send ), HRTIMER_MODE_ABS_PINNED );
423
+ }
375
424
376
425
return NULL ;
377
426
}
@@ -523,7 +572,7 @@ static bool ioem_mq_has_work(struct blk_mq_hw_ctx * hctx)
523
572
struct ioem_data * id = hctx -> sched_data ;
524
573
bool has_work = 0 ;
525
574
526
- has_work = !RB_EMPTY_ROOT (& id -> root .rb_root );
575
+ has_work = !( RB_EMPTY_ROOT (& id -> root .rb_root ) && list_empty ( & id -> list ) );
527
576
528
577
return has_work ;
529
578
}
0 commit comments