@@ -29,7 +29,7 @@ static void ioem_error_injection(struct request* rq);
2929/**
3030 * struct ioem_data - the main data of ioem
3131 * @root: The rb tree root, which is sorted according to `time_to_send`
32- * @list : The list head, which is used to store the waiting requests for IOPS limitation
32+ * @wait_queue : The wait_queue head, which is used to store the waiting requests for IOPS limitation
3333 * @lock: The spinlock of the whole structure
3434 * @timer: The timer used to trigger the dispatch after reaching the
3535 * `time_to_send`.
@@ -51,8 +51,15 @@ static void ioem_error_injection(struct request* rq);
5151 * this struct is only allocated per `request_queue`.
5252 */
5353struct ioem_data {
54+ // The rb tree root is used to handle requests with delay. The request with
55+ // smaller `time_to_send` will be handled first. However, if the `delay` is
56+ // the same, then we will insert into this rb_tree with increasing data,
57+ // which may cause frequent rebalance. As the practice of netem, we should
58+ // add a list to optimize for this situation.
59+ // However, current performance seems fine.
5460 struct rb_root_cached root ;
55- struct list_head list ;
61+
62+ struct list_head wait_queue ;
5663
5764 spinlock_t lock ;
5865
@@ -287,7 +294,7 @@ static void ioem_data_init(struct ioem_data* data, enum hrtimer_restart (*functi
287294
288295 spin_lock_init (& data -> lock );
289296 data -> root = RB_ROOT_CACHED ;
290- INIT_LIST_HEAD (& data -> list );
297+ INIT_LIST_HEAD (& data -> wait_queue );
291298
292299 data -> timer .function = function ;
293300 data -> next_expires = 0 ;
@@ -346,8 +353,8 @@ static struct request* ioem_dequeue(struct ioem_data *data)
346353 struct irl_dispatch_return irl_ret ;
347354
348355 now = ktime_get_ns ();
349- if (!list_empty (& data -> list )) {
350- rq = list_first_entry (& data -> list , struct request , queuelist );
356+ if (!list_empty (& data -> wait_queue )) {
357+ rq = list_first_entry (& data -> wait_queue , struct request , queuelist );
351358 // if now is ealier than the `time_to_send`, there is no need to try to
352359 // dispatch
353360 if (now >= ioem_priv (rq )-> time_to_send ) {
@@ -399,9 +406,9 @@ static struct request* ioem_dequeue(struct ioem_data *data)
399406 goto out ;
400407 } else {
401408 // exceeded. Modify the time_to_send of this request, and reinsert
402- // to the waiting list .
409+ // to the waiting wait_queue .
403410 ioem_priv (rq )-> time_to_send = irl_ret .time_to_send ;
404- list_add_tail (& rq -> queuelist , & data -> list );
411+ list_add_tail (& rq -> queuelist , & data -> wait_queue );
405412 ioem_priv (rq )-> in_list = true;
406413 time_to_send = min (time_to_send , irl_ret .time_to_send );
407414
@@ -583,7 +590,7 @@ static bool ioem_mq_has_work(struct blk_mq_hw_ctx * hctx)
583590 struct ioem_data * id = hctx -> sched_data ;
584591 bool has_work = 0 ;
585592
586- has_work = !(RB_EMPTY_ROOT (& id -> root .rb_root ) && list_empty (& id -> list ));
593+ has_work = !(RB_EMPTY_ROOT (& id -> root .rb_root ) && list_empty (& id -> wait_queue ));
587594
588595 return has_work ;
589596}
0 commit comments