@@ -20,16 +20,16 @@ struct request;
20
20
#include "comp.h"
21
21
22
22
#define rb_to_rq (rb ) rb_entry_safe(rb, struct request, rb_node)
23
- #define rq_rb_first_cached (root ) rb_to_rq(rb_first_cached(root))
24
-
25
- static void ioem_error_injection (struct request * rq );
23
+ #define rq_rb_first (root ) rb_to_rq(rb_first(root))
26
24
25
+ #define IS_RHEL
27
26
#define IOEM_MQ_ENABLED ((LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)) || (defined RHEL_MAJOR && RHEL_MAJOR >= 7 && defined RHEL_MINOR && RHEL_MINOR >= 6))
28
27
29
28
/**
30
29
* struct ioem_data - the main data of ioem
31
30
* @root: The rb tree root, which is sorted according to `time_to_send`
32
31
* @wait_queue: The wait_queue head, which is used to store the waiting requests for IOPS limitation
32
+ * @deivce: The device of current ioem_data
33
33
* @lock: The spinlock of the whole structure
34
34
* @timer: The timer used to trigger the dispatch after reaching the
35
35
* `time_to_send`.
@@ -57,10 +57,12 @@ struct ioem_data {
57
57
// which may cause frequent rebalance. As the practice of netem, we should
58
58
// add a list to optimize for this situation.
59
59
// However, current performance seems fine.
60
- struct rb_root_cached root ;
60
+ struct rb_root root ;
61
61
62
62
struct list_head wait_queue ;
63
63
64
+ dev_t device ;
65
+
64
66
spinlock_t lock ;
65
67
66
68
struct hrtimer timer ;
@@ -81,6 +83,8 @@ struct ioem_data {
81
83
#endif
82
84
};
83
85
86
+ static void ioem_error_injection (struct ioem_data * id , struct request * rq );
87
+
84
88
static bool ioem_limit_should_affect (struct ioem_data * data , struct request * rq );
85
89
86
90
/**
@@ -249,7 +253,7 @@ static void ioem_data_sync_with_injections(struct ioem_data* data);
249
253
static void ioem_erase_head (struct ioem_data * data , struct request * rq )
250
254
{
251
255
if (ioem_priv (rq )-> in_rbtree ) {
252
- rb_erase_cached (& rq -> rb_node , & data -> root );
256
+ rb_erase (& rq -> rb_node , & data -> root );
253
257
RB_CLEAR_NODE (& rq -> rb_node );
254
258
255
259
ioem_priv (rq )-> in_rbtree = false;
@@ -269,7 +273,7 @@ static void ioem_erase_head(struct ioem_data *data, struct request *rq)
269
273
*/
270
274
static struct request * ioem_peek_request (struct ioem_data * data )
271
275
{
272
- struct request * ioem_rq = rq_rb_first_cached (& data -> root );
276
+ struct request * ioem_rq = rq_rb_first (& data -> root );
273
277
274
278
return ioem_rq ;
275
279
}
@@ -293,7 +297,7 @@ static void ioem_data_init(struct ioem_data* data, enum hrtimer_restart (*functi
293
297
hrtimer_init (& data -> timer , CLOCK_MONOTONIC , HRTIMER_MODE_ABS_PINNED );
294
298
295
299
spin_lock_init (& data -> lock );
296
- data -> root = RB_ROOT_CACHED ;
300
+ data -> root = RB_ROOT ;
297
301
INIT_LIST_HEAD (& data -> wait_queue );
298
302
299
303
data -> timer .function = function ;
@@ -311,8 +315,7 @@ static void ioem_data_init(struct ioem_data* data, enum hrtimer_restart (*functi
311
315
*/
312
316
static void ioem_enqueue (struct ioem_data * data , struct request * rq )
313
317
{
314
- struct rb_node * * p = & data -> root .rb_root .rb_node , * parent = NULL ;
315
- bool leftmost = true;
318
+ struct rb_node * * p = & data -> root .rb_node , * parent = NULL ;
316
319
317
320
while (* p ) {
318
321
struct request * parent_rq ;
@@ -322,14 +325,13 @@ static void ioem_enqueue(struct ioem_data *data, struct request *rq)
322
325
323
326
if (ioem_priv (rq )-> time_to_send > ioem_priv (parent_rq )-> time_to_send ) {
324
327
p = & parent -> rb_right ;
325
- leftmost = false;
326
328
}
327
329
else
328
330
p = & parent -> rb_left ;
329
331
}
330
332
331
333
rb_link_node (& rq -> rb_node , parent , p );
332
- rb_insert_color_cached (& rq -> rb_node , & data -> root , leftmost );
334
+ rb_insert_color (& rq -> rb_node , & data -> root );
333
335
334
336
ioem_priv (rq )-> in_rbtree = true;
335
337
}
@@ -374,11 +376,11 @@ static struct request* ioem_dequeue(struct ioem_data *data)
374
376
375
377
// at this time, rq is NULL, and the `time_to_send` is 0, or the next time
376
378
// when irl counter will be reset.
377
- if (RB_EMPTY_ROOT (& data -> root . rb_root )) {
379
+ if (RB_EMPTY_ROOT (& data -> root )) {
378
380
goto out ;
379
381
}
380
382
381
- while (!RB_EMPTY_ROOT (& data -> root . rb_root )) {
383
+ while (!RB_EMPTY_ROOT (& data -> root )) {
382
384
rq = ioem_peek_request (data );
383
385
if (time_to_send == 0 ) {
384
386
time_to_send = ioem_priv (rq )-> time_to_send ;
@@ -522,6 +524,12 @@ static int ioem_mq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
522
524
ioem_data_init (id , ioem_mq_timer , hctx -> queue -> elevator -> elevator_data );
523
525
id -> hctx = hctx ;
524
526
527
+ #if LINUX_VERSION_CODE > KERNEL_VERSION (4 , 0 , 0 )
528
+ id -> device = hctx -> queue -> backing_dev_info -> dev -> devt ;
529
+ #else
530
+ id -> device = hctx -> queue -> backing_dev_info .dev -> devt ;
531
+ #endif
532
+
525
533
hctx -> sched_data = id ;
526
534
return 0 ;
527
535
}
@@ -575,7 +583,7 @@ static void ioem_mq_insert_requests(struct blk_mq_hw_ctx * hctx, struct list_hea
575
583
ioem_priv (rq )-> in_list = false;
576
584
ioem_priv (rq )-> in_rbtree = false;
577
585
578
- ioem_error_injection (rq );
586
+ ioem_error_injection (id , rq );
579
587
ioem_enqueue (id , rq );
580
588
581
589
#if (LINUX_VERSION_CODE < KERNEL_VERSION (5 , 12 , 0 )) && (LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 10 , 0 ))
@@ -590,7 +598,7 @@ static bool ioem_mq_has_work(struct blk_mq_hw_ctx * hctx)
590
598
struct ioem_data * id = hctx -> sched_data ;
591
599
bool has_work = 0 ;
592
600
593
- has_work = !(RB_EMPTY_ROOT (& id -> root . rb_root ) && list_empty (& id -> wait_queue ));
601
+ has_work = !(RB_EMPTY_ROOT (& id -> root ) && list_empty (& id -> wait_queue ));
594
602
595
603
return has_work ;
596
604
}
@@ -729,6 +737,7 @@ static int ioem_sq_init_sched(struct request_queue *q, struct elevator_type *e)
729
737
730
738
ioem_data_init (id , ioem_sq_timer , irl );
731
739
id -> q = q ;
740
+ id -> device = q -> backing_dev_info .dev -> devt ;
732
741
INIT_WORK (& id -> unplug_work , ioem_sq_kick_queue );
733
742
734
743
eq -> elevator_data = id ;
@@ -744,7 +753,7 @@ static void ioem_sq_exit_sched(struct elevator_queue * e)
744
753
{
745
754
struct ioem_data * id = e -> elevator_data ;
746
755
747
- BUG_ON (!RB_EMPTY_ROOT (& id -> root . rb_root ));
756
+ BUG_ON (!RB_EMPTY_ROOT (& id -> root ));
748
757
hrtimer_cancel (& id -> irl -> timer );
749
758
kfree (id -> irl );
750
759
kfree (id );
@@ -756,7 +765,7 @@ static void ioem_sq_insert_request(struct request_queue *q, struct request *rq)
756
765
757
766
ioem_data_sync_with_injections (id );
758
767
ioem_priv (rq )-> time_to_send = ktime_get_ns ();
759
- ioem_error_injection (rq );
768
+ ioem_error_injection (id , rq );
760
769
761
770
ioem_enqueue (id , rq );
762
771
@@ -1066,7 +1075,7 @@ static s64 ioem_random(s64 mu, s32 jitter, struct crndstate *state) {
1066
1075
* `current` should point to the current process, so that we can get the pid
1067
1076
* namespace (or other information) of the process.
1068
1077
*/
1069
- static bool ioem_should_inject (struct request * rq , struct ioem_injection * e ) {
1078
+ static bool ioem_should_inject (struct ioem_data * id , struct request * rq , struct ioem_injection * e ) {
1070
1079
if (rq -> bio == NULL || e == NULL ) {
1071
1080
return 0 ;
1072
1081
}
@@ -1075,7 +1084,7 @@ static bool ioem_should_inject(struct request* rq, struct ioem_injection* e) {
1075
1084
return 0 ;
1076
1085
}
1077
1086
1078
- if (e -> arg .device != 0 && ! bio_is_device ( rq -> bio , e -> arg .device ) ) {
1087
+ if (e -> arg .device != 0 && e -> arg .device == id -> device ) {
1079
1088
return 0 ;
1080
1089
}
1081
1090
@@ -1093,15 +1102,15 @@ static bool ioem_should_inject(struct request* rq, struct ioem_injection* e) {
1093
1102
return 1 ;
1094
1103
}
1095
1104
1096
- static void ioem_error_injection (struct request * rq )
1105
+ static void ioem_error_injection (struct ioem_data * id , struct request * rq )
1097
1106
{
1098
1107
struct ioem_injection * e ;
1099
1108
u64 delay = 0 ;
1100
1109
1101
1110
read_lock (& ioem_injections .lock );
1102
1111
list_for_each_entry (e , & ioem_injections .list , list )
1103
1112
{
1104
- if (!ioem_should_inject (rq , e )) {
1113
+ if (!ioem_should_inject (id , rq , e )) {
1105
1114
continue ;
1106
1115
}
1107
1116
@@ -1125,7 +1134,7 @@ static bool ioem_limit_should_affect(struct ioem_data* data, struct request* rq)
1125
1134
bool should_affect ;
1126
1135
1127
1136
read_lock (& ioem_injections .lock );
1128
- should_affect = ioem_should_inject (rq , data -> ioem_limit );
1137
+ should_affect = ioem_should_inject (data , rq , data -> ioem_limit );
1129
1138
read_unlock (& ioem_injections .lock );
1130
1139
1131
1140
return should_affect ;
@@ -1160,11 +1169,13 @@ static void ioem_data_sync_with_injections(struct ioem_data* data)
1160
1169
list_for_each_entry (e , & ioem_injections .list , list )
1161
1170
{
1162
1171
if (e -> injector_type == IOEM_INJECTOR_TYPE_LIMIT ) {
1163
- irl_change (data -> irl , e -> limit .period_us , e -> limit .quota );
1164
- kref_get (& e -> refcount );
1165
- data -> ioem_limit = e ;
1166
- // multiple limit is not supported
1167
- break ;
1172
+ if (e -> arg .device == 0 || data -> device == e -> arg .device ) {
1173
+ irl_change (data -> irl , e -> limit .period_us , e -> limit .quota );
1174
+ kref_get (& e -> refcount );
1175
+ data -> ioem_limit = e ;
1176
+ // multiple limit is not supported
1177
+ break ;
1178
+ }
1168
1179
}
1169
1180
}
1170
1181
data -> ioem_injection_version = atomic_read (& ioem_injections .version );
0 commit comments