Skip to content

Commit be3c233

Browse files
committed
refine irl handling
Signed-off-by: YangKeao <[email protected]>
1 parent 3eb77e1 commit be3c233

File tree

6 files changed

+126
-50
lines changed

6 files changed

+126
-50
lines changed

driver/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
chaos_driver-y += main.o chaos_device.o injection.o ioem.o
22
obj-m += chaos_driver.o
33

4-
KVERSION ?= $(shell uname -r)
5-
KBUILD_PATH ?= /lib/modules/$(KVERSION)/build
4+
KERNELRELEASE ?= $(shell uname -r)
5+
KBUILD_PATH ?= /lib/modules/$(KERNELRELEASE)/build
66
PWD = $(shell pwd)
77

88
ccflags-y := ${KBUILD_FLAGS}

driver/dkms.conf

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
PACKAGE_NAME="chaos-driver"
2+
PACKAGE_VERSION="0.1.0"
3+
MAKE="make"
4+
CLEAN="make clean"
5+
BUILT_MODULE_NAME[0]="chaos_driver"
6+
DEST_MODULE_LOCATION[0]="/kernel/extra"
7+
AUTOINSTALL="yes"

driver/ioem.c

Lines changed: 107 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,28 @@ struct ioem_data {
6969

7070
static bool ioem_limit_should_affect(struct ioem_data* data, struct request* rq);
7171

72+
/**
73+
* struct ioem_priv - The priv data stored in request
74+
* @time_to_send: The expected sending time of the request
75+
*
76+
* The expected sending time is calculated when this request comes into the
77+
* scheduler, then it will be stored in the `struct ioem_priv`. This struct
78+
* shouldn't be longer than three pointers, as the `rq->elv` only have three
79+
* pointers long.
80+
*/
81+
struct ioem_priv {
82+
u64 time_to_send;
83+
bool ioem_limit_should_affect;
84+
}__attribute__((packed));
85+
86+
struct ioem_priv* ioem_priv(struct request *rq)
87+
{
88+
BUILD_BUG_ON(sizeof(struct ioem_priv) > sizeof(rq->elv));
89+
// `priv` has two pointers long, is enough to store the `ioem_priv`.
90+
return (struct ioem_priv*)(&rq->elv.priv[0]);
91+
}
92+
93+
7294
/**
7395
* struct irl - request limit
7496
* @lock: The lock protects the config
@@ -90,6 +112,8 @@ struct irl {
90112
atomic64_t io_counter;
91113
atomic64_t last_expire_time;
92114
struct hrtimer timer;
115+
116+
atomic64_t affected_request_counter;
93117
};
94118

95119
/**
@@ -137,6 +161,7 @@ static void irl_init(struct irl* counter)
137161
rwlock_init(&counter->lock);
138162
hrtimer_init(&counter->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
139163
counter->timer.function = irl_timer_callback;
164+
atomic64_set(&counter->last_expire_time, ktime_get_ns());
140165
}
141166

142167
struct irl_dispatch_return {
@@ -145,7 +170,7 @@ struct irl_dispatch_return {
145170
};
146171

147172
/**
148-
* irl_dispatch() - change the config of irl
173+
* irl_dispatch() - check whether this request can dispatch
149174
* @data: The corresponding ioem_data struct
150175
* @rq: The request to be dispatch
151176
*
@@ -163,11 +188,12 @@ static struct irl_dispatch_return irl_dispatch(struct ioem_data* data, struct re
163188
u64 counter;
164189
u64 quota;
165190
u64 period;
191+
u64 last_expire_time = atomic64_read(&irl->last_expire_time);
166192

167193
read_lock(&irl->lock);
168194

169195
period = atomic64_read(&irl->io_period_us);
170-
if (period == 0 || !ioem_limit_should_affect(data, rq)) {
196+
if (period == 0 || !ioem_priv(rq)->ioem_limit_should_affect) {
171197
// the irl is not enabled
172198
ret.dispatch = 1;
173199
ret.time_to_send = 0;
@@ -182,12 +208,12 @@ static struct irl_dispatch_return irl_dispatch(struct ioem_data* data, struct re
182208
counter = atomic64_read(&irl->io_counter);
183209
}
184210
if (counter < quota) {
185-
//
186211
ret.dispatch = 1;
187212
ret.time_to_send = 0;
213+
atomic64_dec(&irl->affected_request_counter);
188214
} else {
189215
ret.dispatch = 0;
190-
ret.time_to_send = ktime_get_ns() + period * NSEC_PER_USEC;
216+
ret.time_to_send = last_expire_time + period * NSEC_PER_USEC;
191217
}
192218
}
193219

@@ -197,24 +223,33 @@ static struct irl_dispatch_return irl_dispatch(struct ioem_data* data, struct re
197223
}
198224

199225
/**
200-
* struct ioem_priv - The priv data stored in request
201-
* @time_to_send: The expected sending time of the request
226+
* irl_enqueue() - optimize the time_to_send of a request which will enqueue
227+
* @data: The corresponding ioem_data struct
228+
* @rq: The request to be dispatch
202229
*
203-
* The expected sending time is calculated when this request comes into the
204-
* scheduler, then it will be stored in the `struct ioem_priv`. This struct
205-
* shouldn't be longer than three pointers, as the `rq->elv` only have three
206-
* pointers long.
230+
* This function will read the counter inside irl. If the counter is already
231+
* greater than the quota and the `time_to_send` is earlier than the next
232+
* period, it will set the `time_to_send` of the request to the next period.
207233
*/
208-
struct ioem_priv {
209-
u64 time_to_send;
210-
unsigned int pid_ns;
211-
}__attribute__((packed));
212-
213-
struct ioem_priv* ioem_priv(struct request *rq)
234+
static void irl_enqueue(struct ioem_data* data, struct request* rq)
214235
{
215-
BUILD_BUG_ON(sizeof(struct ioem_priv) > sizeof(rq->elv));
216-
// `priv` has two pointers long, is enough to store the `ioem_priv`.
217-
return (struct ioem_priv*)(&rq->elv.priv[0]);
236+
u64 next_period, period, counter;
237+
struct irl* irl = data->irl;
238+
239+
period = atomic64_read(&irl->io_period_us);
240+
if (period == 0 || !ioem_priv(rq)->ioem_limit_should_affect) {
241+
return;
242+
}
243+
244+
counter = atomic64_fetch_add(1, &irl->affected_request_counter);
245+
read_lock(&irl->lock);
246+
if (atomic64_read(&irl->io_counter) > irl->io_quota) {
247+
next_period = atomic64_read(&irl->last_expire_time) + atomic64_read(&irl->io_period_us) * NSEC_PER_USEC * (counter / irl->io_quota);
248+
if (ioem_priv(rq)->time_to_send < next_period) {
249+
ioem_priv(rq)->time_to_send = next_period;
250+
};
251+
}
252+
read_unlock(&irl->lock);
218253
}
219254

220255
static void ioem_data_sync_with_injections(struct ioem_data* data);
@@ -276,19 +311,23 @@ static void ioem_data_init(struct ioem_data* data, enum hrtimer_restart (*functi
276311
* @data: The `ioem_data` strucutre
277312
* @rq: The request
278313
*
279-
* The request will be inserted into the rb tree
314+
* The request will be inserted into the rb tree. Before inserting the request,
315+
* it will also check whether this request will be affected by the irl and
316+
* whether the irl has
280317
*/
281318
static void ioem_enqueue(struct ioem_data *data, struct request *rq)
282319
{
283320
struct rb_node **p = &data->root.rb_node, *parent = NULL;
284321

322+
irl_enqueue(data, rq);
323+
285324
while (*p) {
286325
struct request* parent_rq;
287326

288327
parent = *p;
289328
parent_rq = rb_entry_safe(parent, struct request, rb_node);
290329

291-
if (ioem_priv(rq)->time_to_send >= ioem_priv(parent_rq)->time_to_send)
330+
if (ioem_priv(rq)->time_to_send > ioem_priv(parent_rq)->time_to_send)
292331
p = &parent->rb_right;
293332
else
294333
p = &parent->rb_left;
@@ -319,24 +358,41 @@ static struct request* ioem_dequeue(struct ioem_data *data)
319358
return NULL;
320359
}
321360

322-
rq = ioem_peek_request(data);
323-
324361
now = ktime_get_ns();
325-
time_to_send = ioem_priv(rq)->time_to_send;
326-
327-
if (time_to_send <= now) {
362+
while (true) {
328363
struct irl_dispatch_return irl_ret;
364+
365+
rq = ioem_peek_request(data);
366+
time_to_send = ioem_priv(rq)->time_to_send;
367+
368+
// if this request's `time_to_send` is earlier than now, later requests
369+
// will be all later than now, then we need to return without any
370+
// request dispatched.
371+
if (time_to_send > now) {
372+
rq = NULL;
373+
break;
374+
}
375+
376+
// check the IRL to decide whether the quota has exceeded
377+
ioem_erase_head(data, rq);
378+
329379
irl_ret = irl_dispatch(data, rq);
330380
if (irl_ret.dispatch > 0) {
331-
ioem_erase_head(data, rq);
381+
// not exceeded, return the request
382+
break;
332383
} else {
333-
time_to_send = irl_ret.time_to_send;
384+
// exceeded. Modify the time_to_send of this request, and reinsert
385+
// to the rb_tree.
386+
ioem_priv(rq)->time_to_send = irl_ret.time_to_send;
387+
ioem_enqueue(data, rq);
388+
334389
rq = NULL;
335390
}
336-
} else {
337-
rq = NULL;
338391
}
339392

393+
// There are three possible situations to reach here:
394+
// 1. The request is not NULL and is prepared to send
395+
// 2. The earliest time_to_send is later than now
340396
if (rq != NULL) {
341397
return rq;
342398
}
@@ -468,22 +524,26 @@ struct request* ioem_mq_dispatch_request(struct blk_mq_hw_ctx * hctx)
468524

469525
static void ioem_mq_insert_requests(struct blk_mq_hw_ctx * hctx, struct list_head * list, bool at_head)
470526
{
527+
struct request *rq, *next;
471528
struct ioem_data *id = hctx->sched_data;
472529

473530
spin_lock(&id->lock);
474531
ioem_data_sync_with_injections(id);
475532

476-
while (!list_empty(list)) {
477-
struct request *rq;
478-
533+
list_for_each_entry_safe(rq, next, list, queuelist) {
479534
rq = list_first_entry(list, struct request, queuelist);
480-
list_del_init(&rq->queuelist);
535+
536+
list_del(&rq->queuelist);
481537

482-
ioem_priv(rq)->time_to_send = ktime_get_ns();
483-
ioem_priv(rq)->pid_ns = ns_inum(task_active_pid_ns(current));
538+
if (at_head) {
539+
ioem_priv(rq)->time_to_send = 0;
540+
ioem_priv(rq)->ioem_limit_should_affect = 0;
541+
} else {
542+
ioem_priv(rq)->time_to_send = ktime_get_ns();
543+
ioem_priv(rq)->ioem_limit_should_affect = ioem_limit_should_affect(id, rq);
544+
}
484545

485546
ioem_error_injection(rq);
486-
487547
ioem_enqueue(id, rq);
488548

489549
#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0)) && (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0))
@@ -964,12 +1024,21 @@ static s64 ioem_random(s64 mu, s32 jitter, struct crndstate *state) {
9641024
return ((rnd % (2 * (u32)jitter)) + mu) - jitter;
9651025
}
9661026

1027+
/**
1028+
* ioem_should_inject() - whether this request should be injected
1029+
* @rq: The io request
1030+
* @e: The ioem injection
1031+
*
1032+
* This functions should be called under process context, which means the
1033+
* `current` should point to the current process, so that we can get the pid
1034+
* namespace (or other information) of the process.
1035+
*/
9671036
static bool ioem_should_inject(struct request* rq, struct ioem_injection* e) {
9681037
if (rq->bio == NULL || e == NULL) {
9691038
return 0;
9701039
}
9711040

972-
if (e->arg.pid_ns != 0 && ioem_priv(rq)->pid_ns != e->arg.pid_ns) {
1041+
if (e->arg.pid_ns != 0 && ns_inum(task_active_pid_ns(current)) != e->arg.pid_ns) {
9731042
return 0;
9741043
}
9751044

pkg/client/client.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func (c *Client) GetVersion() int {
5555
return int(version)
5656
}
5757

58-
func (c *Client) InjectIOEMDelay(devPath string, op int, pid uint, delay int64, jitter int64, corr uint32) (int, error) {
58+
func (c *Client) InjectIOEMDelay(devPath string, op int, pidNs uint, delay int64, jitter int64, corr uint32) (int, error) {
5959
dev := C.uint32_t(0)
6060
if len(devPath) > 0 {
6161
info, err := os.Stat(devPath)
@@ -70,7 +70,7 @@ func (c *Client) InjectIOEMDelay(devPath string, op int, pid uint, delay int64,
7070
dev = C.uint32_t(stat.Rdev)
7171
}
7272

73-
ioem_injection := C.ioem_matcher_arg_new(C.uint32_t(dev), C.int(op), C.uint(pid))
73+
ioem_injection := C.ioem_matcher_arg_new(C.uint32_t(dev), C.int(op), C.uint(pidNs))
7474
delay_arg := C.ioem_injector_delay_arg_new(C.int64_t(delay), C.int64_t(jitter), C.uint32_t(corr))
7575

7676
id := C.add_injection(C.int(c.fd), 0, unsafe.Pointer(&ioem_injection), 0, unsafe.Pointer(&delay_arg))
@@ -81,7 +81,7 @@ func (c *Client) InjectIOEMDelay(devPath string, op int, pid uint, delay int64,
8181
return int(id), nil
8282
}
8383

84-
func (c *Client) InjectIOEMLimit(devPath string, op int, pid uint, period_us uint64, quota uint64) (int, error) {
84+
func (c *Client) InjectIOEMLimit(devPath string, op int, pidNs uint, period_us uint64, quota uint64) (int, error) {
8585
dev := C.uint32_t(0)
8686
if len(devPath) > 0 {
8787
info, err := os.Stat(devPath)
@@ -96,7 +96,7 @@ func (c *Client) InjectIOEMLimit(devPath string, op int, pid uint, period_us uin
9696
dev = C.uint32_t(stat.Rdev)
9797
}
9898

99-
ioem_injection := C.ioem_matcher_arg_new(C.uint32_t(dev), C.int(op), C.uint(pid))
99+
ioem_injection := C.ioem_matcher_arg_new(C.uint32_t(dev), C.int(op), C.uint(pidNs))
100100
limit_arg := C.ioem_injector_limit_arg_new(C.uint64_t(period_us), C.uint64_t(quota))
101101

102102
id := C.add_injection(C.int(c.fd), 0, unsafe.Pointer(&ioem_injection), 1, unsafe.Pointer(&limit_arg))

pkg/cmd/inject/ioem/delay/delay.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import (
2525

2626
var dev_path string
2727
var op int
28-
var pid uint
28+
var pidNs uint
2929
var delay, corr int64
3030
var jitter uint32
3131

@@ -38,7 +38,7 @@ var Delay = &cobra.Command{
3838
os.Exit(1)
3939
}
4040

41-
id, err := c.InjectIOEMDelay(dev_path, op, pid, delay, corr, jitter)
41+
id, err := c.InjectIOEMDelay(dev_path, op, pidNs, delay, corr, jitter)
4242
if err != nil {
4343
fmt.Fprintln(os.Stderr, err)
4444
os.Exit(1)
@@ -50,7 +50,7 @@ var Delay = &cobra.Command{
5050

5151
func init() {
5252
Delay.Flags().IntVar(&op, "op", 0, "operation filter of the injection. 0 for all, 1 for write, 2 for read")
53-
Delay.Flags().UintVar(&pid, "pid", 0, "pid namespace filter of the injection. 0 for all, others for the pid namespace of the specified process")
53+
Delay.Flags().UintVar(&pidNs, "pid-ns", 0, "pid namespace filter of the injection. 0 for all, others for the pid namespace of the specified process")
5454
Delay.Flags().StringVar(&dev_path, "dev-path", "", "path of the injected device")
5555

5656
Delay.Flags().Int64Var(&delay, "delay", 0, "delay of the injection")

pkg/cmd/inject/ioem/limit/limit.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import (
2525

2626
var dev_path string
2727
var op int
28-
var pid uint
28+
var pidNs uint
2929
var period_us, quota uint64
3030

3131
var Limit = &cobra.Command{
@@ -37,7 +37,7 @@ var Limit = &cobra.Command{
3737
os.Exit(1)
3838
}
3939

40-
id, err := c.InjectIOEMLimit(dev_path, op, pid, period_us, quota)
40+
id, err := c.InjectIOEMLimit(dev_path, op, pidNs, period_us, quota)
4141
if err != nil {
4242
fmt.Fprintln(os.Stderr, err)
4343
os.Exit(1)
@@ -49,7 +49,7 @@ var Limit = &cobra.Command{
4949

5050
func init() {
5151
Limit.Flags().IntVar(&op, "op", 0, "operation filter of the injection. 0 for all, 1 for write, 2 for read")
52-
Limit.Flags().UintVar(&pid, "pid", 0, "pid namespace filter of the injection. 0 for all, others for the pid namespace of the specified process")
52+
Limit.Flags().UintVar(&pidNs, "pid-ns", 0, "pid namespace filter of the injection. 0 for all, others for the pid namespace of the specified process")
5353
Limit.Flags().StringVar(&dev_path, "dev_path", "", "path of the injected device")
5454

5555
Limit.Flags().Uint64Var(&period_us, "period-us", 0, "the period time to reset counter")

0 commit comments

Comments
 (0)