Skip to content

Commit f0c0208

Browse files
kalyazinroypat
andcommitted
test(uffd_utils): accept guest_memfd and bitmap memfd
Accept receiving 3 fds instead of 1, where fds[1] is guest_memfd and fds[2] is userfault bitmap memfd. Also handle the FaultRequest message over the UDS socket by calling a new callback in the Runtime and sending a FaultReply. Co-authored-by: Patrick Roy <[email protected]> Signed-off-by: Patrick Roy <[email protected]> Signed-off-by: Nikita Kalyazin <[email protected]>
1 parent d6ad5a0 commit f0c0208

File tree

4 files changed

+350
-151
lines changed

4 files changed

+350
-151
lines changed

src/firecracker/examples/uffd/fault_all_handler.rs

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,27 +23,33 @@ fn main() {
2323
// Get Uffd from UDS. We'll use the uffd to handle PFs for Firecracker.
2424
let listener = UnixListener::bind(uffd_sock_path).expect("Cannot bind to socket path");
2525
let (stream, _) = listener.accept().expect("Cannot listen on UDS socket");
26+
stream
27+
.set_nonblocking(true)
28+
.expect("Cannot set non-blocking");
2629

2730
let mut runtime = Runtime::new(stream, file);
2831
runtime.install_panic_hook();
29-
runtime.run(|uffd_handler: &mut UffdHandler| {
30-
// Read an event from the userfaultfd.
31-
let event = uffd_handler
32-
.read_event()
33-
.expect("Failed to read uffd_msg")
34-
.expect("uffd_msg not ready");
35-
36-
match event {
37-
userfaultfd::Event::Pagefault { .. } => {
38-
let start = get_time_us(ClockType::Monotonic);
39-
for region in uffd_handler.mem_regions.clone() {
40-
uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size);
32+
runtime.run(
33+
|uffd_handler: &mut UffdHandler| {
34+
// Read an event from the userfaultfd.
35+
let event = uffd_handler
36+
.read_event()
37+
.expect("Failed to read uffd_msg")
38+
.expect("uffd_msg not ready");
39+
40+
match event {
41+
userfaultfd::Event::Pagefault { .. } => {
42+
let start = get_time_us(ClockType::Monotonic);
43+
for region in uffd_handler.mem_regions.clone() {
44+
uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size);
45+
}
46+
let end = get_time_us(ClockType::Monotonic);
47+
48+
println!("Finished Faulting All: {}us", end - start);
4149
}
42-
let end = get_time_us(ClockType::Monotonic);
43-
44-
println!("Finished Faulting All: {}us", end - start);
50+
_ => panic!("Unexpected event on userfaultfd"),
4551
}
46-
_ => panic!("Unexpected event on userfaultfd"),
47-
}
48-
});
52+
},
53+
|_uffd_handler: &mut UffdHandler, _offset: usize| {},
54+
);
4955
}

src/firecracker/examples/uffd/malicious_handler.rs

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,23 @@ fn main() {
2121
// Get Uffd from UDS. We'll use the uffd to handle PFs for Firecracker.
2222
let listener = UnixListener::bind(uffd_sock_path).expect("Cannot bind to socket path");
2323
let (stream, _) = listener.accept().expect("Cannot listen on UDS socket");
24+
stream
25+
.set_nonblocking(true)
26+
.expect("Cannot set non-blocking");
2427

2528
let mut runtime = Runtime::new(stream, file);
26-
runtime.run(|uffd_handler: &mut UffdHandler| {
27-
// Read an event from the userfaultfd.
28-
let event = uffd_handler
29-
.read_event()
30-
.expect("Failed to read uffd_msg")
31-
.expect("uffd_msg not ready");
32-
33-
if let userfaultfd::Event::Pagefault { .. } = event {
34-
panic!("Fear me! I am the malicious page fault handler.")
35-
}
36-
});
29+
runtime.run(
30+
|uffd_handler: &mut UffdHandler| {
31+
// Read an event from the userfaultfd.
32+
let event = uffd_handler
33+
.read_event()
34+
.expect("Failed to read uffd_msg")
35+
.expect("uffd_msg not ready");
36+
37+
if let userfaultfd::Event::Pagefault { .. } = event {
38+
panic!("Fear me! I am the malicious page fault handler.")
39+
}
40+
},
41+
|_uffd_handler: &mut UffdHandler, _offset: usize| {},
42+
);
3743
}

src/firecracker/examples/uffd/on_demand_handler.rs

Lines changed: 80 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -22,84 +22,95 @@ fn main() {
2222
// Get Uffd from UDS. We'll use the uffd to handle PFs for Firecracker.
2323
let listener = UnixListener::bind(uffd_sock_path).expect("Cannot bind to socket path");
2424
let (stream, _) = listener.accept().expect("Cannot listen on UDS socket");
25+
stream
26+
.set_nonblocking(true)
27+
.expect("Cannot set non-blocking");
2528

2629
let mut runtime = Runtime::new(stream, file);
2730
runtime.install_panic_hook();
28-
runtime.run(|uffd_handler: &mut UffdHandler| {
29-
// !DISCLAIMER!
30-
// When using UFFD together with the balloon device, this handler needs to deal with
31-
// `remove` and `pagefault` events. There are multiple things to keep in mind in
32-
// such setups:
33-
//
34-
// As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN
35-
// -----------------------------------------------------------------------------------
36-
//
37-
// This means we cannot process UFFD events simply one-by-one anymore - if a `remove` event
38-
// arrives, we need to pre-fetch all other events up to the `remove` event, to unblock the
39-
// UFFD, and then go back to the process the pre-fetched events.
40-
//
41-
// UFFD might receive events in not in their causal order
42-
// -----------------------------------------------------
43-
//
44-
// For example, the guest
45-
// kernel might first respond to a balloon inflation by freeing some memory, and
46-
// telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the
47-
// free memory range, which causes a `remove` event to be sent to UFFD. Then, the
48-
// guest kernel might immediately fault the page in again (for example because
49-
// default_on_oom was set). which causes a `pagefault` event to be sent to UFFD.
50-
//
51-
// However, the pagefault will be triggered from inside KVM on the vCPU thread, while the
52-
// balloon device is handled by Firecracker on its VMM thread. This means that potentially
53-
// this handler can receive the `pagefault` _before_ the `remove` event.
54-
//
55-
// This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events
56-
// to make sure no `remove` event is blocking us can result in the handler acting on
57-
// the `pagefault` event before the `remove` message (despite the `remove` event being
58-
// in the causal past of the `pagefault` event), which means that we will fault in a page
59-
// from the snapshot file, while really we should be faulting in a zero page.
60-
//
61-
// In this example handler, we ignore this problem, to avoid
62-
// complexity (under the assumption that the guest kernel will zero a newly faulted in
63-
// page anyway). A production handler will most likely want to ensure that `remove`
64-
// events for a specific range are always handled before `pagefault` events.
65-
//
66-
// Lastly, we still need to deal with the race condition where a `remove` event arrives
67-
// in the UFFD queue after we got done reading all events, in which case we need to go
68-
// back to reading more events before we can continue processing `pagefault`s.
69-
let mut deferred_events = Vec::new();
31+
runtime.run(
32+
|uffd_handler: &mut UffdHandler| {
33+
// !DISCLAIMER!
34+
// When using UFFD together with the balloon device, this handler needs to deal with
35+
// `remove` and `pagefault` events. There are multiple things to keep in mind in
36+
// such setups:
37+
//
38+
// As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN
39+
// -----------------------------------------------------------------------------------
40+
//
41+
// This means we cannot process UFFD events simply one-by-one anymore - if a `remove`
42+
// event arrives, we need to pre-fetch all other events up to the `remove`
43+
// event, to unblock the UFFD, and then go back to the process the
44+
// pre-fetched events.
45+
//
46+
// UFFD might receive events in not in their causal order
47+
// -----------------------------------------------------
48+
//
49+
// For example, the guest
50+
// kernel might first respond to a balloon inflation by freeing some memory, and
51+
// telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the
52+
// free memory range, which causes a `remove` event to be sent to UFFD. Then, the
53+
// guest kernel might immediately fault the page in again (for example because
54+
// default_on_oom was set). which causes a `pagefault` event to be sent to UFFD.
55+
//
56+
// However, the pagefault will be triggered from inside KVM on the vCPU thread, while
57+
// the balloon device is handled by Firecracker on its VMM thread. This
58+
// means that potentially this handler can receive the `pagefault` _before_
59+
// the `remove` event.
60+
//
61+
// This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events
62+
// to make sure no `remove` event is blocking us can result in the handler acting on
63+
// the `pagefault` event before the `remove` message (despite the `remove` event being
64+
// in the causal past of the `pagefault` event), which means that we will fault in a
65+
// page from the snapshot file, while really we should be faulting in a zero
66+
// page.
67+
//
68+
// In this example handler, we ignore this problem, to avoid
69+
// complexity (under the assumption that the guest kernel will zero a newly faulted in
70+
// page anyway). A production handler will most likely want to ensure that `remove`
71+
// events for a specific range are always handled before `pagefault` events.
72+
//
73+
// Lastly, we still need to deal with the race condition where a `remove` event arrives
74+
// in the UFFD queue after we got done reading all events, in which case we need to go
75+
// back to reading more events before we can continue processing `pagefault`s.
76+
let mut deferred_events = Vec::new();
7077

71-
loop {
72-
// First, try events that we couldn't handle last round
73-
let mut events_to_handle = Vec::from_iter(deferred_events.drain(..));
78+
loop {
79+
// First, try events that we couldn't handle last round
80+
let mut events_to_handle = Vec::from_iter(deferred_events.drain(..));
7481

75-
// Read all events from the userfaultfd.
76-
while let Some(event) = uffd_handler.read_event().expect("Failed to read uffd_msg") {
77-
events_to_handle.push(event);
78-
}
82+
// Read all events from the userfaultfd.
83+
while let Some(event) = uffd_handler.read_event().expect("Failed to read uffd_msg")
84+
{
85+
events_to_handle.push(event);
86+
}
7987

80-
for event in events_to_handle.drain(..) {
81-
// We expect to receive either a Page Fault or `remove`
82-
// event (if the balloon device is enabled).
83-
match event {
84-
userfaultfd::Event::Pagefault { addr, .. } => {
85-
if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
86-
deferred_events.push(event);
88+
for event in events_to_handle.drain(..) {
89+
// We expect to receive either a Page Fault or `remove`
90+
// event (if the balloon device is enabled).
91+
match event {
92+
userfaultfd::Event::Pagefault { addr, .. } => {
93+
if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
94+
deferred_events.push(event);
95+
}
8796
}
97+
userfaultfd::Event::Remove { start, end } => {
98+
uffd_handler.mark_range_removed(start as u64, end as u64)
99+
}
100+
_ => panic!("Unexpected event on userfaultfd"),
88101
}
89-
userfaultfd::Event::Remove { start, end } => {
90-
uffd_handler.mark_range_removed(start as u64, end as u64)
91-
}
92-
_ => panic!("Unexpected event on userfaultfd"),
93102
}
94-
}
95103

96-
// We assume that really only the above removed/pagefault interaction can result in
97-
// deferred events. In that scenario, the loop will always terminate (unless
98-
// newly arriving `remove` events end up indefinitely blocking it, but there's nothing
99-
// we can do about that, and it's a largely theoretical problem).
100-
if deferred_events.is_empty() {
101-
break;
104+
// We assume that really only the above removed/pagefault interaction can result in
105+
// deferred events. In that scenario, the loop will always terminate (unless
106+
// newly arriving `remove` events end up indefinitely blocking it, but there's
107+
// nothing we can do about that, and it's a largely theoretical
108+
// problem).
109+
if deferred_events.is_empty() {
110+
break;
111+
}
102112
}
103-
}
104-
});
113+
},
114+
|_uffd_handler: &mut UffdHandler, _offset: usize| {},
115+
);
105116
}

0 commit comments

Comments
 (0)