@@ -28,81 +28,89 @@ fn main() {
2828
2929 let mut runtime = Runtime :: new ( stream, file) ;
3030 runtime. install_panic_hook ( ) ;
31- runtime. run ( |uffd_handler : & mut UffdHandler | {
32- // !DISCLAIMER!
33- // When using UFFD together with the balloon device, this handler needs to deal with
34- // `remove` and `pagefault` events. There are multiple things to keep in mind in
35- // such setups:
36- //
37- // As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN
38- // -----------------------------------------------------------------------------------
39- //
40- // This means we cannot process UFFD events simply one-by-one anymore - if a `remove` event
41- // arrives, we need to pre-fetch all other events up to the `remove` event, to unblock the
42- // UFFD, and then go back to the process the pre-fetched events.
43- //
44- // UFFD might receive events in not in their causal order
45- // -----------------------------------------------------
46- //
47- // For example, the guest
48- // kernel might first respond to a balloon inflation by freeing some memory, and
49- // telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the
50- // free memory range, which causes a `remove` event to be sent to UFFD. Then, the
51- // guest kernel might immediately fault the page in again (for example because
52- // default_on_oom was set). which causes a `pagefault` event to be sent to UFFD.
53- //
54- // However, the pagefault will be triggered from inside KVM on the vCPU thread, while the
55- // balloon device is handled by Firecracker on its VMM thread. This means that potentially
56- // this handler can receive the `pagefault` _before_ the `remove` event.
57- //
58- // This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events
59- // to make sure no `remove` event is blocking us can result in the handler acting on
60- // the `pagefault` event before the `remove` message (despite the `remove` event being
61- // in the causal past of the `pagefault` event), which means that we will fault in a page
62- // from the snapshot file, while really we should be faulting in a zero page.
63- //
64- // In this example handler, we ignore this problem, to avoid
65- // complexity (under the assumption that the guest kernel will zero a newly faulted in
66- // page anyway). A production handler will most likely want to ensure that `remove`
67- // events for a specific range are always handled before `pagefault` events.
68- //
69- // Lastly, we still need to deal with the race condition where a `remove` event arrives
70- // in the UFFD queue after we got done reading all events, in which case we need to go
71- // back to reading more events before we can continue processing `pagefault`s.
72- let mut deferred_events = Vec :: new ( ) ;
31+ runtime. run (
32+ |uffd_handler : & mut UffdHandler | {
33+ // !DISCLAIMER!
34+ // When using UFFD together with the balloon device, this handler needs to deal with
35+ // `remove` and `pagefault` events. There are multiple things to keep in mind in
36+ // such setups:
37+ //
38+ // As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN
39+ // -----------------------------------------------------------------------------------
40+ //
41+ // This means we cannot process UFFD events simply one-by-one anymore - if a `remove`
42+ // event arrives, we need to pre-fetch all other events up to the `remove`
43+ // event, to unblock the UFFD, and then go back to the process the
44+ // pre-fetched events.
45+ //
46+ // UFFD might receive events in not in their causal order
47+ // -----------------------------------------------------
48+ //
49+ // For example, the guest
50+ // kernel might first respond to a balloon inflation by freeing some memory, and
51+ // telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the
52+ // free memory range, which causes a `remove` event to be sent to UFFD. Then, the
53+ // guest kernel might immediately fault the page in again (for example because
54+ // default_on_oom was set). which causes a `pagefault` event to be sent to UFFD.
55+ //
56+ // However, the pagefault will be triggered from inside KVM on the vCPU thread, while
57+ // the balloon device is handled by Firecracker on its VMM thread. This
58+ // means that potentially this handler can receive the `pagefault` _before_
59+ // the `remove` event.
60+ //
61+ // This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events
62+ // to make sure no `remove` event is blocking us can result in the handler acting on
63+ // the `pagefault` event before the `remove` message (despite the `remove` event being
64+ // in the causal past of the `pagefault` event), which means that we will fault in a
65+ // page from the snapshot file, while really we should be faulting in a zero
66+ // page.
67+ //
68+ // In this example handler, we ignore this problem, to avoid
69+ // complexity (under the assumption that the guest kernel will zero a newly faulted in
70+ // page anyway). A production handler will most likely want to ensure that `remove`
71+ // events for a specific range are always handled before `pagefault` events.
72+ //
73+ // Lastly, we still need to deal with the race condition where a `remove` event arrives
74+ // in the UFFD queue after we got done reading all events, in which case we need to go
75+ // back to reading more events before we can continue processing `pagefault`s.
76+ let mut deferred_events = Vec :: new ( ) ;
7377
74- loop {
75- // First, try events that we couldn't handle last round
76- let mut events_to_handle = Vec :: from_iter ( deferred_events. drain ( ..) ) ;
78+ loop {
79+ // First, try events that we couldn't handle last round
80+ let mut events_to_handle = Vec :: from_iter ( deferred_events. drain ( ..) ) ;
7781
78- // Read all events from the userfaultfd.
79- while let Some ( event) = uffd_handler. read_event ( ) . expect ( "Failed to read uffd_msg" ) {
80- events_to_handle. push ( event) ;
81- }
82+ // Read all events from the userfaultfd.
83+ while let Some ( event) = uffd_handler. read_event ( ) . expect ( "Failed to read uffd_msg" )
84+ {
85+ events_to_handle. push ( event) ;
86+ }
8287
83- for event in events_to_handle. drain ( ..) {
84- // We expect to receive either a Page Fault or `remove`
85- // event (if the balloon device is enabled).
86- match event {
87- userfaultfd:: Event :: Pagefault { addr, .. } => {
88- if !uffd_handler. serve_pf ( addr. cast ( ) , uffd_handler. page_size ) {
89- deferred_events. push ( event) ;
88+ for event in events_to_handle. drain ( ..) {
89+ // We expect to receive either a Page Fault or `remove`
90+ // event (if the balloon device is enabled).
91+ match event {
92+ userfaultfd:: Event :: Pagefault { addr, .. } => {
93+ if !uffd_handler. serve_pf ( addr. cast ( ) , uffd_handler. page_size ) {
94+ deferred_events. push ( event) ;
95+ }
9096 }
97+ userfaultfd:: Event :: Remove { start, end } => {
98+ uffd_handler. mark_range_removed ( start as u64 , end as u64 )
99+ }
100+ _ => panic ! ( "Unexpected event on userfaultfd" ) ,
91101 }
92- userfaultfd:: Event :: Remove { start, end } => {
93- uffd_handler. mark_range_removed ( start as u64 , end as u64 )
94- }
95- _ => panic ! ( "Unexpected event on userfaultfd" ) ,
96102 }
97- }
98103
99- // We assume that really only the above removed/pagefault interaction can result in
100- // deferred events. In that scenario, the loop will always terminate (unless
101- // newly arriving `remove` events end up indefinitely blocking it, but there's nothing
102- // we can do about that, and it's a largely theoretical problem).
103- if deferred_events. is_empty ( ) {
104- break ;
104+ // We assume that really only the above removed/pagefault interaction can result in
105+ // deferred events. In that scenario, the loop will always terminate (unless
106+ // newly arriving `remove` events end up indefinitely blocking it, but there's
107+ // nothing we can do about that, and it's a largely theoretical
108+ // problem).
109+ if deferred_events. is_empty ( ) {
110+ break ;
111+ }
105112 }
106- }
107- } ) ;
113+ } ,
114+ |_uffd_handler : & mut UffdHandler , _offset : usize | { } ,
115+ ) ;
108116}
0 commit comments