@@ -25,81 +25,89 @@ fn main() {
2525
2626 let mut runtime = Runtime :: new ( stream, file) ;
2727 runtime. install_panic_hook ( ) ;
28- runtime. run ( |uffd_handler : & mut UffdHandler | {
29- // !DISCLAIMER!
30- // When using UFFD together with the balloon device, this handler needs to deal with
31- // `remove` and `pagefault` events. There are multiple things to keep in mind in
32- // such setups:
33- //
34- // As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN
35- // -----------------------------------------------------------------------------------
36- //
37- // This means we cannot process UFFD events simply one-by-one anymore - if a `remove` event
38- // arrives, we need to pre-fetch all other events up to the `remove` event, to unblock the
39- // UFFD, and then go back to the process the pre-fetched events.
40- //
41- // UFFD might receive events in not in their causal order
42- // -----------------------------------------------------
43- //
44- // For example, the guest
45- // kernel might first respond to a balloon inflation by freeing some memory, and
46- // telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the
47- // free memory range, which causes a `remove` event to be sent to UFFD. Then, the
48- // guest kernel might immediately fault the page in again (for example because
49- // default_on_oom was set). which causes a `pagefault` event to be sent to UFFD.
50- //
51- // However, the pagefault will be triggered from inside KVM on the vCPU thread, while the
52- // balloon device is handled by Firecracker on its VMM thread. This means that potentially
53- // this handler can receive the `pagefault` _before_ the `remove` event.
54- //
55- // This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events
56- // to make sure no `remove` event is blocking us can result in the handler acting on
57- // the `pagefault` event before the `remove` message (despite the `remove` event being
58- // in the causal past of the `pagefault` event), which means that we will fault in a page
59- // from the snapshot file, while really we should be faulting in a zero page.
60- //
61- // In this example handler, we ignore this problem, to avoid
62- // complexity (under the assumption that the guest kernel will zero a newly faulted in
63- // page anyway). A production handler will most likely want to ensure that `remove`
64- // events for a specific range are always handled before `pagefault` events.
65- //
66- // Lastly, we still need to deal with the race condition where a `remove` event arrives
67- // in the UFFD queue after we got done reading all events, in which case we need to go
68- // back to reading more events before we can continue processing `pagefault`s.
69- let mut deferred_events = Vec :: new ( ) ;
28+ runtime. run (
29+ |uffd_handler : & mut UffdHandler | {
30+ // !DISCLAIMER!
31+ // When using UFFD together with the balloon device, this handler needs to deal with
32+ // `remove` and `pagefault` events. There are multiple things to keep in mind in
33+ // such setups:
34+ //
35+ // As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN
36+ // -----------------------------------------------------------------------------------
37+ //
38+ // This means we cannot process UFFD events simply one-by-one anymore - if a `remove`
39+ // event arrives, we need to pre-fetch all other events up to the `remove`
40+ // event, to unblock the UFFD, and then go back to the process the
41+ // pre-fetched events.
42+ //
43+ // UFFD might receive events in not in their causal order
44+ // -----------------------------------------------------
45+ //
46+ // For example, the guest
47+ // kernel might first respond to a balloon inflation by freeing some memory, and
48+ // telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the
49+ // free memory range, which causes a `remove` event to be sent to UFFD. Then, the
50+ // guest kernel might immediately fault the page in again (for example because
51+ // default_on_oom was set). which causes a `pagefault` event to be sent to UFFD.
52+ //
53+ // However, the pagefault will be triggered from inside KVM on the vCPU thread, while
54+ // the balloon device is handled by Firecracker on its VMM thread. This
55+ // means that potentially this handler can receive the `pagefault` _before_
56+ // the `remove` event.
57+ //
58+ // This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events
59+ // to make sure no `remove` event is blocking us can result in the handler acting on
60+ // the `pagefault` event before the `remove` message (despite the `remove` event being
61+ // in the causal past of the `pagefault` event), which means that we will fault in a
62+ // page from the snapshot file, while really we should be faulting in a zero
63+ // page.
64+ //
65+ // In this example handler, we ignore this problem, to avoid
66+ // complexity (under the assumption that the guest kernel will zero a newly faulted in
67+ // page anyway). A production handler will most likely want to ensure that `remove`
68+ // events for a specific range are always handled before `pagefault` events.
69+ //
70+ // Lastly, we still need to deal with the race condition where a `remove` event arrives
71+ // in the UFFD queue after we got done reading all events, in which case we need to go
72+ // back to reading more events before we can continue processing `pagefault`s.
73+ let mut deferred_events = Vec :: new ( ) ;
7074
71- loop {
72- // First, try events that we couldn't handle last round
73- let mut events_to_handle = Vec :: from_iter ( deferred_events. drain ( ..) ) ;
75+ loop {
76+ // First, try events that we couldn't handle last round
77+ let mut events_to_handle = Vec :: from_iter ( deferred_events. drain ( ..) ) ;
7478
75- // Read all events from the userfaultfd.
76- while let Some ( event) = uffd_handler. read_event ( ) . expect ( "Failed to read uffd_msg" ) {
77- events_to_handle. push ( event) ;
78- }
79+ // Read all events from the userfaultfd.
80+ while let Some ( event) = uffd_handler. read_event ( ) . expect ( "Failed to read uffd_msg" )
81+ {
82+ events_to_handle. push ( event) ;
83+ }
7984
80- for event in events_to_handle. drain ( ..) {
81- // We expect to receive either a Page Fault or `remove`
82- // event (if the balloon device is enabled).
83- match event {
84- userfaultfd:: Event :: Pagefault { addr, .. } => {
85- if !uffd_handler. serve_pf ( addr. cast ( ) , uffd_handler. page_size ) {
86- deferred_events. push ( event) ;
85+ for event in events_to_handle. drain ( ..) {
86+ // We expect to receive either a Page Fault or `remove`
87+ // event (if the balloon device is enabled).
88+ match event {
89+ userfaultfd:: Event :: Pagefault { addr, .. } => {
90+ if !uffd_handler. serve_pf ( addr. cast ( ) , uffd_handler. page_size ) {
91+ deferred_events. push ( event) ;
92+ }
8793 }
94+ userfaultfd:: Event :: Remove { start, end } => {
95+ uffd_handler. mark_range_removed ( start as u64 , end as u64 )
96+ }
97+ _ => panic ! ( "Unexpected event on userfaultfd" ) ,
8898 }
89- userfaultfd:: Event :: Remove { start, end } => {
90- uffd_handler. mark_range_removed ( start as u64 , end as u64 )
91- }
92- _ => panic ! ( "Unexpected event on userfaultfd" ) ,
9399 }
94- }
95100
96- // We assume that really only the above removed/pagefault interaction can result in
97- // deferred events. In that scenario, the loop will always terminate (unless
98- // newly arriving `remove` events end up indefinitely blocking it, but there's nothing
99- // we can do about that, and it's a largely theoretical problem).
100- if deferred_events. is_empty ( ) {
101- break ;
101+ // We assume that really only the above removed/pagefault interaction can result in
102+ // deferred events. In that scenario, the loop will always terminate (unless
103+ // newly arriving `remove` events end up indefinitely blocking it, but there's
104+ // nothing we can do about that, and it's a largely theoretical
105+ // problem).
106+ if deferred_events. is_empty ( ) {
107+ break ;
108+ }
102109 }
103- }
104- } ) ;
110+ } ,
111+ |_uffd_handler : & mut UffdHandler , _offset : usize | { } ,
112+ ) ;
105113}
0 commit comments