Skip to content

Commit 520d08b

Browse files
committed
examples: Have UFFD handler kill Firecracker should it die
If the UFFD handler exits abnormaly for some reason, have it take down Firecracker as well by SIGKILL-ing it from a panic hook. For this, reintroduce the "get peer creds" logic. We have to use SIGKILL because Firecracker could be inside the handler for a KVM-originated page fault that is not marked as interruptible, in which case all signals but SIGKILL are ignored (happens for example during KVM_SET_MSRS when it triggers the initialization of a gfn_to_pfn_cache for the kvm-clock page, which uses GUP without FOLL_INTERRUPTIBLE). While we're at it, add a hint to the generic "process not found" error message to indicate that potentially Firecracker died, and that the cause of this could be the UFFD handler crashing (for example, in #4601 the cause of the mystery hang is the UFFD handler crashing, but we were stumped by what's going on for over half a year. Let's avoid that going forward). Signed-off-by: Patrick Roy <[email protected]>
1 parent d235b05 commit 520d08b

File tree

2 files changed

+38
-0
lines changed

2 files changed

+38
-0
lines changed

src/firecracker/examples/uffd/uffd_utils.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,19 @@ impl Runtime {
199199
panic!("mmap on backing file failed");
200200
}
201201

202+
let peer_creds = peer_process_credentials(&stream);
203+
204+
let default_panic_hook = std::panic::take_hook();
205+
std::panic::set_hook(Box::new(move |panic_info| {
206+
let r = unsafe { libc::kill(peer_creds.pid, libc::SIGKILL) };
207+
208+
if r != 0 {
209+
eprintln!("Failed to kill Firecracker process from panic hook");
210+
}
211+
212+
default_panic_hook(panic_info);
213+
}));
214+
202215
Self {
203216
stream,
204217
backing_file,
@@ -294,6 +307,28 @@ fn create_mem_regions(mappings: &Vec<GuestRegionUffdMapping>, page_size: usize)
294307
mem_regions
295308
}
296309

310+
fn peer_process_credentials(stream: &UnixStream) -> libc::ucred {
311+
let mut creds: libc::ucred = libc::ucred {
312+
pid: 0,
313+
gid: 0,
314+
uid: 0,
315+
};
316+
let mut creds_size = size_of::<libc::ucred>() as u32;
317+
let ret = unsafe {
318+
libc::getsockopt(
319+
stream.as_raw_fd(),
320+
libc::SOL_SOCKET,
321+
libc::SO_PEERCRED,
322+
&mut creds as *mut _ as *mut _,
323+
&mut creds_size as *mut libc::socklen_t,
324+
)
325+
};
326+
if ret != 0 {
327+
panic!("Failed to get peer process credentials");
328+
}
329+
creds
330+
}
331+
297332
#[cfg(test)]
298333
mod tests {
299334
use std::mem::MaybeUninit;

tests/framework/microvm.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,9 @@ def kill(self):
310310
if self.screen_pid:
311311
os.kill(self.screen_pid, signal.SIGKILL)
312312
except:
313+
LOG.error(
314+
"Failed to kill Firecracker Process. Did it already die (or did the UFFD handler process die and take it down)?"
315+
)
313316
LOG.error(self.log_data)
314317
raise
315318

0 commit comments

Comments
 (0)