From 4c5359ad31f93edc099de12d1d012990d8f7ef4d Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Wed, 19 Mar 2025 16:10:52 +0100 Subject: [PATCH] fix: try to make UFFD handlers more robust According to our UFFD protocol, UFFD handlers negotiate with Firecracker during initialization and wait for it to send over a UDS the UFFD file descriptor along with the memory mappings that are being handled over the UFFD. During this handshake, our (testing only/not production grade) UFFD handlers issue what essentially is a `recvmsg` that should return with the UFFD fd and the mappings. Some times instead of the file descriptor, the `recvmsg` wrapper returns a `None` value for the file descriptor. When this happens, the UFFD handler crashes and Firecracker process hangs. According to `man recv(2)`: ``` Datagram sockets in various domains (e.g., the UNIX and Internet domains) permit zero-length datagrams. When such a datagram is received, the return value is 0. ``` which means it is possible to receive a zero-length message (we are communicating with Firecracker over a UDS). Add logic to our UFFD handlers to retry the negotiation with Firecracker up to 5 times before giving up. This helps making them (slightly) more robust. Also, we add some logging in the receive logic so that we can inspect failures post-mortem. Signed-off-by: Babis Chalios --- src/firecracker/examples/uffd/uffd_utils.rs | 50 +++++++++++++++------ 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/src/firecracker/examples/uffd/uffd_utils.rs b/src/firecracker/examples/uffd/uffd_utils.rs index 3d4b92e0079..939a04fa23a 100644 --- a/src/firecracker/examples/uffd/uffd_utils.rs +++ b/src/firecracker/examples/uffd/uffd_utils.rs @@ -52,21 +52,45 @@ pub struct UffdHandler { } impl UffdHandler { - pub fn from_unix_stream(stream: &UnixStream, backing_buffer: *const u8, size: usize) -> Self { - let mut message_buf = vec![0u8; 1024]; - let (bytes_read, file) = stream - .recv_with_fd(&mut message_buf[..]) - .expect("Cannot read from a stream"); - message_buf.resize(bytes_read, 0); + fn get_mappings_and_file(stream: &UnixStream) -> (String, File) { + // Sometimes, reading from the stream succeeds but we don't receive any + // UFFD descriptor. We don't really have a good understanding why this is + // happening, but let's try to be a bit more robust and retry a few times + // before we declare defeat. + for _ in 1..=5 { + let mut message_buf = vec![0u8; 1024]; + let (bytes_read, file) = match stream.recv_with_fd(&mut message_buf[..]) { + Ok(res) => res, + Err(err) => { + println!("Could not receive message from stream: {err}"); + continue; + } + }; + message_buf.resize(bytes_read, 0); + + // We do not expect to receive non-UTF-8 data from Firecracker, so this is probably + // an error we can't recover from. Just immediately abort + let body = String::from_utf8(message_buf.clone()).unwrap_or_else(|_| { + panic!( + "Received body is not a utf-8 valid string. Raw bytes received: \ + {message_buf:#?}" + ) + }); + let file = match file { + Some(file) => file, + None => { + println!("Did not receive Uffd from UDS. Received body: {body}"); + continue; + } + }; + return (body, file); + } - let body = String::from_utf8(message_buf.clone()).unwrap_or_else(|_| { - panic!( - "Received body is not a utf-8 valid string. Raw bytes received: {message_buf:#?}" - ) - }); - let file = - file.unwrap_or_else(|| panic!("Did not receive Uffd from UDS. Received body: {body}")); + panic!("Could not get UFFD and mappings after 5 retries"); + } + pub fn from_unix_stream(stream: &UnixStream, backing_buffer: *const u8, size: usize) -> Self { + let (body, file) = Self::get_mappings_and_file(stream); let mappings = serde_json::from_str::>(&body).unwrap_or_else(|_| { panic!("Cannot deserialize memory mappings. Received body: {body}")