diff --git a/Cargo.lock b/Cargo.lock index 8c4744f2f..2e88c5be2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -894,6 +894,7 @@ dependencies = [ "kvm-ioctls", "libc", "log", + "nix", "polly", "procfs", "serde", diff --git a/init/init.c b/init/init.c index 1e9eed984..004d065d3 100644 --- a/init/init.c +++ b/init/init.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -756,6 +757,71 @@ void clock_worker() } #endif +int reopen_fd(int fd, char *path, int flags) +{ + int newfd = open(path,flags); + if (newfd < 0) { + printf("Failed to open '%s': %s\n", path,strerror(errno)); + return -1; + } + + close(fd); + if (dup2(newfd, fd) < 0) { + perror("dup2"); + close(newfd); + return -1; + } + close(newfd); + return 0; +} + +int setup_redirects() +{ + DIR *ports_dir = opendir("/sys/class/virtio-ports"); + if (ports_dir == NULL) { + printf("Unable to open ports directory!\n"); + return -4; + } + + char path[2048]; + char name_buf[1024]; + + struct dirent *entry = NULL; + while ((entry=readdir(ports_dir))) { + char* port_identifier = entry->d_name; + int result_len = snprintf(path, sizeof(path), "/sys/class/virtio-ports/%s/name", port_identifier); + + // result was truncated + if (result_len > sizeof(name_buf) - 1) { + printf("Path buffer too small"); + return -1; + } + + FILE *port_name_file = fopen(path, "r"); + if (port_name_file == NULL) { + continue; + } + + char *port_name = fgets(name_buf, sizeof(name_buf), port_name_file); + fclose(port_name_file); + + if (port_name != NULL && strcmp(port_name, "krun-stdin\n") == 0) { + // if previous snprintf didn't fail, this one cannot fail either + snprintf(path, sizeof(path), "/dev/%s", port_identifier); + reopen_fd(STDIN_FILENO, path, O_RDONLY); + } else if (port_name != NULL && strcmp(port_name, "krun-stdout\n") == 0) { + snprintf(path, sizeof(path), "/dev/%s", port_identifier); + reopen_fd(STDOUT_FILENO, path, O_WRONLY); + } else if (port_name != NULL && strcmp(port_name, "krun-stderr\n") == 0) { + snprintf(path, sizeof(path), "/dev/%s", port_identifier); + reopen_fd(STDERR_FILENO, path, O_WRONLY); + } + } + + closedir(ports_dir); + return 0; +} + int main(int argc, char **argv) { struct ifreq ifr; @@ -848,10 +914,23 @@ int main(int argc, char **argv) } #endif - if (execvp(exec_argv[0], exec_argv) < 0) { - printf("Couldn't execute '%s' inside the vm: %s\n", exec_argv[0], strerror(errno)); - exit(-3); - } + // We need to fork ourselves, because pid 1 cannot doesn't receive SIGINT signal + int pid = fork(); + if (pid < 0) { + perror("fork"); + exit(-3); + } if (pid == 0) { // child + if (setup_redirects() < 0) { + exit(-4); + } + if (execvp(exec_argv[0], exec_argv) < 0) { + printf("Couldn't execute '%s' inside the vm: %s\n", exec_argv[0], strerror(errno)); + exit(-3); + } + } else { // parent + // wait for children since we can't exit init + waitpid(pid, NULL, 0); + } return 0; } diff --git a/src/arch/src/x86_64/regs.rs b/src/arch/src/x86_64/regs.rs index 100702ab5..5f8ed13f4 100644 --- a/src/arch/src/x86_64/regs.rs +++ b/src/arch/src/x86_64/regs.rs @@ -213,7 +213,7 @@ mod tests { } fn read_u64(gm: &GuestMemoryMmap, offset: u64) -> u64 { - let read_addr = GuestAddress(offset as u64); + let read_addr = GuestAddress(offset); gm.read_obj(read_addr).unwrap() } @@ -253,7 +253,7 @@ mod tests { assert_eq!((i << 21) + 0x83u64, read_u64(gm, PDE_START + (i * 8))); } - assert_eq!(PML4_START as u64, sregs.cr3); + assert_eq!({ PML4_START }, sregs.cr3); assert!(sregs.cr4 & X86_CR4_PAE != 0); assert!(sregs.cr0 & X86_CR0_PG != 0); } @@ -298,9 +298,9 @@ mod tests { let expected_regs: kvm_regs = kvm_regs { rflags: 0x0000_0000_0000_0002u64, rip: 1, - rsp: super::super::layout::BOOT_STACK_POINTER as u64, - rbp: super::super::layout::BOOT_STACK_POINTER as u64, - rsi: super::super::layout::ZERO_PAGE_START as u64, + rsp: super::super::layout::BOOT_STACK_POINTER, + rbp: super::super::layout::BOOT_STACK_POINTER, + rsi: super::super::layout::ZERO_PAGE_START, ..Default::default() }; diff --git a/src/cpuid/src/cpu_leaf.rs b/src/cpuid/src/cpu_leaf.rs index e5f37fd67..6ebac86e3 100644 --- a/src/cpuid/src/cpu_leaf.rs +++ b/src/cpuid/src/cpu_leaf.rs @@ -84,6 +84,7 @@ pub mod leaf_0x4 { use crate::bit_helper::BitRange; // inherit eax from leaf_cache_parameters + #[allow(unused_imports)] pub use crate::cpu_leaf::leaf_cache_parameters::eax::*; pub const MAX_CORES_PER_PACKAGE_BITRANGE: BitRange = bit_range!(31, 26); @@ -300,6 +301,7 @@ pub mod leaf_0x8000001d { pub const LEAF_NUM: u32 = 0x8000_001d; // inherit eax from leaf_cache_parameters + #[allow(unused_imports)] pub use crate::cpu_leaf::leaf_cache_parameters::eax; } diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index 14cea8efe..267a4f1bf 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -15,7 +15,7 @@ crossbeam-channel = "0.5" env_logger = "0.9.0" libc = ">=0.2.39" log = "0.4.0" -nix = "0.24.1" +nix = { version = "0.24.1", features = ["poll"] } rand = "0.8.5" vm-memory = { version = ">=0.13", features = ["backend-mmap"] } diff --git a/src/devices/src/legacy/serial.rs b/src/devices/src/legacy/serial.rs index a74fc80ef..5210922fa 100644 --- a/src/devices/src/legacy/serial.rs +++ b/src/devices/src/legacy/serial.rs @@ -524,13 +524,13 @@ mod tests { fn test_serial_dlab() { let mut serial = Serial::new_sink(EventFd::new(utils::eventfd::EFD_NONBLOCK).unwrap()); - serial.write(0, u64::from(LCR), &[LCR_DLAB_BIT as u8]); + serial.write(0, u64::from(LCR), &[LCR_DLAB_BIT]); serial.write(0, u64::from(DLAB_LOW), &[0x12_u8]); serial.write(0, u64::from(DLAB_HIGH), &[0x34_u8]); let mut data = [0u8]; serial.read(0, u64::from(LCR), &mut data[..]); - assert_eq!(data[0], LCR_DLAB_BIT as u8); + assert_eq!(data[0], { LCR_DLAB_BIT }); serial.read(0, u64::from(DLAB_LOW), &mut data[..]); assert_eq!(data[0], 0x12); serial.read(0, u64::from(DLAB_HIGH), &mut data[..]); @@ -541,16 +541,16 @@ mod tests { fn test_serial_modem() { let mut serial = Serial::new_sink(EventFd::new(utils::eventfd::EFD_NONBLOCK).unwrap()); - serial.write(0, u64::from(MCR), &[MCR_LOOP_BIT as u8]); + serial.write(0, u64::from(MCR), &[MCR_LOOP_BIT]); serial.write(0, u64::from(DATA), &[b'a']); serial.write(0, u64::from(DATA), &[b'b']); serial.write(0, u64::from(DATA), &[b'c']); let mut data = [0u8]; serial.read(0, u64::from(MSR), &mut data[..]); - assert_eq!(data[0], DEFAULT_MODEM_STATUS as u8); + assert_eq!(data[0], { DEFAULT_MODEM_STATUS }); serial.read(0, u64::from(MCR), &mut data[..]); - assert_eq!(data[0], MCR_LOOP_BIT as u8); + assert_eq!(data[0], { MCR_LOOP_BIT }); serial.read(0, u64::from(DATA), &mut data[..]); assert_eq!(data[0], b'a'); serial.read(0, u64::from(DATA), &mut data[..]); diff --git a/src/devices/src/virtio/block/mod.rs b/src/devices/src/virtio/block/mod.rs index 60fdd9ff5..2fcc11d06 100644 --- a/src/devices/src/virtio/block/mod.rs +++ b/src/devices/src/virtio/block/mod.rs @@ -7,7 +7,6 @@ pub mod request; pub mod test_utils; pub use self::device::{Block, CacheType}; -pub use self::event_handler::*; pub use self::request::*; use vm_memory::GuestMemoryError; diff --git a/src/devices/src/virtio/console/console_control.rs b/src/devices/src/virtio/console/console_control.rs new file mode 100644 index 000000000..8c8153a36 --- /dev/null +++ b/src/devices/src/virtio/console/console_control.rs @@ -0,0 +1,152 @@ +use std::collections::VecDeque; +use std::ops::Deref; +use std::sync::{Arc, Mutex}; + +use utils::eventfd::EventFd; +use utils::eventfd::EFD_NONBLOCK; +use vm_memory::{ByteValued, GuestMemoryMmap}; + +use crate::virtio::console::defs::control_event::{ + VIRTIO_CONSOLE_CONSOLE_PORT, VIRTIO_CONSOLE_PORT_ADD, VIRTIO_CONSOLE_PORT_NAME, + VIRTIO_CONSOLE_PORT_OPEN, VIRTIO_CONSOLE_RESIZE, +}; + +#[derive(Copy, Clone, Debug, Default)] +#[repr(C, packed(4))] +pub struct VirtioConsoleControl { + /// Port number + pub id: u32, + /// The kind of control event + pub event: u16, + /// Extra information for the event + pub value: u16, +} + +// Safe because it only has data and has no implicit padding. +// But NOTE that this relies on CPU being little endian, to have correct semantics +unsafe impl ByteValued for VirtioConsoleControl {} + +#[derive(Copy, Clone, Debug, Default)] +#[repr(C, packed)] +pub struct VirtioConsoleResize { + // NOTE: the order of these fields in the actual kernel implementation and in the spec are swapped, + // we follow the order in the kernel to get it working correctly + pub rows: u16, + pub cols: u16, +} + +// Safe because it only has data and has no implicit padding. +// but NOTE, that we rely on CPU being little endian, for the values to be correct +unsafe impl ByteValued for VirtioConsoleResize {} + +pub enum Payload { + ConsoleControl(VirtioConsoleControl), + Bytes(Vec), +} + +impl Deref for Payload { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + match self { + Payload::ConsoleControl(b) => b.as_slice(), + Payload::Bytes(b) => b.as_slice(), + } + } +} + +// Utility for sending commands into control rx queue +pub struct ConsoleControl { + queue: Mutex>, + queue_evt: EventFd, +} + +impl ConsoleControl { + pub fn new() -> Arc { + Arc::new(Self { + queue: Default::default(), + queue_evt: EventFd::new(EFD_NONBLOCK).unwrap(), + }) + } + + pub fn mark_console_port(&self, _mem: &GuestMemoryMmap, port_id: u32) { + self.push_msg(VirtioConsoleControl { + id: port_id, + event: VIRTIO_CONSOLE_CONSOLE_PORT, + value: 1, + }) + } + + pub fn console_resize(&self, port_id: u32, new_size: VirtioConsoleResize) { + let mut buf = Vec::new(); + buf.extend( + VirtioConsoleControl { + id: port_id, + event: VIRTIO_CONSOLE_RESIZE, + value: 0, + } + .as_slice(), + ); + buf.extend(new_size.as_slice()); + self.push_vec(buf) + } + + /// Adds another port with the specified port_id + pub fn port_add(&self, port_id: u32) { + self.push_msg(VirtioConsoleControl { + id: port_id, + event: VIRTIO_CONSOLE_PORT_ADD, + value: 0, + }) + } + + pub fn port_open(&self, port_id: u32, open: bool) { + self.push_msg(VirtioConsoleControl { + id: port_id, + event: VIRTIO_CONSOLE_PORT_OPEN, + value: open as u16, + }) + } + + pub fn port_name(&self, port_id: u32, name: &str) { + let mut buf: Vec = Vec::new(); + + buf.extend_from_slice( + VirtioConsoleControl { + id: port_id, + event: VIRTIO_CONSOLE_PORT_NAME, + value: 1, // Unspecified/unused in the spec, lets use the same value as QEMU. + } + .as_slice(), + ); + + // The spec says the name shouldn't be NUL terminated. + buf.extend(name.as_bytes()); + self.push_vec(buf) + } + + pub fn queue_pop(&self) -> Option { + let mut queue = self.queue.lock().expect("Poisoned lock"); + queue.pop_front() + } + + pub fn queue_evt(&self) -> &EventFd { + &self.queue_evt + } + + fn push_msg(&self, msg: VirtioConsoleControl) { + let mut queue = self.queue.lock().expect("Poisoned lock"); + queue.push_back(Payload::ConsoleControl(msg)); + if let Err(e) = self.queue_evt.write(1) { + log::trace!("ConsoleControl failed to write to notify {e}") + } + } + + fn push_vec(&self, buf: Vec) { + let mut queue = self.queue.lock().expect("Poisoned lock"); + queue.push_back(Payload::Bytes(buf)); + if let Err(e) = self.queue_evt.write(1) { + log::trace!("ConsoleControl failed to write to notify {e}") + } + } +} diff --git a/src/devices/src/virtio/console/device.rs b/src/devices/src/virtio/console/device.rs index 8de438650..e8cf78a40 100644 --- a/src/devices/src/virtio/console/device.rs +++ b/src/devices/src/virtio/console/device.rs @@ -1,29 +1,37 @@ use std::cmp; -use std::collections::VecDeque; -use std::io; use std::io::Write; +use std::iter::zip; +use std::mem::{size_of, size_of_val}; use std::os::unix::io::{AsRawFd, RawFd}; -use std::result; -use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::atomic::AtomicUsize; use std::sync::{Arc, Mutex}; use libc::TIOCGWINSZ; use utils::eventfd::EventFd; -use vm_memory::{ByteValued, Bytes, GuestMemory, GuestMemoryMmap}; +use vm_memory::{ByteValued, Bytes, GuestMemoryMmap}; -use super::super::super::legacy::ReadableFd; use super::super::{ ActivateError, ActivateResult, ConsoleError, DeviceState, Queue as VirtQueue, VirtioDevice, - VIRTIO_MMIO_INT_CONFIG, VIRTIO_MMIO_INT_VRING, }; -use super::{defs, defs::uapi}; +use super::{defs, defs::control_event, defs::uapi}; use crate::legacy::Gic; -use crate::Error as DeviceError; +use crate::virtio::console::console_control::{ + ConsoleControl, VirtioConsoleControl, VirtioConsoleResize, +}; +use crate::virtio::console::defs::QUEUE_SIZE; +use crate::virtio::console::irq_signaler::IRQSignaler; +use crate::virtio::console::port::Port; +use crate::virtio::console::port_queue_mapping::{ + num_queues, port_id_to_queue_idx, QueueDirection, +}; +use crate::virtio::{PortDescription, VmmExitObserver}; -pub(crate) const RXQ_INDEX: usize = 0; -pub(crate) const TXQ_INDEX: usize = 1; -pub(crate) const AVAIL_FEATURES: u64 = - 1 << uapi::VIRTIO_CONSOLE_F_SIZE as u64 | 1 << uapi::VIRTIO_F_VERSION_1 as u64; +pub(crate) const CONTROL_RXQ_INDEX: usize = 2; +pub(crate) const CONTROL_TXQ_INDEX: usize = 3; + +pub(crate) const AVAIL_FEATURES: u64 = 1 << uapi::VIRTIO_CONSOLE_F_SIZE as u64 + | 1 << uapi::VIRTIO_CONSOLE_F_MULTIPORT as u64 + | 1 << uapi::VIRTIO_F_VERSION_1 as u64; pub(crate) fn get_win_size() -> (u16, u16) { #[repr(C)] @@ -56,47 +64,45 @@ pub struct VirtioConsoleConfig { unsafe impl ByteValued for VirtioConsoleConfig {} impl VirtioConsoleConfig { - pub fn new(cols: u16, rows: u16) -> Self { + pub fn new(cols: u16, rows: u16, max_nr_ports: u32) -> Self { VirtioConsoleConfig { cols, rows, - max_nr_ports: 1u32, + max_nr_ports, emerg_wr: 0u32, } } - - pub fn update_console_size(&mut self, cols: u16, rows: u16) { - self.cols = cols; - self.rows = rows; - } } pub struct Console { + pub(crate) device_state: DeviceState, + pub(crate) irq: IRQSignaler, + pub(crate) control: Arc, + pub(crate) ports: Vec, + pub(crate) queues: Vec, pub(crate) queue_events: Vec, + pub(crate) avail_features: u64, pub(crate) acked_features: u64, - pub(crate) interrupt_status: Arc, - pub(crate) interrupt_evt: EventFd, + pub(crate) activate_evt: EventFd, pub(crate) sigwinch_evt: EventFd, - pub(crate) device_state: DeviceState, - pub(crate) in_buffer: VecDeque, + config: VirtioConsoleConfig, - pub(crate) input: Box, - output: Box, - configured: bool, - pub(crate) interactive: bool, - intc: Option>>, - irq_line: Option, } impl Console { - pub(crate) fn with_queues( - input: Box, - output: Box, - queues: Vec, - ) -> super::Result { + pub fn new(ports: Vec) -> super::Result { + assert!(!ports.is_empty(), "Expected at least 1 port"); + assert!( + matches!(ports[0], PortDescription::Console { .. }), + "First port must be a console" + ); + + let num_queues = num_queues(ports.len()); + let queues = vec![VirtQueue::new(QUEUE_SIZE); num_queues]; + let mut queue_events = Vec::new(); for _ in 0..queues.len() { queue_events @@ -104,154 +110,172 @@ impl Console { } let (cols, rows) = get_win_size(); - let config = VirtioConsoleConfig::new(cols, rows); + let config = VirtioConsoleConfig::new(cols, rows, ports.len() as u32); + let ports = zip(0u32.., ports) + .map(|(port_id, description)| Port::new(port_id, description)) + .collect(); Ok(Console { + irq: IRQSignaler::new(), + control: ConsoleControl::new(), + ports, queues, queue_events, avail_features: AVAIL_FEATURES, acked_features: 0, - interrupt_status: Arc::new(AtomicUsize::new(0)), - interrupt_evt: EventFd::new(utils::eventfd::EFD_NONBLOCK) - .map_err(ConsoleError::EventFd)?, activate_evt: EventFd::new(utils::eventfd::EFD_NONBLOCK) .map_err(ConsoleError::EventFd)?, sigwinch_evt: EventFd::new(utils::eventfd::EFD_NONBLOCK) .map_err(ConsoleError::EventFd)?, device_state: DeviceState::Inactive, - in_buffer: VecDeque::new(), config, - input, - output, - configured: false, - interactive: true, - intc: None, - irq_line: None, }) } - pub fn new( - input: Box, - output: Box, - ) -> super::Result { - let queues: Vec = defs::QUEUE_SIZES - .iter() - .map(|&max_size| VirtQueue::new(max_size)) - .collect(); - Self::with_queues(input, output, queues) - } - pub fn id(&self) -> &str { defs::CONSOLE_DEV_ID } pub fn set_intc(&mut self, intc: Arc>) { - self.intc = Some(intc); + self.irq.set_intc(intc) } pub fn get_sigwinch_fd(&self) -> RawFd { self.sigwinch_evt.as_raw_fd() } - pub fn set_interactive(&mut self, interactive: bool) { - self.interactive = interactive; - } - - /// Signal the guest driver that we've used some virtio buffers that it had previously made - /// available. - pub fn signal_used_queue(&self) -> result::Result<(), DeviceError> { - debug!("console: raising IRQ"); - self.interrupt_status - .fetch_or(VIRTIO_MMIO_INT_VRING as usize, Ordering::SeqCst); - if let Some(intc) = &self.intc { - intc.lock().unwrap().set_irq(self.irq_line.unwrap()); - Ok(()) - } else { - self.interrupt_evt.write(1).map_err(|e| { - error!("Failed to signal used queue: {:?}", e); - DeviceError::FailedSignalingUsedQueue(e) - }) - } - } - - pub fn signal_config_update(&self) -> result::Result<(), DeviceError> { - debug!("console: raising IRQ for config update"); - self.interrupt_status - .fetch_or(VIRTIO_MMIO_INT_CONFIG as usize, Ordering::SeqCst); - self.interrupt_evt.write(1).map_err(|e| { - error!("Failed to signal used queue: {:?}", e); - DeviceError::FailedSignalingUsedQueue(e) - }) - } - pub fn update_console_size(&mut self, cols: u16, rows: u16) { - debug!("update_console_size: {} {}", cols, rows); - self.config.update_console_size(cols, rows); - self.signal_config_update().unwrap(); + log::debug!("update_console_size: {} {}", cols, rows); + // Note that we currently only support resizing on the first/main console + self.control + .console_resize(0, VirtioConsoleResize { rows, cols }); } - pub(crate) fn process_rx(&mut self) -> bool { - //debug!("console: RXQ queue event"); - let mem = match self.device_state { - DeviceState::Activated(ref mem) => mem, - // This should never happen, it's been already validated in the event handler. - DeviceState::Inactive => unreachable!(), + pub(crate) fn process_control_rx(&mut self) -> bool { + log::trace!("process_control_rx"); + let DeviceState::Activated(ref mem) = self.device_state else { + unreachable!() }; - - if self.in_buffer.is_empty() { - return false; - } - - let queue = &mut self.queues[RXQ_INDEX]; - let mut used_any = false; - while let Some(head) = queue.pop(mem) { - let len = cmp::min(head.len, self.in_buffer.len() as u32); - let source_slice = self.in_buffer.drain(..len as usize).collect::>(); - if let Err(e) = mem.write_slice(&source_slice[..], head.addr) { - error!("Failed to write slice: {:?}", e); - queue.go_to_previous_position(); - break; - } - - queue.add_used(mem, head.index, len); - used_any = true; - - if self.in_buffer.is_empty() { + let mut raise_irq = false; + + while let Some(head) = self.queues[CONTROL_RXQ_INDEX].pop(mem) { + if let Some(buf) = self.control.queue_pop() { + match mem.write(&buf, head.addr) { + Ok(n) => { + if n != buf.len() { + log::error!("process_control_rx: partial write"); + } + raise_irq = true; + log::trace!("process_control_rx wrote {n}"); + self.queues[CONTROL_RXQ_INDEX].add_used(mem, head.index, n as u32); + } + Err(e) => { + log::error!("process_control_rx failed to write: {e}"); + } + } + } else { + self.queues[CONTROL_RXQ_INDEX].undo_pop(); break; } } - - used_any + raise_irq } - pub(crate) fn process_tx(&mut self) -> bool { - //debug!("console: TXQ queue event"); - let mem = match self.device_state { - DeviceState::Activated(ref mem) => mem, - // This should never happen, it's been already validated in the event handler. - DeviceState::Inactive => unreachable!(), + pub(crate) fn process_control_tx(&mut self) -> bool { + log::trace!("process_control_tx"); + let DeviceState::Activated(ref mem) = self.device_state else { + unreachable!() }; - // This won't be needed once we support multiport - if !self.configured { - self.configured = true; - self.signal_config_update().unwrap(); + let tx_queue = &mut self.queues[CONTROL_TXQ_INDEX]; + let mut raise_irq = false; + + let mut ports_to_start = Vec::new(); + + while let Some(head) = tx_queue.pop(mem) { + raise_irq = true; + + let cmd: VirtioConsoleControl = match mem.read_obj(head.addr) { + Ok(cmd) => cmd, + Err(e) => { + log::error!( + "Failed to read VirtioConsoleControl struct: {e:?}, struct len = {len}, head.len = {head_len}", + len = size_of::(), + head_len = head.len, + ); + continue; + } + }; + tx_queue.add_used(mem, head.index, size_of_val(&cmd) as u32); + + log::trace!("VirtioConsoleControl cmd: {cmd:?}"); + match cmd.event { + control_event::VIRTIO_CONSOLE_DEVICE_READY => { + log::debug!( + "Device is ready: initialization {}", + if cmd.value == 1 { "ok" } else { "failed" } + ); + for port_id in 0..self.ports.len() { + self.control.port_add(port_id as u32); + } + } + control_event::VIRTIO_CONSOLE_PORT_READY => { + if cmd.value != 1 { + log::error!("Port initialization failed: {:?}", cmd); + continue; + } + + if self.ports[cmd.id as usize].is_console() { + self.control.mark_console_port(mem, cmd.id); + } else { + // We start with all ports open, this makes sense for now, + // because underlying file descriptors STDIN, STDOUT, STDERR are always open too + self.control.port_open(cmd.id, true) + } + + let name = self.ports[cmd.id as usize].name(); + log::trace!("Port ready {id}: {name}", id = cmd.id); + if !name.is_empty() { + self.control.port_name(cmd.id, name) + } + } + control_event::VIRTIO_CONSOLE_PORT_OPEN => { + let opened = match cmd.value { + 0 => false, + 1 => true, + _ => { + log::error!( + "Invalid value ({}) for VIRTIO_CONSOLE_PORT_OPEN on port {}", + cmd.value, + cmd.id + ); + continue; + } + }; + + if !opened { + log::debug!("Guest closed port {}", cmd.id); + continue; + } + + ports_to_start.push(cmd.id as usize); + } + _ => log::warn!("Unknown console control event {:x}", cmd.event), + } } - let queue = &mut self.queues[TXQ_INDEX]; - let mut used_any = false; - while let Some(head) = queue.pop(mem) { - let mut buf = vec![0; head.len as usize]; - mem.write_volatile_to(head.addr, &mut buf, head.len as usize) - .unwrap(); - self.output.write_all(&buf).unwrap(); - self.output.flush().unwrap(); - - queue.add_used(mem, head.index, head.len); - used_any = true; + for port_id in ports_to_start { + log::trace!("Starting port io for port {}", port_id); + self.ports[port_id].start( + mem.clone(), + self.queues[port_id_to_queue_idx(QueueDirection::Rx, port_id)].clone(), + self.queues[port_id_to_queue_idx(QueueDirection::Tx, port_id)].clone(), + self.irq.clone(), + self.control.clone(), + ); } - used_any + raise_irq } } @@ -285,15 +309,15 @@ impl VirtioDevice for Console { } fn interrupt_evt(&self) -> &EventFd { - &self.interrupt_evt + self.irq.interrupt_evt() } fn interrupt_status(&self) -> Arc { - self.interrupt_status.clone() + self.irq.interrupt_status() } fn set_irq_line(&mut self, irq: u32) { - self.irq_line = Some(irq); + self.irq.set_irq_line(irq) } fn read_config(&self, offset: u64, mut data: &mut [u8]) { @@ -319,17 +343,8 @@ impl VirtioDevice for Console { } fn activate(&mut self, mem: GuestMemoryMmap) -> ActivateResult { - if self.queues.len() != defs::NUM_QUEUES { - error!( - "Cannot perform activate. Expected {} queue(s), got {}", - defs::NUM_QUEUES, - self.queues.len() - ); - return Err(ActivateError::BadActivate); - } - if self.activate_evt.write(1).is_err() { - error!("Cannot write to activate_evt",); + error!("Cannot write to activate_evt"); return Err(ActivateError::BadActivate); } @@ -345,3 +360,13 @@ impl VirtioDevice for Console { } } } + +impl VmmExitObserver for Console { + fn on_vmm_exit(&mut self) { + for port in &mut self.ports { + port.flush(); + } + + log::trace!("Console on_vmm_exit finished"); + } +} diff --git a/src/devices/src/virtio/console/event_handler.rs b/src/devices/src/virtio/console/event_handler.rs index 16c43220f..707811c64 100644 --- a/src/devices/src/virtio/console/event_handler.rs +++ b/src/devices/src/virtio/console/event_handler.rs @@ -1,69 +1,42 @@ -use std::io::Read; use std::os::unix::io::AsRawFd; -use std::process; use polly::event_manager::{EventManager, Subscriber}; use utils::epoll::{EpollEvent, EventSet}; -use super::device::{get_win_size, Console, RXQ_INDEX, TXQ_INDEX}; +use super::device::{get_win_size, Console}; +use crate::virtio::console::device::{CONTROL_RXQ_INDEX, CONTROL_TXQ_INDEX}; +use crate::virtio::console::port_queue_mapping::{queue_idx_to_port_id, QueueDirection}; use crate::virtio::device::VirtioDevice; impl Console { - pub(crate) fn handle_rxq_event(&mut self, event: &EpollEvent) -> bool { - debug!("console: RX queue event"); + pub(crate) fn read_queue_event(&self, queue_index: usize, event: &EpollEvent) -> bool { + log::trace!("Event on queue {queue_index}: {:?}", event.event_set()); let event_set = event.event_set(); if event_set != EventSet::IN { - warn!("console: rxq unexpected event {:?}", event_set); + warn!("Unexpected event from queue index {queue_index}: {event_set:?}"); return false; } - let mut raise_irq = false; - if let Err(e) = self.queue_events[RXQ_INDEX].read() { - error!("Failed to get console rx queue event: {:?}", e); - } else { - raise_irq |= self.process_rx(); - } - raise_irq - } - - pub(crate) fn handle_txq_event(&mut self, event: &EpollEvent) -> bool { - debug!("console: TX queue event"); - - let event_set = event.event_set(); - if event_set != EventSet::IN { - warn!("console: txq unexpected event {:?}", event_set); + if let Err(e) = self.queue_events[queue_index].read() { + error!("Failed to read event from queue index {queue_index}: {e:?}"); return false; } - let mut raise_irq = false; - if let Err(e) = self.queue_events[TXQ_INDEX].read() { - error!("Failed to get console tx queue event: {:?}", e); - } else { - raise_irq |= self.process_tx(); - } - raise_irq + true } - pub(crate) fn handle_input(&mut self, event: &EpollEvent) { - debug!("console: input event"); - - let event_set = event.event_set(); - match event_set { - EventSet::HANG_UP => process::exit(0), - EventSet::IN => {} - _ => { - warn!("console: input unexpected event {:?}", event_set); - return; + fn notify_port_queue_event(&mut self, queue_index: usize) { + let (direction, port_id) = queue_idx_to_port_id(queue_index); + match direction { + QueueDirection::Rx => { + log::trace!("Notify rx (queue event)"); + self.ports[port_id].notify_rx() + } + QueueDirection::Tx => { + log::trace!("Notify tx (queue event)"); + self.ports[port_id].notify_tx() } - } - - let mut out = [0u8; 64]; - let count = self.input.read(&mut out).unwrap(); - self.in_buffer.extend(&out[..count]); - - if self.process_rx() { - self.signal_used_queue().unwrap(); } } @@ -79,31 +52,22 @@ impl Console { .subscriber(self.activate_evt.as_raw_fd()) .unwrap(); - event_manager - .register( - self.queue_events[RXQ_INDEX].as_raw_fd(), - EpollEvent::new( - EventSet::IN, - self.queue_events[RXQ_INDEX].as_raw_fd() as u64, - ), - self_subscriber.clone(), - ) - .unwrap_or_else(|e| { - error!("Failed to register fs rxq with event manager: {:?}", e); - }); - - event_manager - .register( - self.queue_events[TXQ_INDEX].as_raw_fd(), - EpollEvent::new( - EventSet::IN, - self.queue_events[TXQ_INDEX].as_raw_fd() as u64, - ), - self_subscriber.clone(), - ) - .unwrap_or_else(|e| { - error!("Failed to register fs txq with event manager: {:?}", e); - }); + for queue_index in 0..self.queues.len() { + event_manager + .register( + self.queue_events[queue_index].as_raw_fd(), + EpollEvent::new( + EventSet::IN, + self.queue_events[queue_index].as_raw_fd() as u64, + ), + self_subscriber.clone(), + ) + .unwrap_or_else(|e| { + error!( + "Failed to register queue index {queue_index} with event manager: {e:?}" + ); + }); + } event_manager .unregister(self.activate_evt.as_raw_fd()) @@ -127,33 +91,59 @@ impl Console { let (cols, rows) = get_win_size(); self.update_console_size(cols, rows); } + + fn read_control_queue_event(&mut self, event: &EpollEvent) { + let event_set = event.event_set(); + if event_set != EventSet::IN { + warn!("Unexpected event {:?}", event_set); + } + + if let Err(e) = self.control.queue_evt().read() { + error!("Failed to read the ConsoleControl event: {:?}", e); + } + } } impl Subscriber for Console { fn process(&mut self, event: &EpollEvent, event_manager: &mut EventManager) { let source = event.fd(); - let rxq = self.queue_events[RXQ_INDEX].as_raw_fd(); - let txq = self.queue_events[TXQ_INDEX].as_raw_fd(); + + let control_rxq = self.queue_events[CONTROL_RXQ_INDEX].as_raw_fd(); + let control_txq = self.queue_events[CONTROL_TXQ_INDEX].as_raw_fd(); + let control_rxq_control = self.control.queue_evt().as_raw_fd(); + let activate_evt = self.activate_evt.as_raw_fd(); let sigwinch_evt = self.sigwinch_evt.as_raw_fd(); - let input = self.input.as_raw_fd(); if self.is_activated() { let mut raise_irq = false; - match source { - _ if source == rxq => raise_irq = self.handle_rxq_event(event), - _ if source == txq => raise_irq = self.handle_txq_event(event), - _ if source == input => self.handle_input(event), - _ if source == activate_evt => { - self.handle_activate_event(event_manager); - } - _ if source == sigwinch_evt => { - self.handle_sigwinch_event(event); - } - _ => warn!("Unexpected console event received: {:?}", source), + + if source == control_txq { + raise_irq |= + self.read_queue_event(CONTROL_TXQ_INDEX, event) && self.process_control_tx() + } else if source == control_rxq_control { + self.read_control_queue_event(event); + raise_irq |= self.process_control_rx(); + } else if source == control_rxq { + raise_irq |= self.read_queue_event(CONTROL_RXQ_INDEX, event) + } + /* Guest signaled input/output on port */ + else if let Some(queue_index) = self + .queue_events + .iter() + .position(|fd| fd.as_raw_fd() == source) + { + raise_irq |= self.read_queue_event(queue_index, event); + self.notify_port_queue_event(queue_index); + } else if source == activate_evt { + self.handle_activate_event(event_manager); + } else if source == sigwinch_evt { + self.handle_sigwinch_event(event); + } else { + log::warn!("Unexpected console event received: {:?}", source) } if raise_irq { - self.signal_used_queue().unwrap_or_default(); + self.irq.signal_used_queue("event_handler"); } } else { warn!( @@ -164,17 +154,10 @@ impl Subscriber for Console { } fn interest_list(&self) -> Vec { - if self.interactive { - vec![ - EpollEvent::new(EventSet::IN, self.activate_evt.as_raw_fd() as u64), - EpollEvent::new(EventSet::IN, self.sigwinch_evt.as_raw_fd() as u64), - EpollEvent::new(EventSet::IN, self.input.as_raw_fd() as u64), - ] - } else { - vec![ - EpollEvent::new(EventSet::IN, self.activate_evt.as_raw_fd() as u64), - EpollEvent::new(EventSet::IN, self.sigwinch_evt.as_raw_fd() as u64), - ] - } + vec![ + EpollEvent::new(EventSet::IN, self.activate_evt.as_raw_fd() as u64), + EpollEvent::new(EventSet::IN, self.sigwinch_evt.as_raw_fd() as u64), + EpollEvent::new(EventSet::IN, self.control.queue_evt().as_raw_fd() as u64), + ] } } diff --git a/src/devices/src/virtio/console/irq_signaler.rs b/src/devices/src/virtio/console/irq_signaler.rs new file mode 100644 index 000000000..5deb5971e --- /dev/null +++ b/src/devices/src/virtio/console/irq_signaler.rs @@ -0,0 +1,65 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; + +use utils::eventfd::EventFd; + +use crate::legacy::Gic; +use crate::virtio::{VIRTIO_MMIO_INT_CONFIG, VIRTIO_MMIO_INT_VRING}; + +#[derive(Clone)] +pub struct IRQSignaler { + interrupt_status: Arc, + interrupt_evt: Arc, + intc: Option>>, + irq_line: Option, +} + +impl IRQSignaler { + pub fn new() -> IRQSignaler { + Self { + interrupt_status: Arc::new(AtomicUsize::new(0)), + interrupt_evt: Arc::new( + EventFd::new(utils::eventfd::EFD_NONBLOCK) + .expect("Failed to create EventFd for interrupt_evt"), + ), + intc: None, + irq_line: None, + } + } + + pub fn signal_used_queue(&self, reason: &str) { + log::trace!("signal used queue because '{reason}'"); + self.interrupt_status + .fetch_or(VIRTIO_MMIO_INT_VRING as usize, Ordering::SeqCst); + if let Some(intc) = &self.intc { + intc.lock().unwrap().set_irq(self.irq_line.unwrap()); + } else if let Err(e) = self.interrupt_evt.write(1) { + error!("Failed to signal used queue: {e:?}"); + } + } + + #[allow(dead_code)] + pub fn signal_config_update(&self) { + self.interrupt_status + .fetch_or(VIRTIO_MMIO_INT_CONFIG as usize, Ordering::SeqCst); + if let Err(e) = self.interrupt_evt.write(1) { + error!("Failed to signal config update: {e:?}"); + } + } + + pub fn interrupt_evt(&self) -> &EventFd { + &self.interrupt_evt + } + + pub fn interrupt_status(&self) -> Arc { + self.interrupt_status.clone() + } + + pub fn set_intc(&mut self, intc: Arc>) { + self.intc = Some(intc); + } + + pub fn set_irq_line(&mut self, irq: u32) { + self.irq_line = Some(irq); + } +} diff --git a/src/devices/src/virtio/console/mod.rs b/src/devices/src/virtio/console/mod.rs index c2a20eeaa..bbaba4dde 100644 --- a/src/devices/src/virtio/console/mod.rs +++ b/src/devices/src/virtio/console/mod.rs @@ -1,20 +1,42 @@ +mod console_control; mod device; mod event_handler; +mod irq_signaler; +mod port; +pub mod port_io; +mod port_queue_mapping; +mod process_rx; +mod process_tx; pub use self::defs::uapi::VIRTIO_ID_CONSOLE as TYPE_CONSOLE; pub use self::device::Console; +pub use self::port::PortDescription; mod defs { pub const CONSOLE_DEV_ID: &str = "virtio_console"; - pub const NUM_QUEUES: usize = 2; - pub const QUEUE_SIZES: &[u16] = &[256; NUM_QUEUES]; + pub const QUEUE_SIZE: u16 = 32; pub mod uapi { /// The device conforms to the virtio spec version 1.0. pub const VIRTIO_CONSOLE_F_SIZE: u32 = 0; + pub const VIRTIO_CONSOLE_F_MULTIPORT: u32 = 1; pub const VIRTIO_F_VERSION_1: u32 = 32; pub const VIRTIO_ID_CONSOLE: u32 = 3; } + + #[allow(dead_code)] + pub mod control_event { + pub const VIRTIO_CONSOLE_DEVICE_READY: u16 = 0; + // Also known as VIRTIO_CONSOLE_DEVICE_ADD in spec, but kernel uses this (more descriptive) name + pub const VIRTIO_CONSOLE_PORT_ADD: u16 = 1; + /// Also known as VIRTIO_CONSOLE_DEVICE_REMOVE in spec, but kernel uses this (more descriptive) name + pub const VIRTIO_CONSOLE_PORT_REMOVE: u16 = 2; + pub const VIRTIO_CONSOLE_PORT_READY: u16 = 3; + pub const VIRTIO_CONSOLE_CONSOLE_PORT: u16 = 4; + pub const VIRTIO_CONSOLE_RESIZE: u16 = 5; + pub const VIRTIO_CONSOLE_PORT_OPEN: u16 = 6; + pub const VIRTIO_CONSOLE_PORT_NAME: u16 = 7; + } } #[derive(Debug)] diff --git a/src/devices/src/virtio/console/port.rs b/src/devices/src/virtio/console/port.rs new file mode 100644 index 000000000..09a25ba51 --- /dev/null +++ b/src/devices/src/virtio/console/port.rs @@ -0,0 +1,163 @@ +use std::borrow::Cow; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::thread::JoinHandle; +use std::{mem, thread}; + +use vm_memory::GuestMemoryMmap; + +use crate::virtio::console::console_control::ConsoleControl; +use crate::virtio::console::irq_signaler::IRQSignaler; +use crate::virtio::console::port_io::{PortInput, PortOutput}; +use crate::virtio::console::process_rx::process_rx; +use crate::virtio::console::process_tx::process_tx; +use crate::virtio::Queue; + +pub enum PortDescription { + Console { + input: Option>, + output: Option>, + }, + InputPipe { + name: Cow<'static, str>, + input: Box, + }, + OutputPipe { + name: Cow<'static, str>, + output: Box, + }, +} + +enum PortState { + Inactive { + input: Option>, + output: Option>, + }, + Active { + stop: Arc, + rx_thread: Option>, + tx_thread: Option>, + }, +} + +pub(crate) struct Port { + port_id: u32, + /// Empty if no name given + name: Cow<'static, str>, + represents_console: bool, + state: PortState, +} + +impl Port { + pub(crate) fn new(port_id: u32, description: PortDescription) -> Self { + match description { + PortDescription::Console { input, output } => Self { + port_id, + name: "".into(), + represents_console: true, + state: PortState::Inactive { input, output }, + }, + PortDescription::InputPipe { name, input } => Self { + port_id, + name, + represents_console: false, + state: PortState::Inactive { + input: Some(input), + output: None, + }, + }, + PortDescription::OutputPipe { name, output } => Self { + port_id, + name, + represents_console: false, + state: PortState::Inactive { + input: None, + output: Some(output), + }, + }, + } + } + + pub fn name(&self) -> &str { + &self.name + } + + pub fn is_console(&self) -> bool { + self.represents_console + } + + pub fn notify_rx(&self) { + if let PortState::Active { + rx_thread: Some(handle), + .. + } = &self.state + { + handle.thread().unpark() + } + } + + pub fn notify_tx(&self) { + if let PortState::Active { + tx_thread: Some(handle), + .. + } = &self.state + { + handle.thread().unpark() + } + } + + pub fn start( + &mut self, + mem: GuestMemoryMmap, + rx_queue: Queue, + tx_queue: Queue, + irq_signaler: IRQSignaler, + control: Arc, + ) { + let (input, output) = if let PortState::Inactive { input, output } = &mut self.state { + (mem::take(input), mem::take(output)) + } else { + // The threads are already started + return; + }; + + let rx_thread = input.map(|input| { + let mem = mem.clone(); + let irq_signaler = irq_signaler.clone(); + let port_id = self.port_id; + thread::spawn(move || process_rx(mem, rx_queue, irq_signaler, input, control, port_id)) + }); + + let stop = Arc::new(AtomicBool::new(false)); + let tx_thread = output.map(|output| { + let stop = stop.clone(); + thread::spawn(move || process_tx(mem, tx_queue, irq_signaler, output, stop)) + }); + + self.state = PortState::Active { + stop, + rx_thread, + tx_thread, + } + } + + pub fn flush(&mut self) { + if let PortState::Active { + stop, + tx_thread, + rx_thread: _, + } = &mut self.state + { + stop.store(true, Ordering::Release); + if let Some(tx_thread) = mem::take(tx_thread) { + tx_thread.thread().unpark(); + if let Err(e) = tx_thread.join() { + log::error!( + "Failed to flush tx for port {port_id}, thread panicked: {e:?}", + port_id = self.port_id + ) + } + } + }; + } +} diff --git a/src/devices/src/virtio/console/port_io.rs b/src/devices/src/virtio/console/port_io.rs new file mode 100644 index 000000000..cd0cb4fd2 --- /dev/null +++ b/src/devices/src/virtio/console/port_io.rs @@ -0,0 +1,220 @@ +use std::io::{self, ErrorKind}; +use std::os::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd}; + +use libc::{fcntl, F_GETFL, F_SETFL, O_NONBLOCK, STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO}; +use log::Level; +use nix::errno::Errno; +use nix::poll::{poll, PollFd, PollFlags}; +use nix::unistd::dup; +use utils::eventfd::EventFd; +use utils::eventfd::EFD_NONBLOCK; +use vm_memory::bitmap::Bitmap; +use vm_memory::{VolatileMemoryError, VolatileSlice, WriteVolatile}; + +pub trait PortInput { + fn read_volatile(&mut self, buf: &mut VolatileSlice) -> Result; + + fn wait_until_readable(&self); +} + +pub trait PortOutput { + fn write_volatile(&mut self, buf: &VolatileSlice) -> Result; + + fn wait_until_writable(&self); +} + +pub fn stdin() -> Result, nix::Error> { + let fd = dup_raw_fd_into_owned(STDIN_FILENO)?; + make_non_blocking(&fd)?; + Ok(Box::new(PortInputFd(fd))) +} + +pub fn stdout() -> Result, nix::Error> { + output_to_raw_fd_dup(STDOUT_FILENO) +} + +pub fn stderr() -> Result, nix::Error> { + output_to_raw_fd_dup(STDERR_FILENO) +} + +pub fn output_to_raw_fd_dup(fd: RawFd) -> Result, nix::Error> { + let fd = dup_raw_fd_into_owned(fd)?; + make_non_blocking(&fd)?; + Ok(Box::new(PortOutputFd(fd))) +} + +pub fn output_to_log_as_err() -> Box { + Box::new(PortOutputLog::new()) +} + +struct PortInputFd(OwnedFd); + +impl AsRawFd for PortInputFd { + fn as_raw_fd(&self) -> RawFd { + self.0.as_raw_fd() + } +} + +impl PortInput for PortInputFd { + fn read_volatile(&mut self, buf: &mut VolatileSlice) -> io::Result { + // This source code is copied from vm-memory, except it fixes an issue, where + // the original code would does not handle handle EWOULDBLOCK + + let fd = self.as_raw_fd(); + let guard = buf.ptr_guard_mut(); + + let dst = guard.as_ptr().cast::(); + + // SAFETY: We got a valid file descriptor from `AsRawFd`. The memory pointed to by `dst` is + // valid for writes of length `buf.len() by the invariants upheld by the constructor + // of `VolatileSlice`. + let bytes_read = unsafe { libc::read(fd, dst, buf.len()) }; + + if bytes_read < 0 { + let err = std::io::Error::last_os_error(); + if err.kind() != ErrorKind::WouldBlock { + // We don't know if a partial read might have happened, so mark everything as dirty + buf.bitmap().mark_dirty(0, buf.len()); + } + + Err(err) + } else { + let bytes_read = bytes_read.try_into().unwrap(); + buf.bitmap().mark_dirty(0, bytes_read); + Ok(bytes_read) + } + } + + fn wait_until_readable(&self) { + let mut poll_fds = [PollFd::new(self.as_raw_fd(), PollFlags::POLLIN)]; + poll(&mut poll_fds, -1).expect("Failed to poll"); + } +} + +struct PortOutputFd(OwnedFd); + +impl AsRawFd for PortOutputFd { + fn as_raw_fd(&self) -> RawFd { + self.0.as_raw_fd() + } +} + +impl PortOutput for PortOutputFd { + fn write_volatile(&mut self, buf: &VolatileSlice) -> Result { + self.0.write_volatile(buf).map_err(|e| match e { + VolatileMemoryError::IOError(e) => e, + e => { + log::error!("Unsuported error from write_volatile: {e:?}"); + io::Error::new(ErrorKind::Other, e) + } + }) + } + + fn wait_until_writable(&self) { + let mut poll_fds = [PollFd::new(self.as_raw_fd(), PollFlags::POLLOUT)]; + poll(&mut poll_fds, -1).expect("Failed to poll"); + } +} + +fn dup_raw_fd_into_owned(raw_fd: RawFd) -> Result { + let fd = dup(raw_fd)?; + // SAFETY: the fd is valid because dup succeeded + Ok(unsafe { OwnedFd::from_raw_fd(fd) }) +} + +fn make_non_blocking(as_rw_fd: &impl AsRawFd) -> Result<(), nix::Error> { + let fd = as_rw_fd.as_raw_fd(); + unsafe { + let flags = fcntl(fd, F_GETFL, 0); + if flags < 0 { + return Err(Errno::last()); + } + + if fcntl(fd, F_SETFL, flags | O_NONBLOCK) < 0 { + return Err(Errno::last()); + } + } + Ok(()) +} + +// Utility to relay log from the VM (the kernel boot log and messages from init) +// to the rust log +#[derive(Default)] +pub struct PortOutputLog { + buf: Vec, +} + +impl PortOutputLog { + const FORCE_FLUSH_TRESHOLD: usize = 512; + const LOG_TARGET: &'static str = "init_or_kernel"; + + fn new() -> Self { + Self::default() + } + + fn force_flush(&mut self) { + log::log!(target: PortOutputLog::LOG_TARGET, Level::Error, "[missing newline]{}", String::from_utf8_lossy(&self.buf)); + self.buf.clear(); + } +} + +impl PortOutput for PortOutputLog { + fn write_volatile(&mut self, buf: &VolatileSlice) -> Result { + self.buf + .write_volatile(buf) + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + + let mut start = 0; + for (i, ch) in self.buf.iter().cloned().enumerate() { + if ch == b'\n' { + log::log!(target: PortOutputLog::LOG_TARGET, Level::Error, "{}", String::from_utf8_lossy(&self.buf[start..i])); + start = i + 1; + } + } + self.buf.drain(0..start); + // Make sure to not grow the internal buffer forever! + if self.buf.len() > PortOutputLog::FORCE_FLUSH_TRESHOLD { + self.force_flush() + } + Ok(buf.len()) + } + + fn wait_until_writable(&self) {} +} + +pub struct PortInputSigInt { + sigint_evt: EventFd, +} + +impl PortInputSigInt { + pub fn new() -> Self { + PortInputSigInt { + sigint_evt: EventFd::new(EFD_NONBLOCK) + .expect("Failed to create EventFd for SIGINT signaling"), + } + } + + pub fn sigint_evt(&self) -> &EventFd { + &self.sigint_evt + } +} + +impl Default for PortInputSigInt { + fn default() -> Self { + Self::new() + } +} + +impl PortInput for PortInputSigInt { + fn read_volatile(&mut self, buf: &mut VolatileSlice) -> Result { + self.sigint_evt.read()?; + log::trace!("SIGINT received"); + buf.copy_from(&[3u8]); //ASCII 'ETX' -> generates SIGINIT in a terminal + Ok(1) + } + + fn wait_until_readable(&self) { + let mut poll_fds = [PollFd::new(self.sigint_evt.as_raw_fd(), PollFlags::POLLIN)]; + poll(&mut poll_fds, -1).expect("Failed to poll"); + } +} diff --git a/src/devices/src/virtio/console/port_queue_mapping.rs b/src/devices/src/virtio/console/port_queue_mapping.rs new file mode 100644 index 000000000..cd90884cd --- /dev/null +++ b/src/devices/src/virtio/console/port_queue_mapping.rs @@ -0,0 +1,74 @@ +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum QueueDirection { + Rx, + Tx, +} + +#[must_use] +pub(crate) fn port_id_to_queue_idx(queue_direction: QueueDirection, port_id: usize) -> usize { + match queue_direction { + QueueDirection::Rx if port_id == 0 => 0, + QueueDirection::Rx => 2 + 2 * port_id, + QueueDirection::Tx if port_id == 0 => 1, + QueueDirection::Tx => 2 + 2 * port_id + 1, + } +} + +#[must_use] +pub(crate) fn queue_idx_to_port_id(queue_index: usize) -> (QueueDirection, usize) { + let port_id = match queue_index { + 0 | 1 => 0, + 2 | 3 => { + panic!("Invalid argument: {queue_index} is not a valid receiveq nor transmitq index!") + } + _ => queue_index / 2 - 1, + }; + + let direction = if queue_index % 2 == 0 { + QueueDirection::Rx + } else { + QueueDirection::Tx + }; + + (direction, port_id) +} + +pub(crate) fn num_queues(num_ports: usize) -> usize { + // 2 control queues and then an rx and tx queue for each port + 2 + 2 * num_ports +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_port_id_to_queue_idx() { + assert_eq!(port_id_to_queue_idx(QueueDirection::Rx, 0), 0); + assert_eq!(port_id_to_queue_idx(QueueDirection::Tx, 0), 1); + assert_eq!(port_id_to_queue_idx(QueueDirection::Rx, 1), 4); + assert_eq!(port_id_to_queue_idx(QueueDirection::Tx, 1), 5); + } + + #[test] + fn test_queue_idx_to_port_id_ok() { + assert_eq!(queue_idx_to_port_id(0), (QueueDirection::Rx, 0)); + assert_eq!(queue_idx_to_port_id(1), (QueueDirection::Tx, 0)); + assert_eq!(queue_idx_to_port_id(4), (QueueDirection::Rx, 1)); + assert_eq!(queue_idx_to_port_id(5), (QueueDirection::Tx, 1)); + assert_eq!(queue_idx_to_port_id(6), (QueueDirection::Rx, 2)); + assert_eq!(queue_idx_to_port_id(7), (QueueDirection::Tx, 2)); + } + + #[test] + #[should_panic] + fn test_queue_idx_to_port_id_panic_rx_control() { + let _ = queue_idx_to_port_id(2); + } + + #[test] + #[should_panic] + fn test_queue_idx_to_port_id_panic_tx_control() { + let _ = queue_idx_to_port_id(3); + } +} diff --git a/src/devices/src/virtio/console/process_rx.rs b/src/devices/src/virtio/console/process_rx.rs new file mode 100644 index 000000000..06560e778 --- /dev/null +++ b/src/devices/src/virtio/console/process_rx.rs @@ -0,0 +1,97 @@ +use std::sync::Arc; +use std::{io, thread}; + +use vm_memory::{GuestMemory, GuestMemoryError, GuestMemoryMmap, GuestMemoryRegion}; + +use crate::virtio::console::console_control::ConsoleControl; +use crate::virtio::console::irq_signaler::IRQSignaler; +use crate::virtio::console::port_io::PortInput; +use crate::virtio::{DescriptorChain, Queue}; + +pub(crate) fn process_rx( + mem: GuestMemoryMmap, + mut queue: Queue, + irq: IRQSignaler, + mut input: Box, + control: Arc, + port_id: u32, +) { + let mem = &mem; + let mut eof = false; + + loop { + let head = pop_head_blocking(&mut queue, mem, &irq); + + let head_index = head.index; + let mut bytes_read = 0; + for chain in head.into_iter().writable() { + match read_to_desc(chain, input.as_mut(), &mut eof) { + Ok(0) => { + break; + } + Ok(len) => { + bytes_read += len; + } + Err(e) => { + log::error!("Failed to read: {e:?}") + } + } + } + + if bytes_read != 0 { + log::trace!("Rx {bytes_read} bytes queue len{}", queue.len(mem)); + queue.add_used(mem, head_index, bytes_read as u32); + } + + // We signal_used_queue only when we get WouldBlock or EOF + if eof { + irq.signal_used_queue("rx EOF"); + log::trace!("signaling EOF on port {port_id}"); + control.port_open(port_id, false); + return; + } else if bytes_read == 0 { + queue.undo_pop(); + irq.signal_used_queue("rx WouldBlock"); + input.wait_until_readable(); + } + } +} + +fn pop_head_blocking<'mem>( + queue: &mut Queue, + mem: &'mem GuestMemoryMmap, + irq: &IRQSignaler, +) -> DescriptorChain<'mem> { + loop { + match queue.pop(mem) { + Some(descriptor) => break descriptor, + None => { + irq.signal_used_queue("rx queue empty, parking"); + thread::park(); + log::trace!("rx unparked, queue len {}", queue.len(mem)) + } + } + } +} + +fn read_to_desc( + desc: DescriptorChain, + input: &mut (dyn PortInput + Send), + eof: &mut bool, +) -> Result { + desc.mem + .try_access(desc.len as usize, desc.addr, |_, len, addr, region| { + let mut target = region.get_slice(addr, len).unwrap(); + match input.read_volatile(&mut target) { + Ok(n) => { + if n == 0 { + *eof = true + } + Ok(n) + } + // We can't return an error otherwise we would not know how many bytes were processed before WouldBlock + Err(e) if e.kind() == io::ErrorKind::WouldBlock => Ok(0), + Err(e) => Err(GuestMemoryError::IOError(e)), + } + }) +} diff --git a/src/devices/src/virtio/console/process_tx.rs b/src/devices/src/virtio/console/process_tx.rs new file mode 100644 index 000000000..ad89b5140 --- /dev/null +++ b/src/devices/src/virtio/console/process_tx.rs @@ -0,0 +1,96 @@ +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::{io, thread}; + +use vm_memory::{GuestMemory, GuestMemoryError, GuestMemoryMmap, GuestMemoryRegion}; + +use crate::virtio::console::irq_signaler::IRQSignaler; +use crate::virtio::console::port_io::PortOutput; +use crate::virtio::{DescriptorChain, Queue}; + +pub(crate) fn process_tx( + mem: GuestMemoryMmap, + mut queue: Queue, + irq: IRQSignaler, + mut output: Box, + stop: Arc, +) { + loop { + let Some(head) = pop_head_blocking(&mut queue, &mem, &irq, &stop) else { + return; + }; + + let head_index = head.index; + let mut bytes_written = 0; + + for desc in head.into_iter().readable() { + let desc_len = desc.len as usize; + match write_desc_to_output(desc, output.as_mut(), &irq) { + Ok(0) => { + break; + } + Ok(n) => { + assert_eq!(n, desc_len); + bytes_written += n; + } + Err(e) => { + log::error!("Failed to write output: {e}"); + } + } + } + + if bytes_written == 0 { + log::trace!("Tx Add used {bytes_written}"); + queue.undo_pop(); + } else { + log::trace!("Tx add used {bytes_written}"); + queue.add_used(&mem, head_index, bytes_written as u32); + } + } +} + +fn pop_head_blocking<'mem>( + queue: &mut Queue, + mem: &'mem GuestMemoryMmap, + irq: &IRQSignaler, + stop: &AtomicBool, +) -> Option> { + loop { + match queue.pop(mem) { + Some(descriptor) => break Some(descriptor), + None => { + irq.signal_used_queue("tx queue empty, parking"); + thread::park(); + if stop.load(Ordering::Acquire) { + break None; + } + log::trace!("tx unparked, queue len {}", queue.len(mem)) + } + } + } +} + +fn write_desc_to_output( + desc: DescriptorChain, + output: &mut (dyn PortOutput + Send), + irq: &IRQSignaler, +) -> Result { + desc.mem + .try_access(desc.len as usize, desc.addr, |_, len, addr, region| { + let src = region.get_slice(addr, len).unwrap(); + loop { + log::trace!("Tx {:?}, write_volatile {len} bytes", src); + match output.write_volatile(&src) { + // try_access seem to handle partial write for us (we will be invoked again with an offset) + Ok(n) => break Ok(n), + // We can't return an error otherwise we would not know how many bytes were processed before WouldBlock + Err(e) if e.kind() == io::ErrorKind::WouldBlock => { + log::trace!("Tx wait for output (would block)"); + irq.signal_used_queue("tx waiting for output"); + output.wait_until_writable(); + } + Err(e) => break Err(GuestMemoryError::IOError(e)), + } + } + }) +} diff --git a/src/devices/src/virtio/device.rs b/src/devices/src/virtio/device.rs index 21134acce..73e4ed9ac 100644 --- a/src/devices/src/virtio/device.rs +++ b/src/devices/src/virtio/device.rs @@ -125,6 +125,17 @@ pub trait VirtioDevice: AsAny + Send { } } +pub trait VmmExitObserver { + /// Callback to finish processing or cleanup the device resources + fn on_vmm_exit(&mut self) {} +} + +impl VmmExitObserver for F { + fn on_vmm_exit(&mut self) { + self() + } +} + impl std::fmt::Debug for dyn VirtioDevice { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "VirtioDevice type {}", self.device_type()) diff --git a/src/devices/src/virtio/fs/descriptor_utils.rs b/src/devices/src/virtio/fs/descriptor_utils.rs index 1a5a4cb43..8af40cece 100644 --- a/src/devices/src/virtio/fs/descriptor_utils.rs +++ b/src/devices/src/virtio/fs/descriptor_utils.rs @@ -545,7 +545,7 @@ mod tests { use DescriptorType::*; let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&vec![(memory_start_addr, 0x10000)]).unwrap(); + let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); let chain = create_descriptor_chain( &memory, @@ -564,7 +564,7 @@ mod tests { assert_eq!(reader.available_bytes(), 106); assert_eq!(reader.bytes_read(), 0); - let mut buffer = [0 as u8; 64]; + let mut buffer = [0_u8; 64]; if let Err(_) = reader.read_exact(&mut buffer) { panic!("read_exact should not fail here"); } @@ -586,7 +586,7 @@ mod tests { use DescriptorType::*; let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&vec![(memory_start_addr, 0x10000)]).unwrap(); + let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); let chain = create_descriptor_chain( &memory, @@ -605,7 +605,7 @@ mod tests { assert_eq!(writer.available_bytes(), 106); assert_eq!(writer.bytes_written(), 0); - let mut buffer = [0 as u8; 64]; + let mut buffer = [0_u8; 64]; if let Err(_) = writer.write_all(&mut buffer) { panic!("write_all should not fail here"); } @@ -627,7 +627,7 @@ mod tests { use DescriptorType::*; let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&vec![(memory_start_addr, 0x10000)]).unwrap(); + let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); let chain = create_descriptor_chain( &memory, @@ -652,7 +652,7 @@ mod tests { use DescriptorType::*; let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&vec![(memory_start_addr, 0x10000)]).unwrap(); + let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); let chain = create_descriptor_chain( &memory, @@ -677,7 +677,7 @@ mod tests { use DescriptorType::*; let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&vec![(memory_start_addr, 0x10000)]).unwrap(); + let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); let chain = create_descriptor_chain( &memory, @@ -725,7 +725,7 @@ mod tests { use DescriptorType::*; let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&vec![(memory_start_addr, 0x10000)]).unwrap(); + let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); let secret: Le32 = 0x12345678.into(); @@ -764,7 +764,7 @@ mod tests { use DescriptorType::*; let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&vec![(memory_start_addr, 0x10000)]).unwrap(); + let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); let chain = create_descriptor_chain( &memory, @@ -794,7 +794,7 @@ mod tests { use DescriptorType::*; let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&vec![(memory_start_addr, 0x10000)]).unwrap(); + let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); let chain = create_descriptor_chain( &memory, @@ -823,7 +823,7 @@ mod tests { use DescriptorType::*; let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&vec![(memory_start_addr, 0x10000)]).unwrap(); + let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); let chain = create_descriptor_chain( &memory, @@ -852,7 +852,7 @@ mod tests { use DescriptorType::*; let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&vec![(memory_start_addr, 0x10000)]).unwrap(); + let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); let chain = create_descriptor_chain( &memory, @@ -881,7 +881,7 @@ mod tests { use DescriptorType::*; let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&vec![(memory_start_addr, 0x10000)]).unwrap(); + let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); let chain = create_descriptor_chain( &memory, @@ -910,7 +910,7 @@ mod tests { use DescriptorType::*; let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&vec![(memory_start_addr, 0x10000)]).unwrap(); + let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); let chain = create_descriptor_chain( &memory, @@ -939,7 +939,7 @@ mod tests { use DescriptorType::*; let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&vec![(memory_start_addr, 0x10000)]).unwrap(); + let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); let chain = create_descriptor_chain( &memory, @@ -951,7 +951,7 @@ mod tests { .expect("create_descriptor_chain failed"); let mut reader = Reader::new(&memory, chain).expect("failed to create Reader"); - let mut buf = vec![0u8; 64]; + let mut buf = [0u8; 64]; assert_eq!( reader.read(&mut buf[..]).expect("failed to read to buffer"), 48 @@ -963,7 +963,7 @@ mod tests { use DescriptorType::*; let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&vec![(memory_start_addr, 0x10000)]).unwrap(); + let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); let chain = create_descriptor_chain( &memory, @@ -975,7 +975,7 @@ mod tests { .expect("create_descriptor_chain failed"); let mut writer = Writer::new(&memory, chain).expect("failed to create Writer"); - let buf = vec![0xdeu8; 64]; + let buf = [0xdeu8; 64]; assert_eq!( writer.write(&buf[..]).expect("failed to write from buffer"), 48 diff --git a/src/devices/src/virtio/fs/filesystem.rs b/src/devices/src/virtio/fs/filesystem.rs index a4081fdcf..2c9c16e89 100644 --- a/src/devices/src/virtio/fs/filesystem.rs +++ b/src/devices/src/virtio/fs/filesystem.rs @@ -16,7 +16,6 @@ pub use super::fuse::FsOptions; pub use fuse::OpenOptions; pub use fuse::RemovemappingOne; pub use fuse::SetattrValid; -pub use fuse::ROOT_ID; /// Information about a path in the filesystem. pub struct Entry { diff --git a/src/devices/src/virtio/mmio.rs b/src/devices/src/virtio/mmio.rs index 04dd8ed30..d2a1cfb39 100644 --- a/src/devices/src/virtio/mmio.rs +++ b/src/devices/src/virtio/mmio.rs @@ -470,7 +470,7 @@ pub(crate) mod tests { } fn set_device_status(d: &mut MmioTransport, status: u32) { - let mut buf = vec![0; 4]; + let mut buf = [0; 4]; write_le_u32(&mut buf[..], status); d.write(0, 0x70, &buf[..]); } @@ -778,7 +778,7 @@ pub(crate) mod tests { device_status::ACKNOWLEDGE | device_status::DRIVER | device_status::FEATURES_OK ); - let mut buf = vec![0; 4]; + let mut buf = [0; 4]; let queue_len = d.locked_device().queues().len(); for q in 0..queue_len { d.queue_select = q as u32; @@ -831,7 +831,7 @@ pub(crate) mod tests { ); // Setup queue data structures - let mut buf = vec![0; 4]; + let mut buf = [0; 4]; let queues_count = d.locked_device().queues().len(); for q in 0..queues_count { d.queue_select = q as u32; @@ -865,7 +865,7 @@ pub(crate) mod tests { fn test_bus_device_reset() { let m = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x1000)]).unwrap(); let mut d = MmioTransport::new(m, Arc::new(Mutex::new(DummyDevice::new()))); - let mut buf = vec![0; 4]; + let mut buf = [0; 4]; assert!(!d.are_queues_valid()); assert!(!d.locked_device().is_activated()); diff --git a/src/devices/src/virtio/net/mod.rs b/src/devices/src/virtio/net/mod.rs index beabdc636..e7d365540 100644 --- a/src/devices/src/virtio/net/mod.rs +++ b/src/devices/src/virtio/net/mod.rs @@ -16,8 +16,6 @@ pub mod event_handler; mod passt; pub use self::device::Net; -pub use self::event_handler::*; - #[derive(Debug)] pub enum Error { /// EventFd error. diff --git a/src/utils/src/linux/epoll.rs b/src/utils/src/linux/epoll.rs index 4d5b39927..74ef9446e 100644 --- a/src/utils/src/linux/epoll.rs +++ b/src/utils/src/linux/epoll.rs @@ -267,20 +267,12 @@ mod tests { // For EPOLL_CTL_ADD behavior we will try to add some fds with different event masks into // the interest list of epoll instance. assert!(epoll - .ctl( - ControlOperation::Add, - event_fd_1.as_raw_fd() as i32, - &event_1 - ) + .ctl(ControlOperation::Add, event_fd_1.as_raw_fd(), &event_1) .is_ok()); // We can't add twice the same fd to epoll interest list. assert!(epoll - .ctl( - ControlOperation::Add, - event_fd_1.as_raw_fd() as i32, - &event_1 - ) + .ctl(ControlOperation::Add, event_fd_1.as_raw_fd(), &event_1) .is_err()); let event_fd_2 = EventFd::new(libc::EFD_NONBLOCK).unwrap(); @@ -288,7 +280,7 @@ mod tests { assert!(epoll .ctl( ControlOperation::Add, - event_fd_2.as_raw_fd() as i32, + event_fd_2.as_raw_fd(), // For this fd, we want an Event instance that has `data` field set to other // value than the value of the fd and `events` without EPOLLIN type set. &EpollEvent::new(EventSet::OUT, 10) @@ -301,11 +293,7 @@ mod tests { let event_fd_3 = EventFd::new(libc::EFD_NONBLOCK).unwrap(); let event_3 = EpollEvent::new(EventSet::OUT | EventSet::IN, event_fd_3.as_raw_fd() as u64); assert!(epoll - .ctl( - ControlOperation::Add, - event_fd_3.as_raw_fd() as i32, - &event_3 - ) + .ctl(ControlOperation::Add, event_fd_3.as_raw_fd(), &event_3) .is_ok()); // Let's check `epoll_wait()` behavior for our epoll instance. @@ -341,11 +329,7 @@ mod tests { // that we want to monitor this time on event_fd_1. event_1 = EpollEvent::new(EventSet::OUT, 20); assert!(epoll - .ctl( - ControlOperation::Modify, - event_fd_1.as_raw_fd() as i32, - &event_1 - ) + .ctl(ControlOperation::Modify, event_fd_1.as_raw_fd(), &event_1) .is_ok()); let event_fd_4 = EventFd::new(libc::EFD_NONBLOCK).unwrap(); @@ -353,7 +337,7 @@ mod tests { assert!(epoll .ctl( ControlOperation::Modify, - event_fd_4.as_raw_fd() as i32, + event_fd_4.as_raw_fd(), &EpollEvent::default() ) .is_err()); @@ -371,7 +355,7 @@ mod tests { assert!(epoll .ctl( ControlOperation::Modify, - event_fd_1.as_raw_fd() as i32, + event_fd_1.as_raw_fd(), &EpollEvent::default() ) .is_ok()); @@ -386,7 +370,7 @@ mod tests { assert!(epoll .ctl( ControlOperation::Delete, - event_fd_2.as_raw_fd() as i32, + event_fd_2.as_raw_fd(), &EpollEvent::default() ) .is_ok()); @@ -404,7 +388,7 @@ mod tests { assert!(epoll .ctl( ControlOperation::Delete, - event_fd_4.as_raw_fd() as i32, + event_fd_4.as_raw_fd(), &EpollEvent::default() ) .is_err()); diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 93d7f604f..106acea79 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -30,6 +30,7 @@ serde = { version = "1.0.125", optional = true } serde_json = { version = "1.0.64", optional = true } sev = { version = "1.2.0", features = ["openssl"], optional = true } curl = { version = "0.4", optional = true } +nix = "0.24.1" [target.'cfg(target_arch = "x86_64")'.dependencies] cpuid = { path = "../cpuid" } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 918db27f3..bb5d8596d 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -7,7 +7,7 @@ use crossbeam_channel::unbounded; use std::fmt::{Display, Formatter}; use std::io; -use std::os::unix::io::{AsRawFd, RawFd}; +use std::os::fd::AsRawFd; use std::sync::{Arc, Mutex}; use super::{Error, Vmm}; @@ -21,15 +21,19 @@ use devices::legacy::Serial; use devices::virtio::Net; #[cfg(not(feature = "tee"))] use devices::virtio::VirtioShmRegion; -use devices::virtio::{MmioTransport, Vsock}; +use devices::virtio::{port_io, MmioTransport, PortDescription, Vsock}; #[cfg(feature = "tee")] use kbs_types::Tee; +use crate::device_manager; #[cfg(feature = "tee")] use crate::resources::TeeConfig; #[cfg(target_os = "linux")] +use crate::signal_handler::register_sigint_handler; +#[cfg(target_os = "linux")] use crate::signal_handler::register_sigwinch_handler; +use crate::terminal::term_set_raw_mode; #[cfg(feature = "tee")] use crate::vmm_config::block::BlockBuilder; use crate::vmm_config::boot_source::DEFAULT_KERNEL_CMDLINE; @@ -42,15 +46,15 @@ use crate::vstate::KvmContext; #[cfg(all(target_os = "linux", feature = "tee"))] use crate::vstate::MeasuredRegion; use crate::vstate::{Error as VstateError, Vcpu, VcpuConfig, Vm}; -use crate::{device_manager, VmmEventsObserver}; use arch::ArchMemoryInfo; #[cfg(feature = "tee")] use arch::InitrdConfig; #[cfg(feature = "tee")] use kvm_bindings::KVM_MAX_CPUID_ENTRIES; +use libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO}; +use nix::unistd::isatty; use polly::event_manager::{Error as EventManagerError, EventManager}; use utils::eventfd::EventFd; -use utils::terminal::Terminal; use utils::time::TimestampUs; #[cfg(all(target_os = "linux", target_arch = "x86_64", not(feature = "tee")))] use vm_memory::mmap::GuestRegionMmap; @@ -251,52 +255,6 @@ impl Display for StartMicrovmError { } } -// Wrapper over io::Stdin that implements `Serial::ReadableFd` and `vmm::VmmEventsObserver`. -pub struct SerialStdin(io::Stdin); -impl SerialStdin { - /// Returns a `SerialStdin` wrapper over `io::stdin`. - pub fn get() -> Self { - let stdin = io::stdin(); - stdin.lock().set_raw_mode().unwrap(); - SerialStdin(stdin) - } - - pub fn restore() { - let stdin = io::stdin(); - stdin.lock().set_canon_mode().unwrap(); - } -} - -impl io::Read for SerialStdin { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.0.read(buf) - } -} - -impl AsRawFd for SerialStdin { - fn as_raw_fd(&self) -> RawFd { - self.0.as_raw_fd() - } -} - -impl devices::legacy::ReadableFd for SerialStdin {} - -impl VmmEventsObserver for SerialStdin { - fn on_vmm_boot(&mut self) -> std::result::Result<(), utils::errno::Error> { - // Set raw mode for stdin. - self.0.lock().set_raw_mode().map_err(|e| { - warn!("Cannot set raw mode for the terminal. {:?}", e); - e - }) - } - fn on_vmm_stop(&mut self) -> std::result::Result<(), utils::errno::Error> { - self.0.lock().set_canon_mode().map_err(|e| { - warn!("Cannot set canonical mode for the terminal. {:?}", e); - e - }) - } -} - /// Builds and starts a microVM based on the current Firecracker VmResources configuration. /// /// This is the default build recipe, one could build other microVM flavors by using the @@ -558,12 +516,12 @@ pub fn build_microvm( let shm_region = None; let mut vmm = Vmm { - //events_observer: Some(Box::new(SerialStdin::get())), guest_memory, arch_memory_info, kernel_cmdline, vcpus_handles: Vec::new(), exit_evt, + exit_observers: Vec::new(), vm, mmio_device_manager, #[cfg(target_arch = "x86_64")] @@ -646,6 +604,9 @@ pub fn build_microvm( vmm.start_vcpus(vcpus) .map_err(StartMicrovmError::Internal)?; + // Clippy thinks we don't need Arc std::result::Result<(), StartMicrovmError> { use self::StartMicrovmError::*; - let console = Arc::new(Mutex::new( - devices::virtio::Console::new(Box::new(SerialStdin::get()), Box::new(io::stdout())) - .unwrap(), - )); + let stdin_is_terminal = isatty(STDIN_FILENO).unwrap_or(false); + let stdout_is_terminal = isatty(STDOUT_FILENO).unwrap_or(false); + let stderr_is_terminal = isatty(STDERR_FILENO).unwrap_or(false); - if let Some(intc) = intc { - console.lock().unwrap().set_intc(intc); + if let Err(e) = term_set_raw_mode(!stdin_is_terminal) { + log::error!("Failed to set terminal to raw mode: {e}") } - // Stdin may not be pollable (i.e. when running a container without "-i"). If that's - // the case, disable the interactive mode in the console. - if !event_manager.is_pollable(io::stdin().as_raw_fd()) { - console.lock().unwrap().set_interactive(false) + let console_input = if stdin_is_terminal { + Some(port_io::stdin().unwrap()) + } else { + #[cfg(target_os = "linux")] + { + let sigint_input = port_io::PortInputSigInt::new(); + let sigint_input_fd = sigint_input.sigint_evt().as_raw_fd(); + register_sigint_handler(sigint_input_fd).map_err(RegisterFsSigwinch)?; + Some(Box::new(sigint_input) as _) + } + #[cfg(not(target_os = "linux"))] + None + }; + + let console_output = if stdout_is_terminal { + Some(port_io::stdout().unwrap()) + } else { + Some(port_io::output_to_log_as_err()) + }; + + let mut ports = vec![PortDescription::Console { + input: console_input, + output: console_output, + }]; + + if !stdin_is_terminal { + ports.push(PortDescription::InputPipe { + name: "krun-stdin".into(), + input: port_io::stdin().unwrap(), + }) + } + + if !stdout_is_terminal { + ports.push(PortDescription::OutputPipe { + name: "krun-stdout".into(), + output: port_io::stdout().unwrap(), + }) + }; + + if !stderr_is_terminal { + ports.push(PortDescription::OutputPipe { + name: "krun-stderr".into(), + output: port_io::stderr().unwrap(), + }); + } + + let console = Arc::new(Mutex::new(devices::virtio::Console::new(ports).unwrap())); + + vmm.exit_observers.push(console.clone()); + + if let Some(intc) = intc { + console.lock().unwrap().set_intc(intc); } event_manager @@ -1279,12 +1287,6 @@ pub mod tests { create_guest_memory(mem_size_mib, kernel_region, kernel_guest_addr, kernel_size) } - #[test] - fn test_stdin_wrapper() { - let wrapper = SerialStdin::get(); - assert_eq!(wrapper.as_raw_fd(), io::stdin().as_raw_fd()) - } - #[test] #[cfg(target_arch = "x86_64")] fn test_create_vcpus_x86_64() { diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index df18c18d6..dcf7d43fb 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -30,25 +30,29 @@ mod linux; use crate::linux::vstate; #[cfg(target_os = "macos")] mod macos; +mod terminal; + #[cfg(target_os = "macos")] use macos::vstate; use std::fmt::{Display, Formatter}; use std::io; use std::os::unix::io::AsRawFd; -use std::sync::Mutex; +use std::sync::{Arc, Mutex}; #[cfg(target_os = "linux")] use std::time::Duration; #[cfg(target_arch = "x86_64")] use crate::device_manager::legacy::PortIODeviceManager; use crate::device_manager::mmio::MMIODeviceManager; +use crate::terminal::term_set_canonical_mode; #[cfg(target_os = "linux")] use crate::vstate::VcpuEvent; use crate::vstate::{Vcpu, VcpuHandle, VcpuResponse, Vm}; use arch::ArchMemoryInfo; use arch::DeviceType; use arch::InitrdConfig; +use devices::virtio::VmmExitObserver; use devices::BusDevice; use kernel::cmdline::Cmdline as KernelCmdline; use polly::event_manager::{self, EventManager, Subscriber}; @@ -178,8 +182,6 @@ pub type Result = std::result::Result; /// Contains the state and associated methods required for the Firecracker VMM. pub struct Vmm { - //events_observer: Option>, - // Guest VM core resources. guest_memory: GuestMemoryMmap, arch_memory_info: ArchMemoryInfo, @@ -189,6 +191,7 @@ pub struct Vmm { vcpus_handles: Vec, exit_evt: EventFd, vm: Vm, + exit_observers: Vec>>, // Guest VM devices. mmio_device_manager: MMIODeviceManager, @@ -210,10 +213,6 @@ impl Vmm { pub fn start_vcpus(&mut self, mut vcpus: Vec) -> Result<()> { let vcpu_count = vcpus.len(); - //if let Some(observer) = self.events_observer.as_mut() { - // observer.on_vmm_boot().map_err(Error::VmmObserverInit)?; - //} - Vcpu::register_kick_signal_handler(); self.vcpus_handles.reserve(vcpu_count); @@ -329,13 +328,16 @@ impl Vmm { pub fn stop(&mut self, exit_code: i32) { info!("Vmm is stopping."); - //if let Some(observer) = self.events_observer.as_mut() { - // if let Err(e) = observer.on_vmm_stop() { - // warn!("{}", Error::VmmObserverTeardown(e)); - // } - //} + if let Err(e) = term_set_canonical_mode() { + log::error!("Failed to restore terminal to canonical mode: {e}") + } - builder::SerialStdin::restore(); + for observer in &self.exit_observers { + observer + .lock() + .expect("Poisoned mutex for exit observer") + .on_vmm_exit(); + } // Exit from Firecracker using the provided exit code. Safe because we're terminating // the process anyway. diff --git a/src/vmm/src/signal_handler.rs b/src/vmm/src/signal_handler.rs index d87afbc6e..9a6067bfc 100644 --- a/src/vmm/src/signal_handler.rs +++ b/src/vmm/src/signal_handler.rs @@ -4,7 +4,7 @@ use std::os::unix::io::RawFd; use std::sync::atomic::{AtomicI32, Ordering}; -use libc::{_exit, c_int, c_void, siginfo_t, SIGBUS, SIGSEGV, SIGSYS, SIGWINCH}; +use libc::{_exit, c_int, c_void, siginfo_t, SIGBUS, SIGINT, SIGSEGV, SIGSYS, SIGWINCH}; use utils::signal::register_signal_handler; // The offset of `si_syscall` (offending syscall identifier) within the siginfo structure @@ -17,6 +17,7 @@ const SI_OFF_SYSCALL: isize = 6; const SYS_SECCOMP_CODE: i32 = 1; static CONSOLE_SIGWINCH_FD: AtomicI32 = AtomicI32::new(-1); +static CONSOLE_SIGINT_FD: AtomicI32 = AtomicI32::new(-1); /// Signal handler for `SIGSYS`. /// @@ -94,6 +95,21 @@ extern "C" fn sigwinch_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c let _ = unsafe { libc::write(console_fd, &val as *const _ as *const c_void, 8) }; } +extern "C" fn sigint_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) { + // Safe because we're just reading some fields from a supposedly valid argument. + let si_signo = unsafe { (*info).si_signo }; + + // Sanity check. The condition should never be true. + if num != si_signo || num != SIGINT { + // Safe because we're terminating the process anyway. + unsafe { _exit(i32::from(super::FC_EXIT_CODE_UNEXPECTED_ERROR)) }; + } + + let val: u64 = 1; + let console_fd = CONSOLE_SIGINT_FD.load(Ordering::Relaxed); + let _ = unsafe { libc::write(console_fd, &val as *const _ as *const c_void, 8) }; +} + pub fn register_sigwinch_handler(console_fd: RawFd) -> utils::errno::Result<()> { CONSOLE_SIGWINCH_FD.store(console_fd, Ordering::Relaxed); @@ -102,6 +118,14 @@ pub fn register_sigwinch_handler(console_fd: RawFd) -> utils::errno::Result<()> Ok(()) } +pub fn register_sigint_handler(sigint_fd: RawFd) -> utils::errno::Result<()> { + CONSOLE_SIGINT_FD.store(sigint_fd, Ordering::Relaxed); + + register_signal_handler(SIGINT, sigint_handler)?; + + Ok(()) +} + /// Registers all the required signal handlers. /// /// Custom handlers are installed for: `SIGBUS`, `SIGSEGV`, `SIGSYS`. diff --git a/src/vmm/src/terminal.rs b/src/vmm/src/terminal.rs new file mode 100644 index 000000000..da6035b7c --- /dev/null +++ b/src/vmm/src/terminal.rs @@ -0,0 +1,55 @@ +use libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO}; +use nix::sys::termios::{tcgetattr, tcsetattr, LocalFlags, SetArg}; +use nix::unistd::isatty; +use std::os::fd::RawFd; + +pub fn term_set_raw_mode(handle_signals_by_terminal: bool) -> Result<(), nix::Error> { + if let Some(fd) = get_connected_term_fd() { + term_fd_set_raw_mode(fd, handle_signals_by_terminal) + } else { + Ok(()) + } +} + +pub fn term_set_canonical_mode() -> Result<(), nix::Error> { + if let Some(fd) = get_connected_term_fd() { + term_fd_set_canonical_mode(fd) + } else { + Ok(()) + } +} + +pub fn term_fd_set_raw_mode( + term: RawFd, + handle_signals_by_terminal: bool, +) -> Result<(), nix::Error> { + let mut termios = tcgetattr(term)?; + + let mut mask = LocalFlags::ECHO | LocalFlags::ICANON; + if !handle_signals_by_terminal { + mask |= LocalFlags::ISIG + } + + termios.local_flags &= !mask; + tcsetattr(term, SetArg::TCSANOW, &termios)?; + Ok(()) +} + +pub fn term_fd_set_canonical_mode(term: RawFd) -> Result<(), nix::Error> { + let mut termios = tcgetattr(term)?; + termios.local_flags |= LocalFlags::ECHO | LocalFlags::ICANON | LocalFlags::ISIG; + tcsetattr(term, SetArg::TCSANOW, &termios)?; + Ok(()) +} + +pub fn get_connected_term_fd() -> Option { + if isatty(STDIN_FILENO).unwrap_or(false) { + Some(STDIN_FILENO) + } else if isatty(STDOUT_FILENO).unwrap_or(false) { + Some(STDOUT_FILENO) + } else if isatty(STDERR_FILENO).unwrap_or(false) { + Some(STDERR_FILENO) + } else { + None + } +}