diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 4838ab0eb51..71780aef4e5 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -5,6 +5,7 @@ use std::fmt::Debug; use std::io::{self}; +use std::os::fd::AsFd; use std::os::unix::fs::MetadataExt; #[cfg(feature = "gdb")] use std::sync::mpsc; @@ -65,7 +66,7 @@ use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::machine_config::MachineConfigError; use crate::vmm_config::snapshot::{MemBackendConfig, MemBackendType}; use crate::vstate::kvm::Kvm; -use crate::vstate::memory::Bounce; +use crate::vstate::memory::MaybeBounce; use crate::vstate::vcpu::{Vcpu, VcpuError}; use crate::vstate::vm::{KVM_GMEM_NO_DIRECT_MAP, Vm}; use crate::{EventManager, Vmm, VmmError, device_manager}; @@ -235,6 +236,11 @@ pub fn build_microvm_for_boot( false => None, }; + #[cfg(target_arch = "x86_64")] + if secret_free { + boot_cmdline.insert_str("no-kvmclock")?; + } + let (mut vmm, mut vcpus) = create_vmm_and_vcpus( instance_info, event_manager, @@ -274,7 +280,7 @@ pub fn build_microvm_for_boot( } let entry_point = load_kernel( - Bounce(&boot_config.kernel_file, secret_free), + MaybeBounce::new(boot_config.kernel_file.try_clone().unwrap(), secret_free), vmm.vm.guest_memory(), )?; let initrd = match &boot_config.initrd_file { @@ -286,7 +292,7 @@ pub fn build_microvm_for_boot( Some(InitrdConfig::from_reader( vmm.vm.guest_memory(), - Bounce(initrd_file, secret_free), + MaybeBounce::new(initrd_file.as_fd(), secret_free), u64_to_usize(size), )?) } @@ -321,16 +327,24 @@ pub fn build_microvm_for_boot( &mut boot_cmdline, vm_resources.block.devices.iter(), event_manager, + secret_free, )?; attach_net_devices( &mut vmm, &mut boot_cmdline, vm_resources.net_builder.iter(), event_manager, + secret_free, )?; if let Some(unix_vsock) = vm_resources.vsock.get() { - attach_unixsock_vsock_device(&mut vmm, &mut boot_cmdline, unix_vsock, event_manager)?; + attach_unixsock_vsock_device( + &mut vmm, + &mut boot_cmdline, + unix_vsock, + event_manager, + secret_free, + )?; } if let Some(entropy) = vm_resources.entropy.get() { @@ -707,6 +721,7 @@ fn attach_virtio_device( device: Arc>, cmdline: &mut LoaderKernelCmdline, is_vhost_user: bool, + needs_bouncing: bool, ) -> Result<(), MmioError> { event_manager.add_subscriber(device.clone()); @@ -714,7 +729,11 @@ fn attach_virtio_device( // themselves are created when the corresponding PUT API calls are made, and at that // point we don't know yet whether swiotlb should be enabled or not. if vmm.vm.has_swiotlb() { + // If swiotlb was requested via the API, enable it independently of whether + // bouncing is actually required (e.g. of whether secret hiding is enabled) device.lock().unwrap().force_swiotlb(); + } else if needs_bouncing { + device.lock().unwrap().force_userspace_bounce_buffers(); } // The device mutex mustn't be locked here otherwise it will deadlock. @@ -772,6 +791,7 @@ fn attach_entropy_device( entropy_device.clone(), cmdline, false, + false, ) } @@ -780,6 +800,7 @@ fn attach_block_devices<'a, I: Iterator>> + Debug>( cmdline: &mut LoaderKernelCmdline, blocks: I, event_manager: &mut EventManager, + needs_bouncing: bool, ) -> Result<(), StartMicrovmError> { for block in blocks { let (id, is_vhost_user) = { @@ -804,6 +825,7 @@ fn attach_block_devices<'a, I: Iterator>> + Debug>( block.clone(), cmdline, is_vhost_user, + needs_bouncing, )?; } Ok(()) @@ -814,11 +836,20 @@ fn attach_net_devices<'a, I: Iterator>> + Debug>( cmdline: &mut LoaderKernelCmdline, net_devices: I, event_manager: &mut EventManager, + needs_bouncing: bool, ) -> Result<(), StartMicrovmError> { for net_device in net_devices { let id = net_device.lock().expect("Poisoned lock").id().clone(); // The device mutex mustn't be locked here otherwise it will deadlock. - attach_virtio_device(event_manager, vmm, id, net_device.clone(), cmdline, false)?; + attach_virtio_device( + event_manager, + vmm, + id, + net_device.clone(), + cmdline, + false, + needs_bouncing, + )?; } Ok(()) } @@ -828,10 +859,19 @@ fn attach_unixsock_vsock_device( cmdline: &mut LoaderKernelCmdline, unix_vsock: &Arc>>, event_manager: &mut EventManager, + needs_bouncing: bool, ) -> Result<(), MmioError> { let id = String::from(unix_vsock.lock().expect("Poisoned lock").id()); // The device mutex mustn't be locked here otherwise it will deadlock. - attach_virtio_device(event_manager, vmm, id, unix_vsock.clone(), cmdline, false) + attach_virtio_device( + event_manager, + vmm, + id, + unix_vsock.clone(), + cmdline, + false, + needs_bouncing, + ) } fn attach_balloon_device( @@ -842,7 +882,15 @@ fn attach_balloon_device( ) -> Result<(), MmioError> { let id = String::from(balloon.lock().expect("Poisoned lock").id()); // The device mutex mustn't be locked here otherwise it will deadlock. - attach_virtio_device(event_manager, vmm, id, balloon.clone(), cmdline, false) + attach_virtio_device( + event_manager, + vmm, + id, + balloon.clone(), + cmdline, + false, + false, + ) } // Adds `O_NONBLOCK` to the stdout flags. @@ -1018,6 +1066,7 @@ pub(crate) mod tests { cmdline, block_dev_configs.devices.iter(), event_manager, + false, ) .unwrap(); block_files @@ -1032,7 +1081,7 @@ pub(crate) mod tests { let mut net_builder = NetBuilder::new(); net_builder.build(net_config).unwrap(); - let res = attach_net_devices(vmm, cmdline, net_builder.iter(), event_manager); + let res = attach_net_devices(vmm, cmdline, net_builder.iter(), event_manager, false); res.unwrap(); } @@ -1053,7 +1102,7 @@ pub(crate) mod tests { Arc::new(Mutex::new(mmds)), ); - attach_net_devices(vmm, cmdline, net_builder.iter(), event_manager).unwrap(); + attach_net_devices(vmm, cmdline, net_builder.iter(), event_manager, false).unwrap(); } pub(crate) fn insert_vsock_device( @@ -1066,7 +1115,7 @@ pub(crate) mod tests { let vsock = VsockBuilder::create_unixsock_vsock(vsock_config).unwrap(); let vsock = Arc::new(Mutex::new(vsock)); - attach_unixsock_vsock_device(vmm, cmdline, &vsock, event_manager).unwrap(); + attach_unixsock_vsock_device(vmm, cmdline, &vsock, event_manager, false).unwrap(); assert!( vmm.mmio_device_manager diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index 60c4c74e20d..33482136047 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -604,6 +604,14 @@ mod tests { unimplemented!() } + fn force_userspace_bounce_buffers(&mut self) { + todo!() + } + + fn userspace_bounce_buffers(&self) -> bool { + todo!() + } + fn device_type(&self) -> u32 { 0 } diff --git a/src/vmm/src/devices/virtio/balloon/device.rs b/src/vmm/src/devices/virtio/balloon/device.rs index 195f78fb6c6..1f7a128a9be 100644 --- a/src/vmm/src/devices/virtio/balloon/device.rs +++ b/src/vmm/src/devices/virtio/balloon/device.rs @@ -563,6 +563,14 @@ impl VirtioDevice for Balloon { self.avail_features |= 1 << VIRTIO_F_ACCESS_PLATFORM; } + fn force_userspace_bounce_buffers(&mut self) { + // balloon device doesn't have a need for bounce buffers + } + + fn userspace_bounce_buffers(&self) -> bool { + false + } + fn device_type(&self) -> u32 { TYPE_BALLOON } diff --git a/src/vmm/src/devices/virtio/block/device.rs b/src/vmm/src/devices/virtio/block/device.rs index f777d876001..98976194cd1 100644 --- a/src/vmm/src/devices/virtio/block/device.rs +++ b/src/vmm/src/devices/virtio/block/device.rs @@ -155,6 +155,20 @@ impl VirtioDevice for Block { } } + fn force_userspace_bounce_buffers(&mut self) { + match self { + Block::Virtio(b) => b.force_userspace_bounce_buffers(), + Block::VhostUser(b) => b.force_userspace_bounce_buffers(), + } + } + + fn userspace_bounce_buffers(&self) -> bool { + match self { + Block::Virtio(b) => b.userspace_bounce_buffers(), + Block::VhostUser(b) => b.userspace_bounce_buffers(), + } + } + fn device_type(&self) -> u32 { TYPE_BLOCK } diff --git a/src/vmm/src/devices/virtio/block/vhost_user/device.rs b/src/vmm/src/devices/virtio/block/vhost_user/device.rs index 864d38d90ac..142fb8c3cbb 100644 --- a/src/vmm/src/devices/virtio/block/vhost_user/device.rs +++ b/src/vmm/src/devices/virtio/block/vhost_user/device.rs @@ -300,6 +300,15 @@ impl VirtioDevice for VhostUserBlock self.avail_features |= 1 << VIRTIO_F_ACCESS_PLATFORM; } + fn force_userspace_bounce_buffers(&mut self) { + // Nothing Firecracker can do about this, the backend would need to do the bouncing + panic!("vhost-user-blk is incompatible with userspace bounce buffers") + } + + fn userspace_bounce_buffers(&self) -> bool { + false + } + fn device_type(&self) -> u32 { TYPE_BLOCK } diff --git a/src/vmm/src/devices/virtio/block/virtio/device.rs b/src/vmm/src/devices/virtio/block/virtio/device.rs index d867892bccc..701b5767555 100644 --- a/src/vmm/src/devices/virtio/block/virtio/device.rs +++ b/src/vmm/src/devices/virtio/block/virtio/device.rs @@ -584,6 +584,22 @@ impl VirtioDevice for VirtioBlock { self.avail_features |= 1 << VIRTIO_F_ACCESS_PLATFORM; } + fn force_userspace_bounce_buffers(&mut self) { + match self.disk.file_engine { + FileEngine::Async(_) => { + panic!("async engine is incompatible with userspace bounce buffers") + } + FileEngine::Sync(ref mut engine) => engine.start_bouncing(), + } + } + + fn userspace_bounce_buffers(&self) -> bool { + match self.disk.file_engine { + FileEngine::Async(_) => false, + FileEngine::Sync(ref engine) => engine.is_bouncing(), + } + } + fn device_type(&self) -> u32 { TYPE_BLOCK } diff --git a/src/vmm/src/devices/virtio/block/virtio/io/sync_io.rs b/src/vmm/src/devices/virtio/block/virtio/io/sync_io.rs index eec3b3d8b8d..576a0a5b1f2 100644 --- a/src/vmm/src/devices/virtio/block/virtio/io/sync_io.rs +++ b/src/vmm/src/devices/virtio/block/virtio/io/sync_io.rs @@ -6,7 +6,7 @@ use std::io::{Seek, SeekFrom, Write}; use vm_memory::{GuestMemoryError, ReadVolatile, WriteVolatile}; -use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryMmap}; +use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryMmap, MaybeBounce}; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum SyncIoError { @@ -22,7 +22,12 @@ pub enum SyncIoError { #[derive(Debug)] pub struct SyncFileEngine { - file: File, + // 65536 is the largest buffer a linux guest will give us, empirically. Determined by + // having `MaybeBounce` logging scenarios where the fixed size bounce buffer isn't sufficient. + // Note that even if this assumption ever changes, the worse that'll happen is that we do + // multiple roundtrips between guest memory and the bounce buffer, as MaybeBounce would + // just chop larger reads/writes into chunks of 65k. + file: MaybeBounce, } // SAFETY: `File` is send and ultimately a POD. @@ -30,17 +35,27 @@ unsafe impl Send for SyncFileEngine {} impl SyncFileEngine { pub fn from_file(file: File) -> SyncFileEngine { - SyncFileEngine { file } + SyncFileEngine { + file: MaybeBounce::new_persistent(file, false), + } } #[cfg(test)] pub fn file(&self) -> &File { - &self.file + &self.file.target + } + + pub fn start_bouncing(&mut self) { + self.file.activate() + } + + pub fn is_bouncing(&self) -> bool { + self.file.is_activated() } /// Update the backing file of the engine pub fn update_file(&mut self, file: File) { - self.file = file + self.file.target = file } pub fn read( @@ -77,8 +92,8 @@ impl SyncFileEngine { pub fn flush(&mut self) -> Result<(), SyncIoError> { // flush() first to force any cached data out of rust buffers. - self.file.flush().map_err(SyncIoError::Flush)?; + self.file.target.flush().map_err(SyncIoError::Flush)?; // Sync data out to physical media on host. - self.file.sync_all().map_err(SyncIoError::SyncAll) + self.file.target.sync_all().map_err(SyncIoError::SyncAll) } } diff --git a/src/vmm/src/devices/virtio/block/virtio/persist.rs b/src/vmm/src/devices/virtio/block/virtio/persist.rs index 8c6f2c2453d..a52f901ebab 100644 --- a/src/vmm/src/devices/virtio/block/virtio/persist.rs +++ b/src/vmm/src/devices/virtio/block/virtio/persist.rs @@ -16,7 +16,7 @@ use crate::devices::virtio::TYPE_BLOCK; use crate::devices::virtio::block::persist::BlockConstructorArgs; use crate::devices::virtio::block::virtio::device::FileEngineType; use crate::devices::virtio::block::virtio::metrics::BlockMetricsPerDevice; -use crate::devices::virtio::device::{DeviceState, IrqTrigger}; +use crate::devices::virtio::device::{DeviceState, IrqTrigger, VirtioDevice}; use crate::devices::virtio::generated::virtio_blk::VIRTIO_BLK_F_RO; use crate::devices::virtio::persist::VirtioDeviceState; use crate::rate_limiter::RateLimiter; @@ -127,7 +127,7 @@ impl Persist<'_> for VirtioBlock { capacity: disk_properties.nsectors.to_le(), }; - Ok(VirtioBlock { + let mut dev = VirtioBlock { avail_features, acked_features, config_space, @@ -148,7 +148,13 @@ impl Persist<'_> for VirtioBlock { rate_limiter, is_io_engine_throttled: false, metrics: BlockMetricsPerDevice::alloc(state.id.clone()), - }) + }; + + if state.virtio_state.bounce_in_userspace { + dev.force_userspace_bounce_buffers() + } + + Ok(dev) } } diff --git a/src/vmm/src/devices/virtio/device.rs b/src/vmm/src/devices/virtio/device.rs index f7dbc9d73ef..c14ea6965b9 100644 --- a/src/vmm/src/devices/virtio/device.rs +++ b/src/vmm/src/devices/virtio/device.rs @@ -105,6 +105,12 @@ pub trait VirtioDevice: AsAny + Send { /// Make the virtio device offer the VIRTIO_F_ACCESS_PLATFORM feature fn force_swiotlb(&mut self); + /// Make the virtio device user userspace bounce buffers + fn force_userspace_bounce_buffers(&mut self); + + /// Whether this device is using userspace bounce buffers + fn userspace_bounce_buffers(&self) -> bool; + /// Check if virtio device has negotiated given feature. fn has_feature(&self, feature: u64) -> bool { (self.acked_features() & (1 << feature)) != 0 @@ -266,6 +272,14 @@ pub(crate) mod tests { unimplemented!() } + fn force_userspace_bounce_buffers(&mut self) { + todo!() + } + + fn userspace_bounce_buffers(&self) -> bool { + todo!() + } + fn device_type(&self) -> u32 { todo!() } diff --git a/src/vmm/src/devices/virtio/mmio.rs b/src/vmm/src/devices/virtio/mmio.rs index 3f75d6d35fd..0734d41ef2b 100644 --- a/src/vmm/src/devices/virtio/mmio.rs +++ b/src/vmm/src/devices/virtio/mmio.rs @@ -427,6 +427,14 @@ pub(crate) mod tests { unimplemented!() } + fn force_userspace_bounce_buffers(&mut self) { + unimplemented!() + } + + fn userspace_bounce_buffers(&self) -> bool { + false + } + fn device_type(&self) -> u32 { 123 } diff --git a/src/vmm/src/devices/virtio/net/device.rs b/src/vmm/src/devices/virtio/net/device.rs index f127fe9a0e4..c3598076f02 100755 --- a/src/vmm/src/devices/virtio/net/device.rs +++ b/src/vmm/src/devices/virtio/net/device.rs @@ -6,12 +6,14 @@ // found in the THIRD-PARTY file. use std::collections::VecDeque; +use std::io::{Read, Write}; use std::mem::{self}; use std::net::Ipv4Addr; use std::sync::{Arc, Mutex}; use libc::{EAGAIN, iovec}; use log::error; +use vm_memory::VolatileSlice; use vmm_sys_util::eventfd::EventFd; use super::NET_QUEUE_MAX_SIZE; @@ -247,7 +249,9 @@ pub struct Net { pub(crate) rx_rate_limiter: RateLimiter, pub(crate) tx_rate_limiter: RateLimiter, - rx_frame_buf: [u8; MAX_BUFFER_SIZE], + /// Used both for bounce buffering and for relaying frames to MMDS + userspace_buffer: [u8; MAX_BUFFER_SIZE], + pub(crate) userspace_bouncing: bool, tx_frame_headers: [u8; frame_hdr_len()], @@ -313,8 +317,9 @@ impl Net { queue_evts, rx_rate_limiter, tx_rate_limiter, - rx_frame_buf: [0u8; MAX_BUFFER_SIZE], + userspace_buffer: [0u8; MAX_BUFFER_SIZE], tx_frame_headers: [0u8; frame_hdr_len()], + userspace_bouncing: false, irq_trigger: IrqTrigger::new().map_err(NetError::EventFd)?, config_space, guest_mac, @@ -498,6 +503,7 @@ impl Net { // Tries to detour the frame to MMDS and if MMDS doesn't accept it, sends it on the host TAP. // // Returns whether MMDS consumed the frame. + #[allow(clippy::too_many_arguments)] fn write_to_mmds_or_tap( mmds_ns: Option<&mut MmdsNetworkStack>, rate_limiter: &mut RateLimiter, @@ -506,6 +512,7 @@ impl Net { tap: &mut Tap, guest_mac: Option, net_metrics: &NetDeviceMetrics, + bb: Option<&mut [u8]>, ) -> Result { // Read the frame headers from the IoVecBuffer let max_header_len = headers.len(); @@ -553,7 +560,7 @@ impl Net { } let _metric = net_metrics.tap_write_agg.record_latency_metrics(); - match Self::write_tap(tap, frame_iovec) { + match Self::write_tap(tap, frame_iovec, bb) { Ok(_) => { let len = u64::from(frame_iovec.len()); net_metrics.tx_bytes_count.add(len); @@ -587,15 +594,15 @@ impl Net { if let Some(ns) = self.mmds_ns.as_mut() { if let Some(len) = - ns.write_next_frame(frame_bytes_from_buf_mut(&mut self.rx_frame_buf)?) + ns.write_next_frame(frame_bytes_from_buf_mut(&mut self.userspace_buffer)?) { let len = len.get(); METRICS.mmds.tx_frames.inc(); METRICS.mmds.tx_bytes.add(len as u64); - init_vnet_hdr(&mut self.rx_frame_buf); + init_vnet_hdr(&mut self.userspace_buffer); self.rx_buffer .iovec - .write_all_volatile_at(&self.rx_frame_buf[..vnet_hdr_len() + len], 0)?; + .write_all_volatile_at(&self.userspace_buffer[..vnet_hdr_len() + len], 0)?; // SAFETY: // * len will never be bigger that u32::MAX because mmds is bound // by the size of `self.rx_frame_buf` which is MAX_BUFFER_SIZE size. @@ -736,6 +743,8 @@ impl Net { &mut self.tap, self.guest_mac, &self.metrics, + self.userspace_bouncing + .then_some(self.userspace_buffer.as_mut_slice()), ) .unwrap_or(false); if frame_consumed_by_mmds && self.rx_buffer.used_bytes == 0 { @@ -828,11 +837,57 @@ impl Net { } else { self.rx_buffer.single_chain_slice_mut() }; - self.tap.read_iovec(slice) + + if self.userspace_bouncing { + let how_many = self + .tap + .tap_file + .read(self.userspace_buffer.as_mut_slice())?; + + assert!(how_many <= MAX_BUFFER_SIZE); + + let mut offset = 0; + for iov in slice { + assert!( + offset <= how_many, + "copied more bytes into guest memory than read from tap" + ); + + let to_copy = (how_many - offset).min(iov.iov_len); + + if to_copy == 0 { + break; + } + + // SAFETY: the iovec comes from an `IoVecBufferMut`, which upholds the invariant + // that all contained iovecs are covering valid ranges of guest memory. + // Particularly, to_copy <= iov.iov_len + let vslice = unsafe { VolatileSlice::new(iov.iov_base.cast(), to_copy) }; + + vslice.copy_from(&self.userspace_buffer[offset..]); + + offset += to_copy; + } + + Ok(how_many) + } else { + self.tap.read_iovec(slice) + } } - fn write_tap(tap: &mut Tap, buf: &IoVecBuffer) -> std::io::Result { - tap.write_iovec(buf) + fn write_tap( + tap: &mut Tap, + buf: &IoVecBuffer, + bounce_buffer: Option<&mut [u8]>, + ) -> std::io::Result { + if let Some(bb) = bounce_buffer { + let how_many = buf.len() as usize; + let copied = buf.read_volatile_at(&mut &mut *bb, 0, how_many).unwrap(); + assert_eq!(copied, how_many); + tap.tap_file.write(&bb[..copied]) + } else { + tap.write_iovec(buf) + } } /// Process a single RX queue event. @@ -952,6 +1007,14 @@ impl VirtioDevice for Net { self.avail_features |= 1 << VIRTIO_F_ACCESS_PLATFORM; } + fn force_userspace_bounce_buffers(&mut self) { + self.userspace_bouncing = true + } + + fn userspace_bounce_buffers(&self) -> bool { + self.userspace_bouncing + } + fn device_type(&self) -> u32 { TYPE_NET } @@ -1937,6 +2000,7 @@ pub mod tests { &mut net.tap, Some(src_mac), &net.metrics, + None ) .unwrap() ) @@ -1976,6 +2040,7 @@ pub mod tests { &mut net.tap, Some(guest_mac), &net.metrics, + None ) ); @@ -1991,6 +2056,7 @@ pub mod tests { &mut net.tap, Some(not_guest_mac), &net.metrics, + None ) ); } diff --git a/src/vmm/src/devices/virtio/net/persist.rs b/src/vmm/src/devices/virtio/net/persist.rs index 5f2d6f560b4..cbb5c8f52a7 100644 --- a/src/vmm/src/devices/virtio/net/persist.rs +++ b/src/vmm/src/devices/virtio/net/persist.rs @@ -152,6 +152,8 @@ impl Persist<'_> for Net { net.avail_features = state.virtio_state.avail_features; net.acked_features = state.virtio_state.acked_features; + net.userspace_bouncing = state.virtio_state.bounce_in_userspace; + if state.virtio_state.activated { let supported_flags: u32 = Net::build_tap_offload_features(net.acked_features); net.tap diff --git a/src/vmm/src/devices/virtio/net/tap.rs b/src/vmm/src/devices/virtio/net/tap.rs index c516705af31..30a499489b0 100644 --- a/src/vmm/src/devices/virtio/net/tap.rs +++ b/src/vmm/src/devices/virtio/net/tap.rs @@ -49,7 +49,7 @@ ioctl_iow_nr!(TUNSETVNETHDRSZ, TUNTAP, 216, ::std::os::raw::c_int); /// Tap goes out of scope, and the kernel will clean up the interface automatically. #[derive(Debug)] pub struct Tap { - tap_file: File, + pub(crate) tap_file: File, pub(crate) if_name: [u8; IFACE_NAME_MAX_LEN], } diff --git a/src/vmm/src/devices/virtio/persist.rs b/src/vmm/src/devices/virtio/persist.rs index 7c861352317..ba365617abf 100644 --- a/src/vmm/src/devices/virtio/persist.rs +++ b/src/vmm/src/devices/virtio/persist.rs @@ -125,11 +125,13 @@ pub struct VirtioDeviceState { pub interrupt_status: u32, /// Flag for activated status. pub activated: bool, + /// Whether this device has to use userspace bounce buffers + pub bounce_in_userspace: bool, } impl VirtioDeviceState { /// Construct the virtio state of a device. - pub fn from_device(device: &dyn VirtioDevice) -> Self { + pub fn from_device(device: &impl VirtioDevice) -> Self { VirtioDeviceState { device_type: device.device_type(), avail_features: device.avail_features(), @@ -137,6 +139,7 @@ impl VirtioDeviceState { queues: device.queues().iter().map(Persist::save).collect(), interrupt_status: device.interrupt_status().load(Ordering::Relaxed), activated: device.is_activated(), + bounce_in_userspace: device.userspace_bounce_buffers(), } } diff --git a/src/vmm/src/devices/virtio/rng/device.rs b/src/vmm/src/devices/virtio/rng/device.rs index 719365e8ce8..32b009c08b0 100644 --- a/src/vmm/src/devices/virtio/rng/device.rs +++ b/src/vmm/src/devices/virtio/rng/device.rs @@ -309,6 +309,14 @@ impl VirtioDevice for Entropy { fn force_swiotlb(&mut self) { self.avail_features |= 1 << VIRTIO_F_ACCESS_PLATFORM; } + + fn force_userspace_bounce_buffers(&mut self) { + // rng device works with only userspace accesses + } + + fn userspace_bounce_buffers(&self) -> bool { + false + } } #[cfg(test)] diff --git a/src/vmm/src/devices/virtio/vsock/csm/connection.rs b/src/vmm/src/devices/virtio/vsock/csm/connection.rs index c9bd5b2c0f7..6f39f8b3079 100644 --- a/src/vmm/src/devices/virtio/vsock/csm/connection.rs +++ b/src/vmm/src/devices/virtio/vsock/csm/connection.rs @@ -95,6 +95,7 @@ use crate::devices::virtio::vsock::metrics::METRICS; use crate::devices::virtio::vsock::packet::{VsockPacketHeader, VsockPacketRx, VsockPacketTx}; use crate::logger::IncMetric; use crate::utils::wrap_usize_to_u32; +use crate::vstate::memory::MaybeBounce; /// Trait that vsock connection backends need to implement. /// @@ -118,7 +119,7 @@ pub struct VsockConnection { /// The peer (guest) port. peer_port: u32, /// The (connected) host-side stream. - stream: S, + pub(crate) stream: MaybeBounce, /// The TX buffer for this connection. tx_buf: TxBuf, /// Total number of bytes that have been successfully written to `self.stream`, either @@ -414,7 +415,7 @@ where /// The connection is interested in being notified about EPOLLIN / EPOLLOUT events on the /// host stream. fn as_raw_fd(&self) -> RawFd { - self.stream.as_raw_fd() + self.stream.target.as_raw_fd() } } @@ -509,13 +510,14 @@ where local_port: u32, peer_port: u32, peer_buf_alloc: u32, + bounce: bool, ) -> Self { Self { local_cid, peer_cid, local_port, peer_port, - stream, + stream: MaybeBounce::new_persistent(stream, bounce), state: ConnState::PeerInit, tx_buf: TxBuf::new(), fwd_cnt: Wrapping(0), @@ -535,13 +537,14 @@ where peer_cid: u64, local_port: u32, peer_port: u32, + bounce: bool, ) -> Self { Self { local_cid, peer_cid, local_port, peer_port, - stream, + stream: MaybeBounce::new_persistent(stream, bounce), state: ConnState::LocalInit, tx_buf: TxBuf::new(), fwd_cnt: Wrapping(0), @@ -882,9 +885,10 @@ mod tests { LOCAL_PORT, PEER_PORT, PEER_BUF_ALLOC, + false, ), ConnState::LocalInit => VsockConnection::::new_local_init( - stream, LOCAL_CID, PEER_CID, LOCAL_PORT, PEER_PORT, + stream, LOCAL_CID, PEER_CID, LOCAL_PORT, PEER_PORT, false, ), ConnState::Established => { let mut conn = VsockConnection::::new_peer_init( @@ -894,6 +898,7 @@ mod tests { LOCAL_PORT, PEER_PORT, PEER_BUF_ALLOC, + false, ); assert!(conn.has_pending_rx()); conn.recv_pkt(&mut rx_pkt).unwrap(); @@ -912,7 +917,7 @@ mod tests { } fn set_stream(&mut self, stream: TestStream) { - self.conn.stream = stream; + self.conn.stream = MaybeBounce::new_persistent(stream, false); } fn set_peer_credit(&mut self, credit: u32) { @@ -1014,7 +1019,7 @@ mod tests { let mut ctx = CsmTestContext::new_established(); let data = &[1, 2, 3, 4]; ctx.set_stream(TestStream::new_with_read_buf(data)); - assert_eq!(ctx.conn.as_raw_fd(), ctx.conn.stream.as_raw_fd()); + assert_eq!(ctx.conn.as_raw_fd(), ctx.conn.stream.target.as_raw_fd()); ctx.notify_epollin(); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RW); @@ -1098,7 +1103,7 @@ mod tests { ctx.init_data_tx_pkt(data); ctx.send(); - assert_eq!(ctx.conn.stream.write_buf.len(), 0); + assert_eq!(ctx.conn.stream.target.write_buf.len(), 0); assert!(ctx.conn.tx_buf.is_empty()); } @@ -1113,7 +1118,7 @@ mod tests { let data = &[1, 2, 3, 4]; ctx.init_data_tx_pkt(data); ctx.send(); - assert_eq!(ctx.conn.stream.write_buf, data.to_vec()); + assert_eq!(ctx.conn.stream.target.write_buf, data.to_vec()); ctx.notify_epollin(); ctx.recv(); @@ -1233,7 +1238,7 @@ mod tests { ctx.set_stream(TestStream::new()); ctx.conn.notify(EventSet::OUT); assert!(ctx.conn.tx_buf.is_empty()); - assert_eq!(ctx.conn.stream.write_buf, data); + assert_eq!(ctx.conn.stream.target.write_buf, data); } } diff --git a/src/vmm/src/devices/virtio/vsock/device.rs b/src/vmm/src/devices/virtio/vsock/device.rs index e3b1bdae5cd..350d538c79b 100644 --- a/src/vmm/src/devices/virtio/vsock/device.rs +++ b/src/vmm/src/devices/virtio/vsock/device.rs @@ -286,6 +286,14 @@ where self.avail_features |= 1 << VIRTIO_F_ACCESS_PLATFORM; } + fn force_userspace_bounce_buffers(&mut self) { + self.backend.start_bouncing() + } + + fn userspace_bounce_buffers(&self) -> bool { + self.backend.is_bouncing() + } + fn device_type(&self) -> u32 { uapi::VIRTIO_ID_VSOCK } diff --git a/src/vmm/src/devices/virtio/vsock/mod.rs b/src/vmm/src/devices/virtio/vsock/mod.rs index 859e198860b..54c9eeef3b9 100644 --- a/src/vmm/src/devices/virtio/vsock/mod.rs +++ b/src/vmm/src/devices/virtio/vsock/mod.rs @@ -185,4 +185,7 @@ pub trait VsockChannel { /// The vsock backend, which is basically an epoll-event-driven vsock channel. /// Currently, the only implementation we have is `crate::devices::virtio::unix::muxer::VsockMuxer`, /// which translates guest-side vsock connections to host-side Unix domain socket connections. -pub trait VsockBackend: VsockChannel + VsockEpollListener + Send {} +pub trait VsockBackend: VsockChannel + VsockEpollListener + Send { + fn start_bouncing(&mut self); + fn is_bouncing(&self) -> bool; +} diff --git a/src/vmm/src/devices/virtio/vsock/persist.rs b/src/vmm/src/devices/virtio/vsock/persist.rs index fce6affae69..6128090b601 100644 --- a/src/vmm/src/devices/virtio/vsock/persist.rs +++ b/src/vmm/src/devices/virtio/vsock/persist.rs @@ -10,7 +10,7 @@ use std::sync::atomic::AtomicU32; use serde::{Deserialize, Serialize}; use super::*; -use crate::devices::virtio::device::DeviceState; +use crate::devices::virtio::device::{DeviceState, VirtioDevice}; use crate::devices::virtio::persist::VirtioDeviceState; use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; use crate::devices::virtio::vsock::TYPE_VSOCK; @@ -128,6 +128,11 @@ where } else { DeviceState::Inactive }; + + if state.virtio_state.bounce_in_userspace { + vsock.force_userspace_bounce_buffers(); + } + Ok(vsock) } } diff --git a/src/vmm/src/devices/virtio/vsock/test_utils.rs b/src/vmm/src/devices/virtio/vsock/test_utils.rs index 804f0442559..391d543537f 100644 --- a/src/vmm/src/devices/virtio/vsock/test_utils.rs +++ b/src/vmm/src/devices/virtio/vsock/test_utils.rs @@ -111,7 +111,15 @@ impl VsockEpollListener for TestBackend { self.evset = Some(evset); } } -impl VsockBackend for TestBackend {} +impl VsockBackend for TestBackend { + fn start_bouncing(&mut self) { + unimplemented!() + } + + fn is_bouncing(&self) -> bool { + false + } +} #[derive(Debug)] pub struct TestContext { diff --git a/src/vmm/src/devices/virtio/vsock/unix/muxer.rs b/src/vmm/src/devices/virtio/vsock/unix/muxer.rs index 478d5c7318d..5585761af8f 100644 --- a/src/vmm/src/devices/virtio/vsock/unix/muxer.rs +++ b/src/vmm/src/devices/virtio/vsock/unix/muxer.rs @@ -108,6 +108,7 @@ pub struct VsockMuxer { local_port_set: HashSet, /// The last used host-side port. local_port_last: u32, + bounce: bool, } impl VsockChannel for VsockMuxer { @@ -299,7 +300,19 @@ impl VsockEpollListener for VsockMuxer { } } -impl VsockBackend for VsockMuxer {} +impl VsockBackend for VsockMuxer { + fn start_bouncing(&mut self) { + self.bounce = true; + + for conn in self.conn_map.values_mut() { + conn.stream.activate() + } + } + + fn is_bouncing(&self) -> bool { + self.bounce + } +} impl VsockMuxer { /// Muxer constructor. @@ -321,6 +334,7 @@ impl VsockMuxer { killq: MuxerKillQ::new(), local_port_last: (1u32 << 30) - 1, local_port_set: HashSet::with_capacity(defs::MAX_CONNECTIONS), + bounce: false, }; // Listen on the host initiated socket, for incoming connections. @@ -402,6 +416,7 @@ impl VsockMuxer { self.cid, local_port, peer_port, + self.bounce, ), ) }) @@ -629,6 +644,7 @@ impl VsockMuxer { pkt.hdr.dst_port(), pkt.hdr.src_port(), pkt.hdr.buf_alloc(), + self.bounce, ), ) }) diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 51e0fd524d3..bdb71fb4ec5 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -10,6 +10,7 @@ use serde::{Deserialize, Serialize}; use crate::cpu_config::templates::CustomCpuTemplate; use crate::device_manager::persist::SharedDeviceType; +use crate::devices::virtio::block::device::Block; use crate::logger::info; use crate::mmds; use crate::mmds::data_store::{Mmds, MmdsVersion}; @@ -111,14 +112,6 @@ pub struct VmResources { } impl VmResources { - /// Whether this [`VmResources`] object contains any devices that require host kernel access - /// into guest memory. - pub fn has_any_io_devices(&self) -> bool { - !self.block.devices.is_empty() - || self.vsock.get().is_some() - || self.net_builder.iter().next().is_some() - } - /// Configures Vmm resources as described by the `config_json` param. pub fn from_json( config_json: &str, @@ -270,16 +263,6 @@ impl VmResources { return Err(MachineConfigError::IncompatibleBalloonSize); } - if self.has_any_io_devices() - && self.machine_config.mem_config.secret_free - && !self.swiotlb_used() - { - return Err(MachineConfigError::Incompatible( - "secret freedom", - "I/O without swiotlb", - )); - } - if self.balloon.get().is_some() && updated.huge_pages != HugePageConfig::None { return Err(MachineConfigError::Incompatible( "balloon device", @@ -292,6 +275,22 @@ impl VmResources { "secret freedom", )); } + if !self.swiotlb_used() && updated.mem_config.secret_free { + if self.vhost_user_devices_used() { + return Err(MachineConfigError::Incompatible( + "vhost-user devices", + "userspace bounce buffers", + )); + } + + if self.async_block_engine_used() { + return Err(MachineConfigError::Incompatible( + "async block engine", + "userspace bounce buffers", + )); + } + } + self.machine_config = updated; Ok(()) @@ -377,13 +376,18 @@ impl VmResources { &mut self, block_device_config: BlockDeviceConfig, ) -> Result<(), DriveError> { - if self.has_any_io_devices() - && self.machine_config.mem_config.secret_free - && !self.swiotlb_used() - { - return Err(DriveError::SecretFreeWithoutSwiotlb); + if self.machine_config.mem_config.secret_free && !self.swiotlb_used() { + if block_device_config.file_engine_type == Some(FileEngineType::Async) { + return Err(DriveError::IncompatibleWithUserspaceBounceBuffers( + "async file engine", + )); + } + if block_device_config.socket.is_some() { + return Err(DriveError::IncompatibleWithUserspaceBounceBuffers( + "vhost-user-blk", + )); + } } - self.block.insert(block_device_config) } @@ -392,26 +396,12 @@ impl VmResources { &mut self, body: NetworkInterfaceConfig, ) -> Result<(), NetworkInterfaceError> { - if self.has_any_io_devices() - && self.machine_config.mem_config.secret_free - && !self.swiotlb_used() - { - return Err(NetworkInterfaceError::SecretFreeWithoutSwiotlb); - } - let _ = self.net_builder.build(body)?; Ok(()) } /// Sets a vsock device to be attached when the VM starts. pub fn set_vsock_device(&mut self, config: VsockDeviceConfig) -> Result<(), VsockConfigError> { - if self.has_any_io_devices() - && self.machine_config.mem_config.secret_free - && !self.swiotlb_used() - { - return Err(VsockConfigError::SecretFreeWithoutSwiotlb); - } - self.vsock.insert(config) } @@ -505,6 +495,16 @@ impl VmResources { .any(|b| b.lock().expect("Poisoned lock").is_vhost_user()) } + fn async_block_engine_used(&self) -> bool { + self.block + .devices + .iter() + .any(|b| match &*b.lock().unwrap() { + Block::Virtio(b) => b.file_engine_type() == FileEngineType::Async, + Block::VhostUser(_) => false, + }) + } + /// The size of the swiotlb region requested, in MiB #[cfg(target_arch = "aarch64")] pub fn swiotlb_size(&self) -> usize { diff --git a/src/vmm/src/vmm_config/drive.rs b/src/vmm/src/vmm_config/drive.rs index 4c456c95097..248838fa343 100644 --- a/src/vmm/src/vmm_config/drive.rs +++ b/src/vmm/src/vmm_config/drive.rs @@ -24,8 +24,8 @@ pub enum DriveError { DeviceUpdate(VmmError), /// A root block device already exists! RootBlockDeviceAlreadyAdded, - /// A block device was added to a secret free VM that doesnt have a swiotlb region. - SecretFreeWithoutSwiotlb, + /// {0} is incompatible with userspace bounce buffers. + IncompatibleWithUserspaceBounceBuffers(&'static str), } /// Use this structure to set up the Block Device before booting the kernel. diff --git a/src/vmm/src/vmm_config/net.rs b/src/vmm/src/vmm_config/net.rs index fd83328109e..f1413368090 100644 --- a/src/vmm/src/vmm_config/net.rs +++ b/src/vmm/src/vmm_config/net.rs @@ -71,8 +71,6 @@ pub enum NetworkInterfaceError { GuestMacAddressInUse(String), /// Cannot open/create the tap device: {0} OpenTap(#[from] TapError), - /// A net device was added to a secret free VM that doesnt have a swiotlb region. - SecretFreeWithoutSwiotlb, } /// Builder for a list of network devices. diff --git a/src/vmm/src/vmm_config/vsock.rs b/src/vmm/src/vmm_config/vsock.rs index 6793bac572e..920e4a4d217 100644 --- a/src/vmm/src/vmm_config/vsock.rs +++ b/src/vmm/src/vmm_config/vsock.rs @@ -17,8 +17,6 @@ pub enum VsockConfigError { CreateVsockBackend(VsockUnixBackendError), /// Cannot create vsock device: {0} CreateVsockDevice(VsockError), - /// A vsock device was added to a secret free VM that doesnt have a swiotlb region. - SecretFreeWithoutSwiotlb, } /// This struct represents the strongly typed equivalent of the json body diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 22c1935512f..9421722f11d 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -8,7 +8,7 @@ use std::fs::File; use std::io::{Read, Seek, SeekFrom, Write}; use std::mem::ManuallyDrop; -use std::os::fd::{AsFd, AsRawFd}; +use std::os::fd::AsRawFd; use std::ptr::null_mut; use std::sync::Arc; @@ -63,55 +63,137 @@ pub enum MemoryError { /// Newtype that implements [`ReadVolatile`] and [`WriteVolatile`] if `T` implements `Read` or /// `Write` respectively, by reading/writing using a bounce buffer, and memcpy-ing into the /// [`VolatileSlice`]. +/// +/// Bounce buffers are allocated on the heap, as on-stack bounce buffers could cause stack +/// overflows. If `N == 0` then bounce buffers will be allocated on demand. #[derive(Debug)] -pub struct Bounce(pub T, pub bool); +pub struct MaybeBounce { + pub(crate) target: T, + persistent_buffer: Option>, +} + +impl MaybeBounce { + /// Creates a new `MaybeBounce` that always allocates a bounce + /// buffer on-demand + pub fn new(target: T, should_bounce: bool) -> Self { + MaybeBounce::new_persistent(target, should_bounce) + } +} + +impl MaybeBounce { + /// Creates a new `MaybeBounce` that uses a persistent, fixed size bounce buffer + /// of size `N`. If a read/write request exceeds the size of this bounce buffer, it + /// is split into multiple, `<= N`-size read/writes. + pub fn new_persistent(target: T, should_bounce: bool) -> Self { + let mut bounce = MaybeBounce { + target, + persistent_buffer: None, + }; + + if should_bounce { + bounce.activate() + } + + bounce + } + + /// Activates this [`MaybeBounce`] to start doing reads/writes via a bounce buffer, + /// which is allocated on the heap by this function (e.g. if `activate()` is never called, + /// no bounce buffer is ever allocated). + pub fn activate(&mut self) { + self.persistent_buffer = Some(vec![0u8; N].into_boxed_slice().try_into().unwrap()) + } + + /// Returns `true` if this `MaybeBounce` is actually bouncing buffers. + pub fn is_activated(&self) -> bool { + self.persistent_buffer.is_some() + } +} // FIXME: replace AsFd with ReadVolatile once &File: ReadVolatile in vm-memory. -impl ReadVolatile for Bounce { +impl ReadVolatile for MaybeBounce { fn read_volatile( &mut self, buf: &mut VolatileSlice, ) -> Result { - if self.1 { - let mut bbuf = vec![0; buf.len()]; - let n = self - .0 - .read(bbuf.as_mut_slice()) - .map_err(VolatileMemoryError::IOError)?; - buf.copy_from(&bbuf[..n]); - Ok(n) + if let Some(ref mut persistent) = self.persistent_buffer { + let mut bbuf = (N == 0).then(|| vec![0u8; buf.len()]); + let bbuf = bbuf.as_deref_mut().unwrap_or(persistent.as_mut_slice()); + + let mut buf = buf.offset(0)?; + let mut total = 0; + while !buf.is_empty() { + let how_much = buf.len().min(bbuf.len()); + let n = self + .target + .read_volatile(&mut VolatileSlice::from(&mut bbuf[..how_much]))?; + buf.copy_from(&bbuf[..n]); + + buf = buf.offset(n)?; + total += n; + + if n < how_much { + break; + } + } + + Ok(total) } else { - self.0.as_fd().read_volatile(buf) + self.target.read_volatile(buf) } } } -impl WriteVolatile for Bounce { +impl WriteVolatile for MaybeBounce { fn write_volatile( &mut self, buf: &VolatileSlice, ) -> Result { - if self.1 { - let mut bbuf = vec![0; buf.len()]; - buf.copy_to(bbuf.as_mut_slice()); - self.0 - .write(bbuf.as_slice()) - .map_err(VolatileMemoryError::IOError) + if let Some(ref mut persistent) = self.persistent_buffer { + let mut bbuf = (N == 0).then(|| vec![0u8; buf.len()]); + let bbuf = bbuf.as_deref_mut().unwrap_or(persistent.as_mut_slice()); + + let mut buf = buf.offset(0)?; + let mut total = 0; + while !buf.is_empty() { + let how_much = buf.copy_to(bbuf); + let n = self + .target + .write_volatile(&VolatileSlice::from(&mut bbuf[..how_much]))?; + buf = buf.offset(n)?; + total += n; + + if n < how_much { + break; + } + } + + Ok(total) } else { - self.0.as_fd().write_volatile(buf) + self.target.write_volatile(buf) } } } -impl Read for Bounce { +impl Read for MaybeBounce { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { - self.0.read(buf) + self.target.read(buf) } } -impl Seek for Bounce { +impl Write for MaybeBounce { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.target.write(buf) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.target.flush() + } +} + +impl Seek for MaybeBounce { fn seek(&mut self, pos: SeekFrom) -> std::io::Result { - self.0.seek(pos) + self.target.seek(pos) } } @@ -573,6 +655,7 @@ mod tests { use std::collections::HashMap; use std::io::{Read, Seek}; + use std::os::fd::AsFd; use itertools::Itertools; use vmm_sys_util::tempfile::TempFile; @@ -947,30 +1030,45 @@ mod tests { fn test_bounce() { let file_direct = TempFile::new().unwrap(); let file_bounced = TempFile::new().unwrap(); + let file_persistent_bounced = TempFile::new().unwrap(); let mut data = (0..=255).collect_vec(); - Bounce(file_direct.as_file(), false) + MaybeBounce::new(file_direct.as_file().as_fd(), false) + .write_all_volatile(&VolatileSlice::from(data.as_mut_slice())) + .unwrap(); + MaybeBounce::new(file_bounced.as_file().as_fd(), true) .write_all_volatile(&VolatileSlice::from(data.as_mut_slice())) .unwrap(); - Bounce(file_bounced.as_file(), true) + MaybeBounce::<_, 7>::new_persistent(file_persistent_bounced.as_file().as_fd(), true) .write_all_volatile(&VolatileSlice::from(data.as_mut_slice())) .unwrap(); let mut data_direct = vec![0u8; 256]; let mut data_bounced = vec![0u8; 256]; + let mut data_persistent_bounced = vec![0u8; 256]; file_direct.as_file().seek(SeekFrom::Start(0)).unwrap(); file_bounced.as_file().seek(SeekFrom::Start(0)).unwrap(); + file_persistent_bounced + .as_file() + .seek(SeekFrom::Start(0)) + .unwrap(); - Bounce(file_direct.as_file(), false) + MaybeBounce::new(file_direct.as_file().as_fd(), false) .read_exact_volatile(&mut VolatileSlice::from(data_direct.as_mut_slice())) .unwrap(); - Bounce(file_bounced.as_file(), true) + MaybeBounce::new(file_bounced.as_file().as_fd(), true) .read_exact_volatile(&mut VolatileSlice::from(data_bounced.as_mut_slice())) .unwrap(); + MaybeBounce::<_, 7>::new_persistent(file_persistent_bounced.as_file().as_fd(), true) + .read_exact_volatile(&mut VolatileSlice::from( + data_persistent_bounced.as_mut_slice(), + )) + .unwrap(); assert_eq!(data_direct, data_bounced); assert_eq!(data_direct, data); + assert_eq!(data_persistent_bounced, data); } } diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index dfce69ce5c6..43df7038ace 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -8,7 +8,7 @@ use std::collections::HashMap; use std::fs::{File, OpenOptions}; use std::io::Write; -use std::os::fd::FromRawFd; +use std::os::fd::{AsFd, FromRawFd}; use std::path::Path; use std::sync::Arc; @@ -27,8 +27,8 @@ use crate::persist::{CreateSnapshotError, GuestRegionUffdMapping}; use crate::utils::u64_to_usize; use crate::vmm_config::snapshot::SnapshotType; use crate::vstate::memory::{ - Bounce, GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryRegion, GuestRegionMmap, - KvmRegion, + GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryRegion, GuestRegionMmap, + KvmRegion, MaybeBounce, }; use crate::vstate::vcpu::VcpuError; use crate::{DirtyBitmap, Vcpu, mem_size_mib}; @@ -465,9 +465,10 @@ impl Vm { .guest_memory() .iter() .any(|r| r.inner().guest_memfd != 0); + let mut maybe_bounce = MaybeBounce::new(file.as_fd(), secret_hidden); self.guest_memory() - .dump(&mut Bounce(&file, secret_hidden)) - .and_then(|_| self.swiotlb_regions().dump(&mut file))?; + .dump(&mut maybe_bounce) + .and_then(|_| self.swiotlb_regions().dump(&mut maybe_bounce))?; self.reset_dirty_bitmap(); self.guest_memory().reset_dirty(); } diff --git a/tests/conftest.py b/tests/conftest.py index 8dc03828764..9e27d202fab 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -376,16 +376,12 @@ def io_engine(request): return request.param -# 4MiB is the smallest swiotlb size that seems to generally work for all -# perf tests. 2MiB makes the vsock throughput test consistently fail, and 1MiB makes -# the network throughput test occasionally fail (both due to connection issues). -# The block test passes even with the minimum of 1MiB. We pick 8 to have enough -# buffer to the failing cases. secret_free_test_cases = [None] -if platform.machine() == "aarch64": - secret_free_test_cases.append((8, False)) - if global_props.instance != "m6g.metal": - secret_free_test_cases.append((8, True)) +if ( + global_props.host_linux_version_metrics == "next" + and global_props.instance != "m6g.metal" +): + secret_free_test_cases.append((0, True)) @pytest.fixture(params=secret_free_test_cases) diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index cc39b984607..6231360c2d2 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -330,7 +330,7 @@ def kill(self): # filter ps results for the jailer's unique id _, stdout, stderr = utils.check_output( - f"ps aux | grep {self.jailer.jailer_id}" + f"ps ax -o cmd -ww | grep {self.jailer.jailer_id}" ) # make sure firecracker was killed assert ( diff --git a/tests/framework/utils_uffd.py b/tests/framework/utils_uffd.py index b25ca6cd928..5f0b358fd21 100644 --- a/tests/framework/utils_uffd.py +++ b/tests/framework/utils_uffd.py @@ -8,6 +8,9 @@ import time from pathlib import Path +import pytest + +from framework.properties import global_props from framework.utils import chroot from host_tools import cargo_build @@ -102,4 +105,7 @@ def spawn_pf_handler(vm, handler_path, snapshot): def uffd_handler(handler_name, **kwargs): """Retrieves the uffd handler with the given name""" + if global_props.host_linux_version_metrics != "next": + pytest.skip("UFFD is currently broken on this host kernel") + return cargo_build.get_example(f"uffd_{handler_name}_handler", **kwargs) diff --git a/tests/integration_tests/functional/test_secret_freedom.py b/tests/integration_tests/functional/test_secret_freedom.py index 520cb381203..2b5ca5430f2 100644 --- a/tests/integration_tests/functional/test_secret_freedom.py +++ b/tests/integration_tests/functional/test_secret_freedom.py @@ -2,8 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 """Test secret-freedom related functionality.""" -import platform - import pytest from framework import defs @@ -23,17 +21,15 @@ ] -@pytest.mark.skipif( - platform.machine() != "aarch64", - reason="only ARM can boot secret free VMs with I/O devices", -) -def test_secret_free_boot(microvm_factory, guest_kernel_linux_6_1, rootfs): +def test_secret_free_boot(microvm_factory, guest_kernel, rootfs): """Tests that a VM can boot if all virtio devices are bound to a swiotlb region, and that this swiotlb region is actually discovered by the guest.""" - vm = microvm_factory.build(guest_kernel_linux_6_1, rootfs) + vm = microvm_factory.build(guest_kernel, rootfs) vm.spawn() vm.memory_monitor = None - vm.basic_config(memory_config={"initial_swiotlb_size": 8, "secret_free": True}) + vm.basic_config( + memory_config={"secret_free": True}, + ) vm.add_net_iface() vm.start() @@ -52,9 +48,8 @@ def test_secret_free_initrd(microvm_factory, guest_kernel): uvm.basic_config( add_root_device=False, vcpu_count=1, - boot_args="console=ttyS0 reboot=k panic=1 pci=off no-kvmclock", use_initrd=True, - memory_config={"initial_swiotlb_size": 64, "secret_free": True}, + memory_config={"secret_free": True}, ) uvm.start() @@ -65,16 +60,14 @@ def test_secret_free_initrd(microvm_factory, guest_kernel): serial.rx(token=f"rootfs on / type {INITRD_FILESYSTEM}") -@pytest.mark.skipif( - platform.machine() != "aarch64", - reason="only ARM can boot secret free VMs with I/O devices", -) -def test_secret_free_snapshot_creation(microvm_factory, guest_kernel_linux_6_1, rootfs): +def test_secret_free_snapshot_creation(microvm_factory, guest_kernel, rootfs): """Test that snapshot creation works for secret hidden VMs""" - vm = microvm_factory.build(guest_kernel_linux_6_1, rootfs) + vm = microvm_factory.build(guest_kernel, rootfs) vm.spawn() vm.memory_monitor = None - vm.basic_config(memory_config={"initial_swiotlb_size": 8, "secret_free": True}) + vm.basic_config( + memory_config={"secret_free": True}, + ) vm.add_net_iface() vm.start() diff --git a/tests/integration_tests/performance/test_block_ab.py b/tests/integration_tests/performance/test_block_ab.py index 94d7ce39350..250dad9aed8 100644 --- a/tests/integration_tests/performance/test_block_ab.py +++ b/tests/integration_tests/performance/test_block_ab.py @@ -154,13 +154,22 @@ def test_block_performance( """ Execute block device emulation benchmarking scenarios. """ - if memory_config is not None and "6.1" not in guest_kernel_acpi.name: + if ( + memory_config is not None + and memory_config["initial_swiotlb_size"] != 0 + and "6.1" not in guest_kernel_acpi.name + ): pytest.skip("swiotlb only supported on aarch64/6.1") + if memory_config is not None and io_engine == "Async": + pytest.skip("userspace bounce buffers not supported with async block engine") + vm = microvm_factory.build(guest_kernel_acpi, rootfs, monitor_memory=False) vm.spawn(log_level="Info", emit_metrics=True) vm.basic_config( - vcpu_count=vcpus, mem_size_mib=GUEST_MEM_MIB, memory_config=memory_config + vcpu_count=vcpus, + mem_size_mib=GUEST_MEM_MIB, + memory_config=memory_config, ) vm.add_net_iface() # Add a secondary block device for benchmark tests. diff --git a/tests/integration_tests/performance/test_boottime.py b/tests/integration_tests/performance/test_boottime.py index bd2eb3ba9bc..c7ef5fba3fb 100644 --- a/tests/integration_tests/performance/test_boottime.py +++ b/tests/integration_tests/performance/test_boottime.py @@ -75,7 +75,11 @@ def test_boottime( ): """Test boot time with different guest configurations""" - if memory_config is not None and "6.1" not in guest_kernel_acpi.name: + if ( + memory_config is not None + and memory_config["initial_swiotlb_size"] != 0 + and "6.1" not in guest_kernel_acpi.name + ): pytest.skip("swiotlb only supported on aarch64/6.1") for _ in range(10): diff --git a/tests/integration_tests/performance/test_network_ab.py b/tests/integration_tests/performance/test_network_ab.py index a77697d4b6e..9a3ea2d1d0c 100644 --- a/tests/integration_tests/performance/test_network_ab.py +++ b/tests/integration_tests/performance/test_network_ab.py @@ -40,7 +40,11 @@ def network_microvm(request, microvm_factory, guest_kernel_acpi, rootfs, memory_ """Creates a microvm with the networking setup used by the performance tests in this file. This fixture receives its vcpu count via indirect parameterization""" - if memory_config is not None and "6.1" not in guest_kernel_acpi.name: + if ( + memory_config is not None + and memory_config["initial_swiotlb_size"] != 0 + and "6.1" not in guest_kernel_acpi.name + ): pytest.skip("swiotlb only supported on aarch64/6.1") guest_mem_mib = 1024 @@ -49,7 +53,9 @@ def network_microvm(request, microvm_factory, guest_kernel_acpi, rootfs, memory_ vm = microvm_factory.build(guest_kernel_acpi, rootfs, monitor_memory=False) vm.spawn(log_level="Info", emit_metrics=True) vm.basic_config( - vcpu_count=guest_vcpus, mem_size_mib=guest_mem_mib, memory_config=memory_config + vcpu_count=guest_vcpus, + mem_size_mib=guest_mem_mib, + memory_config=memory_config, ) vm.add_net_iface() vm.start() diff --git a/tests/integration_tests/performance/test_vsock_ab.py b/tests/integration_tests/performance/test_vsock_ab.py index 0ad44160df7..a20bd923d56 100644 --- a/tests/integration_tests/performance/test_vsock_ab.py +++ b/tests/integration_tests/performance/test_vsock_ab.py @@ -91,14 +91,20 @@ def test_vsock_throughput( if mode == "bd" and vcpus < 2: pytest.skip("bidrectional test only done with at least 2 vcpus") - if memory_config is not None and "6.1" not in guest_kernel_acpi.name: + if ( + memory_config is not None + and memory_config["initial_swiotlb_size"] != 0 + and "6.1" not in guest_kernel_acpi.name + ): pytest.skip("swiotlb only supported on aarch64/6.1") mem_size_mib = 1024 vm = microvm_factory.build(guest_kernel_acpi, rootfs, monitor_memory=False) vm.spawn(log_level="Info", emit_metrics=True) vm.basic_config( - vcpu_count=vcpus, mem_size_mib=mem_size_mib, memory_config=memory_config + vcpu_count=vcpus, + mem_size_mib=mem_size_mib, + memory_config=memory_config, ) vm.add_net_iface() # Create a vsock device