From 91e642ae906ac88a51cbf65b473e53e15b56e40f Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Tue, 29 Jul 2025 14:42:02 +0200 Subject: [PATCH 1/6] fix: check in Cargo.lock changes Probably missed during a rebase of `feature/pcie` on top of `main` branch. Signed-off-by: Babis Chalios --- Cargo.lock | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index de81eb1751a..078b75a2f76 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -761,15 +761,6 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.12.1" From 536ecd3f599ccca721900fb614a0035e5410b723 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Mon, 28 Jul 2025 13:40:39 +0200 Subject: [PATCH 2/6] msix: relax assertion on data accesses from guest It is true that writes/reads of an MSI-X table are either 32 or 64 bits long. However, we do check for this invariant in the `match` expression just after the assertion. If the invariant is not held (the guest tried to read/write with an invalid length) we just print an error and continue. This branch of the `match` block is never reached due to the assertion itself. To simplify things, just remove the assertion and let the `match` block logic handle invalid memory accesses. This should also help us better fuzz the bus accesses. Do add a check that the data access is up to 8 bytes long. These are all MMIO or Port IO accesses and they can't be bigger than 8 bytes. So this assertion should never fail in production (unless there's a KVM bug or we try to run Firecracker in some architecture that allows more than 64bit memory accesses). Signed-off-by: Babis Chalios --- src/pci/src/msix.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pci/src/msix.rs b/src/pci/src/msix.rs index be5aa3b8cf1..82f851322b4 100644 --- a/src/pci/src/msix.rs +++ b/src/pci/src/msix.rs @@ -219,7 +219,7 @@ impl MsixConfig { } pub fn read_table(&self, offset: u64, data: &mut [u8]) { - assert!((data.len() == 4 || data.len() == 8)); + assert!(data.len() <= 8); let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; @@ -272,7 +272,7 @@ impl MsixConfig { } pub fn write_table(&mut self, offset: u64, data: &[u8]) { - assert!((data.len() == 4 || data.len() == 8)); + assert!(data.len() <= 8); let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; @@ -368,7 +368,7 @@ impl MsixConfig { } pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) { - assert!((data.len() == 4 || data.len() == 8)); + assert!(data.len() <= 8); let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; From 37cfc20b1a936b53a103c5a6e9be0e103d3e9abf Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Mon, 28 Jul 2025 17:35:47 +0200 Subject: [PATCH 3/6] msi: use Vec to store GSIs for MSI vectors We were using a Hashmap to store the GSIs that were used by the vectors of an MSI-X group. These vectors were always indexed starting by 0, so we can just use a simple Vec. Signed-off-by: Babis Chalios --- .../devices/virtio/transport/pci/device.rs | 2 +- src/vmm/src/vstate/vm.rs | 44 +++++++++---------- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/src/vmm/src/devices/virtio/transport/pci/device.rs b/src/vmm/src/devices/virtio/transport/pci/device.rs index 384ad0358dd..7ee580fc6a1 100644 --- a/src/vmm/src/devices/virtio/transport/pci/device.rs +++ b/src/vmm/src/devices/virtio/transport/pci/device.rs @@ -305,7 +305,7 @@ pub struct VirtioPciDeviceState { pub pci_configuration_state: PciConfigurationState, pub pci_dev_state: VirtioPciCommonConfigState, pub msix_state: MsixConfigState, - pub msi_vector_group: HashMap, + pub msi_vector_group: Vec, pub bar_configuration: Vec, } diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index f4a18484cbc..8d037db96fa 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -110,7 +110,7 @@ impl MsiVector { /// MSI interrupts created for a VirtIO device pub struct MsiVectorGroup { vm: Arc, - irq_routes: HashMap, + irq_routes: Vec, } impl MsiVectorGroup { @@ -123,7 +123,7 @@ impl MsiVectorGroup { } impl<'a> Persist<'a> for MsiVectorGroup { - type State = HashMap; + type State = Vec; type ConstructorArgs = Arc; type Error = InterruptError; @@ -131,20 +131,17 @@ impl<'a> Persist<'a> for MsiVectorGroup { // We don't save the "enabled" state of the MSI interrupt. PCI devices store the MSI-X // configuration and make sure that the vector is enabled during the restore path if it was // initially enabled - self.irq_routes - .iter() - .map(|(id, route)| (*id, route.gsi)) - .collect() + self.irq_routes.iter().map(|route| route.gsi).collect() } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> std::result::Result { - let mut irq_routes = HashMap::new(); + let mut irq_routes = Vec::with_capacity(state.len()); - for (id, gsi) in state { - irq_routes.insert(*id, MsiVector::new(*gsi, false)?); + for gsi in state { + irq_routes.push(MsiVector::new(*gsi, false)?); } Ok(MsiVectorGroup { @@ -156,7 +153,7 @@ impl<'a> Persist<'a> for MsiVectorGroup { impl InterruptSourceGroup for MsiVectorGroup { fn enable(&self) -> vm_device::interrupt::Result<()> { - for route in self.irq_routes.values() { + for route in &self.irq_routes { route.enable(&self.vm.common.fd)?; } @@ -164,7 +161,7 @@ impl InterruptSourceGroup for MsiVectorGroup { } fn disable(&self) -> vm_device::interrupt::Result<()> { - for route in self.irq_routes.values() { + for route in &self.irq_routes { route.disable(&self.vm.common.fd)?; } @@ -180,7 +177,9 @@ impl InterruptSourceGroup for MsiVectorGroup { } fn notifier(&self, index: InterruptIndex) -> Option<&EventFd> { - self.irq_routes.get(&index).map(|route| &route.event_fd) + self.irq_routes + .get(index as usize) + .map(|route| &route.event_fd) } fn update( @@ -199,7 +198,7 @@ impl InterruptSourceGroup for MsiVectorGroup { InterruptSourceConfig::MsiIrq(config) => config, }; - if let Some(route) = self.irq_routes.get(&index) { + if let Some(route) = self.irq_routes.get(index as usize) { // When an interrupt is masked the GSI will not be passed to KVM through // KVM_SET_GSI_ROUTING. So, call [`disable()`] to unregister the interrupt file // descriptor before passing the interrupt routes to KVM @@ -593,14 +592,13 @@ impl Vm { /// Create a group of MSI-X interrupts pub fn create_msix_group(vm: Arc, count: u16) -> Result { debug!("Creating new MSI group with {count} vectors"); - let mut irq_routes = HashMap::with_capacity(count as usize); - for (gsi, i) in vm + let mut irq_routes = Vec::with_capacity(count as usize); + for gsi in vm .resource_allocator() .allocate_gsi_msi(count as u32)? .iter() - .zip(0u32..) { - irq_routes.insert(i, MsiVector::new(*gsi, false)?); + irq_routes.push(MsiVector::new(*gsi, false)?); } Ok(MsiVectorGroup { vm, irq_routes }) @@ -821,13 +819,13 @@ pub(crate) mod tests { let msix_group = create_msix_group(&vm); // Initially all vectors are disabled - for route in msix_group.irq_routes.values() { + for route in &msix_group.irq_routes { assert!(!route.enabled.load(Ordering::Acquire)) } // Enable works msix_group.enable().unwrap(); - for route in msix_group.irq_routes.values() { + for route in &msix_group.irq_routes { assert!(route.enabled.load(Ordering::Acquire)); } // Enabling an enabled group doesn't error out @@ -835,7 +833,7 @@ pub(crate) mod tests { // Disable works msix_group.disable().unwrap(); - for route in msix_group.irq_routes.values() { + for route in &msix_group.irq_routes { assert!(!route.enabled.load(Ordering::Acquire)) } // Disabling a disabled group doesn't error out @@ -921,7 +919,7 @@ pub(crate) mod tests { } // All vectors should be disabled - for vector in msix_group.irq_routes.values() { + for vector in &msix_group.irq_routes { assert!(!vector.enabled.load(Ordering::Acquire)); } @@ -1018,8 +1016,8 @@ pub(crate) mod tests { // Even if an MSI group is enabled, we don't save it as such. During restoration, the PCI // transport will make sure the correct config is set for the vectors and enable them // accordingly. - for (id, vector) in msix_group.irq_routes { - let new_vector = restored_group.irq_routes.get(&id).unwrap(); + for (id, vector) in msix_group.irq_routes.iter().enumerate() { + let new_vector = &restored_group.irq_routes[id]; assert_eq!(vector.gsi, new_vector.gsi); assert!(!new_vector.enabled.load(Ordering::Acquire)); } From 4fc4d8ce6d4fba3e9fd911f569e4931044d73bba Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Tue, 29 Jul 2025 11:25:16 +0200 Subject: [PATCH 4/6] msi: fix size of interrupts HashMap We are using a HashMap to track the interrupt routes we use in the system. The index to the HashMap is the GSI of the interrupt route. We know the maximum number of GSIs we have available so pre-allocate the space for the HashMap to avoid reallocations at runtime. Signed-off-by: Babis Chalios --- src/vmm/src/vstate/vm.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 8d037db96fa..aeb61f88c56 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -28,8 +28,8 @@ use vm_device::interrupt::{ use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; -use crate::arch::host_page_size; pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState}; +use crate::arch::{GSI_MSI_END, host_page_size}; use crate::logger::info; use crate::persist::CreateSnapshotError; use crate::snapshot::Persist; @@ -322,7 +322,7 @@ impl Vm { fd, max_memslots: kvm.max_nr_memslots(), guest_memory: GuestMemoryMmap::default(), - interrupts: Mutex::new(HashMap::new()), + interrupts: Mutex::new(HashMap::with_capacity(GSI_MSI_END as usize + 1)), resource_allocator: Mutex::new(ResourceAllocator::new()), mmio_bus: Arc::new(vm_device::Bus::new()), }) From 72842085e0ed5f2acdc188f16b6c69e906a1302c Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Tue, 29 Jul 2025 14:29:11 +0200 Subject: [PATCH 5/6] fix: return NO_VECTOR when reading MSI vector for invalid queue Fix a bug in the common VirtIO configuration for PCI transport where we would use `queue_select` to read the queue's MSI vector without validating it matches a valid queue. This could lead in panics when accessing the `msix_queue` array. The spec states that in such cases we should return `NO_VECTOR` (0xffff), so do that. Signed-off-by: Babis Chalios --- .../virtio/transport/pci/common_config.rs | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/vmm/src/devices/virtio/transport/pci/common_config.rs b/src/vmm/src/devices/virtio/transport/pci/common_config.rs index 6e52a1ca007..ae66d54f927 100644 --- a/src/vmm/src/devices/virtio/transport/pci/common_config.rs +++ b/src/vmm/src/devices/virtio/transport/pci/common_config.rs @@ -230,7 +230,19 @@ impl VirtioPciCommonConfig { 0x12 => queues.len().try_into().unwrap(), // num_queues 0x16 => self.queue_select, 0x18 => self.with_queue(queues, |q| q.size).unwrap_or(0), - 0x1a => self.msix_queues.lock().unwrap()[self.queue_select as usize], + // If `queue_select` points to an invalid queue we should return NO_VECTOR. + // Reading from here + // https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-1280005: + // + // > The device MUST return vector mapped to a given event, (NO_VECTOR if unmapped) on + // > read of config_msix_vector/queue_msix_vector. + 0x1a => self + .msix_queues + .lock() + .unwrap() + .get(self.queue_select as usize) + .copied() + .unwrap_or(0xffff), 0x1c => u16::from(self.with_queue(queues, |q| q.ready).unwrap_or(false)), 0x1e => self.queue_select, // notify_off _ => { @@ -408,8 +420,13 @@ mod tests { // 'queue_select' can be read and written. regs.write(0x16, &[0xaa, 0x55], dev.clone()); let mut read_back = vec![0x00, 0x00]; - regs.read(0x16, &mut read_back, dev); + regs.read(0x16, &mut read_back, dev.clone()); assert_eq!(read_back[0], 0xaa); assert_eq!(read_back[1], 0x55); + + // Getting the MSI vector when `queue_select` points to an invalid queue should return + // NO_VECTOR (0xffff) + regs.read(0x1a, &mut read_back, dev); + assert_eq!(read_back, [0xff, 0xff]); } } From d93aecfd6b52b374222df29fef86a911bf5df5ab Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Tue, 29 Jul 2025 15:19:03 +0200 Subject: [PATCH 6/6] fix(pci): correct shift size when setting config address We were shifting by the wrong number of bits for 2-byte accesses. Signed-off-by: Babis Chalios --- src/pci/src/bus.rs | 120 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 2 deletions(-) diff --git a/src/pci/src/bus.rs b/src/pci/src/bus.rs index 775238edff9..adfac6c12fb 100644 --- a/src/pci/src/bus.rs +++ b/src/pci/src/bus.rs @@ -291,8 +291,8 @@ impl PciConfigIo { u32::from(data[0]) << (offset * 8), ), 2 => ( - 0x0000_ffff << (offset * 16), - ((u32::from(data[1]) << 8) | u32::from(data[0])) << (offset * 16), + 0x0000_ffff << (offset * 8), + ((u32::from(data[1]) << 8) | u32::from(data[0])) << (offset * 8), ), 4 => (0xffff_ffff, LittleEndian::read_u32(data)), _ => return, @@ -475,3 +475,119 @@ fn parse_io_config_address(config_address: u32) -> (usize, usize, usize, usize) shift_and_mask(config_address, REGISTER_NUMBER_OFFSET, REGISTER_NUMBER_MASK), ) } + +#[cfg(test)] +mod tests { + use std::sync::{Arc, Mutex}; + + use vm_device::BusDevice; + + use super::{PciBus, PciConfigIo, PciRoot}; + use crate::DeviceRelocation; + + struct RelocationMock; + + impl DeviceRelocation for RelocationMock { + fn move_bar( + &self, + _old_base: u64, + _new_base: u64, + _len: u64, + _pci_dev: &mut dyn crate::PciDevice, + _region_type: crate::PciBarRegionType, + ) -> std::result::Result<(), std::io::Error> { + Ok(()) + } + } + + #[test] + fn test_writing_config_address() { + let mock = Arc::new(RelocationMock); + let root = PciRoot::new(None); + let mut bus = PciConfigIo::new(Arc::new(Mutex::new(PciBus::new(root, mock)))); + + assert_eq!(bus.config_address, 0); + // Writing more than 32 bits will should fail + bus.write(0, 0, &[0x42; 8]); + assert_eq!(bus.config_address, 0); + // Write all the address at once + bus.write(0, 0, &[0x13, 0x12, 0x11, 0x10]); + assert_eq!(bus.config_address, 0x10111213); + // Not writing 32bits at offset 0 should have no effect + bus.write(0, 1, &[0x0; 4]); + assert_eq!(bus.config_address, 0x10111213); + + // Write two bytes at a time + bus.write(0, 0, &[0x42, 0x42]); + assert_eq!(bus.config_address, 0x10114242); + bus.write(0, 1, &[0x43, 0x43]); + assert_eq!(bus.config_address, 0x10434342); + bus.write(0, 2, &[0x44, 0x44]); + assert_eq!(bus.config_address, 0x44444342); + // Writing two bytes at offset 3 should overflow, so it shouldn't have any effect + bus.write(0, 3, &[0x45, 0x45]); + assert_eq!(bus.config_address, 0x44444342); + + // Write one byte at a time + bus.write(0, 0, &[0x0]); + assert_eq!(bus.config_address, 0x44444300); + bus.write(0, 1, &[0x0]); + assert_eq!(bus.config_address, 0x44440000); + bus.write(0, 2, &[0x0]); + assert_eq!(bus.config_address, 0x44000000); + bus.write(0, 3, &[0x0]); + assert_eq!(bus.config_address, 0x00000000); + // Writing past 4 bytes should have no effect + bus.write(0, 4, &[0x13]); + assert_eq!(bus.config_address, 0x0); + } + + #[test] + fn test_reading_config_address() { + let mock = Arc::new(RelocationMock); + let root = PciRoot::new(None); + let mut bus = PciConfigIo::new(Arc::new(Mutex::new(PciBus::new(root, mock)))); + + let mut buffer = [0u8; 4]; + + bus.config_address = 0x13121110; + + // First 4 bytes are the config address + // Next 4 bytes are the values read from the configuration space. + // + // Reading past offset 7 should not return nothing (all 1s) + bus.read(0, 8, &mut buffer); + assert_eq!(buffer, [0xff; 4]); + + // offset + buffer.len() needs to be smaller or equal than 4 + bus.read(0, 1, &mut buffer); + assert_eq!(buffer, [0xff; 4]); + bus.read(0, 2, &mut buffer[..3]); + assert_eq!(buffer, [0xff; 4]); + bus.read(0, 3, &mut buffer[..2]); + assert_eq!(buffer, [0xff; 4]); + + // reading one byte at a time + bus.read(0, 0, &mut buffer[0..1]); + assert_eq!(buffer, [0x10, 0xff, 0xff, 0xff]); + bus.read(0, 1, &mut buffer[1..2]); + assert_eq!(buffer, [0x10, 0x11, 0xff, 0xff]); + bus.read(0, 2, &mut buffer[2..3]); + assert_eq!(buffer, [0x10, 0x11, 0x12, 0xff]); + bus.read(0, 3, &mut buffer[3..4]); + assert_eq!(buffer, [0x10, 0x11, 0x12, 0x13]); + + // reading two bytes at a time + bus.config_address = 0x42434445; + bus.read(0, 0, &mut buffer[..2]); + assert_eq!(buffer, [0x45, 0x44, 0x12, 0x13]); + bus.read(0, 1, &mut buffer[..2]); + assert_eq!(buffer, [0x44, 0x43, 0x12, 0x13]); + bus.read(0, 2, &mut buffer[..2]); + assert_eq!(buffer, [0x43, 0x42, 0x12, 0x13]); + + // reading all of it at once + bus.read(0, 0, &mut buffer); + assert_eq!(buffer, [0x45, 0x44, 0x43, 0x42]); + } +}