From 239bdd22b566cf0497a70b07eb521ae509254161 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Mon, 22 Sep 2025 15:02:07 +0200 Subject: [PATCH 1/6] refactor(pci): drop programming interface argument We don't really use the programming interface field of the PCI header. It's always zero for the devices we support. Don't expose it as an argument for constructing PciConfiguration objects. Signed-off-by: Babis Chalios --- src/pci/src/bus.rs | 2 -- src/pci/src/configuration.rs | 35 +------------------ src/pci/src/lib.rs | 4 +-- .../devices/virtio/transport/pci/device.rs | 1 - 4 files changed, 3 insertions(+), 39 deletions(-) diff --git a/src/pci/src/bus.rs b/src/pci/src/bus.rs index 7739e8d9ef8..c0a5a45b58c 100644 --- a/src/pci/src/bus.rs +++ b/src/pci/src/bus.rs @@ -58,7 +58,6 @@ impl PciRoot { 0, PciClassCode::BridgeDevice, &PciBridgeSubclass::HostBridge, - None, PciHeaderType::Device, 0, 0, @@ -482,7 +481,6 @@ mod tests { 0x0, PciClassCode::MassStorage, &PciMassStorageSubclass::SerialScsiController, - None, PciHeaderType::Device, 0x13, 0x12, diff --git a/src/pci/src/configuration.rs b/src/pci/src/configuration.rs index b9607d76611..9159da9fb32 100644 --- a/src/pci/src/configuration.rs +++ b/src/pci/src/configuration.rs @@ -176,17 +176,6 @@ impl PciSubclass for PciNetworkControllerSubclass { } } -/// Trait to define a PCI class programming interface -/// -/// Each combination of `PciClassCode` and `PciSubclass` can specify a -/// set of register-level programming interfaces. -/// This trait is implemented by each programming interface. -/// It allows use of a trait object to generate configurations. -pub trait PciProgrammingInterface { - /// Convert this programming interface to the value used in the PCI specification. - fn get_register_value(&self) -> u8; -} - /// Types of PCI capabilities. #[derive(Debug, PartialEq, Eq, Copy, Clone)] #[allow(dead_code)] @@ -451,7 +440,6 @@ impl PciConfiguration { revision_id: u8, class_code: PciClassCode, subclass: &dyn PciSubclass, - programming_interface: Option<&dyn PciProgrammingInterface>, header_type: PciHeaderType, subsystem_vendor_id: u16, subsystem_id: u16, @@ -473,14 +461,8 @@ impl PciConfiguration { registers[0] = (u32::from(device_id) << 16) | u32::from(vendor_id); // TODO(dverkamp): Status should be write-1-to-clear writable_bits[1] = 0x0000_ffff; // Status (r/o), command (r/w) - let pi = if let Some(pi) = programming_interface { - pi.get_register_value() - } else { - 0 - }; registers[2] = (u32::from(class_code.get_register_value()) << 24) | (u32::from(subclass.get_register_value()) << 16) - | (u32::from(pi) << 8) | u32::from(revision_id); writable_bits[3] = 0x0000_00ff; // Cacheline size (r/w) @@ -1016,17 +998,6 @@ mod tests { assert_eq!(cfg.read_reg(cap_reg + 2), pba_offset); } - #[derive(Copy, Clone)] - enum TestPi { - Test = 0x5a, - } - - impl PciProgrammingInterface for TestPi { - fn get_register_value(&self) -> u8 { - *self as u8 - } - } - #[test] fn class_code() { let cfg = PciConfiguration::new( @@ -1035,7 +1006,6 @@ mod tests { 0x1, PciClassCode::MultimediaController, &PciMultimediaSubclass::AudioController, - Some(&TestPi::Test), PciHeaderType::Device, 0xABCD, 0x2468, @@ -1049,7 +1019,7 @@ mod tests { let prog_if = (class_reg >> 8) & 0xFF; assert_eq!(class_code, 0x04); assert_eq!(subclass, 0x01); - assert_eq!(prog_if, 0x5a); + assert_eq!(prog_if, 0x0); } #[test] @@ -1092,7 +1062,6 @@ mod tests { 0x0, PciClassCode::MassStorage, &PciMassStorageSubclass::SerialScsiController, - None, PciHeaderType::Device, 0x13, 0x12, @@ -1174,7 +1143,6 @@ mod tests { 0x0, PciClassCode::MassStorage, &PciMassStorageSubclass::SerialScsiController, - None, PciHeaderType::Device, 0x13, 0x12, @@ -1220,7 +1188,6 @@ mod tests { 0x0, PciClassCode::MassStorage, &PciMassStorageSubclass::SerialScsiController, - None, PciHeaderType::Device, 0x13, 0x12, diff --git a/src/pci/src/lib.rs b/src/pci/src/lib.rs index 83f5a7a5dcf..ec93f34e58e 100644 --- a/src/pci/src/lib.rs +++ b/src/pci/src/lib.rs @@ -24,8 +24,8 @@ pub use self::bus::{PciBus, PciConfigIo, PciConfigMmio, PciRoot, PciRootError}; pub use self::configuration::{ PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciCapability, PciCapabilityId, PciClassCode, PciConfiguration, PciConfigurationState, PciExpressCapabilityId, PciHeaderType, - PciMassStorageSubclass, PciNetworkControllerSubclass, PciProgrammingInterface, - PciSerialBusSubClass, PciSubclass, PCI_CONFIGURATION_ID, + PciMassStorageSubclass, PciNetworkControllerSubclass, PciSerialBusSubClass, PciSubclass, + PCI_CONFIGURATION_ID, }; pub use self::device::{ BarReprogrammingParams, DeviceRelocation, Error as PciDeviceError, PciDevice, diff --git a/src/vmm/src/devices/virtio/transport/pci/device.rs b/src/vmm/src/devices/virtio/transport/pci/device.rs index f60ca43ac14..3bf7c62d026 100644 --- a/src/vmm/src/devices/virtio/transport/pci/device.rs +++ b/src/vmm/src/devices/virtio/transport/pci/device.rs @@ -335,7 +335,6 @@ impl VirtioPciDevice { 0x1, // For modern virtio-PCI devices class, subclass, - None, PciHeaderType::Device, VIRTIO_PCI_VENDOR_ID, pci_device_id, From 92bef823c398046166cbc0fa4cc9096f509088d2 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Tue, 23 Sep 2025 12:43:06 +0200 Subject: [PATCH 2/6] refactor(pci): remove unused type Drop PciBarConfiguration type which wasn't used anywhere any more. Signed-off-by: Babis Chalios --- src/pci/src/configuration.rs | 21 ------------------- src/pci/src/lib.rs | 4 ++-- .../devices/virtio/transport/pci/device.rs | 8 +++---- 3 files changed, 6 insertions(+), 27 deletions(-) diff --git a/src/pci/src/configuration.rs b/src/pci/src/configuration.rs index 9159da9fb32..bcd78ba463d 100644 --- a/src/pci/src/configuration.rs +++ b/src/pci/src/configuration.rs @@ -423,15 +423,6 @@ impl From for bool { } } -#[derive(Debug, Copy, Clone, Serialize, Deserialize)] -pub struct PciBarConfiguration { - pub addr: u64, - pub size: u64, - pub idx: usize, - pub region_type: PciBarRegionType, - pub prefetchable: PciBarPrefetchable, -} - impl PciConfiguration { #[allow(clippy::too_many_arguments)] pub fn new( @@ -793,18 +784,6 @@ impl PciConfiguration { } } -impl Default for PciBarConfiguration { - fn default() -> Self { - PciBarConfiguration { - idx: 0, - addr: 0, - size: 0, - region_type: PciBarRegionType::Memory64BitRegion, - prefetchable: PciBarPrefetchable::NotPrefetchable, - } - } -} - #[cfg(test)] mod tests { diff --git a/src/pci/src/lib.rs b/src/pci/src/lib.rs index ec93f34e58e..d31d806ac62 100644 --- a/src/pci/src/lib.rs +++ b/src/pci/src/lib.rs @@ -22,8 +22,8 @@ use serde::de::Visitor; pub use self::bus::{PciBus, PciConfigIo, PciConfigMmio, PciRoot, PciRootError}; pub use self::configuration::{ - PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciCapability, PciCapabilityId, - PciClassCode, PciConfiguration, PciConfigurationState, PciExpressCapabilityId, PciHeaderType, + PciBarPrefetchable, PciBarRegionType, PciCapability, PciCapabilityId, PciClassCode, + PciConfiguration, PciConfigurationState, PciExpressCapabilityId, PciHeaderType, PciMassStorageSubclass, PciNetworkControllerSubclass, PciSerialBusSubClass, PciSubclass, PCI_CONFIGURATION_ID, }; diff --git a/src/vmm/src/devices/virtio/transport/pci/device.rs b/src/vmm/src/devices/virtio/transport/pci/device.rs index 3bf7c62d026..137ef00399c 100644 --- a/src/vmm/src/devices/virtio/transport/pci/device.rs +++ b/src/vmm/src/devices/virtio/transport/pci/device.rs @@ -18,10 +18,10 @@ use anyhow::anyhow; use kvm_ioctls::{IoEventAddress, NoDatamatch}; use log::warn; use pci::{ - BarReprogrammingParams, MsixCap, MsixConfig, MsixConfigState, PciBarConfiguration, - PciBarRegionType, PciBdf, PciCapability, PciCapabilityId, PciClassCode, PciConfiguration, - PciConfigurationState, PciDevice, PciDeviceError, PciHeaderType, PciMassStorageSubclass, - PciNetworkControllerSubclass, PciSubclass, + BarReprogrammingParams, MsixCap, MsixConfig, MsixConfigState, PciBarRegionType, PciBdf, + PciCapability, PciCapabilityId, PciClassCode, PciConfiguration, PciConfigurationState, + PciDevice, PciDeviceError, PciHeaderType, PciMassStorageSubclass, PciNetworkControllerSubclass, + PciSubclass, }; use serde::{Deserialize, Serialize}; use thiserror::Error; From b649245d6e5a399f6a1eee230caa4ed79c76e839 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Thu, 25 Sep 2025 13:10:12 +0200 Subject: [PATCH 3/6] refactor(pci): move PCI emulation logic in vmm mod The implementation of the PCI emulation components is heavily Firecracker opinionated. Move those inside a module of `vmm` crate and leave in the `pci` module only type definitions which can easily be reused across VMMs. We also take advantage of this refactoring to remove in various places the usage of `std::io::Error` as error types returned by emulation logic. Signed-off-by: Babis Chalios --- Cargo.lock | 3 - src/pci/Cargo.toml | 7 +- src/pci/src/lib.rs | 363 ++++++++++- src/vmm/src/device_manager/pci_mngr.rs | 10 +- src/vmm/src/devices/pci/pci_segment.rs | 6 +- .../devices/virtio/transport/pci/device.rs | 37 +- src/vmm/src/lib.rs | 2 + src/{pci/src => vmm/src/pci}/bus.rs | 153 ++--- src/{pci/src => vmm/src/pci}/configuration.rs | 564 ++++-------------- src/{pci/src/device.rs => vmm/src/pci/mod.rs} | 26 +- src/{pci/src => vmm/src/pci}/msix.rs | 86 ++- src/vmm/src/vstate/vm.rs | 12 +- 12 files changed, 633 insertions(+), 636 deletions(-) rename src/{pci/src => vmm/src/pci}/bus.rs (89%) rename src/{pci/src => vmm/src/pci}/configuration.rs (64%) rename src/{pci/src/device.rs => vmm/src/pci/mod.rs} (83%) rename src/{pci/src => vmm/src/pci}/msix.rs (91%) diff --git a/Cargo.lock b/Cargo.lock index 1f95b9f1bf4..85aefa303dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1046,15 +1046,12 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" name = "pci" version = "0.1.0" dependencies = [ - "byteorder", "displaydoc", "libc", "log", "serde", "serde_test", "thiserror 2.0.17", - "vm-device", - "vm-memory", "vmm-sys-util", ] diff --git a/src/pci/Cargo.toml b/src/pci/Cargo.toml index 96c84a8b913..47cfe5d3e6b 100644 --- a/src/pci/Cargo.toml +++ b/src/pci/Cargo.toml @@ -12,17 +12,12 @@ bench = false default = [] [dependencies] -byteorder = "1.5.0" + displaydoc = "0.2.5" libc = "0.2.176" log = "0.4.28" serde = { version = "1.0.228", features = ["derive"] } thiserror = "2.0.17" -vm-device = { path = "../vm-device" } -vm-memory = { version = "0.16.1", features = [ - "backend-mmap", - "backend-bitmap", -] } [dev-dependencies] serde_test = "1.0.177" diff --git a/src/pci/src/lib.rs b/src/pci/src/lib.rs index d31d806ac62..c06e4e6dc59 100644 --- a/src/pci/src/lib.rs +++ b/src/pci/src/lib.rs @@ -6,31 +6,15 @@ // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause //! Implements pci devices and busses. -#[macro_use] -extern crate log; -mod bus; -mod configuration; -mod device; -mod msix; +extern crate log; use std::fmt::{self, Debug, Display}; use std::num::ParseIntError; use std::str::FromStr; use serde::de::Visitor; - -pub use self::bus::{PciBus, PciConfigIo, PciConfigMmio, PciRoot, PciRootError}; -pub use self::configuration::{ - PciBarPrefetchable, PciBarRegionType, PciCapability, PciCapabilityId, PciClassCode, - PciConfiguration, PciConfigurationState, PciExpressCapabilityId, PciHeaderType, - PciMassStorageSubclass, PciNetworkControllerSubclass, PciSerialBusSubClass, PciSubclass, - PCI_CONFIGURATION_ID, -}; -pub use self::device::{ - BarReprogrammingParams, DeviceRelocation, Error as PciDeviceError, PciDevice, -}; -pub use self::msix::{Error as MsixError, MsixCap, MsixConfig, MsixConfigState, MsixTableEntry}; +use serde::{Deserialize, Serialize}; /// PCI has four interrupt pins A->D. #[derive(Copy, Clone)] @@ -47,11 +31,6 @@ impl PciInterruptPin { } } -#[cfg(target_arch = "x86_64")] -pub const PCI_CONFIG_IO_PORT: u64 = 0xcf8; -#[cfg(target_arch = "x86_64")] -pub const PCI_CONFIG_IO_PORT_SIZE: u64 = 0x8; - #[derive(Clone, Copy, PartialEq, Eq, PartialOrd)] pub struct PciBdf(u32); @@ -202,6 +181,344 @@ impl FromStr for PciBdf { } } +/// Represents the types of PCI headers allowed in the configuration registers. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum PciHeaderType { + Device, + Bridge, +} + +/// Classes of PCI nodes. +#[allow(dead_code)] +#[derive(Copy, Clone)] +pub enum PciClassCode { + TooOld, + MassStorage, + NetworkController, + DisplayController, + MultimediaController, + MemoryController, + BridgeDevice, + SimpleCommunicationController, + BaseSystemPeripheral, + InputDevice, + DockingStation, + Processor, + SerialBusController, + WirelessController, + IntelligentIoController, + EncryptionController, + DataAcquisitionSignalProcessing, + Other = 0xff, +} + +impl PciClassCode { + pub fn get_register_value(self) -> u8 { + self as u8 + } +} + +/// A PCI subclass. Each class in `PciClassCode` can specify a unique set of subclasses. This trait +/// is implemented by each subclass. It allows use of a trait object to generate configurations. +pub trait PciSubclass { + /// Convert this subclass to the value used in the PCI specification. + fn get_register_value(&self) -> u8; +} + +/// Subclasses of the MultimediaController class. +#[allow(dead_code)] +#[derive(Copy, Clone)] +pub enum PciMultimediaSubclass { + VideoController = 0x00, + AudioController = 0x01, + TelephonyDevice = 0x02, + AudioDevice = 0x03, + Other = 0x80, +} + +impl PciSubclass for PciMultimediaSubclass { + fn get_register_value(&self) -> u8 { + *self as u8 + } +} + +/// Subclasses of the BridgeDevice +#[allow(dead_code)] +#[derive(Copy, Clone)] +pub enum PciBridgeSubclass { + HostBridge = 0x00, + IsaBridge = 0x01, + EisaBridge = 0x02, + McaBridge = 0x03, + PciToPciBridge = 0x04, + PcmciaBridge = 0x05, + NuBusBridge = 0x06, + CardBusBridge = 0x07, + RacEwayBridge = 0x08, + PciToPciSemiTransparentBridge = 0x09, + InfiniBrandToPciHostBridge = 0x0a, + OtherBridgeDevice = 0x80, +} + +impl PciSubclass for PciBridgeSubclass { + fn get_register_value(&self) -> u8 { + *self as u8 + } +} + +/// Subclass of the SerialBus +#[allow(dead_code)] +#[derive(Copy, Clone)] +pub enum PciSerialBusSubClass { + Firewire = 0x00, + Accessbus = 0x01, + Ssa = 0x02, + Usb = 0x03, +} + +impl PciSubclass for PciSerialBusSubClass { + fn get_register_value(&self) -> u8 { + *self as u8 + } +} + +/// Mass Storage Sub Classes +#[allow(dead_code)] +#[derive(Copy, Clone)] +pub enum PciMassStorageSubclass { + ScsiStorage = 0x00, + IdeInterface = 0x01, + FloppyController = 0x02, + IpiController = 0x03, + RaidController = 0x04, + AtaController = 0x05, + SataController = 0x06, + SerialScsiController = 0x07, + NvmController = 0x08, + MassStorage = 0x80, +} + +impl PciSubclass for PciMassStorageSubclass { + fn get_register_value(&self) -> u8 { + *self as u8 + } +} + +/// Network Controller Sub Classes +#[allow(dead_code)] +#[derive(Copy, Clone)] +pub enum PciNetworkControllerSubclass { + EthernetController = 0x00, + TokenRingController = 0x01, + FddiController = 0x02, + AtmController = 0x03, + IsdnController = 0x04, + WorldFipController = 0x05, + PicmgController = 0x06, + InfinibandController = 0x07, + FabricController = 0x08, + NetworkController = 0x80, +} + +impl PciSubclass for PciNetworkControllerSubclass { + fn get_register_value(&self) -> u8 { + *self as u8 + } +} + +/// Types of PCI capabilities. +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +#[allow(dead_code)] +#[allow(non_camel_case_types)] +#[repr(u8)] +pub enum PciCapabilityId { + ListId = 0, + PowerManagement = 0x01, + AcceleratedGraphicsPort = 0x02, + VitalProductData = 0x03, + SlotIdentification = 0x04, + MessageSignalledInterrupts = 0x05, + CompactPciHotSwap = 0x06, + PciX = 0x07, + HyperTransport = 0x08, + VendorSpecific = 0x09, + Debugport = 0x0A, + CompactPciCentralResourceControl = 0x0B, + PciStandardHotPlugController = 0x0C, + BridgeSubsystemVendorDeviceId = 0x0D, + AgpTargetPciPcibridge = 0x0E, + SecureDevice = 0x0F, + PciExpress = 0x10, + MsiX = 0x11, + SataDataIndexConf = 0x12, + PciAdvancedFeatures = 0x13, + PciEnhancedAllocation = 0x14, +} + +impl From for PciCapabilityId { + fn from(c: u8) -> Self { + match c { + 0 => PciCapabilityId::ListId, + 0x01 => PciCapabilityId::PowerManagement, + 0x02 => PciCapabilityId::AcceleratedGraphicsPort, + 0x03 => PciCapabilityId::VitalProductData, + 0x04 => PciCapabilityId::SlotIdentification, + 0x05 => PciCapabilityId::MessageSignalledInterrupts, + 0x06 => PciCapabilityId::CompactPciHotSwap, + 0x07 => PciCapabilityId::PciX, + 0x08 => PciCapabilityId::HyperTransport, + 0x09 => PciCapabilityId::VendorSpecific, + 0x0A => PciCapabilityId::Debugport, + 0x0B => PciCapabilityId::CompactPciCentralResourceControl, + 0x0C => PciCapabilityId::PciStandardHotPlugController, + 0x0D => PciCapabilityId::BridgeSubsystemVendorDeviceId, + 0x0E => PciCapabilityId::AgpTargetPciPcibridge, + 0x0F => PciCapabilityId::SecureDevice, + 0x10 => PciCapabilityId::PciExpress, + 0x11 => PciCapabilityId::MsiX, + 0x12 => PciCapabilityId::SataDataIndexConf, + 0x13 => PciCapabilityId::PciAdvancedFeatures, + 0x14 => PciCapabilityId::PciEnhancedAllocation, + _ => PciCapabilityId::ListId, + } + } +} + +/// Types of PCI Express capabilities. +#[derive(PartialEq, Eq, Copy, Clone, Debug)] +#[allow(dead_code)] +#[repr(u16)] +pub enum PciExpressCapabilityId { + NullCapability = 0x0000, + AdvancedErrorReporting = 0x0001, + VirtualChannelMultiFunctionVirtualChannelNotPresent = 0x0002, + DeviceSerialNumber = 0x0003, + PowerBudgeting = 0x0004, + RootComplexLinkDeclaration = 0x0005, + RootComplexInternalLinkControl = 0x0006, + RootComplexEventCollectorEndpointAssociation = 0x0007, + MultiFunctionVirtualChannel = 0x0008, + VirtualChannelMultiFunctionVirtualChannelPresent = 0x0009, + RootComplexRegisterBlock = 0x000a, + VendorSpecificExtendedCapability = 0x000b, + ConfigurationAccessCorrelation = 0x000c, + AccessControlServices = 0x000d, + AlternativeRoutingIdentificationInterpretation = 0x000e, + AddressTranslationServices = 0x000f, + SingleRootIoVirtualization = 0x0010, + DeprecatedMultiRootIoVirtualization = 0x0011, + Multicast = 0x0012, + PageRequestInterface = 0x0013, + ReservedForAmd = 0x0014, + ResizeableBar = 0x0015, + DynamicPowerAllocation = 0x0016, + ThpRequester = 0x0017, + LatencyToleranceReporting = 0x0018, + SecondaryPciExpress = 0x0019, + ProtocolMultiplexing = 0x001a, + ProcessAddressSpaceId = 0x001b, + LnRequester = 0x001c, + DownstreamPortContainment = 0x001d, + L1PmSubstates = 0x001e, + PrecisionTimeMeasurement = 0x001f, + PciExpressOverMphy = 0x0020, + FRSQueueing = 0x0021, + ReadinessTimeReporting = 0x0022, + DesignatedVendorSpecificExtendedCapability = 0x0023, + VfResizeableBar = 0x0024, + DataLinkFeature = 0x0025, + PhysicalLayerSixteenGts = 0x0026, + LaneMarginingAtTheReceiver = 0x0027, + HierarchyId = 0x0028, + NativePcieEnclosureManagement = 0x0029, + PhysicalLayerThirtyTwoGts = 0x002a, + AlternateProtocol = 0x002b, + SystemFirmwareIntermediary = 0x002c, + ShadowFunctions = 0x002d, + DataObjectExchange = 0x002e, + Reserved = 0x002f, + ExtendedCapabilitiesAbsence = 0xffff, +} + +impl From for PciExpressCapabilityId { + fn from(c: u16) -> Self { + match c { + 0x0000 => PciExpressCapabilityId::NullCapability, + 0x0001 => PciExpressCapabilityId::AdvancedErrorReporting, + 0x0002 => PciExpressCapabilityId::VirtualChannelMultiFunctionVirtualChannelNotPresent, + 0x0003 => PciExpressCapabilityId::DeviceSerialNumber, + 0x0004 => PciExpressCapabilityId::PowerBudgeting, + 0x0005 => PciExpressCapabilityId::RootComplexLinkDeclaration, + 0x0006 => PciExpressCapabilityId::RootComplexInternalLinkControl, + 0x0007 => PciExpressCapabilityId::RootComplexEventCollectorEndpointAssociation, + 0x0008 => PciExpressCapabilityId::MultiFunctionVirtualChannel, + 0x0009 => PciExpressCapabilityId::VirtualChannelMultiFunctionVirtualChannelPresent, + 0x000a => PciExpressCapabilityId::RootComplexRegisterBlock, + 0x000b => PciExpressCapabilityId::VendorSpecificExtendedCapability, + 0x000c => PciExpressCapabilityId::ConfigurationAccessCorrelation, + 0x000d => PciExpressCapabilityId::AccessControlServices, + 0x000e => PciExpressCapabilityId::AlternativeRoutingIdentificationInterpretation, + 0x000f => PciExpressCapabilityId::AddressTranslationServices, + 0x0010 => PciExpressCapabilityId::SingleRootIoVirtualization, + 0x0011 => PciExpressCapabilityId::DeprecatedMultiRootIoVirtualization, + 0x0012 => PciExpressCapabilityId::Multicast, + 0x0013 => PciExpressCapabilityId::PageRequestInterface, + 0x0014 => PciExpressCapabilityId::ReservedForAmd, + 0x0015 => PciExpressCapabilityId::ResizeableBar, + 0x0016 => PciExpressCapabilityId::DynamicPowerAllocation, + 0x0017 => PciExpressCapabilityId::ThpRequester, + 0x0018 => PciExpressCapabilityId::LatencyToleranceReporting, + 0x0019 => PciExpressCapabilityId::SecondaryPciExpress, + 0x001a => PciExpressCapabilityId::ProtocolMultiplexing, + 0x001b => PciExpressCapabilityId::ProcessAddressSpaceId, + 0x001c => PciExpressCapabilityId::LnRequester, + 0x001d => PciExpressCapabilityId::DownstreamPortContainment, + 0x001e => PciExpressCapabilityId::L1PmSubstates, + 0x001f => PciExpressCapabilityId::PrecisionTimeMeasurement, + 0x0020 => PciExpressCapabilityId::PciExpressOverMphy, + 0x0021 => PciExpressCapabilityId::FRSQueueing, + 0x0022 => PciExpressCapabilityId::ReadinessTimeReporting, + 0x0023 => PciExpressCapabilityId::DesignatedVendorSpecificExtendedCapability, + 0x0024 => PciExpressCapabilityId::VfResizeableBar, + 0x0025 => PciExpressCapabilityId::DataLinkFeature, + 0x0026 => PciExpressCapabilityId::PhysicalLayerSixteenGts, + 0x0027 => PciExpressCapabilityId::LaneMarginingAtTheReceiver, + 0x0028 => PciExpressCapabilityId::HierarchyId, + 0x0029 => PciExpressCapabilityId::NativePcieEnclosureManagement, + 0x002a => PciExpressCapabilityId::PhysicalLayerThirtyTwoGts, + 0x002b => PciExpressCapabilityId::AlternateProtocol, + 0x002c => PciExpressCapabilityId::SystemFirmwareIntermediary, + 0x002d => PciExpressCapabilityId::ShadowFunctions, + 0x002e => PciExpressCapabilityId::DataObjectExchange, + 0xffff => PciExpressCapabilityId::ExtendedCapabilitiesAbsence, + _ => PciExpressCapabilityId::Reserved, + } + } +} + +/// See pci_regs.h in kernel +#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)] +pub enum PciBarRegionType { + Memory32BitRegion = 0, + IoRegion = 0x01, + Memory64BitRegion = 0x04, +} + +#[derive(Debug, Copy, Clone, Serialize, Deserialize)] +pub enum PciBarPrefetchable { + NotPrefetchable = 0, + Prefetchable = 0x08, +} + +impl From for bool { + fn from(val: PciBarPrefetchable) -> Self { + match val { + PciBarPrefetchable::NotPrefetchable => false, + PciBarPrefetchable::Prefetchable => true, + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/vmm/src/device_manager/pci_mngr.rs b/src/vmm/src/device_manager/pci_mngr.rs index b8371d82a83..309704be8bf 100644 --- a/src/vmm/src/device_manager/pci_mngr.rs +++ b/src/vmm/src/device_manager/pci_mngr.rs @@ -8,7 +8,6 @@ use std::sync::{Arc, Mutex}; use event_manager::{MutEventSubscriber, SubscriberOps}; use log::{debug, error, warn}; -use pci::{PciDeviceError, PciRootError}; use serde::{Deserialize, Serialize}; use vm_device::BusError; @@ -31,6 +30,7 @@ use crate::devices::virtio::vsock::persist::{ VsockConstructorArgs, VsockState, VsockUdsConstructorArgs, }; use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend}; +use crate::pci::bus::PciRootError; use crate::resources::VmResources; use crate::snapshot::Persist; use crate::vmm_config::mmds::MmdsConfigError; @@ -58,8 +58,6 @@ pub enum PciManagerError { Msi(#[from] InterruptError), /// VirtIO PCI device error: {0} VirtioPciDevice(#[from] VirtioPciDeviceError), - /// PCI device error: {0} - PciDeviceError(#[from] PciDeviceError), /// KVM error: {0} Kvm(#[from] vmm_sys_util::errno::Error), /// MMDS error: {0} @@ -133,14 +131,14 @@ impl PciDevices { let mut resource_allocator_lock = vm.resource_allocator(); let resource_allocator = resource_allocator_lock.deref_mut(); - virtio_device.allocate_bars(&mut resource_allocator.mmio64_memory)?; + virtio_device.allocate_bars(&mut resource_allocator.mmio64_memory); let virtio_device = Arc::new(Mutex::new(virtio_device)); pci_segment .pci_bus .lock() .expect("Poisoned lock") - .add_device(pci_device_bdf.device() as u32, virtio_device.clone())?; + .add_device(pci_device_bdf.device() as u32, virtio_device.clone()); self.virtio_devices .insert((device_type, id.clone()), virtio_device.clone()); @@ -185,7 +183,7 @@ impl PciDevices { .add_device( transport_state.pci_device_bdf.device() as u32, virtio_device.clone(), - )?; + ); self.virtio_devices .insert((device_type, device_id.to_string()), virtio_device.clone()); diff --git a/src/vmm/src/devices/pci/pci_segment.rs b/src/vmm/src/devices/pci/pci_segment.rs index 7deaa027f7b..7e74132dd9a 100644 --- a/src/vmm/src/devices/pci/pci_segment.rs +++ b/src/vmm/src/devices/pci/pci_segment.rs @@ -14,14 +14,16 @@ use std::sync::{Arc, Mutex}; #[cfg(target_arch = "x86_64")] use acpi_tables::{Aml, aml}; use log::info; +use pci::PciBdf; #[cfg(target_arch = "x86_64")] -use pci::{PCI_CONFIG_IO_PORT, PCI_CONFIG_IO_PORT_SIZE, PciConfigIo}; -use pci::{PciBdf, PciBus, PciConfigMmio, PciRoot, PciRootError}; use uuid::Uuid; use vm_allocator::AddressAllocator; use vm_device::{BusDeviceSync, BusError}; use crate::arch::{ArchVm as Vm, PCI_MMCONFIG_START, PCI_MMIO_CONFIG_SIZE_PER_SEGMENT}; +#[cfg(target_arch = "x86_64")] +use crate::pci::bus::{PCI_CONFIG_IO_PORT, PCI_CONFIG_IO_PORT_SIZE}; +use crate::pci::bus::{PciBus, PciConfigIo, PciConfigMmio, PciRoot, PciRootError}; use crate::vstate::resources::ResourceAllocator; pub struct PciSegment { diff --git a/src/vmm/src/devices/virtio/transport/pci/device.rs b/src/vmm/src/devices/virtio/transport/pci/device.rs index 137ef00399c..7fc125af29c 100644 --- a/src/vmm/src/devices/virtio/transport/pci/device.rs +++ b/src/vmm/src/devices/virtio/transport/pci/device.rs @@ -18,9 +18,7 @@ use anyhow::anyhow; use kvm_ioctls::{IoEventAddress, NoDatamatch}; use log::warn; use pci::{ - BarReprogrammingParams, MsixCap, MsixConfig, MsixConfigState, PciBarRegionType, PciBdf, - PciCapability, PciCapabilityId, PciClassCode, PciConfiguration, PciConfigurationState, - PciDevice, PciDeviceError, PciHeaderType, PciMassStorageSubclass, PciNetworkControllerSubclass, + PciBdf, PciCapabilityId, PciClassCode, PciMassStorageSubclass, PciNetworkControllerSubclass, PciSubclass, }; use serde::{Deserialize, Serialize}; @@ -41,6 +39,9 @@ use crate::devices::virtio::transport::pci::common_config::{ }; use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::logger::{debug, error}; +use crate::pci::configuration::{PciCapability, PciConfiguration, PciConfigurationState}; +use crate::pci::msix::{MsixCap, MsixConfig, MsixConfigState, MsixError}; +use crate::pci::{BarReprogrammingParams, PciDevice}; use crate::snapshot::Persist; use crate::utils::u64_to_usize; use crate::vstate::memory::GuestMemoryMmap; @@ -254,7 +255,7 @@ pub enum VirtioPciDeviceError { /// Failed creating VirtioPciDevice: {0} CreateVirtioPciDevice(#[from] anyhow::Error), /// Error creating MSI configuration: {0} - Msi(#[from] pci::MsixError), + Msi(#[from] MsixError), } pub type Result = std::result::Result; @@ -329,13 +330,12 @@ impl VirtioPciDevice { ), }; - PciConfiguration::new( + PciConfiguration::new_type0( VIRTIO_PCI_VENDOR_ID, pci_device_id, 0x1, // For modern virtio-PCI devices class, subclass, - PciHeaderType::Device, VIRTIO_PCI_VENDOR_ID, pci_device_id, Some(msix_config.clone()), @@ -363,10 +363,7 @@ impl VirtioPciDevice { /// This must happen only during the creation of a brand new VM. When a VM is restored from a /// known state, the BARs are already created with the right content, therefore we don't need /// to go through this codepath. - pub fn allocate_bars( - &mut self, - mmio64_allocator: &mut AddressAllocator, - ) -> std::result::Result<(), PciDeviceError> { + pub fn allocate_bars(&mut self, mmio64_allocator: &mut AddressAllocator) { let device_clone = self.device.clone(); let device = device_clone.lock().unwrap(); @@ -388,10 +385,8 @@ impl VirtioPciDevice { ); // Once the BARs are allocated, the capabilities can be added to the PCI configuration. - self.add_pci_capabilities()?; + self.add_pci_capabilities(); self.bar_address = virtio_pci_bar_addr; - - Ok(()) } /// Constructs a new PCI transport for the given virtio device. @@ -523,7 +518,7 @@ impl VirtioPciDevice { self.configuration.get_bar_addr(VIRTIO_BAR_INDEX as usize) } - fn add_pci_capabilities(&mut self) -> std::result::Result<(), PciDeviceError> { + fn add_pci_capabilities(&mut self) { // Add pointers to the different configuration structures from the PCI capabilities. let common_cap = VirtioPciCap::new( PciCapabilityType::Common, @@ -570,8 +565,6 @@ impl VirtioPciDevice { ); self.configuration.add_capability(&msix_cap); } - - Ok(()) } fn read_cap_pci_cfg(&mut self, offset: usize, mut data: &mut [u8]) { @@ -820,11 +813,7 @@ impl PciDevice for VirtioPciDevice { self.configuration.detect_bar_reprogramming(reg_idx, data) } - fn move_bar( - &mut self, - old_base: u64, - new_base: u64, - ) -> std::result::Result<(), std::io::Error> { + fn move_bar(&mut self, old_base: u64, new_base: u64) -> std::result::Result<(), anyhow::Error> { // We only update our idea of the bar in order to support free_bars() above. // The majority of the reallocation is done inside DeviceManager. if self.bar_address == old_base { @@ -987,9 +976,7 @@ mod tests { use event_manager::MutEventSubscriber; use linux_loader::loader::Cmdline; - use pci::{ - MsixCap, PciBdf, PciCapability, PciCapabilityId, PciClassCode, PciDevice, PciSubclass, - }; + use pci::{PciCapabilityId, PciClassCode, PciSubclass}; use vm_memory::{ByteValued, Le32}; use super::{PciCapabilityType, VirtioPciDevice}; @@ -1006,6 +993,8 @@ mod tests { NOTIFY_OFF_MULTIPLIER, PciVirtioSubclass, VirtioPciCap, VirtioPciCfgCap, VirtioPciNotifyCap, }; + use crate::pci::PciDevice; + use crate::pci::msix::MsixCap; use crate::rate_limiter::RateLimiter; use crate::utils::u64_to_usize; use crate::{Vm, Vmm}; diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 7a168db0146..ace273bb94c 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -90,6 +90,8 @@ pub mod gdb; pub mod logger; /// microVM Metadata Service MMDS pub mod mmds; +/// PCI specific emulation code. +pub mod pci; /// Save/restore utilities. pub mod persist; /// Resource store for configured microVM resources. diff --git a/src/pci/src/bus.rs b/src/vmm/src/pci/bus.rs similarity index 89% rename from src/pci/src/bus.rs rename to src/vmm/src/pci/bus.rs index c0a5a45b58c..6ae31f84b58 100644 --- a/src/pci/src/bus.rs +++ b/src/vmm/src/pci/bus.rs @@ -6,39 +6,31 @@ // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause use std::collections::HashMap; +use std::fmt::Debug; use std::ops::DerefMut; use std::sync::{Arc, Barrier, Mutex}; use byteorder::{ByteOrder, LittleEndian}; +use pci::{PciBridgeSubclass, PciClassCode}; use vm_device::BusDevice; -use crate::configuration::{PciBridgeSubclass, PciClassCode, PciConfiguration, PciHeaderType}; -use crate::device::{DeviceRelocation, Error as PciDeviceError, PciDevice}; - -const VENDOR_ID_INTEL: u16 = 0x8086; -const DEVICE_ID_INTEL_VIRT_PCIE_HOST: u16 = 0x0d57; -const NUM_DEVICE_IDS: usize = 32; +use crate::logger::error; +use crate::pci::configuration::PciConfiguration; +use crate::pci::{DeviceRelocation, PciDevice}; +use crate::utils::u64_to_usize; /// Errors for device manager. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum PciRootError { - /// Could not allocate device address space for the device. - AllocateDeviceAddrs(PciDeviceError), - /// Could not allocate an IRQ number. - AllocateIrq, - /// Could not add a device to the port io bus. - PioInsert(vm_device::BusError), - /// Could not add a device to the mmio bus. - MmioInsert(vm_device::BusError), /// Could not find an available device slot on the PCI bus. NoPciDeviceSlotAvailable, - /// Invalid PCI device identifier provided. - InvalidPciDeviceSlot(usize), - /// Valid PCI device identifier but already used. - AlreadyInUsePciDeviceSlot(usize), } -pub type Result = std::result::Result; +const VENDOR_ID_INTEL: u16 = 0x8086; +const DEVICE_ID_INTEL_VIRT_PCIE_HOST: u16 = 0x0d57; +const NUM_DEVICE_IDS: usize = 32; + +#[derive(Debug)] /// Emulates the PCI Root bridge device. pub struct PciRoot { /// Configuration space. @@ -52,13 +44,12 @@ impl PciRoot { PciRoot { config } } else { PciRoot { - config: PciConfiguration::new( + config: PciConfiguration::new_type0( VENDOR_ID_INTEL, DEVICE_ID_INTEL_VIRT_PCIE_HOST, 0, PciClassCode::BridgeDevice, &PciBridgeSubclass::HostBridge, - PciHeaderType::Device, 0, 0, None, @@ -87,16 +78,26 @@ impl PciDevice for PciRoot { } } +/// A PCI bus definition pub struct PciBus { /// Devices attached to this bus. /// Device 0 is host bridge. pub devices: HashMap>>, - device_reloc: Arc, + vm: Arc, device_ids: Vec, } +impl Debug for PciBus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Root Firecracker PCI Bus") + .field("device_ids", &self.device_ids) + .finish() + } +} + impl PciBus { - pub fn new(pci_root: PciRoot, device_reloc: Arc) -> Self { + /// Create a new PCI bus + pub fn new(pci_root: PciRoot, vm: Arc) -> Self { let mut devices: HashMap>> = HashMap::new(); let mut device_ids: Vec = vec![false; NUM_DEVICE_IDS]; @@ -105,21 +106,22 @@ impl PciBus { PciBus { devices, - device_reloc, + vm, device_ids, } } - pub fn add_device(&mut self, device_id: u32, device: Arc>) -> Result<()> { + /// Insert a device in the bus + pub fn add_device(&mut self, device_id: u32, device: Arc>) { self.devices.insert(device_id, device); - Ok(()) } - pub fn next_device_id(&mut self) -> Result { + /// Get a new device ID + pub fn next_device_id(&mut self) -> Result { for (idx, device_id) in self.device_ids.iter_mut().enumerate() { if !(*device_id) { *device_id = true; - return Ok(idx as u32); + return Ok(idx.try_into().unwrap()); } } @@ -127,6 +129,16 @@ impl PciBus { } } +#[cfg(target_arch = "x86_64")] +/// IO port used for configuring PCI over the legacy bus +pub const PCI_CONFIG_IO_PORT: u64 = 0xcf8; +#[cfg(target_arch = "x86_64")] +/// Size of IO ports we are using to configure PCI over the legacy bus. We have two ports, 0xcf8 +/// and 0xcfc 32bits long. +pub const PCI_CONFIG_IO_PORT_SIZE: u64 = 0x8; + +/// Wrapper that allows handling PCI configuration over the legacy Bus +#[derive(Debug)] pub struct PciConfigIo { /// Config space register. config_address: u32, @@ -134,6 +146,7 @@ pub struct PciConfigIo { } impl PciConfigIo { + /// New Port IO configuration handler pub fn new(pci_bus: Arc>) -> Self { PciConfigIo { config_address: 0, @@ -141,6 +154,7 @@ impl PciConfigIo { } } + /// Handle a configuration space read over Port IO pub fn config_space_read(&self) -> u32 { let enabled = (self.config_address & 0x8000_0000) != 0; if !enabled { @@ -168,14 +182,15 @@ impl PciConfigIo { .lock() .unwrap() .devices - .get(&(device as u32)) + .get(&(device.try_into().unwrap())) .map_or(0xffff_ffff, |d| { d.lock().unwrap().read_config_register(register) }) } + /// Handle a configuration space write over Port IO pub fn config_space_write(&mut self, offset: u64, data: &[u8]) -> Option> { - if offset as usize + data.len() > 4 { + if u64_to_usize(offset) + data.len() > 4 { return None; } @@ -201,23 +216,23 @@ impl PciConfigIo { // be a problem currently, since we mainly access this when we are setting up devices. // We might want to do some profiling to ensure this does not become a bottleneck. let pci_bus = self.pci_bus.as_ref().lock().unwrap(); - if let Some(d) = pci_bus.devices.get(&(device as u32)) { + if let Some(d) = pci_bus.devices.get(&(device.try_into().unwrap())) { let mut device = d.lock().unwrap(); // Find out if one of the device's BAR is being reprogrammed, and // reprogram it if needed. - if let Some(params) = device.detect_bar_reprogramming(register, data) { - if let Err(e) = pci_bus.device_reloc.move_bar( + if let Some(params) = device.detect_bar_reprogramming(register, data) + && let Err(e) = pci_bus.vm.move_bar( params.old_base, params.new_base, params.len, device.deref_mut(), - ) { - error!( - "Failed moving device BAR: {}: 0x{:x}->0x{:x}(0x{:x})", - e, params.old_base, params.new_base, params.len - ); - } + ) + { + error!( + "Failed moving device BAR: {}: 0x{:x}->0x{:x}(0x{:x})", + e, params.old_base, params.new_base, params.len + ); } // Update the register value @@ -228,7 +243,7 @@ impl PciConfigIo { } fn set_config_address(&mut self, offset: u64, data: &[u8]) { - if offset as usize + data.len() > 4 { + if u64_to_usize(offset) + data.len() > 4 { return; } let (mask, value): (u32, u32) = match data.len() { @@ -250,7 +265,7 @@ impl PciConfigIo { impl BusDevice for PciConfigIo { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { // Only allow reads to the register boundary. - let start = offset as usize % 4; + let start = u64_to_usize(offset) % 4; let end = start + data.len(); if end > 4 { for d in data.iter_mut() { @@ -267,7 +282,7 @@ impl BusDevice for PciConfigIo { }; for i in start..end { - data[i - start] = (value >> (i * 8)) as u8; + data[i - start] = ((value >> (i * 8)) & 0xff) as u8; } } @@ -284,12 +299,14 @@ impl BusDevice for PciConfigIo { } } +#[derive(Debug)] /// Emulates PCI memory-mapped configuration access mechanism. pub struct PciConfigMmio { pci_bus: Arc>, } impl PciConfigMmio { + /// New MMIO configuration handler object pub fn new(pci_bus: Arc>) -> Self { PciConfigMmio { pci_bus } } @@ -311,14 +328,14 @@ impl PciConfigMmio { .lock() .unwrap() .devices - .get(&(device as u32)) + .get(&(device.try_into().unwrap())) .map_or(0xffff_ffff, |d| { d.lock().unwrap().read_config_register(register) }) } fn config_space_write(&mut self, config_address: u32, offset: u64, data: &[u8]) { - if offset as usize + data.len() > 4 { + if u64_to_usize(offset) + data.len() > 4 { return; } @@ -335,23 +352,23 @@ impl PciConfigMmio { } let pci_bus = self.pci_bus.lock().unwrap(); - if let Some(d) = pci_bus.devices.get(&(device as u32)) { + if let Some(d) = pci_bus.devices.get(&(device.try_into().unwrap())) { let mut device = d.lock().unwrap(); // Find out if one of the device's BAR is being reprogrammed, and // reprogram it if needed. - if let Some(params) = device.detect_bar_reprogramming(register, data) { - if let Err(e) = pci_bus.device_reloc.move_bar( + if let Some(params) = device.detect_bar_reprogramming(register, data) + && let Err(e) = pci_bus.vm.move_bar( params.old_base, params.new_base, params.len, device.deref_mut(), - ) { - error!( - "Failed moving device BAR: {}: 0x{:x}->0x{:x}(0x{:x})", - e, params.old_base, params.new_base, params.len - ); - } + ) + { + error!( + "Failed moving device BAR: {}: 0x{:x}->0x{:x}(0x{:x})", + e, params.old_base, params.new_base, params.len + ); } // Update the register value @@ -363,7 +380,7 @@ impl PciConfigMmio { impl BusDevice for PciConfigMmio { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { // Only allow reads to the register boundary. - let start = offset as usize % 4; + let start = u64_to_usize(offset) % 4; let end = start + data.len(); if end > 4 || offset > u64::from(u32::MAX) { for d in data { @@ -372,9 +389,9 @@ impl BusDevice for PciConfigMmio { return; } - let value = self.config_space_read(offset as u32); + let value = self.config_space_read(offset.try_into().unwrap()); for i in start..end { - data[i - start] = (value >> (i * 8)) as u8; + data[i - start] = ((value >> (i * 8)) & 0xff) as u8; } } @@ -382,7 +399,7 @@ impl BusDevice for PciConfigMmio { if offset > u64::from(u32::MAX) { return None; } - self.config_space_write(offset as u32, offset % 4, data); + self.config_space_write(offset.try_into().unwrap(), offset % 4, data); None } @@ -437,14 +454,13 @@ mod tests { use std::sync::atomic::AtomicUsize; use std::sync::{Arc, Mutex}; + use pci::{PciClassCode, PciMassStorageSubclass}; use vm_device::BusDevice; use super::{PciBus, PciConfigIo, PciConfigMmio, PciRoot}; - use crate::bus::{DEVICE_ID_INTEL_VIRT_PCIE_HOST, VENDOR_ID_INTEL}; - use crate::{ - DeviceRelocation, PciClassCode, PciConfiguration, PciDevice, PciHeaderType, - PciMassStorageSubclass, - }; + use crate::pci::bus::{DEVICE_ID_INTEL_VIRT_PCIE_HOST, VENDOR_ID_INTEL}; + use crate::pci::configuration::PciConfiguration; + use crate::pci::{BarReprogrammingParams, DeviceRelocation, PciDevice}; #[derive(Debug, Default)] struct RelocationMock { @@ -463,8 +479,8 @@ mod tests { _old_base: u64, _new_base: u64, _len: u64, - _pci_dev: &mut dyn crate::PciDevice, - ) -> std::result::Result<(), std::io::Error> { + _pci_dev: &mut dyn PciDevice, + ) -> Result<(), anyhow::Error> { self.reloc_cnt .fetch_add(1, std::sync::atomic::Ordering::SeqCst); Ok(()) @@ -475,13 +491,12 @@ mod tests { impl PciDevMock { fn new() -> Self { - let mut config = PciConfiguration::new( + let mut config = PciConfiguration::new_type0( 0x42, 0x0, 0x0, PciClassCode::MassStorage, &PciMassStorageSubclass::SerialScsiController, - PciHeaderType::Device, 0x13, 0x12, None, @@ -513,7 +528,7 @@ mod tests { &mut self, reg_idx: usize, data: &[u8], - ) -> Option { + ) -> Option { self.0.detect_bar_reprogramming(reg_idx, data) } } @@ -613,8 +628,8 @@ mod tests { let mock = Arc::new(RelocationMock::default()); let root = PciRoot::new(None); let mut bus = PciBus::new(root, mock.clone()); - bus.add_device(1, Arc::new(Mutex::new(PciDevMock::new()))) - .unwrap(); + bus.add_device(1, Arc::new(Mutex::new(PciDevMock::new()))); + let bus = Arc::new(Mutex::new(bus)); (PciConfigMmio::new(bus.clone()), PciConfigIo::new(bus), mock) } diff --git a/src/pci/src/configuration.rs b/src/vmm/src/pci/configuration.rs similarity index 64% rename from src/pci/src/configuration.rs rename to src/vmm/src/pci/configuration.rs index bcd78ba463d..3d9f588e466 100644 --- a/src/pci/src/configuration.rs +++ b/src/vmm/src/pci/configuration.rs @@ -8,10 +8,13 @@ use std::sync::{Arc, Mutex}; use byteorder::{ByteOrder, LittleEndian}; +use pci::{PciCapabilityId, PciClassCode, PciSubclass}; use serde::{Deserialize, Serialize}; -use crate::device::BarReprogrammingParams; -use crate::MsixConfig; +use super::BarReprogrammingParams; +use super::msix::MsixConfig; +use crate::logger::{info, warn}; +use crate::utils::u64_to_usize; // The number of 32bit registers in the config space, 4096 bytes. const NUM_CONFIGURATION_REGISTERS: usize = 1024; @@ -29,326 +32,11 @@ const CAPABILITY_LIST_HEAD_OFFSET: usize = 0x34; const FIRST_CAPABILITY_OFFSET: usize = 0x40; const CAPABILITY_MAX_OFFSET: usize = 192; -pub const PCI_CONFIGURATION_ID: &str = "pci_configuration"; - -/// Represents the types of PCI headers allowed in the configuration registers. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum PciHeaderType { - Device, - Bridge, -} - -/// Classes of PCI nodes. -#[allow(dead_code)] -#[derive(Copy, Clone)] -pub enum PciClassCode { - TooOld, - MassStorage, - NetworkController, - DisplayController, - MultimediaController, - MemoryController, - BridgeDevice, - SimpleCommunicationController, - BaseSystemPeripheral, - InputDevice, - DockingStation, - Processor, - SerialBusController, - WirelessController, - IntelligentIoController, - EncryptionController, - DataAcquisitionSignalProcessing, - Other = 0xff, -} - -impl PciClassCode { - pub fn get_register_value(self) -> u8 { - self as u8 - } -} - -/// A PCI subclass. Each class in `PciClassCode` can specify a unique set of subclasses. This trait -/// is implemented by each subclass. It allows use of a trait object to generate configurations. -pub trait PciSubclass { - /// Convert this subclass to the value used in the PCI specification. - fn get_register_value(&self) -> u8; -} - -/// Subclasses of the MultimediaController class. -#[allow(dead_code)] -#[derive(Copy, Clone)] -pub enum PciMultimediaSubclass { - VideoController = 0x00, - AudioController = 0x01, - TelephonyDevice = 0x02, - AudioDevice = 0x03, - Other = 0x80, -} - -impl PciSubclass for PciMultimediaSubclass { - fn get_register_value(&self) -> u8 { - *self as u8 - } -} - -/// Subclasses of the BridgeDevice -#[allow(dead_code)] -#[derive(Copy, Clone)] -pub enum PciBridgeSubclass { - HostBridge = 0x00, - IsaBridge = 0x01, - EisaBridge = 0x02, - McaBridge = 0x03, - PciToPciBridge = 0x04, - PcmciaBridge = 0x05, - NuBusBridge = 0x06, - CardBusBridge = 0x07, - RacEwayBridge = 0x08, - PciToPciSemiTransparentBridge = 0x09, - InfiniBrandToPciHostBridge = 0x0a, - OtherBridgeDevice = 0x80, -} - -impl PciSubclass for PciBridgeSubclass { - fn get_register_value(&self) -> u8 { - *self as u8 - } -} - -/// Subclass of the SerialBus -#[allow(dead_code)] -#[derive(Copy, Clone)] -pub enum PciSerialBusSubClass { - Firewire = 0x00, - Accessbus = 0x01, - Ssa = 0x02, - Usb = 0x03, -} - -impl PciSubclass for PciSerialBusSubClass { - fn get_register_value(&self) -> u8 { - *self as u8 - } -} - -/// Mass Storage Sub Classes -#[allow(dead_code)] -#[derive(Copy, Clone)] -pub enum PciMassStorageSubclass { - ScsiStorage = 0x00, - IdeInterface = 0x01, - FloppyController = 0x02, - IpiController = 0x03, - RaidController = 0x04, - AtaController = 0x05, - SataController = 0x06, - SerialScsiController = 0x07, - NvmController = 0x08, - MassStorage = 0x80, -} - -impl PciSubclass for PciMassStorageSubclass { - fn get_register_value(&self) -> u8 { - *self as u8 - } -} - -/// Network Controller Sub Classes -#[allow(dead_code)] -#[derive(Copy, Clone)] -pub enum PciNetworkControllerSubclass { - EthernetController = 0x00, - TokenRingController = 0x01, - FddiController = 0x02, - AtmController = 0x03, - IsdnController = 0x04, - WorldFipController = 0x05, - PicmgController = 0x06, - InfinibandController = 0x07, - FabricController = 0x08, - NetworkController = 0x80, -} - -impl PciSubclass for PciNetworkControllerSubclass { - fn get_register_value(&self) -> u8 { - *self as u8 - } -} - -/// Types of PCI capabilities. -#[derive(Debug, PartialEq, Eq, Copy, Clone)] -#[allow(dead_code)] -#[allow(non_camel_case_types)] -#[repr(u8)] -pub enum PciCapabilityId { - ListId = 0, - PowerManagement = 0x01, - AcceleratedGraphicsPort = 0x02, - VitalProductData = 0x03, - SlotIdentification = 0x04, - MessageSignalledInterrupts = 0x05, - CompactPciHotSwap = 0x06, - PciX = 0x07, - HyperTransport = 0x08, - VendorSpecific = 0x09, - Debugport = 0x0A, - CompactPciCentralResourceControl = 0x0B, - PciStandardHotPlugController = 0x0C, - BridgeSubsystemVendorDeviceId = 0x0D, - AgpTargetPciPcibridge = 0x0E, - SecureDevice = 0x0F, - PciExpress = 0x10, - MsiX = 0x11, - SataDataIndexConf = 0x12, - PciAdvancedFeatures = 0x13, - PciEnhancedAllocation = 0x14, -} - -impl From for PciCapabilityId { - fn from(c: u8) -> Self { - match c { - 0 => PciCapabilityId::ListId, - 0x01 => PciCapabilityId::PowerManagement, - 0x02 => PciCapabilityId::AcceleratedGraphicsPort, - 0x03 => PciCapabilityId::VitalProductData, - 0x04 => PciCapabilityId::SlotIdentification, - 0x05 => PciCapabilityId::MessageSignalledInterrupts, - 0x06 => PciCapabilityId::CompactPciHotSwap, - 0x07 => PciCapabilityId::PciX, - 0x08 => PciCapabilityId::HyperTransport, - 0x09 => PciCapabilityId::VendorSpecific, - 0x0A => PciCapabilityId::Debugport, - 0x0B => PciCapabilityId::CompactPciCentralResourceControl, - 0x0C => PciCapabilityId::PciStandardHotPlugController, - 0x0D => PciCapabilityId::BridgeSubsystemVendorDeviceId, - 0x0E => PciCapabilityId::AgpTargetPciPcibridge, - 0x0F => PciCapabilityId::SecureDevice, - 0x10 => PciCapabilityId::PciExpress, - 0x11 => PciCapabilityId::MsiX, - 0x12 => PciCapabilityId::SataDataIndexConf, - 0x13 => PciCapabilityId::PciAdvancedFeatures, - 0x14 => PciCapabilityId::PciEnhancedAllocation, - _ => PciCapabilityId::ListId, - } - } -} - -/// Types of PCI Express capabilities. -#[derive(PartialEq, Eq, Copy, Clone, Debug)] -#[allow(dead_code)] -#[repr(u16)] -pub enum PciExpressCapabilityId { - NullCapability = 0x0000, - AdvancedErrorReporting = 0x0001, - VirtualChannelMultiFunctionVirtualChannelNotPresent = 0x0002, - DeviceSerialNumber = 0x0003, - PowerBudgeting = 0x0004, - RootComplexLinkDeclaration = 0x0005, - RootComplexInternalLinkControl = 0x0006, - RootComplexEventCollectorEndpointAssociation = 0x0007, - MultiFunctionVirtualChannel = 0x0008, - VirtualChannelMultiFunctionVirtualChannelPresent = 0x0009, - RootComplexRegisterBlock = 0x000a, - VendorSpecificExtendedCapability = 0x000b, - ConfigurationAccessCorrelation = 0x000c, - AccessControlServices = 0x000d, - AlternativeRoutingIdentificationInterpretation = 0x000e, - AddressTranslationServices = 0x000f, - SingleRootIoVirtualization = 0x0010, - DeprecatedMultiRootIoVirtualization = 0x0011, - Multicast = 0x0012, - PageRequestInterface = 0x0013, - ReservedForAmd = 0x0014, - ResizeableBar = 0x0015, - DynamicPowerAllocation = 0x0016, - ThpRequester = 0x0017, - LatencyToleranceReporting = 0x0018, - SecondaryPciExpress = 0x0019, - ProtocolMultiplexing = 0x001a, - ProcessAddressSpaceId = 0x001b, - LnRequester = 0x001c, - DownstreamPortContainment = 0x001d, - L1PmSubstates = 0x001e, - PrecisionTimeMeasurement = 0x001f, - PciExpressOverMphy = 0x0020, - FRSQueueing = 0x0021, - ReadinessTimeReporting = 0x0022, - DesignatedVendorSpecificExtendedCapability = 0x0023, - VfResizeableBar = 0x0024, - DataLinkFeature = 0x0025, - PhysicalLayerSixteenGts = 0x0026, - LaneMarginingAtTheReceiver = 0x0027, - HierarchyId = 0x0028, - NativePcieEnclosureManagement = 0x0029, - PhysicalLayerThirtyTwoGts = 0x002a, - AlternateProtocol = 0x002b, - SystemFirmwareIntermediary = 0x002c, - ShadowFunctions = 0x002d, - DataObjectExchange = 0x002e, - Reserved = 0x002f, - ExtendedCapabilitiesAbsence = 0xffff, -} - -impl From for PciExpressCapabilityId { - fn from(c: u16) -> Self { - match c { - 0x0000 => PciExpressCapabilityId::NullCapability, - 0x0001 => PciExpressCapabilityId::AdvancedErrorReporting, - 0x0002 => PciExpressCapabilityId::VirtualChannelMultiFunctionVirtualChannelNotPresent, - 0x0003 => PciExpressCapabilityId::DeviceSerialNumber, - 0x0004 => PciExpressCapabilityId::PowerBudgeting, - 0x0005 => PciExpressCapabilityId::RootComplexLinkDeclaration, - 0x0006 => PciExpressCapabilityId::RootComplexInternalLinkControl, - 0x0007 => PciExpressCapabilityId::RootComplexEventCollectorEndpointAssociation, - 0x0008 => PciExpressCapabilityId::MultiFunctionVirtualChannel, - 0x0009 => PciExpressCapabilityId::VirtualChannelMultiFunctionVirtualChannelPresent, - 0x000a => PciExpressCapabilityId::RootComplexRegisterBlock, - 0x000b => PciExpressCapabilityId::VendorSpecificExtendedCapability, - 0x000c => PciExpressCapabilityId::ConfigurationAccessCorrelation, - 0x000d => PciExpressCapabilityId::AccessControlServices, - 0x000e => PciExpressCapabilityId::AlternativeRoutingIdentificationInterpretation, - 0x000f => PciExpressCapabilityId::AddressTranslationServices, - 0x0010 => PciExpressCapabilityId::SingleRootIoVirtualization, - 0x0011 => PciExpressCapabilityId::DeprecatedMultiRootIoVirtualization, - 0x0012 => PciExpressCapabilityId::Multicast, - 0x0013 => PciExpressCapabilityId::PageRequestInterface, - 0x0014 => PciExpressCapabilityId::ReservedForAmd, - 0x0015 => PciExpressCapabilityId::ResizeableBar, - 0x0016 => PciExpressCapabilityId::DynamicPowerAllocation, - 0x0017 => PciExpressCapabilityId::ThpRequester, - 0x0018 => PciExpressCapabilityId::LatencyToleranceReporting, - 0x0019 => PciExpressCapabilityId::SecondaryPciExpress, - 0x001a => PciExpressCapabilityId::ProtocolMultiplexing, - 0x001b => PciExpressCapabilityId::ProcessAddressSpaceId, - 0x001c => PciExpressCapabilityId::LnRequester, - 0x001d => PciExpressCapabilityId::DownstreamPortContainment, - 0x001e => PciExpressCapabilityId::L1PmSubstates, - 0x001f => PciExpressCapabilityId::PrecisionTimeMeasurement, - 0x0020 => PciExpressCapabilityId::PciExpressOverMphy, - 0x0021 => PciExpressCapabilityId::FRSQueueing, - 0x0022 => PciExpressCapabilityId::ReadinessTimeReporting, - 0x0023 => PciExpressCapabilityId::DesignatedVendorSpecificExtendedCapability, - 0x0024 => PciExpressCapabilityId::VfResizeableBar, - 0x0025 => PciExpressCapabilityId::DataLinkFeature, - 0x0026 => PciExpressCapabilityId::PhysicalLayerSixteenGts, - 0x0027 => PciExpressCapabilityId::LaneMarginingAtTheReceiver, - 0x0028 => PciExpressCapabilityId::HierarchyId, - 0x0029 => PciExpressCapabilityId::NativePcieEnclosureManagement, - 0x002a => PciExpressCapabilityId::PhysicalLayerThirtyTwoGts, - 0x002b => PciExpressCapabilityId::AlternateProtocol, - 0x002c => PciExpressCapabilityId::SystemFirmwareIntermediary, - 0x002d => PciExpressCapabilityId::ShadowFunctions, - 0x002e => PciExpressCapabilityId::DataObjectExchange, - 0xffff => PciExpressCapabilityId::ExtendedCapabilitiesAbsence, - _ => PciExpressCapabilityId::Reserved, - } - } -} - /// A PCI capability list. Devices can optionally specify capabilities in their configuration space. pub trait PciCapability { + /// Bytes of the PCI capability fn bytes(&self) -> &[u8]; + /// Id of the PCI capability fn id(&self) -> PciCapabilityId; } @@ -377,6 +65,7 @@ struct PciBar { used: bool, } +/// PCI configuration space state for (de)serialization #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PciConfigurationState { registers: Vec, @@ -386,6 +75,7 @@ pub struct PciConfigurationState { msix_cap_reg_idx: Option, } +#[derive(Debug)] /// Contains the configuration space of a PCI node. /// /// See the [specification](https://en.wikipedia.org/wiki/PCI_configuration_space). @@ -400,38 +90,15 @@ pub struct PciConfiguration { msix_config: Option>>, } -/// See pci_regs.h in kernel -#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)] -pub enum PciBarRegionType { - Memory32BitRegion = 0, - IoRegion = 0x01, - Memory64BitRegion = 0x04, -} - -#[derive(Debug, Copy, Clone, Serialize, Deserialize)] -pub enum PciBarPrefetchable { - NotPrefetchable = 0, - Prefetchable = 0x08, -} - -impl From for bool { - fn from(val: PciBarPrefetchable) -> Self { - match val { - PciBarPrefetchable::NotPrefetchable => false, - PciBarPrefetchable::Prefetchable => true, - } - } -} - impl PciConfiguration { #[allow(clippy::too_many_arguments)] - pub fn new( + /// Create a new type 0 PCI configuration + pub fn new_type0( vendor_id: u16, device_id: u16, revision_id: u8, class_code: PciClassCode, subclass: &dyn PciSubclass, - header_type: PciHeaderType, subsystem_vendor_id: u16, subsystem_id: u16, msix_config: Option>>, @@ -456,9 +123,6 @@ impl PciConfiguration { | (u32::from(subclass.get_register_value()) << 16) | u32::from(revision_id); writable_bits[3] = 0x0000_00ff; // Cacheline size (r/w) - - // We only every create device types. No bridges used at the moment - assert_eq!(header_type, PciHeaderType::Device); registers[3] = 0x0000_0000; // Header type 0 (device) writable_bits[15] = 0x0000_00ff; // IRQ line (r/w) registers[11] = (u32::from(subsystem_id) << 16) | u32::from(subsystem_vendor_id); @@ -482,6 +146,7 @@ impl PciConfiguration { } } + /// Create PCI configuration space state pub fn state(&self) -> PciConfigurationState { PciConfigurationState { registers: self.registers.to_vec(), @@ -611,9 +276,9 @@ impl PciConfiguration { // // | Bit 3 | Bits 2-1 | Bit 0 | // | Prefetchable | type | Always 0 | - self.registers[reg_idx] = (((addr & 0xffff_ffff) as u32) & BAR_MEM_ADDR_MASK) - | (PciBarPrefetchable::NotPrefetchable as u32) - | (PciBarRegionType::Memory64BitRegion as u32); + // + // Non-prefetchable, 64 bits BAR region + self.registers[reg_idx] = (((addr & 0xffff_ffff) as u32) & BAR_MEM_ADDR_MASK) | 4u32; self.writable_bits[reg_idx] = BAR_MEM_ADDR_MASK; self.bars[bar_idx].addr = self.registers[reg_idx]; self.bars[bar_idx].used = true; @@ -649,7 +314,7 @@ impl PciConfiguration { let end_offset = cap_offset.checked_add(total_len).unwrap(); assert!(end_offset <= CAPABILITY_MAX_OFFSET); self.registers[STATUS_REG] |= STATUS_REG_CAPABILITIES_USED_MASK; - self.write_byte_internal(tail_offset, cap_offset as u8, false); + self.write_byte_internal(tail_offset, cap_offset.try_into().unwrap(), false); self.write_byte_internal(cap_offset, cap_data.id() as u8, false); self.write_byte_internal(cap_offset + 1, 0, false); // Next pointer. for (i, byte) in cap_data.bytes().iter().enumerate() { @@ -677,39 +342,40 @@ impl PciConfiguration { (next + 3) & !3 } + /// Write a PCI configuration register pub fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) { if reg_idx >= NUM_CONFIGURATION_REGISTERS { return; } - if offset as usize + data.len() > 4 { + if u64_to_usize(offset) + data.len() > 4 { return; } // Handle potential write to MSI-X message control register - if let Some(msix_cap_reg_idx) = self.msix_cap_reg_idx { - if let Some(msix_config) = &self.msix_config { - if msix_cap_reg_idx == reg_idx && offset == 2 && data.len() == 2 { - // 2-bytes write in the Message Control field - msix_config - .lock() - .unwrap() - .set_msg_ctl(LittleEndian::read_u16(data)); - } else if msix_cap_reg_idx == reg_idx && offset == 0 && data.len() == 4 { - // 4 bytes write at the beginning. Ignore the first 2 bytes which are the - // capability id and next capability pointer - msix_config - .lock() - .unwrap() - .set_msg_ctl((LittleEndian::read_u32(data) >> 16) as u16); - } + if let Some(msix_cap_reg_idx) = self.msix_cap_reg_idx + && let Some(msix_config) = &self.msix_config + { + if msix_cap_reg_idx == reg_idx && offset == 2 && data.len() == 2 { + // 2-bytes write in the Message Control field + msix_config + .lock() + .unwrap() + .set_msg_ctl(LittleEndian::read_u16(data)); + } else if msix_cap_reg_idx == reg_idx && offset == 0 && data.len() == 4 { + // 4 bytes write at the beginning. Ignore the first 2 bytes which are the + // capability id and next capability pointer + msix_config + .lock() + .unwrap() + .set_msg_ctl((LittleEndian::read_u32(data) >> 16) as u16); } } match data.len() { - 1 => self.write_byte(reg_idx * 4 + offset as usize, data[0]), + 1 => self.write_byte(reg_idx * 4 + u64_to_usize(offset), data[0]), 2 => self.write_word( - reg_idx * 4 + offset as usize, + reg_idx * 4 + u64_to_usize(offset), u16::from(data[0]) | (u16::from(data[1]) << 8), ), 4 => self.write_reg(reg_idx, LittleEndian::read_u32(data)), @@ -717,6 +383,7 @@ impl PciConfiguration { } } + /// Detect whether the guest wants to reprogram the address of a BAR pub fn detect_bar_reprogramming( &mut self, reg_idx: usize, @@ -786,11 +453,11 @@ impl PciConfiguration { #[cfg(test)] mod tests { - + use pci::PciMultimediaSubclass; use vm_memory::ByteValued; use super::*; - use crate::MsixCap; + use crate::pci::msix::MsixCap; #[repr(C, packed)] #[derive(Clone, Copy, Default)] @@ -838,14 +505,14 @@ mod tests { #[test] #[should_panic] fn test_too_big_capability() { - let mut cfg = default_config(); + let mut cfg = default_pci_config(); cfg.add_capability(&BadCap::new(127)); } #[test] #[should_panic] fn test_capability_space_overflow() { - let mut cfg = default_config(); + let mut cfg = default_pci_config(); cfg.add_capability(&BadCap::new(62)); cfg.add_capability(&BadCap::new(62)); cfg.add_capability(&BadCap::new(0)); @@ -853,7 +520,7 @@ mod tests { #[test] fn test_add_capability() { - let mut cfg = default_config(); + let mut cfg = default_pci_config(); // Reset capabilities cfg.last_capability = None; @@ -877,7 +544,7 @@ mod tests { // Verify the contents of the capabilities. let cap1_data = cfg.read_reg(cap1_offset / 4); assert_eq!(cap1_data & 0xFF, 0x09); // capability ID - assert_eq!((cap1_data >> 8) & 0xFF, cap2_offset as u32); // next capability pointer + assert_eq!((cap1_data >> 8) & 0xFF, u32::try_from(cap2_offset).unwrap()); // next capability pointer assert_eq!((cap1_data >> 16) & 0xFF, 0x04); // cap1.len assert_eq!((cap1_data >> 24) & 0xFF, 0xAA); // cap1.foo @@ -890,7 +557,7 @@ mod tests { #[test] fn test_msix_capability() { - let mut cfg = default_config(); + let mut cfg = default_pci_config(); // Information about the MSI-X capability layout: https://wiki.osdev.org/PCI#Enabling_MSI-X let msix_cap = MsixCap::new( @@ -977,21 +644,23 @@ mod tests { assert_eq!(cfg.read_reg(cap_reg + 2), pba_offset); } - #[test] - fn class_code() { - let cfg = PciConfiguration::new( + fn default_pci_config() -> PciConfiguration { + PciConfiguration::new_type0( 0x1234, 0x5678, 0x1, PciClassCode::MultimediaController, &PciMultimediaSubclass::AudioController, - PciHeaderType::Device, 0xABCD, 0x2468, None, None, - ); + ) + } + #[test] + fn class_code() { + let cfg = default_pci_config(); let class_reg = cfg.read_reg(2); let class_code = (class_reg >> 24) & 0xFF; let subclass = (class_reg >> 16) & 0xFF; @@ -1034,53 +703,38 @@ mod tests { assert_eq!(decode_64_bits_bar_size(hi, lo), 0xffff_ffff_ffff_fff0); } - fn default_config() -> PciConfiguration { - PciConfiguration::new( - 0x42, - 0x0, - 0x0, - PciClassCode::MassStorage, - &PciMassStorageSubclass::SerialScsiController, - PciHeaderType::Device, - 0x13, - 0x12, - None, - None, - ) - } - #[test] #[should_panic] fn test_bar_size_no_power_of_two() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(0, 0x1000, 0x1001); } #[test] #[should_panic] fn test_bad_bar_index() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(NUM_BAR_REGS, 0x1000, 0x1000); } #[test] #[should_panic] fn test_bad_64bit_bar_index() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(NUM_BAR_REGS - 1, 0x1000, 0x1000); } #[test] #[should_panic] fn test_bar_size_overflows() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(0, u64::MAX, 0x2); } #[test] #[should_panic] fn test_lower_bar_free_upper_used() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(1, 0x1000, 0x1000); pci_config.add_pci_bar(0, 0x1000, 0x1000); } @@ -1088,7 +742,7 @@ mod tests { #[test] #[should_panic] fn test_lower_bar_used() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(0, 0x1000, 0x1000); pci_config.add_pci_bar(0, 0x1000, 0x1000); } @@ -1096,14 +750,14 @@ mod tests { #[test] #[should_panic] fn test_upper_bar_used() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(0, 0x1000, 0x1000); pci_config.add_pci_bar(1, 0x1000, 0x1000); } #[test] fn test_add_pci_bar() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); pci_config.add_pci_bar(0, 0x1_0000_0000, 0x1000); @@ -1116,18 +770,7 @@ mod tests { #[test] fn test_access_invalid_reg() { - let mut pci_config = PciConfiguration::new( - 0x42, - 0x0, - 0x0, - PciClassCode::MassStorage, - &PciMassStorageSubclass::SerialScsiController, - PciHeaderType::Device, - 0x13, - 0x12, - None, - None, - ); + let mut pci_config = default_pci_config(); // Can't read past the end of the configuration space assert_eq!( @@ -1161,52 +804,55 @@ mod tests { #[test] fn test_detect_bar_reprogramming() { - let mut pci_config = PciConfiguration::new( - 0x42, - 0x0, - 0x0, - PciClassCode::MassStorage, - &PciMassStorageSubclass::SerialScsiController, - PciHeaderType::Device, - 0x13, - 0x12, - None, - None, - ); + let mut pci_config = default_pci_config(); // Trying to reprogram with something less than 4 bytes (length of the address) should fail - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG, &[0x13]) - .is_none()); - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12]) - .is_none()); - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12]) - .is_none()); - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12, 0x16]) - .is_none()); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG, &[0x13]) + .is_none() + ); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12]) + .is_none() + ); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12]) + .is_none() + ); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12, 0x16]) + .is_none() + ); // Writing all 1s is a special case where we're actually asking for the size of the BAR - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0xffff_ffff)) - .is_none()); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0xffff_ffff)) + .is_none() + ); // Trying to reprogram a BAR that hasn't be initialized does nothing for reg_idx in BAR0_REG..BAR0_REG + NUM_BAR_REGS { - assert!(pci_config - .detect_bar_reprogramming(reg_idx, &u32::to_le_bytes(0x1312_4243)) - .is_none()); + assert!( + pci_config + .detect_bar_reprogramming(reg_idx, &u32::to_le_bytes(0x1312_4243)) + .is_none() + ); } // Reprogramming of a 64bit BAR pci_config.add_pci_bar(0, 0x13_1200_0000, 0x8000); // First we write the lower 32 bits and this shouldn't cause any reprogramming - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x4200_0000)) - .is_none()); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x4200_0000)) + .is_none() + ); pci_config.write_config_register(BAR0_REG, 0, &u32::to_le_bytes(0x4200_0000)); // Writing the upper 32 bits should trigger the reprogramming @@ -1233,17 +879,21 @@ mod tests { pci_config.write_config_register(BAR0_REG + 1, 0, &u32::to_le_bytes(0x1312)); // Attempting to reprogram the BAR with the same address should not have any effect - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x4200_0000)) - .is_none()); - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x1312)) - .is_none()); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x4200_0000)) + .is_none() + ); + assert!( + pci_config + .detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x1312)) + .is_none() + ); } #[test] fn test_rom_bar() { - let mut pci_config = default_config(); + let mut pci_config = default_pci_config(); // ROM BAR address should always be 0 and writes to it shouldn't do anything assert_eq!(pci_config.read_reg(ROM_BAR_REG), 0); diff --git a/src/pci/src/device.rs b/src/vmm/src/pci/mod.rs similarity index 83% rename from src/pci/src/device.rs rename to src/vmm/src/pci/mod.rs index fe9e676eb03..2d4826349f8 100644 --- a/src/pci/src/device.rs +++ b/src/vmm/src/pci/mod.rs @@ -5,24 +5,28 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause -use std::sync::{Arc, Barrier}; -use std::{io, result}; +/// PCI bus logic +pub mod bus; +/// PCI configuration space handling +pub mod configuration; +/// MSI-X logic +pub mod msix; -#[derive(Debug, thiserror::Error, displaydoc::Display)] -pub enum Error { - /// Allocating space for an IO BAR failed, size={0}. - IoAllocationFailed(u64), - /// Expected resource not found. - MissingResource, -} +use std::fmt::Debug; +use std::sync::{Arc, Barrier}; #[derive(Clone, Copy, Debug, PartialEq, Eq)] +/// Parameters for performing a BAR reprogramming operation pub struct BarReprogrammingParams { + /// Previous address of the BAR pub old_base: u64, + /// New address of the BAR pub new_base: u64, + /// Size of the BAR pub len: u64, } +/// Common logic of all PCI devices pub trait PciDevice: Send { /// Sets a register in the configuration space. /// * `reg_idx` - The index of the config register to modify. @@ -55,7 +59,7 @@ pub trait PciDevice: Send { None } /// Relocates the BAR to a different address in guest address space. - fn move_bar(&mut self, _old_base: u64, _new_base: u64) -> result::Result<(), io::Error> { + fn move_bar(&mut self, _old_base: u64, _new_base: u64) -> Result<(), anyhow::Error> { Ok(()) } } @@ -71,5 +75,5 @@ pub trait DeviceRelocation: Send + Sync { new_base: u64, len: u64, pci_dev: &mut dyn PciDevice, - ) -> result::Result<(), io::Error>; + ) -> Result<(), anyhow::Error>; } diff --git a/src/pci/src/msix.rs b/src/vmm/src/pci/msix.rs similarity index 91% rename from src/pci/src/msix.rs rename to src/vmm/src/pci/msix.rs index 50abeaf9737..74854d0029f 100644 --- a/src/pci/src/msix.rs +++ b/src/vmm/src/pci/msix.rs @@ -1,19 +1,20 @@ +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // Copyright © 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause // +use std::io; use std::sync::Arc; -use std::{io, result}; use byteorder::{ByteOrder, LittleEndian}; +use pci::PciCapabilityId; use serde::{Deserialize, Serialize}; -use vm_device::interrupt::{ - InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig, -}; +use vm_device::interrupt::{InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig}; use vm_memory::ByteValued; -use crate::{PciCapability, PciCapabilityId}; +use crate::logger::{debug, error, warn}; +use crate::pci::configuration::PciCapability; const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; @@ -23,7 +24,8 @@ const FUNCTION_MASK_BIT: u8 = 14; const MSIX_ENABLE_BIT: u8 = 15; #[derive(Debug, thiserror::Error, displaydoc::Display)] -pub enum Error { +/// Error during handling of MSI-X interrupts +pub enum MsixError { /// Failed enabling the interrupt route. EnableInterruptRoute(io::Error), /// Failed updating the interrupt route. @@ -31,14 +33,20 @@ pub enum Error { } #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] +/// MSI-X table entries pub struct MsixTableEntry { + /// Lower 32 bits of the vector address pub msg_addr_lo: u32, + /// Upper 32 bits of the vector address pub msg_addr_hi: u32, + /// Vector data pub msg_data: u32, + /// Enable/Disable and (un)masking control pub vector_ctl: u32, } impl MsixTableEntry { + /// Returns `true` if the vector is masked pub fn masked(&self) -> bool { self.vector_ctl & 0x1 == 0x1 } @@ -56,6 +64,7 @@ impl Default for MsixTableEntry { } #[derive(Debug, Clone, Serialize, Deserialize)] +/// State for (de)serializing MSI-X configuration pub struct MsixConfigState { table_entries: Vec, pba_entries: Vec, @@ -63,12 +72,19 @@ pub struct MsixConfigState { enabled: bool, } +/// MSI-X configuration pub struct MsixConfig { + /// Vector table entries pub table_entries: Vec, + /// Pending bit array pub pba_entries: Vec, + /// Id of the device using this set of vectors pub devid: u32, + /// Interrupts used to drive this set of vectors pub interrupt_source_group: Arc, + /// Whether vectors are masked pub masked: bool, + /// Whether vectors are enabled pub enabled: bool, } @@ -85,12 +101,13 @@ impl std::fmt::Debug for MsixConfig { } impl MsixConfig { + /// Create a new MSI-X configuration pub fn new( msix_vectors: u16, interrupt_source_group: Arc, devid: u32, state: Option, - ) -> result::Result { + ) -> Result { assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); let (table_entries, pba_entries, masked, enabled) = if let Some(state) = state { @@ -109,16 +126,16 @@ impl MsixConfig { interrupt_source_group .update( - idx as InterruptIndex, + idx.try_into().unwrap(), InterruptSourceConfig::MsiIrq(config), state.masked, true, ) - .map_err(Error::UpdateInterruptRoute)?; + .map_err(MsixError::UpdateInterruptRoute)?; interrupt_source_group .enable() - .map_err(Error::EnableInterruptRoute)?; + .map_err(MsixError::EnableInterruptRoute)?; } } @@ -148,6 +165,7 @@ impl MsixConfig { }) } + /// Create the state object for serializing MSI-X vectors pub fn state(&self) -> MsixConfigState { MsixConfigState { table_entries: self.table_entries.clone(), @@ -157,6 +175,7 @@ impl MsixConfig { } } + /// Set the MSI-X control message (enable/disable, (un)mask) pub fn set_msg_ctl(&mut self, reg: u16) { let old_masked = self.masked; let old_enabled = self.enabled; @@ -177,7 +196,7 @@ impl MsixConfig { }; if let Err(e) = self.interrupt_source_group.update( - idx as InterruptIndex, + idx.try_into().unwrap(), InterruptSourceConfig::MsiIrq(config), table_entry.masked(), true, @@ -199,13 +218,14 @@ impl MsixConfig { // masked. if old_masked && !self.masked { for (index, entry) in self.table_entries.clone().iter().enumerate() { - if !entry.masked() && self.get_pba_bit(index as u16) == 1 { + if !entry.masked() && self.get_pba_bit(index.try_into().unwrap()) == 1 { self.inject_msix_and_clear_pba(index); } } } } + /// Read an MSI-X table entry pub fn read_table(&self, offset: u64, data: &mut [u8]) { assert!(data.len() <= 8); @@ -258,6 +278,7 @@ impl MsixConfig { } } + /// Write an MSI-X table entry pub fn write_table(&mut self, offset: u64, data: &[u8]) { assert!(data.len() <= 8); @@ -322,7 +343,7 @@ impl MsixConfig { }; if let Err(e) = self.interrupt_source_group.update( - index as InterruptIndex, + index.try_into().unwrap(), InterruptSourceConfig::MsiIrq(config), table_entry.masked(), true, @@ -344,12 +365,13 @@ impl MsixConfig { && self.enabled && old_entry.masked() && !table_entry.masked() - && self.get_pba_bit(index as u16) == 1 + && self.get_pba_bit(index.try_into().unwrap()) == 1 { self.inject_msix_and_clear_pba(index); } } + /// Read a pending bit array entry pub fn read_pba(&self, offset: u64, data: &mut [u8]) { let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; @@ -391,10 +413,12 @@ impl MsixConfig { } } + /// Write a pending bit array entry pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) { error!("Pending Bit Array is read only"); } + /// Set PBA bit for a vector pub fn set_pba_bit(&mut self, vector: u16, reset: bool) { assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); @@ -414,6 +438,7 @@ impl MsixConfig { } } + /// Get the PBA bit for a vector fn get_pba_bit(&self, vector: u16) -> u8 { assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); @@ -427,38 +452,39 @@ impl MsixConfig { ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 } + /// Inject an MSI-X interrupt and clear the PBA bit for a vector fn inject_msix_and_clear_pba(&mut self, vector: usize) { // Inject the MSI message match self .interrupt_source_group - .trigger(vector as InterruptIndex) + .trigger(vector.try_into().unwrap()) { Ok(_) => debug!("MSI-X injected on vector control flip"), Err(e) => error!("failed to inject MSI-X: {}", e), } // Clear the bit from PBA - self.set_pba_bit(vector as u16, true); + self.set_pba_bit(vector.try_into().unwrap(), true); } } -#[allow(dead_code)] #[repr(C, packed)] -#[derive(Clone, Copy, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)] +/// MSI-X PCI capability pub struct MsixCap { - // Message Control Register - // 10-0: MSI-X Table size - // 13-11: Reserved - // 14: Mask. Mask all MSI-X when set. - // 15: Enable. Enable all MSI-X when set. + /// Message Control Register + /// 10-0: MSI-X Table size + /// 13-11: Reserved + /// 14: Mask. Mask all MSI-X when set. + /// 15: Enable. Enable all MSI-X when set. pub msg_ctl: u16, - // Table. Contains the offset and the BAR indicator (BIR) - // 2-0: Table BAR indicator (BIR). Can be 0 to 5. - // 31-3: Table offset in the BAR pointed by the BIR. + /// Table. Contains the offset and the BAR indicator (BIR) + /// 2-0: Table BAR indicator (BIR). Can be 0 to 5. + /// 31-3: Table offset in the BAR pointed by the BIR. pub table: u32, - // Pending Bit Array. Contains the offset and the BAR indicator (BIR) - // 2-0: PBA BAR indicator (BIR). Can be 0 to 5. - // 31-3: PBA offset in the BAR pointed by the BIR. + /// Pending Bit Array. Contains the offset and the BAR indicator (BIR) + /// 2-0: PBA BAR indicator (BIR). Can be 0 to 5. + /// 31-3: PBA offset in the BAR pointed by the BIR. pub pba: u32, } @@ -476,6 +502,7 @@ impl PciCapability for MsixCap { } impl MsixCap { + /// Create a new MSI-X capability object pub fn new( table_pci_bar: u8, table_size: u16, @@ -500,6 +527,7 @@ impl MsixCap { mod tests { use std::sync::atomic::{AtomicUsize, Ordering}; + use vm_device::interrupt::InterruptIndex; use vmm_sys_util::eventfd::EventFd; use super::*; diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 27d43176367..7e2b9975601 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -12,6 +12,7 @@ use std::path::Path; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex, MutexGuard}; +use anyhow::anyhow; #[cfg(target_arch = "x86_64")] use kvm_bindings::KVM_IRQCHIP_IOAPIC; use kvm_bindings::{ @@ -19,8 +20,7 @@ use kvm_bindings::{ KvmIrqRouting, kvm_irq_routing_entry, kvm_userspace_memory_region, }; use kvm_ioctls::VmFd; -use log::{debug, error}; -use pci::DeviceRelocation; +use log::debug; use serde::{Deserialize, Serialize}; use vm_device::interrupt::{ InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig, @@ -31,6 +31,7 @@ use vmm_sys_util::eventfd::EventFd; pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState}; use crate::arch::{GSI_MSI_END, host_page_size}; use crate::logger::info; +use crate::pci::{DeviceRelocation, PciDevice}; use crate::persist::CreateSnapshotError; use crate::snapshot::Persist; use crate::utils::u64_to_usize; @@ -668,10 +669,9 @@ impl DeviceRelocation for Vm { _old_base: u64, _new_base: u64, _len: u64, - _pci_dev: &mut dyn pci::PciDevice, - ) -> Result<(), std::io::Error> { - error!("pci: device relocation not supported"); - Err(std::io::Error::from(std::io::ErrorKind::Unsupported)) + _pci_dev: &mut dyn PciDevice, + ) -> Result<(), anyhow::Error> { + Err(anyhow!("pci: device relocation not supported")) } } From 2d190dd60a88431bcf7077f545742efae930ef48 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Thu, 25 Sep 2025 16:46:52 +0200 Subject: [PATCH 4/6] refactor(pci): separate functions for new and restored types The code we inherited from Cloud Hypervisor was using a single `new` method for creating objects both for booted and snapshot resumed VMs. The `new` method had an optional `state` argument. When this argument was `Some(state)` the construction of the object happened through copying values from `state`. Split this functionality, for various types, in two distinct methods `new` & `from_state` which are only called for booted and snapshotted microVMs respectively. This is similar to the pattern we're using throughout Firecracker, reduces if/else branches, as well as the arguments of distinct methods. Signed-off-by: Babis Chalios --- .../devices/virtio/transport/pci/device.rs | 37 ++---- src/vmm/src/pci/bus.rs | 2 - src/vmm/src/pci/configuration.rs | 66 +++++----- src/vmm/src/pci/msix.rs | 122 +++++++++--------- 4 files changed, 104 insertions(+), 123 deletions(-) diff --git a/src/vmm/src/devices/virtio/transport/pci/device.rs b/src/vmm/src/devices/virtio/transport/pci/device.rs index 7fc125af29c..75fe1da1608 100644 --- a/src/vmm/src/devices/virtio/transport/pci/device.rs +++ b/src/vmm/src/devices/virtio/transport/pci/device.rs @@ -312,7 +312,6 @@ impl VirtioPciDevice { fn pci_configuration( virtio_device_type: u32, msix_config: &Arc>, - pci_config_state: Option, ) -> PciConfiguration { let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + u16::try_from(virtio_device_type).unwrap(); let (class, subclass) = match virtio_device_type { @@ -339,25 +338,9 @@ impl VirtioPciDevice { VIRTIO_PCI_VENDOR_ID, pci_device_id, Some(msix_config.clone()), - pci_config_state, ) } - fn msix_config( - pci_device_bdf: u32, - msix_vectors: Arc, - msix_config_state: Option, - ) -> Result>> { - let msix_config = Arc::new(Mutex::new(MsixConfig::new( - msix_vectors.num_vectors(), - msix_vectors, - pci_device_bdf, - msix_config_state, - )?)); - - Ok(msix_config) - } - /// Allocate the PCI BAR for the VirtIO device and its associated capabilities. /// /// This must happen only during the creation of a brand new VM. When a VM is restored from a @@ -399,11 +382,14 @@ impl VirtioPciDevice { ) -> Result { let num_queues = device.lock().expect("Poisoned lock").queues().len(); - let msix_config = Self::msix_config(pci_device_bdf, msi_vectors.clone(), None)?; + let msix_config = Arc::new(Mutex::new(MsixConfig::new( + msi_vectors.num_vectors(), + msi_vectors.clone(), + pci_device_bdf, + )?)); let pci_config = Self::pci_configuration( device.lock().expect("Poisoned lock").device_type(), &msix_config, - None, ); let virtio_common_config = VirtioPciCommonConfig::new(VirtioPciCommonConfigState { @@ -448,16 +434,15 @@ impl VirtioPciDevice { msi_vectors: Arc, state: VirtioPciDeviceState, ) -> Result { - let msix_config = Self::msix_config( + let msix_config = Arc::new(Mutex::new(MsixConfig::from_state( + state.msix_state, state.pci_device_bdf.into(), msi_vectors.clone(), - Some(state.msix_state), - )?; + )?)); - let pci_config = Self::pci_configuration( - device.lock().expect("Poisoned lock").device_type(), - &msix_config, - Some(state.pci_configuration_state), + let pci_config = PciConfiguration::type0_from_state( + state.pci_configuration_state, + Some(msix_config.clone()), ); let virtio_common_config = VirtioPciCommonConfig::new(state.pci_dev_state); let cap_pci_cfg_info = VirtioPciCfgCapInfo { diff --git a/src/vmm/src/pci/bus.rs b/src/vmm/src/pci/bus.rs index 6ae31f84b58..72e9f1c2815 100644 --- a/src/vmm/src/pci/bus.rs +++ b/src/vmm/src/pci/bus.rs @@ -53,7 +53,6 @@ impl PciRoot { 0, 0, None, - None, ), } } @@ -500,7 +499,6 @@ mod tests { 0x13, 0x12, None, - None, ); config.add_pci_bar(0, 0x1000, 0x1000); diff --git a/src/vmm/src/pci/configuration.rs b/src/vmm/src/pci/configuration.rs index 3d9f588e466..83d74e62b94 100644 --- a/src/vmm/src/pci/configuration.rs +++ b/src/vmm/src/pci/configuration.rs @@ -102,46 +102,41 @@ impl PciConfiguration { subsystem_vendor_id: u16, subsystem_id: u16, msix_config: Option>>, - state: Option, ) -> Self { - let (registers, writable_bits, bars, last_capability, msix_cap_reg_idx) = - if let Some(state) = state { - ( - state.registers.try_into().unwrap(), - state.writable_bits.try_into().unwrap(), - state.bars.try_into().unwrap(), - state.last_capability, - state.msix_cap_reg_idx, - ) - } else { - let mut registers = [0u32; NUM_CONFIGURATION_REGISTERS]; - let mut writable_bits = [0u32; NUM_CONFIGURATION_REGISTERS]; - registers[0] = (u32::from(device_id) << 16) | u32::from(vendor_id); - // TODO(dverkamp): Status should be write-1-to-clear - writable_bits[1] = 0x0000_ffff; // Status (r/o), command (r/w) - registers[2] = (u32::from(class_code.get_register_value()) << 24) - | (u32::from(subclass.get_register_value()) << 16) - | u32::from(revision_id); - writable_bits[3] = 0x0000_00ff; // Cacheline size (r/w) - registers[3] = 0x0000_0000; // Header type 0 (device) - writable_bits[15] = 0x0000_00ff; // IRQ line (r/w) - registers[11] = (u32::from(subsystem_id) << 16) | u32::from(subsystem_vendor_id); - - ( - registers, - writable_bits, - [PciBar::default(); NUM_BAR_REGS], - None, - None, - ) - }; + let mut registers = [0u32; NUM_CONFIGURATION_REGISTERS]; + let mut writable_bits = [0u32; NUM_CONFIGURATION_REGISTERS]; + registers[0] = (u32::from(device_id) << 16) | u32::from(vendor_id); + // TODO(dverkamp): Status should be write-1-to-clear + writable_bits[1] = 0x0000_ffff; // Status (r/o), command (r/w) + registers[2] = (u32::from(class_code.get_register_value()) << 24) + | (u32::from(subclass.get_register_value()) << 16) + | u32::from(revision_id); + writable_bits[3] = 0x0000_00ff; // Cacheline size (r/w) + registers[3] = 0x0000_0000; // Header type 0 (device) + writable_bits[15] = 0x0000_00ff; // IRQ line (r/w) + registers[11] = (u32::from(subsystem_id) << 16) | u32::from(subsystem_vendor_id); PciConfiguration { registers, writable_bits, - bars, - last_capability, - msix_cap_reg_idx, + bars: [PciBar::default(); NUM_BAR_REGS], + last_capability: None, + msix_cap_reg_idx: None, + msix_config, + } + } + + /// Create a type 0 PCI configuration from snapshot state + pub fn type0_from_state( + state: PciConfigurationState, + msix_config: Option>>, + ) -> Self { + PciConfiguration { + registers: state.registers.try_into().unwrap(), + writable_bits: state.writable_bits.try_into().unwrap(), + bars: state.bars.try_into().unwrap(), + last_capability: state.last_capability, + msix_cap_reg_idx: state.msix_cap_reg_idx, msix_config, } } @@ -654,7 +649,6 @@ mod tests { 0xABCD, 0x2468, None, - None, ) } diff --git a/src/vmm/src/pci/msix.rs b/src/vmm/src/pci/msix.rs index 74854d0029f..35f56983ed6 100644 --- a/src/vmm/src/pci/msix.rs +++ b/src/vmm/src/pci/msix.rs @@ -106,62 +106,66 @@ impl MsixConfig { msix_vectors: u16, interrupt_source_group: Arc, devid: u32, - state: Option, ) -> Result { assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); - let (table_entries, pba_entries, masked, enabled) = if let Some(state) = state { - if state.enabled && !state.masked { - for (idx, table_entry) in state.table_entries.iter().enumerate() { - if table_entry.masked() { - continue; - } + let mut table_entries: Vec = Vec::new(); + table_entries.resize_with(msix_vectors as usize, Default::default); + let mut pba_entries: Vec = Vec::new(); + let num_pba_entries: usize = (msix_vectors as usize).div_ceil(BITS_PER_PBA_ENTRY); + pba_entries.resize_with(num_pba_entries, Default::default); - let config = MsiIrqSourceConfig { - high_addr: table_entry.msg_addr_hi, - low_addr: table_entry.msg_addr_lo, - data: table_entry.msg_data, - devid, - }; + Ok(MsixConfig { + table_entries, + pba_entries, + devid, + interrupt_source_group, + masked: true, + enabled: false, + }) + } - interrupt_source_group - .update( - idx.try_into().unwrap(), - InterruptSourceConfig::MsiIrq(config), - state.masked, - true, - ) - .map_err(MsixError::UpdateInterruptRoute)?; - - interrupt_source_group - .enable() - .map_err(MsixError::EnableInterruptRoute)?; + /// Create an MSI-X configuration from snapshot state + pub fn from_state( + state: MsixConfigState, + devid: u32, + interrupt_source_group: Arc, + ) -> Result { + if state.enabled && !state.masked { + for (idx, table_entry) in state.table_entries.iter().enumerate() { + if table_entry.masked() { + continue; } - } - ( - state.table_entries, - state.pba_entries, - state.masked, - state.enabled, - ) - } else { - let mut table_entries: Vec = Vec::new(); - table_entries.resize_with(msix_vectors as usize, Default::default); - let mut pba_entries: Vec = Vec::new(); - let num_pba_entries: usize = (msix_vectors as usize).div_ceil(BITS_PER_PBA_ENTRY); - pba_entries.resize_with(num_pba_entries, Default::default); + let config = MsiIrqSourceConfig { + high_addr: table_entry.msg_addr_hi, + low_addr: table_entry.msg_addr_lo, + data: table_entry.msg_data, + devid, + }; - (table_entries, pba_entries, true, false) - }; + interrupt_source_group + .update( + idx.try_into().unwrap(), + InterruptSourceConfig::MsiIrq(config), + state.masked, + true, + ) + .map_err(MsixError::UpdateInterruptRoute)?; + + interrupt_source_group + .enable() + .map_err(MsixError::EnableInterruptRoute)?; + } + } Ok(MsixConfig { - table_entries, - pba_entries, + table_entries: state.table_entries, + pba_entries: state.pba_entries, devid, interrupt_source_group, - masked, - enabled, + masked: state.masked, + enabled: state.enabled, }) } @@ -589,13 +593,13 @@ mod tests { #[test] #[should_panic] fn test_too_many_vectors() { - MsixConfig::new(2049, Arc::new(MockInterrupt::new()), 0x42, None).unwrap(); + MsixConfig::new(2049, Arc::new(MockInterrupt::new()), 0x42).unwrap(); } #[test] fn test_new_msix_config() { let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); assert_eq!(config.devid, 0x42); assert!(config.masked); assert!(!config.enabled); @@ -606,7 +610,7 @@ mod tests { #[test] fn test_enable_msix_vectors() { let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); assert!(!config.enabled); assert!(config.masked); @@ -634,7 +638,7 @@ mod tests { #[should_panic] fn test_table_access_read_too_big() { let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); let mut buffer = [0u8; 16]; config.read_table(0, &mut buffer); @@ -643,7 +647,7 @@ mod tests { #[test] fn test_read_table_past_end() { let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); let mut buffer = [0u8; 8]; // We have 2 vectors (16 bytes each), so we should be able to read up to 32 bytes. @@ -655,7 +659,7 @@ mod tests { #[test] fn test_read_table_bad_length() { let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); let mut buffer = [0u8; 8]; // We can either read 4 or 8 bytes @@ -682,7 +686,7 @@ mod tests { #[test] fn test_access_table() { let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); // enabled and not masked config.set_msg_ctl(0x8000); assert_eq!(vectors.update_cnt(0), 1); @@ -788,7 +792,7 @@ mod tests { #[should_panic] fn test_table_access_write_too_big() { let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); let buffer = [0u8; 16]; config.write_table(0, &buffer); @@ -797,7 +801,7 @@ mod tests { #[test] fn test_pba_read_too_big() { let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); let mut buffer = [0u8; 16]; config.read_pba(0, &mut buffer); @@ -807,7 +811,7 @@ mod tests { #[test] fn test_pba_invalid_offset() { let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); let mut buffer = [0u8; 8]; // Past the end of the PBA array @@ -826,7 +830,7 @@ mod tests { #[should_panic] fn test_set_pba_bit_vector_too_big() { let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); config.set_pba_bit(2048, false); } @@ -835,7 +839,7 @@ mod tests { #[should_panic] fn test_get_pba_bit_vector_too_big() { let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); config.get_pba_bit(2048); } @@ -843,7 +847,7 @@ mod tests { #[test] fn test_pba_bit_invalid_vector() { let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); // We have two vectors, so setting the pending bit for the third one // should be ignored @@ -857,7 +861,7 @@ mod tests { #[test] fn test_pba_read() { let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(128, vectors.clone(), 0x42, None).unwrap(); + let mut config = MsixConfig::new(128, vectors.clone(), 0x42).unwrap(); let mut buffer = [0u8; 8]; config.set_pba_bit(1, false); @@ -879,7 +883,7 @@ mod tests { #[test] fn test_pending_interrupt() { let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); config.set_pba_bit(1, false); assert_eq!(config.get_pba_bit(1), 1); // Enable MSI-X vector and unmask interrupts From fe094df17493bcf4532730eb3a3e7d3b2ecb10c0 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Mon, 29 Sep 2025 12:40:31 +0200 Subject: [PATCH 5/6] refactor(pci): simplify MSI-X interrupts implementation Our implementation of MSI-X was taken from the equivalent Cloud Hypervisor one. Cloud Hypervisor supports non-KVM hypervisors, hence they introduced a few abstractions to cater for that. More specifically, they are using a trait (`InterruptSourceGroup`) to describe the underlying hypervisor-specific mechanism used to drive MSI-X interrupts from the device to the guest. Moreover, they use a couple of types to distinguish between MSI, MSI-X and legacy IRQ interrupts and how these can be configured. This is especially useful for MSI/MSI-X interrupts where the interrupt configuration is provided by the guest over PCI bus MMIO accesses. Finally, in order to allow for transparent error handling, `Result` types returned by related trait methods was returning `std::io::Error` types as the error type, which is not very intuitive. We don't need all of that. We clearly distinguish between legacy interrupts and MSI-X (we don't use at all MSI), so we can simply plug in all the definitions directly where we need them without any dynamic dispatch indirection. Moreover, various MSI-X components were stored all over the place creating a lot of unnecessary duplication. This commit refactors our implementation to get rid of the unnecessary bit of dynamic dispatch. We still keep the `VirtioInterrupt` trait which we use to abstract the actual interrupt type (IRQ or MSI-X) from VirtIO devices. Moreover, it removes the duplication of MSI-X related types across various types. We only duplicate the MSI-X configuration type (which is behind an Arc) in a few places to avoid needing to unnecessarily take a lock(). Moreover, we introduce a single interrupt related error type and get rid of the `std::io::Error` usage. Finally, it introduces a couple of metrics for interrupts that are useful information by themselves, but also help us maintain some unit testing assertions for which we were relying on a mock type that implemented `InterruptSourceGroup`. Signed-off-by: Babis Chalios --- src/vmm/src/device_manager/pci_mngr.rs | 20 +- src/vmm/src/devices/mod.rs | 3 +- src/vmm/src/devices/virtio/balloon/mod.rs | 3 +- .../devices/virtio/block/vhost_user/mod.rs | 3 +- src/vmm/src/devices/virtio/transport/mmio.rs | 11 +- src/vmm/src/devices/virtio/transport/mod.rs | 6 +- .../devices/virtio/transport/pci/device.rs | 102 +++--- src/vmm/src/logger/metrics.rs | 22 ++ src/vmm/src/pci/msix.rs | 314 +++++++----------- src/vmm/src/vstate/interrupts.rs | 202 +++++++++++ src/vmm/src/vstate/mod.rs | 2 + src/vmm/src/vstate/vm.rs | 274 ++------------- tests/host_tools/fcmetrics.py | 1 + 13 files changed, 436 insertions(+), 527 deletions(-) create mode 100644 src/vmm/src/vstate/interrupts.rs diff --git a/src/vmm/src/device_manager/pci_mngr.rs b/src/vmm/src/device_manager/pci_mngr.rs index 309704be8bf..83daf7c8b09 100644 --- a/src/vmm/src/device_manager/pci_mngr.rs +++ b/src/vmm/src/device_manager/pci_mngr.rs @@ -34,8 +34,8 @@ use crate::pci::bus::PciRootError; use crate::resources::VmResources; use crate::snapshot::Persist; use crate::vmm_config::mmds::MmdsConfigError; +use crate::vstate::interrupts::InterruptError; use crate::vstate::memory::GuestMemoryMmap; -use crate::vstate::vm::{InterruptError, MsiVectorGroup}; use crate::{EventManager, Vm}; #[derive(Debug, Default)] @@ -121,11 +121,16 @@ impl PciDevices { let msix_num = u16::try_from(device.lock().expect("Poisoned lock").queues().len() + 1).unwrap(); - let msix_vectors = Arc::new(Vm::create_msix_group(vm.clone(), msix_num)?); + let msix_vectors = Vm::create_msix_group(vm.clone(), msix_num)?; // Create the transport - let mut virtio_device = - VirtioPciDevice::new(id.clone(), mem, device, msix_vectors, pci_device_bdf.into())?; + let mut virtio_device = VirtioPciDevice::new( + id.clone(), + mem, + device, + Arc::new(msix_vectors), + pci_device_bdf.into(), + )?; // Allocate bars let mut resource_allocator_lock = vm.resource_allocator(); @@ -162,17 +167,12 @@ impl PciDevices { ) -> Result<(), PciManagerError> { // We should only be reaching this point if PCI is enabled let pci_segment = self.pci_segment.as_ref().unwrap(); - let msi_vector_group = Arc::new(MsiVectorGroup::restore( - vm.clone(), - &transport_state.msi_vector_group, - )?); let device_type: u32 = device.lock().expect("Poisoned lock").device_type(); let virtio_device = Arc::new(Mutex::new(VirtioPciDevice::new_from_state( device_id.to_string(), - vm.guest_memory().clone(), + vm, device.clone(), - msi_vector_group, transport_state.clone(), )?)); diff --git a/src/vmm/src/devices/mod.rs b/src/vmm/src/devices/mod.rs index 371cc2cfa9e..232eabba989 100644 --- a/src/vmm/src/devices/mod.rs +++ b/src/vmm/src/devices/mod.rs @@ -23,6 +23,7 @@ use crate::devices::virtio::net::metrics::NetDeviceMetrics; use crate::devices::virtio::queue::{InvalidAvailIdx, QueueError}; use crate::devices::virtio::vsock::VsockError; use crate::logger::IncMetric; +use crate::vstate::interrupts::InterruptError; // Function used for reporting error in terms of logging // but also in terms of metrics of net event fails. @@ -41,7 +42,7 @@ pub enum DeviceError { /// Failed to read from the TAP device. FailedReadTap, /// Failed to signal irq: {0} - FailedSignalingIrq(io::Error), + FailedSignalingIrq(#[from] InterruptError), /// IO error: {0} IoError(io::Error), /// Device received malformed payload. diff --git a/src/vmm/src/devices/virtio/balloon/mod.rs b/src/vmm/src/devices/virtio/balloon/mod.rs index 91de01a8393..33020e0ddec 100644 --- a/src/vmm/src/devices/virtio/balloon/mod.rs +++ b/src/vmm/src/devices/virtio/balloon/mod.rs @@ -17,6 +17,7 @@ use super::queue::{InvalidAvailIdx, QueueError}; use crate::devices::virtio::balloon::metrics::METRICS; use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; use crate::logger::IncMetric; +use crate::vstate::interrupts::InterruptError; /// Device ID used in MMIO device identification. /// Because Balloon is unique per-vm, this ID can be hardcoded. @@ -72,7 +73,7 @@ pub enum BalloonError { /// EventFd error: {0} EventFd(std::io::Error), /// Received error while sending an interrupt: {0} - InterruptError(std::io::Error), + InterruptError(InterruptError), /// Guest gave us a malformed descriptor. MalformedDescriptor, /// Guest gave us a malformed payload. diff --git a/src/vmm/src/devices/virtio/block/vhost_user/mod.rs b/src/vmm/src/devices/virtio/block/vhost_user/mod.rs index 0afaaed3400..0cd6d46f0ae 100644 --- a/src/vmm/src/devices/virtio/block/vhost_user/mod.rs +++ b/src/vmm/src/devices/virtio/block/vhost_user/mod.rs @@ -7,6 +7,7 @@ pub mod persist; use self::device::VhostUserBlock; use crate::devices::virtio::vhost_user::VhostUserError; +use crate::vstate::interrupts::InterruptError; /// Number of queues for the vhost-user block device. pub const NUM_QUEUES: u64 = 1; @@ -28,5 +29,5 @@ pub enum VhostUserBlockError { /// Error opening eventfd: {0} EventFd(std::io::Error), /// Error creating irqfd: {0} - Interrupt(std::io::Error), + Interrupt(InterruptError), } diff --git a/src/vmm/src/devices/virtio/transport/mmio.rs b/src/vmm/src/devices/virtio/transport/mmio.rs index c20928e3c29..319e98b1537 100644 --- a/src/vmm/src/devices/virtio/transport/mmio.rs +++ b/src/vmm/src/devices/virtio/transport/mmio.rs @@ -15,8 +15,9 @@ use super::{VirtioInterrupt, VirtioInterruptType}; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::device_status; use crate::devices::virtio::queue::Queue; -use crate::logger::{error, warn}; +use crate::logger::{IncMetric, METRICS, error, warn}; use crate::utils::byte_order; +use crate::vstate::interrupts::InterruptError; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; // TODO crosvm uses 0 here, but IIRC virtio specified some other vendor id that should be used @@ -399,17 +400,19 @@ impl Default for IrqTrigger { } impl VirtioInterrupt for IrqTrigger { - fn trigger(&self, interrupt_type: VirtioInterruptType) -> Result<(), std::io::Error> { + fn trigger(&self, interrupt_type: VirtioInterruptType) -> Result<(), InterruptError> { + METRICS.interrupts.triggers.inc(); match interrupt_type { VirtioInterruptType::Config => self.trigger_irq(IrqType::Config), VirtioInterruptType::Queue(_) => self.trigger_irq(IrqType::Vring), } } - fn trigger_queues(&self, queues: &[u16]) -> Result<(), std::io::Error> { + fn trigger_queues(&self, queues: &[u16]) -> Result<(), InterruptError> { if queues.is_empty() { Ok(()) } else { + METRICS.interrupts.triggers.inc(); self.trigger_irq(IrqType::Vring) } } @@ -449,7 +452,7 @@ impl IrqTrigger { } } - fn trigger_irq(&self, irq_type: IrqType) -> Result<(), std::io::Error> { + fn trigger_irq(&self, irq_type: IrqType) -> Result<(), InterruptError> { let irq = match irq_type { IrqType::Config => VIRTIO_MMIO_INT_CONFIG, IrqType::Vring => VIRTIO_MMIO_INT_VRING, diff --git a/src/vmm/src/devices/virtio/transport/mod.rs b/src/vmm/src/devices/virtio/transport/mod.rs index 39dfe05a4fd..41d0730dfe0 100644 --- a/src/vmm/src/devices/virtio/transport/mod.rs +++ b/src/vmm/src/devices/virtio/transport/mod.rs @@ -6,6 +6,8 @@ use std::sync::atomic::AtomicU32; use vmm_sys_util::eventfd::EventFd; +use crate::vstate::interrupts::InterruptError; + /// MMIO transport for VirtIO devices pub mod mmio; /// PCI transport for VirtIO devices @@ -23,7 +25,7 @@ pub enum VirtioInterruptType { /// API of interrupt types used by VirtIO devices pub trait VirtioInterrupt: std::fmt::Debug + Send + Sync { /// Trigger a VirtIO interrupt. - fn trigger(&self, interrupt_type: VirtioInterruptType) -> Result<(), std::io::Error>; + fn trigger(&self, interrupt_type: VirtioInterruptType) -> Result<(), InterruptError>; /// Trigger multiple Virtio interrupts for selected queues. /// The caller needs to ensure that [`queues`] does not include duplicate entries to @@ -31,7 +33,7 @@ pub trait VirtioInterrupt: std::fmt::Debug + Send + Sync { /// This is to allow sending a single interrupt for implementations that don't /// distinguish different queues, like IrqTrigger, instead of sending multiple same /// interrupts. - fn trigger_queues(&self, queues: &[u16]) -> Result<(), std::io::Error> { + fn trigger_queues(&self, queues: &[u16]) -> Result<(), InterruptError> { queues .iter() .try_for_each(|&qidx| self.trigger(VirtioInterruptType::Queue(qidx))) diff --git a/src/vmm/src/devices/virtio/transport/pci/device.rs b/src/vmm/src/devices/virtio/transport/pci/device.rs index 75fe1da1608..59d02179fd1 100644 --- a/src/vmm/src/devices/virtio/transport/pci/device.rs +++ b/src/vmm/src/devices/virtio/transport/pci/device.rs @@ -40,13 +40,13 @@ use crate::devices::virtio::transport::pci::common_config::{ use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::logger::{debug, error}; use crate::pci::configuration::{PciCapability, PciConfiguration, PciConfigurationState}; -use crate::pci::msix::{MsixCap, MsixConfig, MsixConfigState, MsixError}; +use crate::pci::msix::{MsixCap, MsixConfig, MsixConfigState}; use crate::pci::{BarReprogrammingParams, PciDevice}; use crate::snapshot::Persist; use crate::utils::u64_to_usize; +use crate::vstate::interrupts::{InterruptError, MsixVectorGroup}; use crate::vstate::memory::GuestMemoryMmap; use crate::vstate::resources::ResourceAllocator; -use crate::vstate::vm::{InterruptError, MsiVectorGroup}; const DEVICE_INIT: u8 = 0x00; const DEVICE_ACKNOWLEDGE: u8 = 0x01; @@ -246,7 +246,6 @@ pub struct VirtioPciDeviceState { pub pci_configuration_state: PciConfigurationState, pub pci_dev_state: VirtioPciCommonConfigState, pub msix_state: MsixConfigState, - pub msi_vector_group: Vec, pub bar_address: u64, } @@ -255,7 +254,7 @@ pub enum VirtioPciDeviceError { /// Failed creating VirtioPciDevice: {0} CreateVirtioPciDevice(#[from] anyhow::Error), /// Error creating MSI configuration: {0} - Msi(#[from] MsixError), + Msi(#[from] InterruptError), } pub type Result = std::result::Result; @@ -271,19 +270,12 @@ pub struct VirtioPciDevice { // virtio PCI common configuration common_config: VirtioPciCommonConfig, - // MSI-X config - msix_config: Option>>, - - // Number of MSI-X vectors - msix_num: u16, - // Virtio device reference and status device: Arc>, device_activated: Arc, // PCI interrupts. - virtio_interrupt: Option>, - interrupt_source_group: Arc, + virtio_interrupt: Option>, // Guest memory memory: GuestMemoryMmap, @@ -377,16 +369,15 @@ impl VirtioPciDevice { id: String, memory: GuestMemoryMmap, device: Arc>, - msi_vectors: Arc, + msix_vectors: Arc, pci_device_bdf: u32, ) -> Result { let num_queues = device.lock().expect("Poisoned lock").queues().len(); let msix_config = Arc::new(Mutex::new(MsixConfig::new( - msi_vectors.num_vectors(), - msi_vectors.clone(), + msix_vectors.clone(), pci_device_bdf, - )?)); + ))); let pci_config = Self::pci_configuration( device.lock().expect("Poisoned lock").device_type(), &msix_config, @@ -405,7 +396,7 @@ impl VirtioPciDevice { msix_config.clone(), virtio_common_config.msix_config.clone(), virtio_common_config.msix_queues.clone(), - msi_vectors.clone(), + msix_vectors, )); let virtio_pci_device = VirtioPciDevice { @@ -413,13 +404,10 @@ impl VirtioPciDevice { pci_device_bdf: pci_device_bdf.into(), configuration: pci_config, common_config: virtio_common_config, - msix_config: Some(msix_config), - msix_num: msi_vectors.num_vectors(), device, device_activated: Arc::new(AtomicBool::new(false)), virtio_interrupt: Some(interrupt), memory, - interrupt_source_group: msi_vectors, cap_pci_cfg_info: VirtioPciCfgCapInfo::default(), bar_address: 0, }; @@ -429,16 +417,14 @@ impl VirtioPciDevice { pub fn new_from_state( id: String, - memory: GuestMemoryMmap, + vm: &Arc, device: Arc>, - msi_vectors: Arc, state: VirtioPciDeviceState, ) -> Result { - let msix_config = Arc::new(Mutex::new(MsixConfig::from_state( - state.msix_state, - state.pci_device_bdf.into(), - msi_vectors.clone(), - )?)); + let msix_config = + MsixConfig::from_state(state.msix_state, vm.clone(), state.pci_device_bdf.into())?; + let vectors = msix_config.vectors.clone(); + let msix_config = Arc::new(Mutex::new(msix_config)); let pci_config = PciConfiguration::type0_from_state( state.pci_configuration_state, @@ -454,7 +440,7 @@ impl VirtioPciDevice { msix_config.clone(), virtio_common_config.msix_config.clone(), virtio_common_config.msix_queues.clone(), - msi_vectors.clone(), + vectors, )); let virtio_pci_device = VirtioPciDevice { @@ -462,13 +448,10 @@ impl VirtioPciDevice { pci_device_bdf: state.pci_device_bdf, configuration: pci_config, common_config: virtio_common_config, - msix_config: Some(msix_config), - msix_num: msi_vectors.num_vectors(), device, device_activated: Arc::new(AtomicBool::new(state.device_activated)), virtio_interrupt: Some(interrupt), - memory: memory.clone(), - interrupt_source_group: msi_vectors, + memory: vm.guest_memory().clone(), cap_pci_cfg_info, bar_address: state.bar_address, }; @@ -479,7 +462,7 @@ impl VirtioPciDevice { .lock() .expect("Poisoned lock") .activate( - memory, + virtio_pci_device.memory.clone(), virtio_pci_device.virtio_interrupt.as_ref().unwrap().clone(), ); } @@ -540,10 +523,15 @@ impl VirtioPciDevice { self.configuration.add_capability(&configuration_cap) + VIRTIO_PCI_CAP_OFFSET; self.cap_pci_cfg_info.cap = configuration_cap; - if self.msix_config.is_some() { + if let Some(interrupt) = &self.virtio_interrupt { let msix_cap = MsixCap::new( VIRTIO_BAR_INDEX, - self.msix_num, + interrupt + .msix_config + .lock() + .expect("Poisoned lock") + .vectors + .num_vectors(), MSIX_TABLE_BAR_OFFSET.try_into().unwrap(), VIRTIO_BAR_INDEX, MSIX_PBA_BAR_OFFSET.try_into().unwrap(), @@ -643,13 +631,13 @@ impl VirtioPciDevice { pci_configuration_state: self.configuration.state(), pci_dev_state: self.common_config.state(), msix_state: self - .msix_config + .virtio_interrupt .as_ref() .unwrap() + .msix_config .lock() .expect("Poisoned lock") .state(), - msi_vector_group: self.interrupt_source_group.save(), bar_address: self.bar_address, } } @@ -659,7 +647,7 @@ pub struct VirtioInterruptMsix { msix_config: Arc>, config_vector: Arc, queues_vectors: Arc>>, - interrupt_source_group: Arc, + vectors: Arc, } impl std::fmt::Debug for VirtioInterruptMsix { @@ -677,19 +665,19 @@ impl VirtioInterruptMsix { msix_config: Arc>, config_vector: Arc, queues_vectors: Arc>>, - interrupt_source_group: Arc, + vectors: Arc, ) -> Self { VirtioInterruptMsix { msix_config, config_vector, queues_vectors, - interrupt_source_group, + vectors, } } } impl VirtioInterrupt for VirtioInterruptMsix { - fn trigger(&self, int_type: VirtioInterruptType) -> std::result::Result<(), std::io::Error> { + fn trigger(&self, int_type: VirtioInterruptType) -> std::result::Result<(), InterruptError> { let vector = match int_type { VirtioInterruptType::Config => self.config_vector.load(Ordering::Acquire), VirtioInterruptType::Queue(queue_index) => *self @@ -697,7 +685,7 @@ impl VirtioInterrupt for VirtioInterruptMsix { .lock() .unwrap() .get(queue_index as usize) - .ok_or(ErrorKind::InvalidInput)?, + .ok_or(InterruptError::InvalidVectorIndex(queue_index as usize))?, }; if vector == VIRTQ_MSI_NO_VECTOR { @@ -716,8 +704,7 @@ impl VirtioInterrupt for VirtioInterruptMsix { return Ok(()); } - self.interrupt_source_group - .trigger(vector as InterruptIndex) + self.vectors.trigger(vector as usize) } fn notifier(&self, int_type: VirtioInterruptType) -> Option<&EventFd> { @@ -730,8 +717,7 @@ impl VirtioInterrupt for VirtioInterruptMsix { .get(queue_index as usize)?, }; - self.interrupt_source_group - .notifier(vector as InterruptIndex) + self.vectors.notifier(vector as usize) } fn status(&self) -> Arc { @@ -832,16 +818,18 @@ impl PciDevice for VirtioPciDevice { warn!("pci: unexpected read to notification BAR. Offset {o:#x}"); } o if (MSIX_TABLE_BAR_OFFSET..MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE).contains(&o) => { - if let Some(msix_config) = &self.msix_config { - msix_config + if let Some(interrupt) = &self.virtio_interrupt { + interrupt + .msix_config .lock() .unwrap() .read_table(o - MSIX_TABLE_BAR_OFFSET, data); } } o if (MSIX_PBA_BAR_OFFSET..MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE).contains(&o) => { - if let Some(msix_config) = &self.msix_config { - msix_config + if let Some(interrupt) = &self.virtio_interrupt { + interrupt + .msix_config .lock() .unwrap() .read_pba(o - MSIX_PBA_BAR_OFFSET, data); @@ -874,16 +862,18 @@ impl PciDevice for VirtioPciDevice { warn!("pci: unexpected write to notification BAR. Offset {o:#x}"); } o if (MSIX_TABLE_BAR_OFFSET..MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE).contains(&o) => { - if let Some(msix_config) = &self.msix_config { - msix_config + if let Some(interrupt) = &self.virtio_interrupt { + interrupt + .msix_config .lock() .unwrap() .write_table(o - MSIX_TABLE_BAR_OFFSET, data); } } o if (MSIX_PBA_BAR_OFFSET..MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE).contains(&o) => { - if let Some(msix_config) = &self.msix_config { - msix_config + if let Some(interrupt) = &self.virtio_interrupt { + interrupt + .msix_config .lock() .unwrap() .write_pba(o - MSIX_PBA_BAR_OFFSET, data); @@ -918,9 +908,9 @@ impl PciDevice for VirtioPciDevice { let mut device = self.device.lock().unwrap(); let reset_result = device.reset(); match reset_result { - Some((virtio_interrupt, mut _queue_evts)) => { + Some(_) => { // Upon reset the device returns its interrupt EventFD - self.virtio_interrupt = Some(virtio_interrupt); + self.virtio_interrupt = None; self.device_activated.store(false, Ordering::SeqCst); // Reset queue readiness (changes queue_enable), queue sizes diff --git a/src/vmm/src/logger/metrics.rs b/src/vmm/src/logger/metrics.rs index 294af948591..4b80cf2f2f5 100644 --- a/src/vmm/src/logger/metrics.rs +++ b/src/vmm/src/logger/metrics.rs @@ -791,6 +791,25 @@ impl VcpuMetrics { } } +/// MicroVM interrupt-related metrics +#[derive(Debug, Default, Serialize)] +pub struct InterruptMetrics { + /// Number of interrupt triggers + pub triggers: SharedIncMetric, + /// Configuration updates + pub config_updates: SharedIncMetric, +} + +impl InterruptMetrics { + /// Const default construction. + pub const fn new() -> Self { + Self { + triggers: SharedIncMetric::new(), + config_updates: SharedIncMetric::new(), + } + } +} + /// Metrics specific to the machine manager as a whole. #[derive(Debug, Default, Serialize)] pub struct VmmMetrics { @@ -899,6 +918,8 @@ pub struct FirecrackerMetrics { #[serde(flatten)] /// Vhost-user device related metrics. pub vhost_user_ser: VhostUserMetricsSerializeProxy, + /// Interrupt related metrics + pub interrupts: InterruptMetrics, } impl FirecrackerMetrics { /// Const default construction. @@ -924,6 +945,7 @@ impl FirecrackerMetrics { vsock_ser: VsockMetricsSerializeProxy {}, entropy_ser: EntropyMetricsSerializeProxy {}, vhost_user_ser: VhostUserMetricsSerializeProxy {}, + interrupts: InterruptMetrics::new(), } } } diff --git a/src/vmm/src/pci/msix.rs b/src/vmm/src/pci/msix.rs index 35f56983ed6..16405fe6217 100644 --- a/src/vmm/src/pci/msix.rs +++ b/src/vmm/src/pci/msix.rs @@ -4,17 +4,18 @@ // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause // -use std::io; use std::sync::Arc; use byteorder::{ByteOrder, LittleEndian}; use pci::PciCapabilityId; use serde::{Deserialize, Serialize}; -use vm_device::interrupt::{InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig}; use vm_memory::ByteValued; +use crate::Vm; use crate::logger::{debug, error, warn}; use crate::pci::configuration::PciCapability; +use crate::snapshot::Persist; +use crate::vstate::interrupts::{InterruptError, MsixVectorConfig, MsixVectorGroup}; const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; @@ -23,15 +24,6 @@ const BITS_PER_PBA_ENTRY: usize = 64; const FUNCTION_MASK_BIT: u8 = 14; const MSIX_ENABLE_BIT: u8 = 15; -#[derive(Debug, thiserror::Error, displaydoc::Display)] -/// Error during handling of MSI-X interrupts -pub enum MsixError { - /// Failed enabling the interrupt route. - EnableInterruptRoute(io::Error), - /// Failed updating the interrupt route. - UpdateInterruptRoute(io::Error), -} - #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] /// MSI-X table entries pub struct MsixTableEntry { @@ -70,6 +62,7 @@ pub struct MsixConfigState { pba_entries: Vec, masked: bool, enabled: bool, + vectors: Vec, } /// MSI-X configuration @@ -80,8 +73,8 @@ pub struct MsixConfig { pub pba_entries: Vec, /// Id of the device using this set of vectors pub devid: u32, - /// Interrupts used to drive this set of vectors - pub interrupt_source_group: Arc, + /// Interrupts vectors used + pub vectors: Arc, /// Whether vectors are masked pub masked: bool, /// Whether vectors are enabled @@ -102,60 +95,47 @@ impl std::fmt::Debug for MsixConfig { impl MsixConfig { /// Create a new MSI-X configuration - pub fn new( - msix_vectors: u16, - interrupt_source_group: Arc, - devid: u32, - ) -> Result { - assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE); + pub fn new(vectors: Arc, devid: u32) -> Self { + assert!(vectors.num_vectors() <= MAX_MSIX_VECTORS_PER_DEVICE); let mut table_entries: Vec = Vec::new(); - table_entries.resize_with(msix_vectors as usize, Default::default); + table_entries.resize_with(vectors.num_vectors() as usize, Default::default); let mut pba_entries: Vec = Vec::new(); - let num_pba_entries: usize = (msix_vectors as usize).div_ceil(BITS_PER_PBA_ENTRY); + let num_pba_entries: usize = (vectors.num_vectors() as usize).div_ceil(BITS_PER_PBA_ENTRY); pba_entries.resize_with(num_pba_entries, Default::default); - Ok(MsixConfig { + MsixConfig { table_entries, pba_entries, devid, - interrupt_source_group, + vectors, masked: true, enabled: false, - }) + } } /// Create an MSI-X configuration from snapshot state pub fn from_state( state: MsixConfigState, + vm: Arc, devid: u32, - interrupt_source_group: Arc, - ) -> Result { + ) -> Result { + let vectors = Arc::new(MsixVectorGroup::restore(vm, &state.vectors)?); if state.enabled && !state.masked { for (idx, table_entry) in state.table_entries.iter().enumerate() { if table_entry.masked() { continue; } - let config = MsiIrqSourceConfig { + let config = MsixVectorConfig { high_addr: table_entry.msg_addr_hi, low_addr: table_entry.msg_addr_lo, data: table_entry.msg_data, devid, }; - interrupt_source_group - .update( - idx.try_into().unwrap(), - InterruptSourceConfig::MsiIrq(config), - state.masked, - true, - ) - .map_err(MsixError::UpdateInterruptRoute)?; - - interrupt_source_group - .enable() - .map_err(MsixError::EnableInterruptRoute)?; + vectors.update(idx, config, state.masked, true)?; + vectors.enable()?; } } @@ -163,7 +143,7 @@ impl MsixConfig { table_entries: state.table_entries, pba_entries: state.pba_entries, devid, - interrupt_source_group, + vectors, masked: state.masked, enabled: state.enabled, }) @@ -176,6 +156,7 @@ impl MsixConfig { pba_entries: self.pba_entries.clone(), masked: self.masked, enabled: self.enabled, + vectors: self.vectors.save(), } } @@ -192,25 +173,20 @@ impl MsixConfig { if self.enabled && !self.masked { debug!("MSI-X enabled for device 0x{:x}", self.devid); for (idx, table_entry) in self.table_entries.iter().enumerate() { - let config = MsiIrqSourceConfig { + let config = MsixVectorConfig { high_addr: table_entry.msg_addr_hi, low_addr: table_entry.msg_addr_lo, data: table_entry.msg_data, devid: self.devid, }; - if let Err(e) = self.interrupt_source_group.update( - idx.try_into().unwrap(), - InterruptSourceConfig::MsiIrq(config), - table_entry.masked(), - true, - ) { + if let Err(e) = self.vectors.update(idx, config, table_entry.masked(), true) { error!("Failed updating vector: {:?}", e); } } } else if old_enabled || !old_masked { debug!("MSI-X disabled for device 0x{:x}", self.devid); - if let Err(e) = self.interrupt_source_group.disable() { + if let Err(e) = self.vectors.disable() { error!("Failed disabling irq_fd: {:?}", e); } } @@ -339,19 +315,17 @@ impl MsixConfig { // this is safe because if the entry is masked (starts masked as per spec) // in the table then it won't be triggered. if self.enabled && !self.masked && !table_entry.masked() { - let config = MsiIrqSourceConfig { + let config = MsixVectorConfig { high_addr: table_entry.msg_addr_hi, low_addr: table_entry.msg_addr_lo, data: table_entry.msg_data, devid: self.devid, }; - if let Err(e) = self.interrupt_source_group.update( - index.try_into().unwrap(), - InterruptSourceConfig::MsiIrq(config), - table_entry.masked(), - true, - ) { + if let Err(e) = self + .vectors + .update(index, config, table_entry.masked(), true) + { error!("Failed updating vector: {:?}", e); } } @@ -459,10 +433,7 @@ impl MsixConfig { /// Inject an MSI-X interrupt and clear the PBA bit for a vector fn inject_msix_and_clear_pba(&mut self, vector: usize) { // Inject the MSI message - match self - .interrupt_source_group - .trigger(vector.try_into().unwrap()) - { + match self.vectors.trigger(vector) { Ok(_) => debug!("MSI-X injected on vector control flip"), Err(e) => error!("failed to inject MSI-X: {}", e), } @@ -529,77 +500,25 @@ impl MsixCap { #[cfg(test)] mod tests { - use std::sync::atomic::{AtomicUsize, Ordering}; - - use vm_device::interrupt::InterruptIndex; - use vmm_sys_util::eventfd::EventFd; - use super::*; + use crate::builder::tests::default_vmm; + use crate::logger::{IncMetric, METRICS}; + use crate::{Vm, check_metric_after_block}; - #[derive(Debug)] - struct MockInterrupt { - trigger_cnt: [AtomicUsize; 2], - update_cnt: [AtomicUsize; 2], - event_fd: [EventFd; 2], - } - - impl MockInterrupt { - fn new() -> Self { - MockInterrupt { - trigger_cnt: [AtomicUsize::new(0), AtomicUsize::new(0)], - update_cnt: [AtomicUsize::new(0), AtomicUsize::new(0)], - event_fd: [ - EventFd::new(libc::EFD_NONBLOCK).unwrap(), - EventFd::new(libc::EFD_NONBLOCK).unwrap(), - ], - } - } - - fn interrupt_cnt(&self, index: InterruptIndex) -> usize { - self.trigger_cnt[index as usize].load(Ordering::SeqCst) - } - - fn update_cnt(&self, index: InterruptIndex) -> usize { - self.update_cnt[index as usize].load(Ordering::SeqCst) - } - } - - impl InterruptSourceGroup for MockInterrupt { - fn trigger(&self, index: InterruptIndex) -> vm_device::interrupt::Result<()> { - self.trigger_cnt[index as usize].fetch_add(1, Ordering::SeqCst); - Ok(()) - } - - fn notifier(&self, index: InterruptIndex) -> Option<&EventFd> { - self.event_fd.get(index as usize) - } - - fn update( - &self, - index: InterruptIndex, - _config: InterruptSourceConfig, - _masked: bool, - _set_gsi: bool, - ) -> vm_device::interrupt::Result<()> { - self.update_cnt[index as usize].fetch_add(1, Ordering::SeqCst); - Ok(()) - } - - fn set_gsi(&self) -> vm_device::interrupt::Result<()> { - Ok(()) - } + fn msix_vector_group(nr_vectors: u16) -> Arc { + let vmm = default_vmm(); + Arc::new(Vm::create_msix_group(vmm.vm.clone(), nr_vectors).unwrap()) } #[test] #[should_panic] fn test_too_many_vectors() { - MsixConfig::new(2049, Arc::new(MockInterrupt::new()), 0x42).unwrap(); + MsixConfig::new(msix_vector_group(2049), 0x42); } #[test] fn test_new_msix_config() { - let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); + let config = MsixConfig::new(msix_vector_group(2), 0x42); assert_eq!(config.devid, 0x42); assert!(config.masked); assert!(!config.enabled); @@ -609,8 +528,7 @@ mod tests { #[test] fn test_enable_msix_vectors() { - let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); + let mut config = MsixConfig::new(msix_vector_group(2), 0x42); assert!(!config.enabled); assert!(config.masked); @@ -637,8 +555,7 @@ mod tests { #[test] #[should_panic] fn test_table_access_read_too_big() { - let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); + let config = MsixConfig::new(msix_vector_group(2), 0x42); let mut buffer = [0u8; 16]; config.read_table(0, &mut buffer); @@ -646,8 +563,7 @@ mod tests { #[test] fn test_read_table_past_end() { - let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); + let config = MsixConfig::new(msix_vector_group(2), 0x42); let mut buffer = [0u8; 8]; // We have 2 vectors (16 bytes each), so we should be able to read up to 32 bytes. @@ -658,8 +574,7 @@ mod tests { #[test] fn test_read_table_bad_length() { - let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); + let config = MsixConfig::new(msix_vector_group(2), 0x42); let mut buffer = [0u8; 8]; // We can either read 4 or 8 bytes @@ -685,45 +600,56 @@ mod tests { #[test] fn test_access_table() { - let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); + let mut config = MsixConfig::new(msix_vector_group(2), 0x42); // enabled and not masked - config.set_msg_ctl(0x8000); - assert_eq!(vectors.update_cnt(0), 1); - assert_eq!(vectors.update_cnt(1), 1); + check_metric_after_block!( + METRICS.interrupts.config_updates, + 2, + config.set_msg_ctl(0x8000) + ); let mut buffer = [0u8; 8]; // Write first vector's address with a single 8-byte write - config.write_table(0, &u64::to_le_bytes(0x0000_1312_0000_1110)); // It's still masked so shouldn't be updated - assert_eq!(vectors.update_cnt(0), 1); - assert_eq!(vectors.update_cnt(1), 1); + check_metric_after_block!( + METRICS.interrupts.config_updates, + 0, + config.write_table(0, &u64::to_le_bytes(0x0000_1312_0000_1110)) + ); + // Same for control and message data - config.write_table(8, &u64::to_le_bytes(0x0_0000_0020)); // Now, we enabled it, so we should see an update - assert_eq!(vectors.update_cnt(0), 2); - assert_eq!(vectors.update_cnt(1), 1); + check_metric_after_block!( + METRICS.interrupts.config_updates, + 1, + config.write_table(8, &u64::to_le_bytes(0x0_0000_0020)) + ); // Write second vector's fields with 4-byte writes - // low 32 bits of the address - config.write_table(16, &u32::to_le_bytes(0x4241)); - assert_eq!(vectors.update_cnt(0), 2); - // Still masked - assert_eq!(vectors.update_cnt(1), 1); - // high 32 bits of the address - config.write_table(20, &u32::to_le_bytes(0x4443)); - assert_eq!(vectors.update_cnt(0), 2); - // Still masked - assert_eq!(vectors.update_cnt(1), 1); - // message data - config.write_table(24, &u32::to_le_bytes(0x21)); - assert_eq!(vectors.update_cnt(0), 2); - // Still masked - assert_eq!(vectors.update_cnt(1), 1); - // vector control - config.write_table(28, &u32::to_le_bytes(0x0)); - assert_eq!(vectors.update_cnt(0), 2); - assert_eq!(vectors.update_cnt(1), 2); + // low 32 bits of the address (still masked) + check_metric_after_block!( + METRICS.interrupts.config_updates, + 0, + config.write_table(16, &u32::to_le_bytes(0x4241)) + ); + // high 32 bits of the address (still masked) + check_metric_after_block!( + METRICS.interrupts.config_updates, + 0, + config.write_table(20, &u32::to_le_bytes(0x4443)) + ); + // message data (still masked) + check_metric_after_block!( + METRICS.interrupts.config_updates, + 0, + config.write_table(24, &u32::to_le_bytes(0x21)) + ); + // vector control (now unmasked) + check_metric_after_block!( + METRICS.interrupts.config_updates, + 1, + config.write_table(28, &u32::to_le_bytes(0x0)) + ); assert_eq!(config.table_entries[0].msg_addr_hi, 0x1312); assert_eq!(config.table_entries[0].msg_addr_lo, 0x1110); @@ -770,29 +696,28 @@ mod tests { assert_eq!(0x0_0000_0020, u64::from_le_bytes(buffer)); // If we mask the interrupts we shouldn't see any update - config.write_table(12, &u32::to_le_bytes(0x1)); - config.write_table(28, &u32::to_le_bytes(0x1)); - assert_eq!(vectors.update_cnt(0), 2); - assert_eq!(vectors.update_cnt(1), 2); + check_metric_after_block!(METRICS.interrupts.config_updates, 0, { + config.write_table(12, &u32::to_le_bytes(0x1)); + config.write_table(28, &u32::to_le_bytes(0x1)); + }); // Un-masking them should update them - config.write_table(12, &u32::to_le_bytes(0x0)); - config.write_table(28, &u32::to_le_bytes(0x0)); - assert_eq!(vectors.update_cnt(0), 3); - assert_eq!(vectors.update_cnt(1), 3); + check_metric_after_block!(METRICS.interrupts.config_updates, 2, { + config.write_table(12, &u32::to_le_bytes(0x0)); + config.write_table(28, &u32::to_le_bytes(0x0)); + }); // Setting up the same config should have no effect - config.write_table(12, &u32::to_le_bytes(0x0)); - config.write_table(28, &u32::to_le_bytes(0x0)); - assert_eq!(vectors.update_cnt(0), 3); - assert_eq!(vectors.update_cnt(1), 3); + check_metric_after_block!(METRICS.interrupts.config_updates, 0, { + config.write_table(12, &u32::to_le_bytes(0x0)); + config.write_table(28, &u32::to_le_bytes(0x0)); + }); } #[test] #[should_panic] fn test_table_access_write_too_big() { - let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); + let mut config = MsixConfig::new(msix_vector_group(2), 0x42); let buffer = [0u8; 16]; config.write_table(0, &buffer); @@ -800,8 +725,7 @@ mod tests { #[test] fn test_pba_read_too_big() { - let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); + let config = MsixConfig::new(msix_vector_group(2), 0x42); let mut buffer = [0u8; 16]; config.read_pba(0, &mut buffer); @@ -810,8 +734,7 @@ mod tests { #[test] fn test_pba_invalid_offset() { - let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); + let config = MsixConfig::new(msix_vector_group(2), 0x42); let mut buffer = [0u8; 8]; // Past the end of the PBA array @@ -829,8 +752,7 @@ mod tests { #[test] #[should_panic] fn test_set_pba_bit_vector_too_big() { - let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); + let mut config = MsixConfig::new(msix_vector_group(2), 0x42); config.set_pba_bit(2048, false); } @@ -838,16 +760,14 @@ mod tests { #[test] #[should_panic] fn test_get_pba_bit_vector_too_big() { - let vectors = Arc::new(MockInterrupt::new()); - let config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); + let config = MsixConfig::new(msix_vector_group(2), 0x42); config.get_pba_bit(2048); } #[test] fn test_pba_bit_invalid_vector() { - let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); + let mut config = MsixConfig::new(msix_vector_group(2), 0x42); // We have two vectors, so setting the pending bit for the third one // should be ignored @@ -860,8 +780,7 @@ mod tests { #[test] fn test_pba_read() { - let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(128, vectors.clone(), 0x42).unwrap(); + let mut config = MsixConfig::new(msix_vector_group(128), 0x42); let mut buffer = [0u8; 8]; config.set_pba_bit(1, false); @@ -882,40 +801,33 @@ mod tests { #[test] fn test_pending_interrupt() { - let vectors = Arc::new(MockInterrupt::new()); - let mut config = MsixConfig::new(2, vectors.clone(), 0x42).unwrap(); + let mut config = MsixConfig::new(msix_vector_group(2), 0x42); config.set_pba_bit(1, false); assert_eq!(config.get_pba_bit(1), 1); // Enable MSI-X vector and unmask interrupts - config.set_msg_ctl(0x8000); - // Individual vectors are still masked, so no change - assert_eq!(vectors.interrupt_cnt(0), 0); - assert_eq!(vectors.interrupt_cnt(1), 0); + check_metric_after_block!(METRICS.interrupts.triggers, 0, config.set_msg_ctl(0x8000)); // Enable all vectors - config.write_table(8, &u64::to_le_bytes(0x0_0000_0020)); - config.write_table(24, &u64::to_le_bytes(0x0_0000_0020)); - // Vector one had a pending bit, so we must have triggered an interrupt for it // and cleared the pending bit - assert_eq!(vectors.interrupt_cnt(0), 0); - assert_eq!(vectors.interrupt_cnt(1), 1); + check_metric_after_block!(METRICS.interrupts.triggers, 1, { + config.write_table(8, &u64::to_le_bytes(0x0_0000_0020)); + config.write_table(24, &u64::to_le_bytes(0x0_0000_0020)); + }); assert_eq!(config.get_pba_bit(1), 0); // Check that interrupt is sent as well for enabled vectors once we unmask from // Message Control // Mask vectors and set pending bit for vector 0 - config.set_msg_ctl(0xc000); - config.set_pba_bit(0, false); - assert_eq!(vectors.interrupt_cnt(0), 0); - assert_eq!(vectors.interrupt_cnt(1), 1); + check_metric_after_block!(METRICS.interrupts.triggers, 0, { + config.set_msg_ctl(0xc000); + config.set_pba_bit(0, false); + }); // Unmask them - config.set_msg_ctl(0x8000); - assert_eq!(vectors.interrupt_cnt(0), 1); - assert_eq!(vectors.interrupt_cnt(1), 1); + check_metric_after_block!(METRICS.interrupts.triggers, 1, config.set_msg_ctl(0x8000)); assert_eq!(config.get_pba_bit(0), 0); } } diff --git a/src/vmm/src/vstate/interrupts.rs b/src/vmm/src/vstate/interrupts.rs new file mode 100644 index 00000000000..5246144d8f6 --- /dev/null +++ b/src/vmm/src/vstate/interrupts.rs @@ -0,0 +1,202 @@ +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; + +use kvm_ioctls::VmFd; +use vmm_sys_util::eventfd::EventFd; + +use crate::Vm; +use crate::logger::{IncMetric, METRICS}; +use crate::snapshot::Persist; + +#[derive(Debug, thiserror::Error, displaydoc::Display)] +/// Errors related with Firecracker interrupts +pub enum InterruptError { + /// Error allocating resources: {0} + Allocator(#[from] vm_allocator::Error), + /// IO error: {0} + Io(#[from] std::io::Error), + /// FamStruct error: {0} + FamStruct(#[from] vmm_sys_util::fam::Error), + /// KVM error: {0} + Kvm(#[from] kvm_ioctls::Error), + /// Invalid vector index: {0} + InvalidVectorIndex(usize), +} + +/// Configuration data for an MSI-X interrupt. +#[derive(Copy, Clone, Debug, Default)] +pub struct MsixVectorConfig { + /// High address to delivery message signaled interrupt. + pub high_addr: u32, + /// Low address to delivery message signaled interrupt. + pub low_addr: u32, + /// Data to write to delivery message signaled interrupt. + pub data: u32, + /// Unique ID of the device to delivery message signaled interrupt. + pub devid: u32, +} + +/// Type that describes an allocated interrupt +#[derive(Debug)] +pub struct MsixVector { + /// GSI used for this vector + pub gsi: u32, + /// EventFd used for this vector + pub event_fd: EventFd, + /// Flag determining whether the vector is enabled + pub enabled: AtomicBool, +} + +impl MsixVector { + /// Create a new [`MsixVector`] of a particular type + pub fn new(gsi: u32, enabled: bool) -> Result { + Ok(MsixVector { + gsi, + event_fd: EventFd::new(libc::EFD_NONBLOCK)?, + enabled: AtomicBool::new(enabled), + }) + } +} + +impl MsixVector { + /// Enable vector + pub fn enable(&self, vmfd: &VmFd) -> Result<(), InterruptError> { + if !self.enabled.load(Ordering::Acquire) { + vmfd.register_irqfd(&self.event_fd, self.gsi)?; + self.enabled.store(true, Ordering::Release); + } + + Ok(()) + } + + /// Disable vector + pub fn disable(&self, vmfd: &VmFd) -> Result<(), InterruptError> { + if self.enabled.load(Ordering::Acquire) { + vmfd.unregister_irqfd(&self.event_fd, self.gsi)?; + self.enabled.store(false, Ordering::Release); + } + + Ok(()) + } +} + +#[derive(Debug)] +/// MSI interrupts created for a VirtIO device +pub struct MsixVectorGroup { + /// Reference to the Vm object, which we'll need for interacting with the underlying KVM Vm + /// file descriptor + pub vm: Arc, + /// A list of all the MSI-X vectors + pub vectors: Vec, +} + +impl MsixVectorGroup { + /// Returns the number of vectors in this group + pub fn num_vectors(&self) -> u16 { + // It is safe to unwrap here. We are creating `MsixVectorGroup` objects through the + // `Vm::create_msix_group` where the argument for the number of `vectors` is a `u16`. + u16::try_from(self.vectors.len()).unwrap() + } + + /// Enable the MSI-X vector group + pub fn enable(&self) -> Result<(), InterruptError> { + for route in &self.vectors { + route.enable(&self.vm.common.fd)?; + } + + Ok(()) + } + + /// Disable the MSI-X vector group + pub fn disable(&self) -> Result<(), InterruptError> { + for route in &self.vectors { + route.disable(&self.vm.common.fd)?; + } + + Ok(()) + } + + /// Trigger an interrupt for a vector in the group + pub fn trigger(&self, index: usize) -> Result<(), InterruptError> { + self.notifier(index) + .ok_or(InterruptError::InvalidVectorIndex(index))? + .write(1)?; + METRICS.interrupts.triggers.inc(); + Ok(()) + } + + /// Get a referece to the underlying `EventFd` used to trigger interrupts for a vector in the + /// group + pub fn notifier(&self, index: usize) -> Option<&EventFd> { + self.vectors.get(index).map(|route| &route.event_fd) + } + + /// Update the MSI-X configuration for a vector in the group + pub fn update( + &self, + index: usize, + msi_config: MsixVectorConfig, + masked: bool, + set_gsi: bool, + ) -> Result<(), InterruptError> { + if let Some(vector) = self.vectors.get(index) { + METRICS.interrupts.config_updates.inc(); + // When an interrupt is masked the GSI will not be passed to KVM through + // KVM_SET_GSI_ROUTING. So, call [`disable()`] to unregister the interrupt file + // descriptor before passing the interrupt routes to KVM + if masked { + vector.disable(&self.vm.common.fd)?; + } + + self.vm.register_msi(vector, masked, msi_config)?; + if set_gsi { + self.vm + .set_gsi_routes() + .map_err(|err| std::io::Error::other(format!("MSI-X update: {err}")))? + } + + // Assign KVM_IRQFD after KVM_SET_GSI_ROUTING to avoid + // panic on kernel which does not have commit a80ced6ea514 + // (KVM: SVM: fix panic on out-of-bounds guest IRQ). + if !masked { + vector.enable(&self.vm.common.fd)?; + } + + return Ok(()); + } + + Err(InterruptError::InvalidVectorIndex(index)) + } +} + +impl<'a> Persist<'a> for MsixVectorGroup { + type State = Vec; + type ConstructorArgs = Arc; + type Error = InterruptError; + + fn save(&self) -> Self::State { + // We don't save the "enabled" state of the MSI interrupt. PCI devices store the MSI-X + // configuration and make sure that the vector is enabled during the restore path if it was + // initially enabled + self.vectors.iter().map(|route| route.gsi).collect() + } + + fn restore( + constructor_args: Self::ConstructorArgs, + state: &Self::State, + ) -> std::result::Result { + let mut vectors = Vec::with_capacity(state.len()); + + for gsi in state { + vectors.push(MsixVector::new(*gsi, false)?); + } + + Ok(MsixVectorGroup { + vm: constructor_args, + vectors, + }) + } +} diff --git a/src/vmm/src/vstate/mod.rs b/src/vmm/src/vstate/mod.rs index f4fa25914d0..da94095bc9e 100644 --- a/src/vmm/src/vstate/mod.rs +++ b/src/vmm/src/vstate/mod.rs @@ -1,6 +1,8 @@ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 +/// VM interrupts implementation. +pub mod interrupts; /// Module with Kvm implementation. pub mod kvm; /// Module with GuestMemory implementation. diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 7e2b9975601..6a78beb584f 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -9,7 +9,6 @@ use std::collections::HashMap; use std::fs::OpenOptions; use std::io::Write; use std::path::Path; -use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex, MutexGuard}; use anyhow::anyhow; @@ -22,9 +21,6 @@ use kvm_bindings::{ use kvm_ioctls::VmFd; use log::debug; use serde::{Deserialize, Serialize}; -use vm_device::interrupt::{ - InterruptIndex, InterruptSourceConfig, InterruptSourceGroup, MsiIrqSourceConfig, -}; use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; @@ -33,9 +29,9 @@ use crate::arch::{GSI_MSI_END, host_page_size}; use crate::logger::info; use crate::pci::{DeviceRelocation, PciDevice}; use crate::persist::CreateSnapshotError; -use crate::snapshot::Persist; use crate::utils::u64_to_usize; use crate::vmm_config::snapshot::SnapshotType; +use crate::vstate::interrupts::{InterruptError, MsixVector, MsixVectorConfig, MsixVectorGroup}; use crate::vstate::memory::{ Address, GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryRegion, GuestRegionMmap, }; @@ -43,19 +39,6 @@ use crate::vstate::resources::ResourceAllocator; use crate::vstate::vcpu::VcpuError; use crate::{DirtyBitmap, Vcpu, mem_size_mib}; -#[derive(Debug, thiserror::Error, displaydoc::Display)] -/// Errors related with Firecracker interrupts -pub enum InterruptError { - /// Error allocating resources: {0} - Allocator(#[from] vm_allocator::Error), - /// EventFd error: {0} - EventFd(std::io::Error), - /// FamStruct error: {0} - FamStruct(#[from] vmm_sys_util::fam::Error), - /// KVM error: {0} - Kvm(#[from] kvm_ioctls::Error), -} - #[derive(Debug, Serialize, Deserialize)] /// A struct representing an interrupt line used by some device of the microVM pub struct RoutingEntry { @@ -63,179 +46,6 @@ pub struct RoutingEntry { masked: bool, } -/// Type that describes an allocated interrupt -#[derive(Debug)] -pub struct MsiVector { - /// GSI used for this vector - pub gsi: u32, - /// EventFd used for this vector - pub event_fd: EventFd, - /// Flag determining whether the vector is enabled - pub enabled: AtomicBool, -} - -impl MsiVector { - /// Create a new [`MsiVector`] of a particular type - pub fn new(gsi: u32, enabled: bool) -> Result { - Ok(MsiVector { - gsi, - event_fd: EventFd::new(libc::EFD_NONBLOCK).map_err(InterruptError::EventFd)?, - enabled: AtomicBool::new(enabled), - }) - } -} - -impl MsiVector { - /// Enable vector - fn enable(&self, vmfd: &VmFd) -> Result<(), errno::Error> { - if !self.enabled.load(Ordering::Acquire) { - vmfd.register_irqfd(&self.event_fd, self.gsi)?; - self.enabled.store(true, Ordering::Release); - } - - Ok(()) - } - - /// Disable vector - fn disable(&self, vmfd: &VmFd) -> Result<(), errno::Error> { - if self.enabled.load(Ordering::Acquire) { - vmfd.unregister_irqfd(&self.event_fd, self.gsi)?; - self.enabled.store(false, Ordering::Release); - } - - Ok(()) - } -} - -#[derive(Debug)] -/// MSI interrupts created for a VirtIO device -pub struct MsiVectorGroup { - vm: Arc, - irq_routes: Vec, -} - -impl MsiVectorGroup { - /// Returns the number of vectors in this group - pub fn num_vectors(&self) -> u16 { - // It is safe to unwrap here. We are creating `MsiVectorGroup` objects through the - // `Vm::create_msix_group` where the argument for the number of `irq_routes` is a `u16`. - u16::try_from(self.irq_routes.len()).unwrap() - } -} - -impl<'a> Persist<'a> for MsiVectorGroup { - type State = Vec; - type ConstructorArgs = Arc; - type Error = InterruptError; - - fn save(&self) -> Self::State { - // We don't save the "enabled" state of the MSI interrupt. PCI devices store the MSI-X - // configuration and make sure that the vector is enabled during the restore path if it was - // initially enabled - self.irq_routes.iter().map(|route| route.gsi).collect() - } - - fn restore( - constructor_args: Self::ConstructorArgs, - state: &Self::State, - ) -> std::result::Result { - let mut irq_routes = Vec::with_capacity(state.len()); - - for gsi in state { - irq_routes.push(MsiVector::new(*gsi, false)?); - } - - Ok(MsiVectorGroup { - vm: constructor_args, - irq_routes, - }) - } -} - -impl InterruptSourceGroup for MsiVectorGroup { - fn enable(&self) -> vm_device::interrupt::Result<()> { - for route in &self.irq_routes { - route.enable(&self.vm.common.fd)?; - } - - Ok(()) - } - - fn disable(&self) -> vm_device::interrupt::Result<()> { - for route in &self.irq_routes { - route.disable(&self.vm.common.fd)?; - } - - Ok(()) - } - - fn trigger(&self, index: InterruptIndex) -> vm_device::interrupt::Result<()> { - self.notifier(index) - .ok_or_else(|| { - std::io::Error::other(format!("trigger: invalid interrupt index {index}")) - })? - .write(1) - } - - fn notifier(&self, index: InterruptIndex) -> Option<&EventFd> { - self.irq_routes - .get(index as usize) - .map(|route| &route.event_fd) - } - - fn update( - &self, - index: InterruptIndex, - config: InterruptSourceConfig, - masked: bool, - set_gsi: bool, - ) -> vm_device::interrupt::Result<()> { - let msi_config = match config { - InterruptSourceConfig::LegacyIrq(_) => { - return Err(std::io::Error::other( - "MSI-x update: invalid configuration type", - )); - } - InterruptSourceConfig::MsiIrq(config) => config, - }; - - if let Some(route) = self.irq_routes.get(index as usize) { - // When an interrupt is masked the GSI will not be passed to KVM through - // KVM_SET_GSI_ROUTING. So, call [`disable()`] to unregister the interrupt file - // descriptor before passing the interrupt routes to KVM - if masked { - route.disable(&self.vm.common.fd)?; - } - - self.vm.register_msi(route, masked, msi_config)?; - if set_gsi { - self.vm - .set_gsi_routes() - .map_err(|err| std::io::Error::other(format!("MSI-X update: {err}")))? - } - - // Assign KVM_IRQFD after KVM_SET_GSI_ROUTING to avoid - // panic on kernel which does not have commit a80ced6ea514 - // (KVM: SVM: fix panic on out-of-bounds guest IRQ). - if !masked { - route.enable(&self.vm.common.fd)?; - } - - return Ok(()); - } - - Err(std::io::Error::other(format!( - "MSI-X update: invalid vector index {index}" - ))) - } - - fn set_gsi(&self) -> vm_device::interrupt::Result<()> { - self.vm - .set_gsi_routes() - .map_err(|err| std::io::Error::other(format!("MSI-X update: {err}"))) - } -} - /// Architecture independent parts of a VM. #[derive(Debug)] pub struct VmCommon { @@ -550,9 +360,9 @@ impl Vm { /// Register an MSI device interrupt pub fn register_msi( &self, - route: &MsiVector, + route: &MsixVector, masked: bool, - config: MsiIrqSourceConfig, + config: MsixVectorConfig, ) -> Result<(), errno::Error> { let mut entry = kvm_irq_routing_entry { gsi: route.gsi, @@ -591,18 +401,18 @@ impl Vm { } /// Create a group of MSI-X interrupts - pub fn create_msix_group(vm: Arc, count: u16) -> Result { + pub fn create_msix_group(vm: Arc, count: u16) -> Result { debug!("Creating new MSI group with {count} vectors"); - let mut irq_routes = Vec::with_capacity(count as usize); + let mut vectors = Vec::with_capacity(count as usize); for gsi in vm .resource_allocator() .allocate_gsi_msi(count as u32)? .iter() { - irq_routes.push(MsiVector::new(*gsi, false)?); + vectors.push(MsixVector::new(*gsi, false)?); } - Ok(MsiVectorGroup { vm, irq_routes }) + Ok(MsixVectorGroup { vm, vectors }) } /// Set GSI routes to KVM @@ -677,11 +487,13 @@ impl DeviceRelocation for Vm { #[cfg(test)] pub(crate) mod tests { - use vm_device::interrupt::{InterruptSourceConfig, LegacyIrqSourceConfig}; + use std::sync::atomic::Ordering; + use vm_memory::GuestAddress; use vm_memory::mmap::MmapRegionBuilder; use super::*; + use crate::snapshot::Persist; #[cfg(target_arch = "x86_64")] use crate::snapshot::Snapshot; use crate::test_utils::single_region_mem_raw; @@ -798,7 +610,7 @@ pub(crate) mod tests { vm.setup_irqchip(1).unwrap(); } - fn create_msix_group(vm: &Arc) -> MsiVectorGroup { + fn create_msix_group(vm: &Arc) -> MsixVectorGroup { Vm::create_msix_group(vm.clone(), 4).unwrap() } @@ -818,13 +630,13 @@ pub(crate) mod tests { let msix_group = create_msix_group(&vm); // Initially all vectors are disabled - for route in &msix_group.irq_routes { + for route in &msix_group.vectors { assert!(!route.enabled.load(Ordering::Acquire)) } // Enable works msix_group.enable().unwrap(); - for route in &msix_group.irq_routes { + for route in &msix_group.vectors { assert!(route.enabled.load(Ordering::Acquire)); } // Enabling an enabled group doesn't error out @@ -832,7 +644,7 @@ pub(crate) mod tests { // Disable works msix_group.disable().unwrap(); - for route in &msix_group.irq_routes { + for route in &msix_group.vectors { assert!(!route.enabled.load(Ordering::Acquire)) } // Disabling a disabled group doesn't error out @@ -868,29 +680,18 @@ pub(crate) mod tests { assert!(msix_group.notifier(4).is_none()); } - #[test] - fn test_msi_vector_group_update_wrong_config() { - let (_, vm) = setup_vm_with_memory(mib_to_bytes(128)); - let vm = Arc::new(vm); - let msix_group = create_msix_group(&vm); - let irq_config = LegacyIrqSourceConfig { irqchip: 0, pin: 0 }; - msix_group - .update(0, InterruptSourceConfig::LegacyIrq(irq_config), true, true) - .unwrap_err(); - } - #[test] fn test_msi_vector_group_update_invalid_vector() { let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128)); enable_irqchip(&mut vm); let vm = Arc::new(vm); let msix_group = create_msix_group(&vm); - let config = InterruptSourceConfig::MsiIrq(MsiIrqSourceConfig { + let config = MsixVectorConfig { high_addr: 0x42, low_addr: 0x12, data: 0x12, devid: 0xafa, - }); + }; msix_group.update(0, config, true, true).unwrap(); msix_group.update(4, config, true, true).unwrap_err(); } @@ -904,7 +705,7 @@ pub(crate) mod tests { let msix_group = create_msix_group(&vm); // Set some configuration for the vectors. Initially all are masked - let mut config = MsiIrqSourceConfig { + let mut config = MsixVectorConfig { high_addr: 0x42, low_addr: 0x13, data: 0x12, @@ -912,13 +713,11 @@ pub(crate) mod tests { }; for i in 0..4 { config.data = 0x12 * i; - msix_group - .update(i, InterruptSourceConfig::MsiIrq(config), true, false) - .unwrap(); + msix_group.update(i as usize, config, true, false).unwrap(); } // All vectors should be disabled - for vector in &msix_group.irq_routes { + for vector in &msix_group.vectors { assert!(!vector.enabled.load(Ordering::Acquire)); } @@ -956,9 +755,7 @@ pub(crate) mod tests { // Updating the config of a vector should enable its route (and only its route) config.data = 0; - msix_group - .update(0, InterruptSourceConfig::MsiIrq(config), false, true) - .unwrap(); + msix_group.update(0, config, false, true).unwrap(); for i in 0..4 { let gsi = crate::arch::GSI_MSI_START + i; let interrupts = vm.common.interrupts.lock().unwrap(); @@ -975,31 +772,6 @@ pub(crate) mod tests { } } - #[cfg(target_arch = "x86_64")] - #[test] - fn test_msi_vector_group_set_gsi_without_ioapic() { - // Setting GSI routes without IOAPIC setup should fail on x86. Apparently, it doesn't fail - // on Aarch64 - let (_, vm) = setup_vm_with_memory(mib_to_bytes(128)); - let vm = Arc::new(vm); - let msix_group = create_msix_group(&vm); - let err = msix_group.set_gsi().unwrap_err(); - assert_eq!( - format!("{err}"), - "MSI-X update: KVM error: Invalid argument (os error 22)" - ); - } - - #[test] - fn test_msi_vector_group_set_gsi() { - let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128)); - enable_irqchip(&mut vm); - let vm = Arc::new(vm); - let msix_group = create_msix_group(&vm); - - msix_group.set_gsi().unwrap(); - } - #[test] fn test_msi_vector_group_persistence() { let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128)); @@ -1009,14 +781,14 @@ pub(crate) mod tests { msix_group.enable().unwrap(); let state = msix_group.save(); - let restored_group = MsiVectorGroup::restore(vm, &state).unwrap(); + let restored_group = MsixVectorGroup::restore(vm, &state).unwrap(); assert_eq!(msix_group.num_vectors(), restored_group.num_vectors()); // Even if an MSI group is enabled, we don't save it as such. During restoration, the PCI // transport will make sure the correct config is set for the vectors and enable them // accordingly. - for (id, vector) in msix_group.irq_routes.iter().enumerate() { - let new_vector = &restored_group.irq_routes[id]; + for (id, vector) in msix_group.vectors.iter().enumerate() { + let new_vector = &restored_group.vectors[id]; assert_eq!(vector.gsi, new_vector.gsi); assert!(!new_vector.enabled.load(Ordering::Acquire)); } diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py index 3a93ced14d3..864481b68e2 100644 --- a/tests/host_tools/fcmetrics.py +++ b/tests/host_tools/fcmetrics.py @@ -292,6 +292,7 @@ def validate_fc_metrics(metrics): "entropy_rate_limiter_throttled", "rate_limiter_event_count", ], + "interrupts": ["triggers", "config_updates"], } # validate timestamp before jsonschema validation which some more time From 6de3060caa7fd1467fb6b628fa05e6c7427fde60 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Mon, 29 Sep 2025 17:52:22 +0200 Subject: [PATCH 6/6] refactor(pci): remove vm-device crate We are only using the Bus-related definitions from there now. Move those in a module under `vmm` and drop the dependency to `vm-device`. Signed-off-by: Babis Chalios --- Cargo.lock | 9 - src/vm-device/Cargo.toml | 16 -- src/vm-device/src/interrupt/mod.rs | 194 ------------------ src/vm-device/src/lib.rs | 62 ------ src/vmm/Cargo.toml | 1 - src/vmm/src/arch/aarch64/vcpu.rs | 3 +- src/vmm/src/arch/x86_64/vcpu.rs | 7 +- src/vmm/src/arch/x86_64/vm.rs | 5 +- src/vmm/src/device_manager/legacy.rs | 3 +- src/vmm/src/device_manager/mmio.rs | 5 +- src/vmm/src/device_manager/mod.rs | 5 +- src/vmm/src/device_manager/pci_mngr.rs | 2 +- src/vmm/src/device_manager/persist.rs | 3 +- src/vmm/src/devices/legacy/i8042.rs | 4 +- src/vmm/src/devices/legacy/rtc_pl031.rs | 2 +- src/vmm/src/devices/legacy/serial.rs | 4 +- src/vmm/src/devices/pci/pci_segment.rs | 2 +- src/vmm/src/devices/pseudo/boot_timer.rs | 3 +- src/vmm/src/devices/virtio/transport/mmio.rs | 4 +- .../devices/virtio/transport/pci/device.rs | 3 +- src/vmm/src/pci/bus.rs | 4 +- src/{vm-device/src => vmm/src/vstate}/bus.rs | 41 ++-- src/vmm/src/vstate/mod.rs | 2 + src/vmm/src/vstate/vcpu.rs | 9 +- src/vmm/src/vstate/vm.rs | 5 +- src/vmm/tests/devices.rs | 2 +- 26 files changed, 66 insertions(+), 334 deletions(-) delete mode 100644 src/vm-device/Cargo.toml delete mode 100644 src/vm-device/src/interrupt/mod.rs delete mode 100644 src/vm-device/src/lib.rs rename src/{vm-device/src => vmm/src/vstate}/bus.rs (91%) diff --git a/Cargo.lock b/Cargo.lock index 85aefa303dd..10c8af01e65 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1648,14 +1648,6 @@ dependencies = [ "thiserror 2.0.17", ] -[[package]] -name = "vm-device" -version = "0.1.0" -dependencies = [ - "serde", - "vmm-sys-util", -] - [[package]] name = "vm-fdt" version = "0.3.0" @@ -1722,7 +1714,6 @@ dependencies = [ "uuid", "vhost", "vm-allocator", - "vm-device", "vm-fdt", "vm-memory", "vm-superio", diff --git a/src/vm-device/Cargo.toml b/src/vm-device/Cargo.toml deleted file mode 100644 index 53842418b5d..00000000000 --- a/src/vm-device/Cargo.toml +++ /dev/null @@ -1,16 +0,0 @@ -[package] -authors = ["The Cloud Hypervisor Authors"] -edition = "2021" -name = "vm-device" -version = "0.1.0" -license = "Apache-2.0 AND BSD-3-Clause" - -[lib] -bench = false - -[features] -default = [] - -[dependencies] -serde = { version = "1.0.228", features = ["derive", "rc"] } -vmm-sys-util = { version = "0.14.0", features = ["with-serde"] } diff --git a/src/vm-device/src/interrupt/mod.rs b/src/vm-device/src/interrupt/mod.rs deleted file mode 100644 index da5d87a4e1a..00000000000 --- a/src/vm-device/src/interrupt/mod.rs +++ /dev/null @@ -1,194 +0,0 @@ -// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// Copyright (C) 2019 Alibaba Cloud. All rights reserved. -// Copyright © 2019 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause - -//! Traits and Structs to manage interrupt sources for devices. -//! -//! In system programming, an interrupt is a signal to the processor emitted by hardware or -//! software indicating an event that needs immediate attention. An interrupt alerts the processor -//! to a high-priority condition requiring the interruption of the current code the processor is -//! executing. The processor responds by suspending its current activities, saving its state, and -//! executing a function called an interrupt handler (or an interrupt service routine, ISR) to deal -//! with the event. This interruption is temporary, and, after the interrupt handler finishes, -//! unless handling the interrupt has emitted a fatal error, the processor resumes normal -//! activities. -//! -//! Hardware interrupts are used by devices to communicate that they require attention from the -//! operating system, or a bare-metal program running on the CPU if there are no OSes. The act of -//! initiating a hardware interrupt is referred to as an interrupt request (IRQ). Different devices -//! are usually associated with different interrupts using a unique value associated with each -//! interrupt. This makes it possible to know which hardware device caused which interrupts. -//! These interrupt values are often called IRQ lines, or just interrupt lines. -//! -//! Nowadays, IRQ lines is not the only mechanism to deliver device interrupts to processors. -//! MSI [(Message Signaled Interrupt)](https://en.wikipedia.org/wiki/Message_Signaled_Interrupts) -//! is another commonly used alternative in-band method of signaling an interrupt, using special -//! in-band messages to replace traditional out-of-band assertion of dedicated interrupt lines. -//! While more complex to implement in a device, message signaled interrupts have some significant -//! advantages over pin-based out-of-band interrupt signaling. Message signaled interrupts are -//! supported in PCI bus since its version 2.2, and in later available PCI Express bus. Some -//! non-PCI architectures also use message signaled interrupts. -//! -//! While IRQ is a term commonly used by Operating Systems when dealing with hardware -//! interrupts, the IRQ numbers managed by OSes are independent of the ones managed by VMM. -//! For simplicity sake, the term `Interrupt Source` is used instead of IRQ to represent both -//! pin-based interrupts and MSI interrupts. -//! -//! A device may support multiple types of interrupts, and each type of interrupt may support one -//! or multiple interrupt sources. For example, a PCI device may support: -//! * Legacy Irq: exactly one interrupt source. -//! * PCI MSI Irq: 1,2,4,8,16,32 interrupt sources. -//! * PCI MSIx Irq: 2^n(n=0-11) interrupt sources. -//! -//! A distinct Interrupt Source Identifier (ISID) will be assigned to each interrupt source. -//! An ID allocator will be used to allocate and free Interrupt Source Identifiers for devices. -//! To decouple the vm-device crate from the ID allocator, the vm-device crate doesn't take the -//! responsibility to allocate/free Interrupt Source IDs but only makes use of assigned IDs. -//! -//! The overall flow to deal with interrupts is: -//! * The VMM creates an interrupt manager -//! * The VMM creates a device manager, passing on an reference to the interrupt manager -//! * The device manager passes on an reference to the interrupt manager to all registered devices -//! * The guest kernel loads drivers for virtual devices -//! * The guest device driver determines the type and number of interrupts needed, and update the -//! device configuration -//! * The virtual device backend requests the interrupt manager to create an interrupt group -//! according to guest configuration information - -use std::sync::Arc; - -use vmm_sys_util::eventfd::EventFd; - -/// Reuse std::io::Result to simplify interoperability among crates. -pub type Result = std::io::Result; - -/// Data type to store an interrupt source identifier. -pub type InterruptIndex = u32; - -/// Configuration data for legacy interrupts. -/// -/// On x86 platforms, legacy interrupts means those interrupts routed through PICs or IOAPICs. -#[derive(Copy, Clone, Debug)] -pub struct LegacyIrqSourceConfig { - pub irqchip: u32, - pub pin: u32, -} - -/// Configuration data for MSI/MSI-X interrupts. -/// -/// On x86 platforms, these interrupts are vectors delivered directly to the LAPIC. -#[derive(Copy, Clone, Debug, Default)] -pub struct MsiIrqSourceConfig { - /// High address to delivery message signaled interrupt. - pub high_addr: u32, - /// Low address to delivery message signaled interrupt. - pub low_addr: u32, - /// Data to write to delivery message signaled interrupt. - pub data: u32, - /// Unique ID of the device to delivery message signaled interrupt. - pub devid: u32, -} - -/// Configuration data for an interrupt source. -#[derive(Copy, Clone, Debug)] -pub enum InterruptSourceConfig { - /// Configuration data for Legacy interrupts. - LegacyIrq(LegacyIrqSourceConfig), - /// Configuration data for PciMsi, PciMsix and generic MSI interrupts. - MsiIrq(MsiIrqSourceConfig), -} - -/// Configuration data for legacy, pin based interrupt groups. -/// -/// A legacy interrupt group only takes one irq number as its configuration. -#[derive(Copy, Clone, Debug)] -pub struct LegacyIrqGroupConfig { - /// Legacy irq number. - pub irq: InterruptIndex, -} - -/// Configuration data for MSI/MSI-X interrupt groups -/// -/// MSI/MSI-X interrupt groups are basically a set of vectors. -#[derive(Copy, Clone, Debug)] -pub struct MsiIrqGroupConfig { - /// First index of the MSI/MSI-X interrupt vectors - pub base: InterruptIndex, - /// Number of vectors in the MSI/MSI-X group. - pub count: InterruptIndex, -} - -/// Trait to manage interrupt sources for virtual device backends. -/// -/// The InterruptManager implementations should protect itself from concurrent accesses internally, -/// so it could be invoked from multi-threaded context. -pub trait InterruptManager: Send + Sync { - type GroupConfig; - - /// Create an [InterruptSourceGroup](trait.InterruptSourceGroup.html) object to manage - /// interrupt sources for a virtual device - /// - /// An [InterruptSourceGroup](trait.InterruptSourceGroup.html) object manages all interrupt - /// sources of the same type for a virtual device. - /// - /// # Arguments - /// * interrupt_type: type of interrupt source. - /// * base: base Interrupt Source ID to be managed by the group object. - /// * count: number of Interrupt Sources to be managed by the group object. - fn create_group(&self, config: Self::GroupConfig) -> Result>; - - /// Destroy an [InterruptSourceGroup](trait.InterruptSourceGroup.html) object created by - /// [create_group()](trait.InterruptManager.html#tymethod.create_group). - /// - /// Assume the caller takes the responsibility to disable all interrupt sources of the group - /// before calling destroy_group(). This assumption helps to simplify InterruptSourceGroup - /// implementations. - fn destroy_group(&self, group: Arc) -> Result<()>; -} - -pub trait InterruptSourceGroup: Send + Sync { - /// Enable the interrupt sources in the group to generate interrupts. - fn enable(&self) -> Result<()> { - // Not all interrupt sources can be enabled. - // To accommodate this, we can have a no-op here. - Ok(()) - } - - /// Disable the interrupt sources in the group to generate interrupts. - fn disable(&self) -> Result<()> { - // Not all interrupt sources can be disabled. - // To accommodate this, we can have a no-op here. - Ok(()) - } - - /// Inject an interrupt from this interrupt source into the guest. - fn trigger(&self, index: InterruptIndex) -> Result<()>; - - /// Returns an interrupt notifier from this interrupt. - /// - /// An interrupt notifier allows for external components and processes - /// to inject interrupts into a guest, by writing to the file returned - /// by this method. - #[allow(unused_variables)] - fn notifier(&self, index: InterruptIndex) -> Option<&EventFd>; - - /// Update the interrupt source group configuration. - /// - /// # Arguments - /// * index: sub-index into the group. - /// * config: configuration data for the interrupt source. - /// * masked: if the interrupt is masked - /// * set_gsi: whether update the GSI routing table. - fn update( - &self, - index: InterruptIndex, - config: InterruptSourceConfig, - masked: bool, - set_gsi: bool, - ) -> Result<()>; - - /// Set the interrupt group GSI routing table. - fn set_gsi(&self) -> Result<()>; -} diff --git a/src/vm-device/src/lib.rs b/src/vm-device/src/lib.rs deleted file mode 100644 index b980b09c4b9..00000000000 --- a/src/vm-device/src/lib.rs +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 -// -// Copyright © 2020 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -// - -use serde::{Deserialize, Serialize}; - -mod bus; -pub mod interrupt; - -pub use self::bus::{Bus, BusDevice, BusDeviceSync, Error as BusError}; - -/// Type of Message Signalled Interrupt -#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub enum MsiIrqType { - /// PCI MSI IRQ numbers. - PciMsi, - /// PCI MSIx IRQ numbers. - PciMsix, - /// Generic MSI IRQ numbers. - GenericMsi, -} - -#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)] -pub enum PciBarType { - Io, - Mmio32, - Mmio64, -} - -/// Enumeration for device resources. -#[allow(missing_docs)] -#[derive(Clone, Debug, Serialize, Deserialize)] -pub enum Resource { - /// IO Port address range. - PioAddressRange { base: u16, size: u16 }, - /// Memory Mapped IO address range. - MmioAddressRange { base: u64, size: u64 }, - /// PCI BAR - PciBar { - index: usize, - base: u64, - size: u64, - type_: PciBarType, - prefetchable: bool, - }, - /// Legacy IRQ number. - LegacyIrq(u32), - /// Message Signaled Interrupt - MsiIrq { - ty: MsiIrqType, - base: u32, - size: u32, - }, - /// Network Interface Card MAC address. - MacAddress(String), - /// KVM memslot index. - KvmMemSlot(u32), -} diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index d71c7b63926..bb035ae0b71 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -53,7 +53,6 @@ utils = { path = "../utils" } uuid = "1.18.1" vhost = { version = "0.14.0", features = ["vhost-user-frontend"] } vm-allocator = { version = "0.1.3", features = ["serde"] } -vm-device = { path = "../vm-device" } vm-memory = { version = "0.16.2", features = [ "backend-mmap", "backend-bitmap", diff --git a/src/vmm/src/arch/aarch64/vcpu.rs b/src/vmm/src/arch/aarch64/vcpu.rs index 7a591bdee91..39020b6fbb6 100644 --- a/src/vmm/src/arch/aarch64/vcpu.rs +++ b/src/vmm/src/arch/aarch64/vcpu.rs @@ -23,6 +23,7 @@ use crate::cpu_config::aarch64::custom_cpu_template::VcpuFeatures; use crate::cpu_config::templates::CpuConfiguration; use crate::logger::{IncMetric, METRICS, error}; use crate::vcpu::{VcpuConfig, VcpuError}; +use crate::vstate::bus::Bus; use crate::vstate::memory::{Address, GuestMemoryMmap}; use crate::vstate::vcpu::VcpuEmulation; use crate::vstate::vm::Vm; @@ -120,7 +121,7 @@ pub struct KvmVcpu { #[derive(Default, Debug)] pub struct Peripherals { /// mmio bus. - pub mmio_bus: Option>, + pub mmio_bus: Option>, } impl KvmVcpu { diff --git a/src/vmm/src/arch/x86_64/vcpu.rs b/src/vmm/src/arch/x86_64/vcpu.rs index eea1f24ae69..5dd6a2e55e5 100644 --- a/src/vmm/src/arch/x86_64/vcpu.rs +++ b/src/vmm/src/arch/x86_64/vcpu.rs @@ -25,6 +25,7 @@ use crate::arch::x86_64::msr::{MsrError, create_boot_msr_entries}; use crate::arch::x86_64::regs::{SetupFpuError, SetupRegistersError, SetupSpecialRegistersError}; use crate::cpu_config::x86_64::{CpuConfiguration, cpuid}; use crate::logger::{IncMetric, METRICS}; +use crate::vstate::bus::Bus; use crate::vstate::memory::GuestMemoryMmap; use crate::vstate::vcpu::{VcpuConfig, VcpuEmulation, VcpuError}; use crate::vstate::vm::Vm; @@ -160,9 +161,9 @@ pub struct KvmVcpu { #[derive(Default, Debug)] pub struct Peripherals { /// Pio bus. - pub pio_bus: Option>, + pub pio_bus: Option>, /// Mmio bus. - pub mmio_bus: Option>, + pub mmio_bus: Option>, } impl KvmVcpu { @@ -267,7 +268,7 @@ impl KvmVcpu { } /// Sets a Port Mapped IO bus for this vcpu. - pub fn set_pio_bus(&mut self, pio_bus: Arc) { + pub fn set_pio_bus(&mut self, pio_bus: Arc) { self.peripherals.pio_bus = Some(pio_bus); } diff --git a/src/vmm/src/arch/x86_64/vm.rs b/src/vmm/src/arch/x86_64/vm.rs index b71d18ae37b..37d97d8c212 100644 --- a/src/vmm/src/arch/x86_64/vm.rs +++ b/src/vmm/src/arch/x86_64/vm.rs @@ -14,6 +14,7 @@ use serde::{Deserialize, Serialize}; use crate::arch::x86_64::msr::MsrError; use crate::snapshot::Persist; use crate::utils::u64_to_usize; +use crate::vstate::bus::Bus; use crate::vstate::memory::{GuestMemoryExtension, GuestMemoryState}; use crate::vstate::resources::ResourceAllocator; use crate::vstate::vm::{VmCommon, VmError}; @@ -60,7 +61,7 @@ pub struct ArchVm { /// `None` if `KVM_CAP_XSAVE2` not supported. xsave2_size: Option, /// Port IO bus - pub pio_bus: Arc, + pub pio_bus: Arc, } impl ArchVm { @@ -95,7 +96,7 @@ impl ArchVm { .set_tss_address(u64_to_usize(crate::arch::x86_64::layout::KVM_TSS_ADDRESS)) .map_err(ArchVmError::SetTssAddress)?; - let pio_bus = Arc::new(vm_device::Bus::new()); + let pio_bus = Arc::new(Bus::new()); Ok(ArchVm { common, diff --git a/src/vmm/src/device_manager/legacy.rs b/src/vmm/src/device_manager/legacy.rs index 77173433d38..d8ff2aaf41b 100644 --- a/src/vmm/src/device_manager/legacy.rs +++ b/src/vmm/src/device_manager/legacy.rs @@ -18,12 +18,13 @@ use vmm_sys_util::eventfd::EventFd; use crate::Vm; use crate::devices::legacy::serial::SerialOut; use crate::devices::legacy::{EventFdTrigger, I8042Device, SerialDevice, SerialEventsWrapper}; +use crate::vstate::bus::BusError; /// Errors corresponding to the `PortIODeviceManager`. #[derive(Debug, derive_more::From, thiserror::Error, displaydoc::Display)] pub enum LegacyDeviceError { /// Failed to add legacy device to Bus: {0} - BusError(vm_device::BusError), + BusError(BusError), /// Failed to create EventFd: {0} EventFd(std::io::Error), } diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index 46accb637b0..f7514ed1e0c 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -27,6 +27,7 @@ use crate::devices::legacy::{RTCDevice, SerialDevice}; use crate::devices::pseudo::BootTimer; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::transport::mmio::MmioTransport; +use crate::vstate::bus::{Bus, BusError}; #[cfg(target_arch = "x86_64")] use crate::vstate::memory::GuestAddress; use crate::vstate::resources::ResourceAllocator; @@ -37,7 +38,7 @@ pub enum MmioError { /// Failed to allocate requested resource: {0} Allocator(#[from] vm_allocator::Error), /// Failed to insert device on the bus: {0} - BusInsert(#[from] vm_device::BusError), + BusInsert(#[from] BusError), /// Failed to allocate requested resourc: {0} Cmdline(#[from] linux_loader::cmdline::Error), /// Failed to find the device on the bus. @@ -360,7 +361,7 @@ impl MMIODeviceManager { /// Register a boot timer device. pub fn register_mmio_boot_timer( &mut self, - mmio_bus: &vm_device::Bus, + mmio_bus: &Bus, boot_timer: Arc>, ) -> Result<(), MmioError> { // Attach a new boot timer device. diff --git a/src/vmm/src/device_manager/mod.rs b/src/vmm/src/device_manager/mod.rs index d7052422a3a..d11996ad06d 100644 --- a/src/vmm/src/device_manager/mod.rs +++ b/src/vmm/src/device_manager/mod.rs @@ -36,6 +36,7 @@ use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::transport::mmio::{IrqTrigger, MmioTransport}; use crate::resources::VmResources; use crate::snapshot::Persist; +use crate::vstate::bus::BusError; use crate::vstate::memory::GuestMemoryMmap; use crate::{EmulateSerialInitError, EventManager, Vm}; @@ -68,7 +69,7 @@ pub enum AttachDeviceError { /// MMIO transport error: {0} MmioTransport(#[from] MmioError), /// Error inserting device in bus: {0} - Bus(#[from] vm_device::BusError), + Bus(#[from] BusError), /// Error creating VMGenID device: {0} CreateVmGenID(#[from] VmGenIdError), /// Error while registering VMGenID with KVM: {0} @@ -427,7 +428,7 @@ pub enum DevicePersistError { /// Error resetting serial console: {0} SerialRestore(#[from] EmulateSerialInitError), /// Error inserting device in bus: {0} - Bus(#[from] vm_device::BusError), + Bus(#[from] BusError), /// Error creating DeviceManager: {0} DeviceManager(#[from] DeviceManagerCreateError), } diff --git a/src/vmm/src/device_manager/pci_mngr.rs b/src/vmm/src/device_manager/pci_mngr.rs index 83daf7c8b09..b9fab1afa3a 100644 --- a/src/vmm/src/device_manager/pci_mngr.rs +++ b/src/vmm/src/device_manager/pci_mngr.rs @@ -9,7 +9,6 @@ use std::sync::{Arc, Mutex}; use event_manager::{MutEventSubscriber, SubscriberOps}; use log::{debug, error, warn}; use serde::{Deserialize, Serialize}; -use vm_device::BusError; use super::persist::{MmdsState, SharedDeviceType}; use crate::devices::pci::PciSegment; @@ -34,6 +33,7 @@ use crate::pci::bus::PciRootError; use crate::resources::VmResources; use crate::snapshot::Persist; use crate::vmm_config::mmds::MmdsConfigError; +use crate::vstate::bus::BusError; use crate::vstate::interrupts::InterruptError; use crate::vstate::memory::GuestMemoryMmap; use crate::{EventManager, Vm}; diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index d6d46fff0f5..b6b2ff7eb70 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -43,6 +43,7 @@ use crate::mmds::data_store::MmdsVersion; use crate::resources::{ResourcesError, VmResources}; use crate::snapshot::Persist; use crate::vmm_config::mmds::MmdsConfigError; +use crate::vstate::bus::BusError; use crate::vstate::memory::GuestMemoryMmap; use crate::{EventManager, Vm}; @@ -58,7 +59,7 @@ pub enum DevicePersistError { /// Mmio transport MmioTransport, /// Bus error: {0} - Bus(#[from] vm_device::BusError), + Bus(#[from] BusError), #[cfg(target_arch = "aarch64")] /// Legacy: {0} Legacy(#[from] std::io::Error), diff --git a/src/vmm/src/devices/legacy/i8042.rs b/src/vmm/src/devices/legacy/i8042.rs index eee9957f208..664002637e6 100644 --- a/src/vmm/src/devices/legacy/i8042.rs +++ b/src/vmm/src/devices/legacy/i8042.rs @@ -14,6 +14,7 @@ use serde::Serialize; use vmm_sys_util::eventfd::EventFd; use crate::logger::{IncMetric, SharedIncMetric, error}; +use crate::vstate::bus::BusDevice; /// Errors thrown by the i8042 device. #[derive(Debug, thiserror::Error, displaydoc::Display)] @@ -210,7 +211,7 @@ impl I8042Device { } } -impl vm_device::BusDevice for I8042Device { +impl BusDevice for I8042Device { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { // All our ports are byte-wide. We don't know how to handle any wider data. if data.len() != 1 { @@ -342,7 +343,6 @@ impl vm_device::BusDevice for I8042Device { #[cfg(test)] mod tests { - use vm_device::BusDevice; use super::*; diff --git a/src/vmm/src/devices/legacy/rtc_pl031.rs b/src/vmm/src/devices/legacy/rtc_pl031.rs index b025c1d1512..96b1134eb1c 100644 --- a/src/vmm/src/devices/legacy/rtc_pl031.rs +++ b/src/vmm/src/devices/legacy/rtc_pl031.rs @@ -122,7 +122,7 @@ impl RTCDevice { } #[cfg(target_arch = "aarch64")] -impl vm_device::BusDevice for RTCDevice { +impl crate::vstate::bus::BusDevice for RTCDevice { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { self.bus_read(offset, data) } diff --git a/src/vmm/src/devices/legacy/serial.rs b/src/vmm/src/devices/legacy/serial.rs index 63834af3cb1..6b5c29aabf4 100644 --- a/src/vmm/src/devices/legacy/serial.rs +++ b/src/vmm/src/devices/legacy/serial.rs @@ -23,6 +23,7 @@ use vmm_sys_util::eventfd::EventFd; use crate::devices::legacy::EventFdTrigger; use crate::logger::{IncMetric, SharedIncMetric}; +use crate::vstate::bus::BusDevice; /// Received Data Available interrupt - for letting the driver know that /// there is some pending data to be processed. @@ -363,7 +364,7 @@ fn is_fifo(fd: RawFd) -> bool { (stat.st_mode & libc::S_IFIFO) != 0 } -impl vm_device::BusDevice for SerialWrapper +impl BusDevice for SerialWrapper where I: Read + AsRawFd + Send, { @@ -393,7 +394,6 @@ where mod tests { #![allow(clippy::undocumented_unsafe_blocks)] - use vm_device::BusDevice; use vmm_sys_util::eventfd::EventFd; use super::*; diff --git a/src/vmm/src/devices/pci/pci_segment.rs b/src/vmm/src/devices/pci/pci_segment.rs index 7e74132dd9a..04d902ba252 100644 --- a/src/vmm/src/devices/pci/pci_segment.rs +++ b/src/vmm/src/devices/pci/pci_segment.rs @@ -18,12 +18,12 @@ use pci::PciBdf; #[cfg(target_arch = "x86_64")] use uuid::Uuid; use vm_allocator::AddressAllocator; -use vm_device::{BusDeviceSync, BusError}; use crate::arch::{ArchVm as Vm, PCI_MMCONFIG_START, PCI_MMIO_CONFIG_SIZE_PER_SEGMENT}; #[cfg(target_arch = "x86_64")] use crate::pci::bus::{PCI_CONFIG_IO_PORT, PCI_CONFIG_IO_PORT_SIZE}; use crate::pci::bus::{PciBus, PciConfigIo, PciConfigMmio, PciRoot, PciRootError}; +use crate::vstate::bus::{BusDeviceSync, BusError}; use crate::vstate::resources::ResourceAllocator; pub struct PciSegment { diff --git a/src/vmm/src/devices/pseudo/boot_timer.rs b/src/vmm/src/devices/pseudo/boot_timer.rs index f0cf38977b5..81f9abb34f3 100644 --- a/src/vmm/src/devices/pseudo/boot_timer.rs +++ b/src/vmm/src/devices/pseudo/boot_timer.rs @@ -6,6 +6,7 @@ use std::sync::{Arc, Barrier}; use utils::time::TimestampUs; use crate::logger::info; +use crate::vstate::bus::BusDevice; const MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE: u8 = 123; @@ -15,7 +16,7 @@ pub struct BootTimer { start_ts: TimestampUs, } -impl vm_device::BusDevice for BootTimer { +impl BusDevice for BootTimer { fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option> { // Only handle byte length instructions at a zero offset. if data.len() != 1 || offset != 0 { diff --git a/src/vmm/src/devices/virtio/transport/mmio.rs b/src/vmm/src/devices/virtio/transport/mmio.rs index 319e98b1537..f5039281f16 100644 --- a/src/vmm/src/devices/virtio/transport/mmio.rs +++ b/src/vmm/src/devices/virtio/transport/mmio.rs @@ -17,6 +17,7 @@ use crate::devices::virtio::device_status; use crate::devices::virtio::queue::Queue; use crate::logger::{IncMetric, METRICS, error, warn}; use crate::utils::byte_order; +use crate::vstate::bus::BusDevice; use crate::vstate::interrupts::InterruptError; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; @@ -233,7 +234,7 @@ impl MmioTransport { } } -impl vm_device::BusDevice for MmioTransport { +impl BusDevice for MmioTransport { fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { match offset { 0x00..=0xff if data.len() == 4 => { @@ -473,7 +474,6 @@ pub(crate) mod tests { use std::ops::Deref; - use vm_device::BusDevice; use vmm_sys_util::eventfd::EventFd; use super::*; diff --git a/src/vmm/src/devices/virtio/transport/pci/device.rs b/src/vmm/src/devices/virtio/transport/pci/device.rs index 59d02179fd1..9bc604fc7c1 100644 --- a/src/vmm/src/devices/virtio/transport/pci/device.rs +++ b/src/vmm/src/devices/virtio/transport/pci/device.rs @@ -24,8 +24,6 @@ use pci::{ use serde::{Deserialize, Serialize}; use thiserror::Error; use vm_allocator::{AddressAllocator, AllocPolicy, RangeInclusive}; -use vm_device::interrupt::{InterruptIndex, InterruptSourceGroup, MsiIrqGroupConfig}; -use vm_device::{BusDevice, PciBarType}; use vm_memory::{Address, ByteValued, GuestAddress, Le32}; use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; @@ -44,6 +42,7 @@ use crate::pci::msix::{MsixCap, MsixConfig, MsixConfigState}; use crate::pci::{BarReprogrammingParams, PciDevice}; use crate::snapshot::Persist; use crate::utils::u64_to_usize; +use crate::vstate::bus::BusDevice; use crate::vstate::interrupts::{InterruptError, MsixVectorGroup}; use crate::vstate::memory::GuestMemoryMmap; use crate::vstate::resources::ResourceAllocator; diff --git a/src/vmm/src/pci/bus.rs b/src/vmm/src/pci/bus.rs index 72e9f1c2815..a88263226de 100644 --- a/src/vmm/src/pci/bus.rs +++ b/src/vmm/src/pci/bus.rs @@ -12,12 +12,12 @@ use std::sync::{Arc, Barrier, Mutex}; use byteorder::{ByteOrder, LittleEndian}; use pci::{PciBridgeSubclass, PciClassCode}; -use vm_device::BusDevice; use crate::logger::error; use crate::pci::configuration::PciConfiguration; use crate::pci::{DeviceRelocation, PciDevice}; use crate::utils::u64_to_usize; +use crate::vstate::bus::BusDevice; /// Errors for device manager. #[derive(Debug, thiserror::Error, displaydoc::Display)] @@ -454,12 +454,12 @@ mod tests { use std::sync::{Arc, Mutex}; use pci::{PciClassCode, PciMassStorageSubclass}; - use vm_device::BusDevice; use super::{PciBus, PciConfigIo, PciConfigMmio, PciRoot}; use crate::pci::bus::{DEVICE_ID_INTEL_VIRT_PCIE_HOST, VENDOR_ID_INTEL}; use crate::pci::configuration::PciConfiguration; use crate::pci::{BarReprogrammingParams, DeviceRelocation, PciDevice}; + use crate::vstate::bus::BusDevice; #[derive(Debug, Default)] struct RelocationMock { diff --git a/src/vm-device/src/bus.rs b/src/vmm/src/vstate/bus.rs similarity index 91% rename from src/vm-device/src/bus.rs rename to src/vmm/src/vstate/bus.rs index 31880d354bb..9c050196323 100644 --- a/src/vm-device/src/bus.rs +++ b/src/vmm/src/vstate/bus.rs @@ -10,7 +10,7 @@ use std::cmp::Ordering; use std::collections::btree_map::BTreeMap; use std::sync::{Arc, Barrier, Mutex, RwLock, Weak}; -use std::{convert, error, fmt, io, result}; +use std::{error, fmt, result}; /// Trait for devices that respond to reads or writes in an arbitrary address space. /// @@ -26,6 +26,7 @@ pub trait BusDevice: Send { } } +/// Trait similar to [`BusDevice`] with the extra requirement that a device is `Send` and `Sync`. #[allow(unused_variables)] pub trait BusDeviceSync: Send + Sync { /// Reads at `offset` from this device @@ -51,8 +52,9 @@ impl BusDeviceSync for Mutex { } } +/// Error type for [`Bus`]-related operations. #[derive(Debug)] -pub enum Error { +pub enum BusError { /// The insertion failed because the new device overlapped with an old device. Overlap, /// Failed to operate on zero sized range. @@ -61,21 +63,16 @@ pub enum Error { MissingAddressRange, } -pub type Result = result::Result; +/// Result type for [`Bus`]-related operations. +pub type Result = result::Result; -impl fmt::Display for Error { +impl fmt::Display for BusError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "bus_error: {self:?}") } } -impl error::Error for Error {} - -impl convert::From for io::Error { - fn from(e: Error) -> Self { - io::Error::other(e) - } -} +impl error::Error for BusError {} /// Holds a base and length representing the address space occupied by a `BusDevice`. /// @@ -83,7 +80,9 @@ impl convert::From for io::Error { /// * len - The length of the range in bytes. #[derive(Debug, Copy, Clone)] pub struct BusRange { + /// base address of a range within a [`Bus`] pub base: u64, + /// length of a range within a [`Bus`] pub len: u64, } @@ -140,6 +139,7 @@ impl Bus { } #[allow(clippy::type_complexity)] + /// Get a reference to a device residing inside the bus at address [`addr`]. pub fn resolve(&self, addr: u64) -> Option<(u64, u64, Arc)> { if let Some((range, dev)) = self.first_before(addr) { let offset = addr - range.base; @@ -150,9 +150,10 @@ impl Bus { None } + /// Insert a device into the [`Bus`] in the range [`addr`, `addr` + `len`]. pub fn insert(&self, device: Arc, base: u64, len: u64) -> Result<()> { if len == 0 { - return Err(Error::ZeroSizedRange); + return Err(BusError::ZeroSizedRange); } // Reject all cases where the new device's range overlaps with an existing device. @@ -163,7 +164,7 @@ impl Bus { .iter() .any(|(range, _dev)| range.overlaps(base, len)) { - return Err(Error::Overlap); + return Err(BusError::Overlap); } if self @@ -173,7 +174,7 @@ impl Bus { .insert(BusRange { base, len }, Arc::downgrade(&device)) .is_some() { - return Err(Error::Overlap); + return Err(BusError::Overlap); } Ok(()) @@ -182,13 +183,13 @@ impl Bus { /// Removes the device at the given address space range. pub fn remove(&self, base: u64, len: u64) -> Result<()> { if len == 0 { - return Err(Error::ZeroSizedRange); + return Err(BusError::ZeroSizedRange); } let bus_range = BusRange { base, len }; if self.devices.write().unwrap().remove(&bus_range).is_none() { - return Err(Error::MissingAddressRange); + return Err(BusError::MissingAddressRange); } Ok(()) @@ -224,7 +225,7 @@ impl Bus { let device = if let Some((_, _, dev)) = self.resolve(old_base) { dev.clone() } else { - return Err(Error::MissingAddressRange); + return Err(BusError::MissingAddressRange); }; // Remove the old address range @@ -243,7 +244,7 @@ impl Bus { dev.read(base, offset, data); Ok(()) } else { - Err(Error::MissingAddressRange) + Err(BusError::MissingAddressRange) } } @@ -255,7 +256,7 @@ impl Bus { // OK to unwrap as lock() failing is a serious error condition and should panic. Ok(dev.write(base, offset, data)) } else { - Err(Error::MissingAddressRange) + Err(BusError::MissingAddressRange) } } } @@ -269,12 +270,14 @@ mod tests { struct ConstantDevice; impl BusDeviceSync for ConstantDevice { + #[allow(clippy::cast_possible_truncation)] fn read(&self, _base: u64, offset: u64, data: &mut [u8]) { for (i, v) in data.iter_mut().enumerate() { *v = (offset as u8) + (i as u8); } } + #[allow(clippy::cast_possible_truncation)] fn write(&self, _base: u64, offset: u64, data: &[u8]) -> Option> { for (i, v) in data.iter().enumerate() { assert_eq!(*v, (offset as u8) + (i as u8)) diff --git a/src/vmm/src/vstate/mod.rs b/src/vmm/src/vstate/mod.rs index da94095bc9e..5e58f09e05a 100644 --- a/src/vmm/src/vstate/mod.rs +++ b/src/vmm/src/vstate/mod.rs @@ -1,6 +1,8 @@ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 +/// Module with the implementation of a Bus that can hold devices. +pub mod bus; /// VM interrupts implementation. pub mod interrupts; /// Module with Kvm implementation. diff --git a/src/vmm/src/vstate/vcpu.rs b/src/vmm/src/vstate/vcpu.rs index b0f12990710..554ff3c9d62 100644 --- a/src/vmm/src/vstate/vcpu.rs +++ b/src/vmm/src/vstate/vcpu.rs @@ -27,6 +27,7 @@ use crate::logger::{IncMetric, METRICS}; use crate::seccomp::{BpfProgram, BpfProgramRef}; use crate::utils::signal::{Killable, register_signal_handler, sigrtmin}; use crate::utils::sm::StateMachine; +use crate::vstate::bus::Bus; use crate::vstate::vm::Vm; /// Signal number (SIGRTMIN) used to kick Vcpus. @@ -142,7 +143,7 @@ impl Vcpu { } /// Sets a MMIO bus for this vcpu. - pub fn set_mmio_bus(&mut self, mmio_bus: Arc) { + pub fn set_mmio_bus(&mut self, mmio_bus: Arc) { self.kvm_vcpu.peripherals.mmio_bus = Some(mmio_bus); } @@ -699,7 +700,6 @@ pub(crate) mod tests { use std::sync::{Arc, Barrier, Mutex}; use linux_loader::loader::KernelLoader; - use vm_device::BusDevice; use vmm_sys_util::errno; use super::*; @@ -708,6 +708,7 @@ pub(crate) mod tests { use crate::seccomp::get_empty_filters; use crate::utils::mib_to_bytes; use crate::utils::signal::validate_signal_num; + use crate::vstate::bus::BusDevice; use crate::vstate::kvm::Kvm; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; use crate::vstate::vcpu::VcpuError as EmulationError; @@ -818,7 +819,7 @@ pub(crate) mod tests { ) ); - let bus = Arc::new(vm_device::Bus::new()); + let bus = Arc::new(Bus::new()); let dummy = Arc::new(Mutex::new(DummyDevice)); bus.insert(dummy, 0x10, 0x10).unwrap(); vcpu.set_mmio_bus(bus); @@ -965,7 +966,7 @@ pub(crate) mod tests { fn test_set_mmio_bus() { let (_, _, mut vcpu) = setup_vcpu(0x1000); assert!(vcpu.kvm_vcpu.peripherals.mmio_bus.is_none()); - vcpu.set_mmio_bus(Arc::new(vm_device::Bus::new())); + vcpu.set_mmio_bus(Arc::new(Bus::new())); assert!(vcpu.kvm_vcpu.peripherals.mmio_bus.is_some()); } diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 6a78beb584f..8a479348747 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -31,6 +31,7 @@ use crate::pci::{DeviceRelocation, PciDevice}; use crate::persist::CreateSnapshotError; use crate::utils::u64_to_usize; use crate::vmm_config::snapshot::SnapshotType; +use crate::vstate::bus::Bus; use crate::vstate::interrupts::{InterruptError, MsixVector, MsixVectorConfig, MsixVectorGroup}; use crate::vstate::memory::{ Address, GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryRegion, GuestRegionMmap, @@ -59,7 +60,7 @@ pub struct VmCommon { /// Allocator for VM resources pub resource_allocator: Mutex, /// MMIO bus - pub mmio_bus: Arc, + pub mmio_bus: Arc, } /// Errors associated with the wrappers over KVM ioctls. @@ -135,7 +136,7 @@ impl Vm { guest_memory: GuestMemoryMmap::default(), interrupts: Mutex::new(HashMap::with_capacity(GSI_MSI_END as usize + 1)), resource_allocator: Mutex::new(ResourceAllocator::new()), - mmio_bus: Arc::new(vm_device::Bus::new()), + mmio_bus: Arc::new(Bus::new()), }) } diff --git a/src/vmm/tests/devices.rs b/src/vmm/tests/devices.rs index a1ddf124cf7..5a228b96f96 100644 --- a/src/vmm/tests/devices.rs +++ b/src/vmm/tests/devices.rs @@ -12,10 +12,10 @@ use std::sync::{Arc, Mutex}; use event_manager::{EventManager, SubscriberOps}; use libc::EFD_NONBLOCK; -use vm_device::BusDevice; use vm_superio::Serial; use vmm::devices::legacy::serial::SerialOut; use vmm::devices::legacy::{EventFdTrigger, SerialEventsWrapper, SerialWrapper}; +use vmm::vstate::bus::BusDevice; use vmm_sys_util::eventfd::EventFd; fn create_serial(