diff --git a/docs/cpu_templates/cpuid-normalization.md b/docs/cpu_templates/cpuid-normalization.md index 2eb743add22..6a15ff4a9e0 100644 --- a/docs/cpu_templates/cpuid-normalization.md +++ b/docs/cpu_templates/cpuid-normalization.md @@ -28,15 +28,16 @@ See also: [boot protocol settings](boot-protocol.md) ## Intel-specific CPUID normalization -| Description | Leaf | Subleaf | Register | Bits | -| -------------------------------------------------------------- | :--------------------------------: | :-----: | :----------------: | :---: | -| Update deterministic cache parameters | 0x4 | all | EAX | 31:14 | -| Disable Intel Turbo Boost technology | 0x6 | - | EAX | 1 | -| Disable frequency selection | 0x6 | - | ECX | 3 | -| Set FDP_EXCPTN_ONLY bit | 0x7 | 0x0 | EBX | 6 | -| Set "Deprecates FPU CS and FPU DS values" bit | 0x7 | 0x0 | EBX | 13 | -| Disable performance monitoring | 0xa | - | EAX, EBX, ECX, EDX | all | -| Update brand string to use a default format and real frequency | 0x80000002, 0x80000003, 0x80000004 | - | EAX, EBX, ECX, EDX | all | +| Description | Leaf | Subleaf | Register | Bits | +| -------------------------------------------------------------- | :--------------------------------: | :-----: | :------: | :---: | +| Update deterministic cache parameters | 0x4 | all | EAX | 31:14 | +| Disable Intel Turbo Boost technology | 0x6 | - | EAX | 1 | +| Disable frequency selection | 0x6 | - | ECX | 3 | +| Set FDP_EXCPTN_ONLY bit | 0x7 | 0x0 | EBX | 6 | +| Set "Deprecates FPU CS and FPU DS values" bit | 0x7 | 0x0 | EBX | 13 | +| Disable performance monitoring | 0xa | - | all | all | +| Fill v2 extended topology enumeration leaf | 0x1f | all | all | all | +| Update brand string to use a default format and real frequency | 0x80000002, 0x80000003, 0x80000004 | - | all | all | ## AMD-specifc CPUID normalization diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs index 111960db00a..e481bc17681 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs @@ -48,10 +48,10 @@ pub enum PassthroughCacheTopologyError { pub enum FeatureEntryError { /// Missing leaf 0x80000008. MissingLeaf0x80000008, - /// Failed to set `nt` (number of physical threads) due to overflow. - NumberOfPhysicalThreadsOverflow, - /// Failed to set `nt` (number of physical threads). + /// Failed to set number of physical threads (CPUID.80000008H:ECX[7:0]): {0} NumberOfPhysicalThreads(CheckedAssignError), + /// Failed to set number of physical threads (CPUID.80000008H:ECX[7:0]) due to overflow. + NumberOfPhysicalThreadsOverflow, } /// Error type for setting leaf 0x8000001d section of [`super::AmdCpuid::normalize`]. @@ -59,22 +59,24 @@ pub enum FeatureEntryError { pub enum ExtendedCacheTopologyError { /// Missing leaf 0x8000001d. MissingLeaf0x8000001d, - /// Failed to set `num_sharing_cache` due to overflow. - NumSharingCacheOverflow, - /// Failed to set `num_sharing_cache`: {0} - NumSharingCache(CheckedAssignError), + #[rustfmt::skip] + /// Failed to set number of logical processors sharing cache(CPUID.(EAX=8000001DH,ECX={0}):EAX[25:14]): {1} + NumSharingCache(u32, CheckedAssignError), + #[rustfmt::skip] + /// Failed to set number of logical processors sharing cache (CPUID.(EAX=8000001DH,ECX={0}):EAX[25:14]) due to overflow. + NumSharingCacheOverflow(u32), } /// Error type for setting leaf 0x8000001e section of [`super::AmdCpuid::normalize`]. #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum ExtendedApicIdError { + /// Failed to set compute unit ID (CPUID.8000001EH:EBX[7:0]): {0} + ComputeUnitId(CheckedAssignError), + /// Failed to set extended APIC ID (CPUID.8000001EH:EAX[31:0]): {0} + ExtendedApicId(CheckedAssignError), /// Missing leaf 0x8000001e. MissingLeaf0x8000001e, - /// Failed to set `extended_apic_id`: {0} - ExtendedApicId(CheckedAssignError), - /// Failed to set `compute_unit_id`: {0} - ComputeUnitId(CheckedAssignError), - /// Failed to set `threads_per_compute_unit`: {0} + /// Failed to set threads per core unit (CPUID:8000001EH:EBX[15:8]): {0} ThreadPerComputeUnit(CheckedAssignError), } @@ -153,6 +155,7 @@ impl super::AmdCpuid { // On non-AMD hosts this condition may never be true thus this loop may be // indefinite. + // CPUID Fn8000_0001D_EAX_x[4:0] (Field Name: CacheType) // Cache type. Identifies the type of cache. // ```text // Bits Description @@ -162,8 +165,6 @@ impl super::AmdCpuid { // 03h Unified cache // 1Fh-04h Reserved. // ``` - // - // cache_type: 0..4, let cache_type = result.eax & 15; if cache_type == 0 { break; @@ -186,10 +187,9 @@ impl super::AmdCpuid { let leaf_80000001 = self .get_mut(&CpuidKey::leaf(0x80000001)) .ok_or(NormalizeCpuidError::MissingLeaf0x80000001)?; + // CPUID Fn8000_0001_ECX[22] (Field Name: TopologyExtensions) // Topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID // Fn8000_001E_EDX. - // - // topology_extensions: 22, set_bit(&mut leaf_80000001.result.ecx, 22, true); Ok(()) } @@ -220,6 +220,7 @@ impl super::AmdCpuid { .get_mut(&CpuidKey::leaf(0x80000008)) .ok_or(FeatureEntryError::MissingLeaf0x80000008)?; + // CPUID Fn8000_0008_ECX[15:12] (Field Name: ApicIdSize) // APIC ID size. The number of bits in the initial APIC20[ApicId] value that indicate // logical processor ID within a package. The size of this field determines the // maximum number of logical processors (MNLP) that the package could @@ -228,19 +229,15 @@ impl super::AmdCpuid { // Fn8000_0008_ECX[NC]. A value of zero indicates that legacy methods must be // used to determine the maximum number of logical processors, as indicated by // CPUID Fn8000_0008_ECX[NC]. - // - // apic_id_size: 12..16, - set_range(&mut leaf_80000008.result.ecx, 12..16, THREAD_ID_MAX_SIZE).unwrap(); + set_range(&mut leaf_80000008.result.ecx, 12..=15, THREAD_ID_MAX_SIZE).unwrap(); + // CPUID Fn8000_0008_ECX[7:0] (Field Name: NC) // Number of physical threads - 1. The number of threads in the processor is NT+1 // (e.g., if NT = 0, then there is one thread). See “Legacy Method” on page 633. - // - // nt: 0..8, - // let sub = cpu_count .checked_sub(1) .ok_or(FeatureEntryError::NumberOfPhysicalThreadsOverflow)?; - set_range(&mut leaf_80000008.result.ecx, 0..8, u32::from(sub)) + set_range(&mut leaf_80000008.result.ecx, 0..=7, u32::from(sub)) .map_err(FeatureEntryError::NumberOfPhysicalThreads)?; Ok(()) @@ -255,6 +252,7 @@ impl super::AmdCpuid { ) -> Result<(), ExtendedCacheTopologyError> { for i in 0.. { if let Some(subleaf) = self.get_mut(&CpuidKey::subleaf(0x8000001d, i)) { + // CPUID Fn8000_001D_EAX_x[7:5] (Field Name: CacheLevel) // Cache level. Identifies the level of this cache. Note that the enumeration value // is not necessarily equal to the cache level. // ```text @@ -265,10 +263,9 @@ impl super::AmdCpuid { // 011b Level 3 // 111b-100b Reserved. // ``` - // - // cache_level: 5..8 - let cache_level = get_range(subleaf.result.eax, 5..8); + let cache_level = get_range(subleaf.result.eax, 5..=7); + // CPUID Fn8000_001D_EAX_x[25:14] (Field Name: NumSharingCache) // Specifies the number of logical processors sharing the cache enumerated by N, // the value passed to the instruction in ECX. The number of logical processors // sharing this cache is the value of this field incremented by 1. To determine @@ -279,8 +276,6 @@ impl super::AmdCpuid { // // Logical processors with the same ShareId then share a cache. If // NumSharingCache+1 is not a power of two, round it up to the next power of two. - // - // num_sharing_cache: 14..26, match cache_level { // L1 & L2 Cache @@ -288,17 +283,17 @@ impl super::AmdCpuid { 1 | 2 => { // SAFETY: We know `cpus_per_core > 0` therefore this is always safe. let sub = u32::from(cpus_per_core.checked_sub(1).unwrap()); - set_range(&mut subleaf.result.eax, 14..26, sub) - .map_err(ExtendedCacheTopologyError::NumSharingCache)?; + set_range(&mut subleaf.result.eax, 14..=25, sub) + .map_err(|err| ExtendedCacheTopologyError::NumSharingCache(i, err))?; } // L3 Cache // The L3 cache is shared among all the logical threads 3 => { let sub = cpu_count .checked_sub(1) - .ok_or(ExtendedCacheTopologyError::NumSharingCacheOverflow)?; - set_range(&mut subleaf.result.eax, 14..26, u32::from(sub)) - .map_err(ExtendedCacheTopologyError::NumSharingCache)?; + .ok_or(ExtendedCacheTopologyError::NumSharingCacheOverflow(i))?; + set_range(&mut subleaf.result.eax, 14..=25, u32::from(sub)) + .map_err(|err| ExtendedCacheTopologyError::NumSharingCache(i, err))?; } _ => (), } @@ -334,16 +329,18 @@ impl super::AmdCpuid { .get_mut(&CpuidKey::leaf(0x8000001e)) .ok_or(ExtendedApicIdError::MissingLeaf0x8000001e)?; + // CPUID Fn8000_001E_EAX[31:0] (Field Name: ExtendedApicId) // Extended APIC ID. If MSR0000_001B[ApicEn] = 0, this field is reserved. - // - // extended_apic_id: 0..32, - set_range(&mut leaf_8000001e.result.eax, 0..32, u32::from(cpu_index)) + set_range(&mut leaf_8000001e.result.eax, 0..=31, u32::from(cpu_index)) .map_err(ExtendedApicIdError::ExtendedApicId)?; - // compute_unit_id: 0..8, - set_range(&mut leaf_8000001e.result.ebx, 0..8, core_id) + // CPUID Fn8000_001E_EBX[7:0] (Field Name: ComputeUnitId) + // Compute unit ID. Identifies a Compute Unit, which may be one or more physical cores that + // each implement one or more logical processors. + set_range(&mut leaf_8000001e.result.ebx, 0..=7, core_id) .map_err(ExtendedApicIdError::ComputeUnitId)?; + // CPUID Fn8000_001E_EBX[15:8] (Field Name: ThreadsPerComputeUnit) // Threads per compute unit (zero-based count). The actual number of threads // per compute unit is the value of this field + 1. To determine which logical // processors (threads) belong to a given Compute Unit, determine a ShareId @@ -355,30 +352,26 @@ impl super::AmdCpuid { // Unit. (If ThreadsPerComputeUnit+1 is not a power of two, round it up to the // next power of two). // - // threads_per_compute_unit: 8..16, - // // SAFETY: We know `cpus_per_core > 0` therefore this is always safe. let sub = u32::from(cpus_per_core.checked_sub(1).unwrap()); - set_range(&mut leaf_8000001e.result.ebx, 8..16, sub) + set_range(&mut leaf_8000001e.result.ebx, 8..=15, sub) .map_err(ExtendedApicIdError::ThreadPerComputeUnit)?; + // CPUID Fn8000_001E_ECX[10:8] (Field Name: NodesPerProcessor) // Specifies the number of nodes in the package/socket in which this logical // processor resides. Node in this context corresponds to a processor die. // Encoding is N-1, where N is the number of nodes present in the socket. // - // nodes_per_processor: 8..11, - // // SAFETY: We know the value always fits within the range and thus is always safe. // Set nodes per processor. - set_range(&mut leaf_8000001e.result.ecx, 8..11, NODES_PER_PROCESSOR).unwrap(); + set_range(&mut leaf_8000001e.result.ecx, 8..=10, NODES_PER_PROCESSOR).unwrap(); + // CPUID Fn8000_001E_ECX[7:0] (Field Name: NodeId) // Specifies the ID of the node containing the current logical processor. NodeId // values are unique across the system. // - // node_id: 0..8, - // // Put all the cpus in the same node. - set_range(&mut leaf_8000001e.result.ecx, 0..8, 0).unwrap(); + set_range(&mut leaf_8000001e.result.ecx, 0..=7, 0).unwrap(); Ok(()) } diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs index 41bc9d3f3f5..2e22134e882 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs @@ -34,14 +34,14 @@ pub enum NormalizeCpuidError { #[allow(clippy::enum_variant_names)] #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum DeterministicCacheError { - /// Failed to set `Maximum number of addressable IDs for logical processors sharing this cache` due to underflow in cpu count. - MaxCpusPerCoreUnderflow, - /// Failed to set `Maximum number of addressable IDs for logical processors sharing this cache`: {0}. - MaxCpusPerCore(CheckedAssignError), - /// Failed to set `Maximum number of addressable IDs for processor cores in the physical package` due to underflow in cores. - MaxCorePerPackageUnderflow, - /// Failed to set `Maximum number of addressable IDs for processor cores in the physical package`: {0}. + /// Failed to set max addressable core ID in physical package (CPUID.04H:EAX[31:26]): {0}. MaxCorePerPackage(CheckedAssignError), + /// Failed to set max addressable core ID in physical package (CPUID.04H:EAX[31:26]) due to underflow in cores. + MaxCorePerPackageUnderflow, + /// Failed to set max addressable processor ID sharing cache (CPUID.04H:EAX[25:14]): {0}. + MaxCpusPerCore(CheckedAssignError), + /// Failed to set max addressable processor ID sharing cache (CPUID.04H:EAX[25:14]) due to underflow in cpu count. + MaxCpusPerCoreUnderflow, } /// We always use this brand string. @@ -73,6 +73,7 @@ impl super::IntelCpuid { self.update_power_management_entry()?; self.update_extended_feature_flags_entry()?; self.update_performance_monitoring_entry()?; + self.update_extended_topology_v2_entry(); self.update_brand_string_entry()?; Ok(()) @@ -97,18 +98,16 @@ impl super::IntelCpuid { break; } + // CPUID.04H:EAX[7:5] // Cache Level (Starts at 1) - // - // cache_level: 5..8 - let cache_level = get_range(subleaf.result.eax, 5..8); + let cache_level = get_range(subleaf.result.eax, 5..=7); + // CPUID.04H:EAX[25:14] // Maximum number of addressable IDs for logical processors sharing this cache. // - Add one to the return value to get the result. // - The nearest power-of-2 integer that is not smaller than (1 + EAX[25:14]) is the // number of unique initial APIC IDs reserved for addressing different logical // processors sharing this cache. - // - // max_num_addressable_ids_for_logical_processors_sharing_this_cache: 14..26, // We know `cpus_per_core > 0` therefore `cpus_per_core.checked_sub(1).unwrap()` is // always safe. @@ -118,7 +117,7 @@ impl super::IntelCpuid { // The L1 & L2 cache is shared by at most 2 hyperthreads 1 | 2 => { let sub = u32::from(cpus_per_core.checked_sub(1).unwrap()); - set_range(&mut subleaf.result.eax, 14..26, sub) + set_range(&mut subleaf.result.eax, 14..=25, sub) .map_err(DeterministicCacheError::MaxCpusPerCore)?; } // L3 Cache @@ -129,7 +128,7 @@ impl super::IntelCpuid { .checked_sub(1) .ok_or(DeterministicCacheError::MaxCpusPerCoreUnderflow)?, ); - set_range(&mut subleaf.result.eax, 14..26, sub) + set_range(&mut subleaf.result.eax, 14..=25, sub) .map_err(DeterministicCacheError::MaxCpusPerCore)?; } _ => (), @@ -139,6 +138,7 @@ impl super::IntelCpuid { #[allow(clippy::unwrap_used)] let cores = cpu_count.checked_div(cpus_per_core).unwrap(); + // CPUID.04H:EAX[31:26] // Maximum number of addressable IDs for processor cores in the physical package. // - Add one to the return value to get the result. // - The nearest power-of-2 integer that is not smaller than (1 + EAX[31:26]) is the @@ -146,14 +146,12 @@ impl super::IntelCpuid { // a physical package. Core ID is a subset of bits of the initial APIC ID. // - The returned value is constant for valid initial values in ECX. Valid ECX // values start from 0. - // - // max_num_addressable_ids_for_processor_cores_in_physical_package: 26..32, // Put all the cores in the same socket let sub = u32::from(cores) .checked_sub(1) .ok_or(DeterministicCacheError::MaxCorePerPackageUnderflow)?; - set_range(&mut subleaf.result.eax, 26..32, sub) + set_range(&mut subleaf.result.eax, 26..=31, sub) .map_err(DeterministicCacheError::MaxCorePerPackage)?; } else { break; @@ -168,16 +166,14 @@ impl super::IntelCpuid { .get_mut(&CpuidKey::leaf(0x6)) .ok_or(NormalizeCpuidError::MissingLeaf6)?; + // CPUID.06H:EAX[1] // Intel Turbo Boost Technology available (see description of IA32_MISC_ENABLE[38]). - // - // intel_turbo_boost_technology: 1, set_bit(&mut leaf_6.result.eax, 1, false); + // CPUID.06H:ECX[3] // The processor supports performance-energy bias preference if CPUID.06H:ECX.SETBH[bit 3] // is set and it also implies the presence of a new architectural MSR called // IA32_ENERGY_PERF_BIAS (1B0H). - // - // performance_energy_bias: 3, // Clear X86 EPB feature. No frequency selection in the hypervisor. set_bit(&mut leaf_6.result.ecx, 3, false); @@ -190,8 +186,9 @@ impl super::IntelCpuid { .get_mut(&CpuidKey::subleaf(0x7, 0)) .ok_or(NormalizeCpuidError::MissingLeaf7)?; - // Set FDP_EXCPTN_ONLY bit (bit 6) and ZERO_FCS_FDS bit (bit 13) as recommended in kernel - // doc. These bits are reserved in AMD. + // Set the following bits as recommended in kernel doc. These bits are reserved in AMD. + // - CPUID.07H:EBX[6] (FDP_EXCPTN_ONLY) + // - CPUID.07H:EBX[13] (Deprecates FPU CS and FPU DS values) // https://lore.kernel.org/all/20220322110712.222449-3-pbonzini@redhat.com/ // https://github.com/torvalds/linux/commit/45016721de3c714902c6f475b705e10ae0bdd801 set_bit(&mut leaf_7_0.result.ebx, 6, true); @@ -214,6 +211,29 @@ impl super::IntelCpuid { Ok(()) } + /// Update extended topology v2 entry + /// + /// CPUID leaf 1FH is a preferred superset to leaf 0xB. Intel recommends using leaf 0x1F when + /// available rather than leaf 0xB. + /// + /// Since we don't use any domains than ones supported in leaf 0xB, we just copy contents of + /// leaf 0xB to leaf 0x1F. + fn update_extended_topology_v2_entry(&mut self) { + // Skip if leaf 0x1F does not exist. + if self.get(&CpuidKey::leaf(0x1F)).is_none() { + return; + } + + for index in 0.. { + if let Some(subleaf) = self.get(&CpuidKey::subleaf(0xB, index)) { + self.0 + .insert(CpuidKey::subleaf(0x1F, index), subleaf.clone()); + } else { + break; + } + } + } + fn update_brand_string_entry(&mut self) -> Result<(), NormalizeCpuidError> { // Get host brand string. let host_brand_string: [u8; BRAND_STRING_LENGTH] = host_brand_string(); @@ -335,9 +355,12 @@ mod tests { clippy::as_conversions )] + use std::collections::BTreeMap; use std::ffi::CStr; use super::*; + use crate::cpu_config::x86_64::cpuid::{CpuidEntry, IntelCpuid, KvmCpuidFlags}; + #[test] fn default_brand_string_test() { let brand_string = b"Intel(R) Xeon(R) Platinum 8275CL CPU @ 3.00GHz\0\0"; @@ -375,22 +398,21 @@ mod tests { #[test] fn test_update_extended_feature_flags_entry() { - let mut cpuid = - crate::cpu_config::x86_64::cpuid::IntelCpuid(std::collections::BTreeMap::from([( - crate::cpu_config::x86_64::cpuid::CpuidKey { - leaf: 0x7, - subleaf: 0, - }, - crate::cpu_config::x86_64::cpuid::CpuidEntry { - flags: crate::cpu_config::x86_64::cpuid::KvmCpuidFlags::SIGNIFICANT_INDEX, - ..Default::default() - }, - )])); + let mut cpuid = IntelCpuid(BTreeMap::from([( + CpuidKey { + leaf: 0x7, + subleaf: 0, + }, + CpuidEntry { + flags: KvmCpuidFlags::SIGNIFICANT_INDEX, + ..Default::default() + }, + )])); cpuid.update_extended_feature_flags_entry().unwrap(); let leaf_7_0 = cpuid - .get(&crate::cpu_config::x86_64::cpuid::CpuidKey { + .get(&CpuidKey { leaf: 0x7, subleaf: 0, }) @@ -398,4 +420,106 @@ mod tests { assert!((leaf_7_0.result.ebx & (1 << 6)) > 0); assert!((leaf_7_0.result.ebx & (1 << 13)) > 0); } + + #[test] + fn test_update_extended_topology_v2_entry_no_leaf_0x1f() { + let mut cpuid = IntelCpuid(BTreeMap::from([( + CpuidKey { + leaf: 0xB, + subleaf: 0, + }, + CpuidEntry { + flags: KvmCpuidFlags::SIGNIFICANT_INDEX, + ..Default::default() + }, + )])); + + cpuid.update_extended_topology_v2_entry(); + + assert!( + cpuid + .get(&CpuidKey { + leaf: 0x1F, + subleaf: 0, + }) + .is_none() + ); + } + + #[test] + fn test_update_extended_topology_v2_entry() { + let mut cpuid = IntelCpuid(BTreeMap::from([ + ( + CpuidKey { + leaf: 0xB, + subleaf: 0, + }, + CpuidEntry { + flags: KvmCpuidFlags::SIGNIFICANT_INDEX, + result: CpuidRegisters { + eax: 0x1, + ebx: 0x2, + ecx: 0x3, + edx: 0x4, + }, + }, + ), + ( + CpuidKey { + leaf: 0xB, + subleaf: 1, + }, + CpuidEntry { + flags: KvmCpuidFlags::SIGNIFICANT_INDEX, + result: CpuidRegisters { + eax: 0xa, + ebx: 0xb, + ecx: 0xc, + edx: 0xd, + }, + }, + ), + ( + CpuidKey { + leaf: 0x1F, + subleaf: 0, + }, + CpuidEntry { + flags: KvmCpuidFlags::SIGNIFICANT_INDEX, + result: CpuidRegisters { + eax: 0xFFFFFFFF, + ebx: 0xFFFFFFFF, + ecx: 0xFFFFFFFF, + edx: 0xFFFFFFFF, + }, + }, + ), + ])); + + cpuid.update_extended_topology_v2_entry(); + + // Check leaf 0x1F, subleaf 0 is updated. + let leaf_1f_0 = cpuid + .get(&CpuidKey { + leaf: 0x1F, + subleaf: 0, + }) + .unwrap(); + assert_eq!(leaf_1f_0.result.eax, 0x1); + assert_eq!(leaf_1f_0.result.ebx, 0x2); + assert_eq!(leaf_1f_0.result.ecx, 0x3); + assert_eq!(leaf_1f_0.result.edx, 0x4); + + // Check lefa 0x1F, subleaf 1 is inserted. + let leaf_1f_1 = cpuid + .get(&CpuidKey { + leaf: 0x1F, + subleaf: 1, + }) + .unwrap(); + assert_eq!(leaf_1f_1.result.eax, 0xa); + assert_eq!(leaf_1f_1.result.ebx, 0xb); + assert_eq!(leaf_1f_1.result.ecx, 0xc); + assert_eq!(leaf_1f_1.result.edx, 0xd); + } } diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs index cec8aad2f4c..f9359036f5f 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs @@ -4,6 +4,7 @@ use crate::cpu_config::x86_64::cpuid::{ CpuidEntry, CpuidKey, CpuidRegisters, CpuidTrait, KvmCpuidFlags, cpuid, }; +use crate::vmm_config::machine_config::MAX_SUPPORTED_VCPUS; /// Error type for [`super::Cpuid::normalize`]. #[allow(clippy::module_name_repetitions)] @@ -60,16 +61,16 @@ pub enum GetMaxCpusPerPackageError { #[rustfmt::skip] #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum ExtendedTopologyError { - /// Failed to set `Number of bits to shift right on x2APIC ID to get a unique topology ID of the next level type`: {0} - ApicId(CheckedAssignError), - /// Failed to set `Number of logical processors at this level type`: {0} - LogicalProcessors(CheckedAssignError), - /// Failed to set `Level Type`: {0} - LevelType(CheckedAssignError), - /// Failed to set `Level Number`: {0} - LevelNumber(CheckedAssignError), - /// Failed to set all leaves, as more than `u32::MAX` sub-leaves are present: {0} - Overflow(>::Error), + /// Failed to set domain type (CPUID.(EAX=0xB,ECX={0}):ECX[15:8]): {1} + DomainType(u32, CheckedAssignError), + /// Failed to set input ECX (CPUID.(EAX=0xB,ECX={0}):ECX[7:0]): {1} + InputEcx(u32, CheckedAssignError), + /// Failed to set number of logical processors (CPUID.(EAX=0xB,ECX={0}):EBX[15:0]): {1} + NumLogicalProcs(u32, CheckedAssignError), + /// Failed to set right-shift bits (CPUID.(EAX=0xB,ECX={0}):EAX[4:0]): {1} + RightShiftBits(u32, CheckedAssignError), + /// Unexpected subleaf: {0} + UnexpectedSubleaf(u32) } /// Error type for setting leaf 0x80000006 of Cpuid::normalize(). @@ -94,66 +95,54 @@ pub fn set_bit(x: &mut u32, bit: u8, y: bool) { } /// Sets a given range to a given value. -#[allow(clippy::arithmetic_side_effects)] pub fn set_range( x: &mut u32, - range: std::ops::Range, + range: std::ops::RangeInclusive, y: u32, ) -> Result<(), CheckedAssignError> { - debug_assert!(range.end >= range.start); - match range.end - range.start { - z @ 0..=31 => { - if y >= 2u32.pow(u32::from(z)) { - Err(CheckedAssignError) - } else { - let shift = y << range.start; - *x = shift | (*x & !mask(range)); - Ok(()) - } - } - 32 => { - let shift = y << range.start; - *x = shift | (*x & !mask(range)); - Ok(()) - } - 33.. => Err(CheckedAssignError), + let start = *range.start(); + let end = *range.end(); + + debug_assert!(end >= start); + debug_assert!(end < 32); + + // Ensure `y` fits within the number of bits in the specified range. + // Note that + // - 1 <= `num_bits` <= 32 from the above assertion + // - if `num_bits` equals to 32, `y` always fits within it since `y` is `u32`. + let num_bits = end - start + 1; + if num_bits < 32 && y >= (1u32 << num_bits) { + return Err(CheckedAssignError); } + + let mask = get_mask(range); + *x = (*x & !mask) | (y << start); + + Ok(()) } + /// Gets a given range within a given value. -#[allow(clippy::arithmetic_side_effects)] -pub fn get_range(x: u32, range: std::ops::Range) -> u32 { - debug_assert!(range.end >= range.start); - (x & mask(range.clone())) >> range.start +pub fn get_range(x: u32, range: std::ops::RangeInclusive) -> u32 { + let start = *range.start(); + let end = *range.end(); + + debug_assert!(end >= start); + debug_assert!(end < 32); + + let mask = get_mask(range); + (x & mask) >> start } /// Returns a mask where the given range is ones. -#[allow( - clippy::as_conversions, - clippy::arithmetic_side_effects, - clippy::cast_possible_truncation -)] -const fn mask(range: std::ops::Range) -> u32 { - /// Returns a value where in the binary representation all bits to the right of the x'th bit - /// from the left are 1. - #[allow(clippy::unreachable)] - const fn shift(x: u8) -> u32 { - if x == 0 { - 0 - } else if x < u32::BITS as u8 { - (1 << x) - 1 - } else if x == u32::BITS as u8 { - u32::MAX - } else { - unreachable!() - } +const fn get_mask(range: std::ops::RangeInclusive) -> u32 { + let num_bits = *range.end() - *range.start() + 1; + let shift = *range.start(); + + if num_bits == 32 { + u32::MAX + } else { + ((1u32 << num_bits) - 1) << shift } - - debug_assert!(range.end >= range.start); - debug_assert!(range.end <= u32::BITS as u8); - - let front = shift(range.start); - let back = shift(range.end); - !front & back } // We use this 2nd implementation so we can conveniently define functions only used within @@ -221,79 +210,54 @@ impl super::Cpuid { cpu_index: u8, cpu_count: u8, ) -> Result<(), FeatureInformationError> { - // Flush a cache line size. - const EBX_CLFLUSH_CACHELINE: u32 = 8; - - // PDCM: Perfmon and Debug Capability. - const ECX_PDCM_BITINDEX: u8 = 15; - - // TSC-Deadline. - const ECX_TSC_DEADLINE_BITINDEX: u8 = 24; - - // CPU is running on a hypervisor. - const ECX_HYPERVISOR_BITINDEX: u8 = 31; - let leaf_1 = self .get_mut(&CpuidKey::leaf(0x1)) .ok_or(FeatureInformationError::MissingLeaf1)?; - // A value of 1 indicates the processor supports the performance and debug feature - // indication MSR IA32_PERF_CAPABILITIES. - // - // pdcm: 15, - set_bit(&mut leaf_1.result.ecx, ECX_PDCM_BITINDEX, false); + // CPUID.01H:EBX[15:08] + // CLFLUSH line size (Value * 8 = cache line size in bytes; used also by CLFLUSHOPT). + set_range(&mut leaf_1.result.ebx, 8..=15, 8).map_err(FeatureInformationError::Clflush)?; - // A value of 1 indicates that the processor’s local APIC timer supports one-shot - // operation using a TSC deadline value. + // CPUID.01H:EBX[23:16] + // Maximum number of addressable IDs for logical processors in this physical package. // - // tsc_deadline: 24, - set_bit(&mut leaf_1.result.ecx, ECX_TSC_DEADLINE_BITINDEX, true); - - // Hypervisor bit - set_bit(&mut leaf_1.result.ecx, ECX_HYPERVISOR_BITINDEX, true); + // The nearest power-of-2 integer that is not smaller than EBX[23:16] is the number of + // unique initial APIC IDs reserved for addressing different logical processors in a + // physical package. This field is only valid if CPUID.1.EDX.HTT[bit 28]= 1. + let max_cpus_per_package = u32::from( + get_max_cpus_per_package(cpu_count) + .map_err(FeatureInformationError::GetMaxCpusPerPackage)?, + ); + set_range(&mut leaf_1.result.ebx, 16..=23, max_cpus_per_package) + .map_err(FeatureInformationError::SetMaxCpusPerPackage)?; + // CPUID.01H:EBX[31:24] // Initial APIC ID. // - // The 8-bit initial APIC ID in EBX[31:24] is replaced by the 32-bit x2APIC ID, - // available in Leaf 0BH and Leaf 1FH. - // - // initial_apic_id: 24..32, - set_range(&mut leaf_1.result.ebx, 24..32, u32::from(cpu_index)) + // The 8-bit initial APIC ID in EBX[31:24] is replaced by the 32-bit x2APIC ID, available + // in Leaf 0BH and Leaf 1FH. + set_range(&mut leaf_1.result.ebx, 24..=31, u32::from(cpu_index)) .map_err(FeatureInformationError::InitialApicId)?; - // CLFLUSH line size (Value ∗ 8 = cache line size in bytes; used also by CLFLUSHOPT). - // - // clflush: 8..16, - set_range(&mut leaf_1.result.ebx, 8..16, EBX_CLFLUSH_CACHELINE) - .map_err(FeatureInformationError::Clflush)?; + // CPUID.01H:ECX[15] (Mnemonic: PDCM) + // Performance and Debug Capability: A value of 1 indicates the processor supports the + // performance and debug feature indication MSR IA32_PERF_CAPABILITIES. + set_bit(&mut leaf_1.result.ecx, 15, false); - let max_cpus_per_package = u32::from( - get_max_cpus_per_package(cpu_count) - .map_err(FeatureInformationError::GetMaxCpusPerPackage)?, - ); + // CPUID.01H:ECX[24] (Mnemonic: TSC-Deadline) + // A value of 1 indicates that the processor’s local APIC timer supports one-shot operation + // using a TSC deadline value. + set_bit(&mut leaf_1.result.ecx, 24, true); - // Maximum number of addressable IDs for logical processors in this physical package. - // - // The nearest power-of-2 integer that is not smaller than EBX[23:16] is the number of - // unique initial APIC IDs reserved for addressing different logical - // processors in a physical package. This field is only valid if - // CPUID.1.EDX.HTT[bit 28]= 1. - // - // max_addressable_logical_processor_ids: 16..24, - set_range(&mut leaf_1.result.ebx, 16..24, max_cpus_per_package) - .map_err(FeatureInformationError::SetMaxCpusPerPackage)?; + // CPUID.01H:ECX[31] (Mnemonic: Hypervisor) + set_bit(&mut leaf_1.result.ecx, 31, true); + // CPUID.01H:EDX[28] (Mnemonic: HTT) // Max APIC IDs reserved field is Valid. A value of 0 for HTT indicates there is only a - // single logical processor in the package and software should assume only a - // single APIC ID is reserved. A value of 1 for HTT indicates the value in - // CPUID.1.EBX[23:16] (the Maximum number of addressable IDs for logical - // processors in this package) is valid for the package. - // - // htt: 28, - - // A value of 1 for HTT indicates the value in CPUID.1.EBX[23:16] - // (the Maximum number of addressable IDs for logical processors in this package) - // is valid for the package + // single logical processor in the package and software should assume only a single APIC ID + // is reserved. A value of 1 for HTT indicates the value in CPUID.1.EBX[23:16] (the Maximum + // number of addressable IDs for logical processors in this package) is valid for the + // package. set_bit(&mut leaf_1.result.edx, 28, cpu_count > 1); Ok(()) @@ -307,17 +271,8 @@ impl super::Cpuid { cpu_bits: u8, cpus_per_core: u8, ) -> Result<(), ExtendedTopologyError> { - /// Level type used for setting thread level processor topology. - const LEVEL_TYPE_THREAD: u32 = 1; - /// Level type used for setting core level processor topology. - const LEVEL_TYPE_CORE: u32 = 2; - /// The APIC ID shift in leaf 0xBh specifies the number of bits to shit the x2APIC ID to - /// get a unique topology of the next level. This allows 128 logical - /// processors/package. - const LEAFBH_INDEX1_APICID: u32 = 7; - // The following commit changed the behavior of KVM_GET_SUPPORTED_CPUID to no longer - // include leaf 0xB / sub-leaf 1. + // include CPUID.(EAX=0BH,ECX=1). // https://lore.kernel.org/all/20221027092036.2698180-1-pbonzini@redhat.com/ self.inner_mut() .entry(CpuidKey::subleaf(0xB, 0x1)) @@ -333,103 +288,95 @@ impl super::Cpuid { for index in 0.. { if let Some(subleaf) = self.get_mut(&CpuidKey::subleaf(0xB, index)) { - // reset eax, ebx, ecx + // Reset eax, ebx, ecx subleaf.result.eax = 0; subleaf.result.ebx = 0; subleaf.result.ecx = 0; - // EDX bits 31..0 contain x2APIC ID of current logical processor - // x2APIC increases the size of the APIC ID from 8 bits to 32 bits + // CPUID.(EAX=0BH,ECX=N).EDX[31:0] + // x2APIC ID of the current logical processor. subleaf.result.edx = u32::from(cpu_index); subleaf.flags = KvmCpuidFlags::SIGNIFICANT_INDEX; - // "If SMT is not present in a processor implementation but CPUID leaf 0BH is - // supported, CPUID.EAX=0BH, ECX=0 will return EAX = 0, EBX = 1 and - // level type = 1. Number of logical processors at the core level is - // reported at level type = 2." (Intel® 64 Architecture x2APIC - // Specification, Ch. 2.8) match index { - // Number of bits to shift right on x2APIC ID to get a unique topology ID of the - // next level type*. All logical processors with the same - // next level ID share current level. + // CPUID.(EAX=0BH,ECX=N):EAX[4:0] + // The number of bits that the x2APIC ID must be shifted to the right to address + // instances of the next higher-scoped domain. When logical processor is not + // supported by the processor, the value of this field at the Logical Processor + // domain sub-leaf may be returned as either 0 (no allocated bits in the x2APIC + // ID) or 1 (one allocated bit in the x2APIC ID); software should plan + // accordingly. + + // CPUID.(EAX=0BH,ECX=N):EBX[15:0] + // The number of logical processors across all instances of this domain within + // the next-higher scoped domain. (For example, in a processor socket/package + // comprising "M" dies of "N" cores each, where each core has "L" logical + // processors, the "die" domain sub-leaf value of this field would be M*N*L.) + // This number reflects configuration as shipped by Intel. Note, software must + // not use this field to enumerate processor topology. + + // CPUID.(EAX=0BH,ECX=N):ECX[7:0] + // The input ECX sub-leaf index. + + // CPUID.(EAX=0BH,ECX=N):ECX[15:8] + // Domain Type. This field provides an identification value which indicates the + // domain as shown below. Although domains are ordered, their assigned + // identification values are not and software should not depend on it. // - // *Software should use this field (EAX[4:0]) to enumerate processor topology of - // the system. + // Hierarchy Domain Domain Type Identification Value + // ----------------------------------------------------------------- + // Lowest Logical Processor 1 + // Highest Core 2 // - // bit_shifts_right_2x_apic_id_unique_topology_id: 0..5 + // (Note that enumeration values of 0 and 3-255 are reserved.) - // Number of logical processors at this level type. The number reflects - // configuration as shipped by Intel**. - // - // **Software must not use EBX[15:0] to enumerate processor topology of the - // system. This value in this field (EBX[15:0]) is only - // intended for display/diagnostic purposes. The actual - // number of logical processors available to BIOS/OS/Applications may be - // different from the value of EBX[15:0], depending on - // software and platform hardware configurations. - // - // logical_processors: 0..16 - - // Level number. Same value in ECX input. - // - // level_number: 0..8, - - // Level type*** - // - // If an input value n in ECX returns the invalid level-type of 0 in ECX[15:8], - // other input values with ECX>n also return 0 in ECX[15:8]. - // - // ***The value of the “level type” field is not related to level numbers in any - // way, higher “level type” values do not mean higher - // levels. Level type field has the following encoding: - // - 0: Invalid. - // - 1: SMT. - // - 2: Core. - // - 3-255: Reserved. - // - // level_type: 8..16 - - // Thread Level Topology; index = 0 + // Logical processor domain 0 => { // To get the next level APIC ID, shift right with at most 1 because we have - // maximum 2 hyperthreads per core that can be represented by 1 bit. - set_range(&mut subleaf.result.eax, 0..5, u32::from(cpu_bits)) - .map_err(ExtendedTopologyError::ApicId)?; + // maximum 2 logical procerssors per core that can be represented by 1 bit. + set_range(&mut subleaf.result.eax, 0..=4, u32::from(cpu_bits)) + .map_err(|err| ExtendedTopologyError::RightShiftBits(index, err))?; // When cpu_count == 1 or HT is disabled, there is 1 logical core at this - // level Otherwise there are 2 - set_range(&mut subleaf.result.ebx, 0..16, u32::from(cpus_per_core)) - .map_err(ExtendedTopologyError::LogicalProcessors)?; + // domain; otherwise there are 2 + set_range(&mut subleaf.result.ebx, 0..=15, u32::from(cpus_per_core)) + .map_err(|err| ExtendedTopologyError::NumLogicalProcs(index, err))?; + + // Skip setting 0 to ECX[7:0] since it's already reset to 0. - set_range(&mut subleaf.result.ecx, 8..16, LEVEL_TYPE_THREAD) - .map_err(ExtendedTopologyError::LevelType)?; + // Set the domain type identification value for logical processor, + set_range(&mut subleaf.result.ecx, 8..=15, 1) + .map_err(|err| ExtendedTopologyError::DomainType(index, err))?; } - // Core Level Processor Topology; index = 1 + // Core domain 1 => { - set_range(&mut subleaf.result.eax, 0..5, LEAFBH_INDEX1_APICID) - .map_err(ExtendedTopologyError::ApicId)?; - - set_range(&mut subleaf.result.ebx, 0..16, u32::from(cpu_count)) - .map_err(ExtendedTopologyError::LogicalProcessors)?; - - // We expect here as this is an extremely rare case that is unlikely to ever - // occur. It would require manual editing of the CPUID structure to push - // more than 2^32 subleaves. - let sub = index; - set_range(&mut subleaf.result.ecx, 0..8, sub) - .map_err(ExtendedTopologyError::LevelNumber)?; - - set_range(&mut subleaf.result.ecx, 8..16, LEVEL_TYPE_CORE) - .map_err(ExtendedTopologyError::LevelType)?; + // Configure such that the next higher-scoped domain (i.e. socket) include + // all logical processors. + // + // The CPUID.(EAX=0BH,ECX=1).EAX[4:0] value must be an integer N such that + // 2^N is greater than or equal to the maximum number of vCPUs. + set_range( + &mut subleaf.result.eax, + 0..=4, + MAX_SUPPORTED_VCPUS.next_power_of_two().ilog2(), + ) + .map_err(|err| ExtendedTopologyError::RightShiftBits(index, err))?; + set_range(&mut subleaf.result.ebx, 0..=15, u32::from(cpu_count)) + .map_err(|err| ExtendedTopologyError::NumLogicalProcs(index, err))?; + + // Setting the input ECX value (i.e. `index`) + set_range(&mut subleaf.result.ecx, 0..=7, index) + .map_err(|err| ExtendedTopologyError::InputEcx(index, err))?; + + // Set the domain type identification value for core. + set_range(&mut subleaf.result.ecx, 8..=15, 2) + .map_err(|err| ExtendedTopologyError::DomainType(index, err))?; } - // Core Level Processor Topology; index >=2 - // No other levels available; This should already be set correctly, - // and it is added here as a "re-enforcement" in case we run on - // different hardware _ => { - // We expect here as this is an extremely rare case that is unlikely to ever - // occur. It would require manual editing of the CPUID structure to push - // more than 2^32 subleaves. - subleaf.result.ecx = index; + // KVM no longer returns any subleaf numbers greater than 0. The patch was + // merged in v6.2 and backported to v5.10. Subleaves >= 2 should not be + // included. + // https://github.com/torvalds/linux/commit/45e966fcca03ecdcccac7cb236e16eea38cc18af + return Err(ExtendedTopologyError::UnexpectedSubleaf(index)); } } } else { diff --git a/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py b/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py index c8075faa505..e387018d6d2 100644 --- a/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py +++ b/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py @@ -157,10 +157,12 @@ } -def test_host_vs_guest_cpu_features(uvm_nano): +def test_host_vs_guest_cpu_features(uvm_plain_any): """Check CPU features host vs guest""" - vm = uvm_nano + vm = uvm_plain_any + vm.spawn() + vm.basic_config() vm.add_net_iface() vm.start() host_feats = set(utils.check_output(CPU_FEATURES_CMD).stdout.split()) @@ -231,6 +233,102 @@ def test_host_vs_guest_cpu_features(uvm_nano): assert host_feats - guest_feats == host_guest_diff_6_1 assert guest_feats - host_feats == INTEL_GUEST_ONLY_FEATS - {"umip"} + case CpuModel.INTEL_SAPPHIRE_RAPIDS: + expected_host_minus_guest = INTEL_HOST_ONLY_FEATS.copy() + expected_guest_minus_host = INTEL_GUEST_ONLY_FEATS.copy() + + host_version = global_props.host_linux_version_tpl + guest_version = vm.guest_kernel_version + + # KVM does not support virtualization of the following hardware features yet for several + # reasons (e.g. security, simply difficulty of implementation). + expected_host_minus_guest |= { + # Intel Total Memory Encryption (TME) is the capability to encrypt the entirety of + # physical memory of a system. TME is enabled by system BIOS/hardware and applies to + # the phyiscal memory as a whole. + "tme", + # PCONFIG instruction allows software to configure certain platform features. It + # supports these features with multiple leaf functions, selecting a leaf function + # using the value in EAX. As of this writing, the only defined PCONFIG leaf function + # is for key programming for total memory encryption-multi-key (TME-MK). + "pconfig", + # Architectural Last Branch Record (Arch LBR) that is a feature that logs the most + # recently executed branch instructions (e.g. source and destination addresses). + # Traditional LBR implementations have existed in Intel CPUs for years and the MSR + # interface varied by CPU model. Arch LBR is a standardized version. There is a + # kernel patch created in 2022 but didn't get merged due to a mess. + # https://lore.kernel.org/all/20221125040604.5051-1-weijiang.yang@intel.com/ + "arch_lbr", + # ENQCMD/ENQCMDS are instructions that allow software to atomically write 64-byte + # commands to enqueue registers, which are special device registers accessed using + # memory-mapped I/O. + "enqcmd", + # Intel Resource Director Technology (RDT) feature set provides a set of allocation + # (resource control) capabilities including Cache Allocation Technology (CAT) and + # Code and Data Prioritization (CDP). + # L3 variants are listed in INTEL_HOST_ONLY_FEATS. + "cat_l2", + "cdp_l2", + # This is a synthesized bit for split lock detection that raise an Alignment Check + # (#AC) exception if an operand of an atomic operation crosses two cache lines. It + # is not enumerated on CPUID, instead detected by actually attempting to read from + # MSR address 0x33 (MSR_MEMORY_CTRL in Intel SDM, MSR_TEST_CTRL in Linux kernel). + "split_lock_detect", + } + + # The following features are also not virtualized by KVM yet but are only supported on + # newer kernel versions. + if host_version >= (5, 18): + expected_host_minus_guest |= { + # Hardware Feedback Interface (HFI) is a feature that gives OSes a performance + # and energy efficiency capability data for each CPU that can be used to + # influence task placement decisions. + # https://github.com/torvalds/linux/commit/7b8f40b3de75c971a4e5f9308b06deb59118dbac + "hfi", + # Indirect Brach Tracking (IBT) is a feature where the CPU ensures that indirect + # branch targets start with ENDBRANCH instruction (`endbr32` or `endbr64`), + # which executes as a no-op; if anything else is found, a control-protection + # (#CP) fault will be raised. + # https://github.com/torvalds/linux/commit/991625f3dd2cbc4b787deb0213e2bcf8fa264b21 + "ibt", + } + + # AVX512 FP16 is supported and passed through on v5.11+. + # https://github.com/torvalds/linux/commit/e1b35da5e624f8b09d2e98845c2e4c84b179d9a4 + # https://github.com/torvalds/linux/commit/2224fc9efb2d6593fbfb57287e39ba4958b188ba + if host_version >= (5, 11) and guest_version < (5, 11): + expected_host_minus_guest |= {"avx512_fp16"} + + # AVX VNNI support is supported and passed through on v5.12+. + # https://github.com/torvalds/linux/commit/b85a0425d8056f3bd8d0a94ecdddf2a39d32a801 + # https://github.com/torvalds/linux/commit/1085a6b585d7d1c441cd10fdb4c7a4d96a22eba7 + if host_version >= (5, 12) and guest_version < (5, 12): + expected_host_minus_guest |= {"avx_vnni"} + + # Bus lock detection is supported on v5.12+ and passed through on v5.13+. + # https://github.com/torvalds/linux/commit/f21d4d3b97a8603567e5d4250bd75e8ebbd520af + # https://github.com/torvalds/linux/commit/76ea438b4afcd9ee8da3387e9af4625eaccff58f + if host_version >= (5, 13) and guest_version < (5, 12): + expected_host_minus_guest |= {"bus_lock_detect"} + + # Intel AMX is supported and passed through on v5.17+. + # https://github.com/torvalds/linux/commit/690a757d610e50c2c3acd2e4bc3992cfc63feff2 + if host_version >= (5, 17) and guest_version < (5, 17): + expected_host_minus_guest |= {"amx_bf16", "amx_int8", "amx_tile"} + + expected_guest_minus_host -= { + # UMIP can be emulated by KVM on Intel processors, but is supported in hardware on + # Intel Sapphire Rapids and passed through. + "umip", + # This is a synthesized bit and it is always set on guest thanks to kvm-clock. But + # Intel Sapphire Rapids reports TSC frequency on CPUID leaf 0x15, so the bit is also + # set on host. + "tsc_known_freq", + } + + assert host_feats - guest_feats == expected_host_minus_guest + assert guest_feats - host_feats == expected_guest_minus_host + case CpuModel.ARM_NEOVERSE_N1: expected_guest_minus_host = set() expected_host_minus_guest = set() diff --git a/tests/integration_tests/functional/test_cpu_features_x86_64.py b/tests/integration_tests/functional/test_cpu_features_x86_64.py index 270e3fa12e2..0fb51e5bd37 100644 --- a/tests/integration_tests/functional/test_cpu_features_x86_64.py +++ b/tests/integration_tests/functional/test_cpu_features_x86_64.py @@ -150,6 +150,8 @@ def test_brand_string(uvm_plain_any): * For Intel CPUs, the guest brand string should be: Intel(R) Xeon(R) Processor @ {host frequency} + or + Intel(R) Xeon(R) Processor where {host frequency} is the frequency reported by the host CPUID (e.g. 4.01GHz) * For AMD CPUs, the guest brand string should be: @@ -184,7 +186,9 @@ def test_brand_string(uvm_plain_any): cif = open("/proc/cpuinfo", "r", encoding="utf-8") cpu_info = cif.read() mo = re.search("model name.*:.* ([0-9]*.[0-9]*[G|M|T]Hz)", cpu_info) - assert mo + # Skip if host frequency is not reported + if mo is None: + return host_frequency = mo.group(1) # Assert the model name matches "Intel(R) Xeon(R) Processor @ " diff --git a/tests/integration_tests/functional/test_cpu_template_helper.py b/tests/integration_tests/functional/test_cpu_template_helper.py index 1b1c2478da3..6ff2db1f7f0 100644 --- a/tests/integration_tests/functional/test_cpu_template_helper.py +++ b/tests/integration_tests/functional/test_cpu_template_helper.py @@ -133,6 +133,10 @@ def build_cpu_config_dict(cpu_config_path): # support it, the userspace cpuid command in ubuntu 22 reports not only # the subleaf 0 but also the subleaf 1. (0x1B, 0x1), + # CPUID.1Fh is a preferred superset to CPUID.0Bh. For the same reason as + # CPUID.Bh, the subleaf 2 should be skipped when the guest userspace cpuid + # enumerates it. + (0x1F, 0x2), # CPUID.20000000h is not documented in Intel SDM and AMD APM. KVM doesn't # report it, but the userspace cpuid command in ubuntu 22 does. (0x20000000, 0x0), @@ -184,6 +188,9 @@ def build_cpu_config_dict(cpu_config_path): 0x48, # MSR_IA32_SMBASE is not accessible outside of System Management Mode. 0x9E, + # MSR_IA32_UMWAIT_CONTROL is R/W MSR that guest OS modifies after boot to + # control UMWAIT feature. + 0xE1, # MSR_IA32_TSX_CTRL is R/W MSR to disable Intel TSX feature as a mitigation # against TAA vulnerability. 0x122, @@ -193,6 +200,10 @@ def build_cpu_config_dict(cpu_config_path): 0x174, 0x175, 0x176, + # MSR_IA32_XFD is R/W MSR for guest OS to control which XSAVE-enabled + # features are temporarily disabled. Guest OS disables TILEDATA by default + # using the MSR. + 0x1C4, # MSR_IA32_TSC_DEADLINE specifies the time at which a timer interrupt # should occur and depends on the elapsed time. 0x6E0, diff --git a/tests/integration_tests/security/test_vulnerabilities.py b/tests/integration_tests/security/test_vulnerabilities.py index 14307e56888..356bdb4370f 100644 --- a/tests/integration_tests/security/test_vulnerabilities.py +++ b/tests/integration_tests/security/test_vulnerabilities.py @@ -85,7 +85,10 @@ def expected_vulnerabilities(self, cpu_template_name): Since we have a test on host and the exception in guest is not valid, we add a check to ignore this exception. """ - if global_props.cpu_codename == "INTEL_ICELAKE" and cpu_template_name is None: + if ( + global_props.cpu_codename in ["INTEL_ICELAKE", "INTEL_SAPPHIRE_RAPIDS"] + and cpu_template_name is None + ): return { '{"NAME": "REPTAR", "CVE": "CVE-2023-23583", "VULNERABLE": true, "INFOS": "Your microcode is too old to mitigate the vulnerability"}' }