From 17b494d127f52c117b5b021fafcecc11cdf4b0ad Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 10:33:55 +0000 Subject: [PATCH 01/20] refactor(vmm): Reorder normalization of feature info leaf Reorder normalization from lower bits to higher and from EAX to EDX. No functional change. Signed-off-by: Takahiro Itazuri --- .../src/cpu_config/x86_64/cpuid/normalize.rs | 57 +++++++++---------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs index cec8aad2f4c..be35f3ad757 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs @@ -237,41 +237,12 @@ impl super::Cpuid { .get_mut(&CpuidKey::leaf(0x1)) .ok_or(FeatureInformationError::MissingLeaf1)?; - // A value of 1 indicates the processor supports the performance and debug feature - // indication MSR IA32_PERF_CAPABILITIES. - // - // pdcm: 15, - set_bit(&mut leaf_1.result.ecx, ECX_PDCM_BITINDEX, false); - - // A value of 1 indicates that the processor’s local APIC timer supports one-shot - // operation using a TSC deadline value. - // - // tsc_deadline: 24, - set_bit(&mut leaf_1.result.ecx, ECX_TSC_DEADLINE_BITINDEX, true); - - // Hypervisor bit - set_bit(&mut leaf_1.result.ecx, ECX_HYPERVISOR_BITINDEX, true); - - // Initial APIC ID. - // - // The 8-bit initial APIC ID in EBX[31:24] is replaced by the 32-bit x2APIC ID, - // available in Leaf 0BH and Leaf 1FH. - // - // initial_apic_id: 24..32, - set_range(&mut leaf_1.result.ebx, 24..32, u32::from(cpu_index)) - .map_err(FeatureInformationError::InitialApicId)?; - // CLFLUSH line size (Value ∗ 8 = cache line size in bytes; used also by CLFLUSHOPT). // // clflush: 8..16, set_range(&mut leaf_1.result.ebx, 8..16, EBX_CLFLUSH_CACHELINE) .map_err(FeatureInformationError::Clflush)?; - let max_cpus_per_package = u32::from( - get_max_cpus_per_package(cpu_count) - .map_err(FeatureInformationError::GetMaxCpusPerPackage)?, - ); - // Maximum number of addressable IDs for logical processors in this physical package. // // The nearest power-of-2 integer that is not smaller than EBX[23:16] is the number of @@ -280,9 +251,37 @@ impl super::Cpuid { // CPUID.1.EDX.HTT[bit 28]= 1. // // max_addressable_logical_processor_ids: 16..24, + let max_cpus_per_package = u32::from( + get_max_cpus_per_package(cpu_count) + .map_err(FeatureInformationError::GetMaxCpusPerPackage)?, + ); set_range(&mut leaf_1.result.ebx, 16..24, max_cpus_per_package) .map_err(FeatureInformationError::SetMaxCpusPerPackage)?; + // Initial APIC ID. + // + // The 8-bit initial APIC ID in EBX[31:24] is replaced by the 32-bit x2APIC ID, + // available in Leaf 0BH and Leaf 1FH. + // + // initial_apic_id: 24..32, + set_range(&mut leaf_1.result.ebx, 24..32, u32::from(cpu_index)) + .map_err(FeatureInformationError::InitialApicId)?; + + // A value of 1 indicates the processor supports the performance and debug feature + // indication MSR IA32_PERF_CAPABILITIES. + // + // pdcm: 15, + set_bit(&mut leaf_1.result.ecx, ECX_PDCM_BITINDEX, false); + + // A value of 1 indicates that the processor’s local APIC timer supports one-shot + // operation using a TSC deadline value. + // + // tsc_deadline: 24, + set_bit(&mut leaf_1.result.ecx, ECX_TSC_DEADLINE_BITINDEX, true); + + // Hypervisor bit + set_bit(&mut leaf_1.result.ecx, ECX_HYPERVISOR_BITINDEX, true); + // Max APIC IDs reserved field is Valid. A value of 0 for HTT indicates there is only a // single logical processor in the package and software should assume only a // single APIC ID is reserved. A value of 1 for HTT indicates the value in From 7c8fc68d71a9cdc427b1400029a99507ff442e84 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 10:54:33 +0000 Subject: [PATCH 02/20] refactor(vmm): Use CPUID notation used in Intel SDM The CPUID notation is widely used in various Intel docs including SDM. Signed-off-by: Takahiro Itazuri --- .../x86_64/cpuid/intel/normalize.rs | 20 +++--- .../src/cpu_config/x86_64/cpuid/normalize.rs | 69 ++++++++----------- 2 files changed, 35 insertions(+), 54 deletions(-) diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs index 41bc9d3f3f5..0bfc5d89632 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs @@ -97,18 +97,16 @@ impl super::IntelCpuid { break; } + // CPUID.04H:EAX[7:5] // Cache Level (Starts at 1) - // - // cache_level: 5..8 let cache_level = get_range(subleaf.result.eax, 5..8); + // CPUID.04H:EAX[25:14] // Maximum number of addressable IDs for logical processors sharing this cache. // - Add one to the return value to get the result. // - The nearest power-of-2 integer that is not smaller than (1 + EAX[25:14]) is the // number of unique initial APIC IDs reserved for addressing different logical // processors sharing this cache. - // - // max_num_addressable_ids_for_logical_processors_sharing_this_cache: 14..26, // We know `cpus_per_core > 0` therefore `cpus_per_core.checked_sub(1).unwrap()` is // always safe. @@ -139,6 +137,7 @@ impl super::IntelCpuid { #[allow(clippy::unwrap_used)] let cores = cpu_count.checked_div(cpus_per_core).unwrap(); + // CPUID.04H:EAX[31:26] // Maximum number of addressable IDs for processor cores in the physical package. // - Add one to the return value to get the result. // - The nearest power-of-2 integer that is not smaller than (1 + EAX[31:26]) is the @@ -146,8 +145,6 @@ impl super::IntelCpuid { // a physical package. Core ID is a subset of bits of the initial APIC ID. // - The returned value is constant for valid initial values in ECX. Valid ECX // values start from 0. - // - // max_num_addressable_ids_for_processor_cores_in_physical_package: 26..32, // Put all the cores in the same socket let sub = u32::from(cores) @@ -168,16 +165,14 @@ impl super::IntelCpuid { .get_mut(&CpuidKey::leaf(0x6)) .ok_or(NormalizeCpuidError::MissingLeaf6)?; + // CPUID.06H:EAX[1] // Intel Turbo Boost Technology available (see description of IA32_MISC_ENABLE[38]). - // - // intel_turbo_boost_technology: 1, set_bit(&mut leaf_6.result.eax, 1, false); + // CPUID.06H:ECX[3] // The processor supports performance-energy bias preference if CPUID.06H:ECX.SETBH[bit 3] // is set and it also implies the presence of a new architectural MSR called // IA32_ENERGY_PERF_BIAS (1B0H). - // - // performance_energy_bias: 3, // Clear X86 EPB feature. No frequency selection in the hypervisor. set_bit(&mut leaf_6.result.ecx, 3, false); @@ -190,8 +185,9 @@ impl super::IntelCpuid { .get_mut(&CpuidKey::subleaf(0x7, 0)) .ok_or(NormalizeCpuidError::MissingLeaf7)?; - // Set FDP_EXCPTN_ONLY bit (bit 6) and ZERO_FCS_FDS bit (bit 13) as recommended in kernel - // doc. These bits are reserved in AMD. + // Set the following bits as recommended in kernel doc. These bits are reserved in AMD. + // - CPUID.07H:EBX[6] (FDP_EXCPTN_ONLY) + // - CPUID.07H:EBX[13] (Deprecates FPU CS and FPU DS values) // https://lore.kernel.org/all/20220322110712.222449-3-pbonzini@redhat.com/ // https://github.com/torvalds/linux/commit/45016721de3c714902c6f475b705e10ae0bdd801 set_bit(&mut leaf_7_0.result.ebx, 6, true); diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs index be35f3ad757..18963875813 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs @@ -237,20 +237,17 @@ impl super::Cpuid { .get_mut(&CpuidKey::leaf(0x1)) .ok_or(FeatureInformationError::MissingLeaf1)?; - // CLFLUSH line size (Value ∗ 8 = cache line size in bytes; used also by CLFLUSHOPT). - // - // clflush: 8..16, + // CPUID.01H:EBX[15:08] + // CLFLUSH line size (Value * 8 = cache line size in bytes; used also by CLFLUSHOPT). set_range(&mut leaf_1.result.ebx, 8..16, EBX_CLFLUSH_CACHELINE) .map_err(FeatureInformationError::Clflush)?; + // CPUID.01H:EBX[23:16] // Maximum number of addressable IDs for logical processors in this physical package. // // The nearest power-of-2 integer that is not smaller than EBX[23:16] is the number of - // unique initial APIC IDs reserved for addressing different logical - // processors in a physical package. This field is only valid if - // CPUID.1.EDX.HTT[bit 28]= 1. - // - // max_addressable_logical_processor_ids: 16..24, + // unique initial APIC IDs reserved for addressing different logical processors in a + // physical package. This field is only valid if CPUID.1.EDX.HTT[bit 28]= 1. let max_cpus_per_package = u32::from( get_max_cpus_per_package(cpu_count) .map_err(FeatureInformationError::GetMaxCpusPerPackage)?, @@ -258,41 +255,33 @@ impl super::Cpuid { set_range(&mut leaf_1.result.ebx, 16..24, max_cpus_per_package) .map_err(FeatureInformationError::SetMaxCpusPerPackage)?; + // CPUID.01H:EBX[31:24] // Initial APIC ID. // - // The 8-bit initial APIC ID in EBX[31:24] is replaced by the 32-bit x2APIC ID, - // available in Leaf 0BH and Leaf 1FH. - // - // initial_apic_id: 24..32, + // The 8-bit initial APIC ID in EBX[31:24] is replaced by the 32-bit x2APIC ID, available + // in Leaf 0BH and Leaf 1FH. set_range(&mut leaf_1.result.ebx, 24..32, u32::from(cpu_index)) .map_err(FeatureInformationError::InitialApicId)?; - // A value of 1 indicates the processor supports the performance and debug feature - // indication MSR IA32_PERF_CAPABILITIES. - // - // pdcm: 15, + // CPUID.01H:ECX[15] (Mnemonic: PDCM) + // Performance and Debug Capability: A value of 1 indicates the processor supports the + // performance and debug feature indication MSR IA32_PERF_CAPABILITIES. set_bit(&mut leaf_1.result.ecx, ECX_PDCM_BITINDEX, false); - // A value of 1 indicates that the processor’s local APIC timer supports one-shot - // operation using a TSC deadline value. - // - // tsc_deadline: 24, + // CPUID.01H:ECX[24] (Mnemonic: TSC-Deadline) + // A value of 1 indicates that the processor’s local APIC timer supports one-shot operation + // using a TSC deadline value. set_bit(&mut leaf_1.result.ecx, ECX_TSC_DEADLINE_BITINDEX, true); - // Hypervisor bit + // CPUID.01H:ECX[31] (Mnemonic: Hypervisor) set_bit(&mut leaf_1.result.ecx, ECX_HYPERVISOR_BITINDEX, true); + // CPUID.01H:EDX[28] (Mnemonic: HTT) // Max APIC IDs reserved field is Valid. A value of 0 for HTT indicates there is only a - // single logical processor in the package and software should assume only a - // single APIC ID is reserved. A value of 1 for HTT indicates the value in - // CPUID.1.EBX[23:16] (the Maximum number of addressable IDs for logical - // processors in this package) is valid for the package. - // - // htt: 28, - - // A value of 1 for HTT indicates the value in CPUID.1.EBX[23:16] - // (the Maximum number of addressable IDs for logical processors in this package) - // is valid for the package + // single logical processor in the package and software should assume only a single APIC ID + // is reserved. A value of 1 for HTT indicates the value in CPUID.1.EBX[23:16] (the Maximum + // number of addressable IDs for logical processors in this package) is valid for the + // package. set_bit(&mut leaf_1.result.edx, 28, cpu_count > 1); Ok(()) @@ -316,7 +305,7 @@ impl super::Cpuid { const LEAFBH_INDEX1_APICID: u32 = 7; // The following commit changed the behavior of KVM_GET_SUPPORTED_CPUID to no longer - // include leaf 0xB / sub-leaf 1. + // include CPUID.(EAX=0BH,ECX=1). // https://lore.kernel.org/all/20221027092036.2698180-1-pbonzini@redhat.com/ self.inner_mut() .entry(CpuidKey::subleaf(0xB, 0x1)) @@ -336,8 +325,8 @@ impl super::Cpuid { subleaf.result.eax = 0; subleaf.result.ebx = 0; subleaf.result.ecx = 0; - // EDX bits 31..0 contain x2APIC ID of current logical processor - // x2APIC increases the size of the APIC ID from 8 bits to 32 bits + // CPUID.(EAX=0BH,ECX=N).EDX[31:0] + // x2APIC ID of the current logical processor. subleaf.result.edx = u32::from(cpu_index); subleaf.flags = KvmCpuidFlags::SIGNIFICANT_INDEX; @@ -347,15 +336,15 @@ impl super::Cpuid { // reported at level type = 2." (Intel® 64 Architecture x2APIC // Specification, Ch. 2.8) match index { + // CPUID.(EAX=0BH,ECX=N):EAX[4:0] // Number of bits to shift right on x2APIC ID to get a unique topology ID of the // next level type*. All logical processors with the same // next level ID share current level. // // *Software should use this field (EAX[4:0]) to enumerate processor topology of // the system. - // - // bit_shifts_right_2x_apic_id_unique_topology_id: 0..5 + // CPUID.(EAX=0BH,ECX=N):EBX[15:0] // Number of logical processors at this level type. The number reflects // configuration as shipped by Intel**. // @@ -365,13 +354,11 @@ impl super::Cpuid { // number of logical processors available to BIOS/OS/Applications may be // different from the value of EBX[15:0], depending on // software and platform hardware configurations. - // - // logical_processors: 0..16 + // CPUID.(EAX=0BH,ECX=N):ECX[7:0] // Level number. Same value in ECX input. - // - // level_number: 0..8, + // CPUID.(EAX=0BH,ECX=N):ECX[15:8] // Level type*** // // If an input value n in ECX returns the invalid level-type of 0 in ECX[15:8], @@ -384,8 +371,6 @@ impl super::Cpuid { // - 1: SMT. // - 2: Core. // - 3-255: Reserved. - // - // level_type: 8..16 // Thread Level Topology; index = 0 0 => { From 7ebf98b08d53a8befc962a0128e4c887e8b6be10 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 20:38:35 +0000 Subject: [PATCH 03/20] refactor(vmm): Use CPUID notation used in AMD APM The CPUID notation is widely used in various AMD docs including APM. Signed-off-by: Takahiro Itazuri --- .../cpu_config/x86_64/cpuid/amd/normalize.rs | 35 +++++++------------ 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs index 111960db00a..f0ee9322e12 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs @@ -153,6 +153,7 @@ impl super::AmdCpuid { // On non-AMD hosts this condition may never be true thus this loop may be // indefinite. + // CPUID Fn8000_0001D_EAX_x[4:0] (Field Name: CacheType) // Cache type. Identifies the type of cache. // ```text // Bits Description @@ -162,8 +163,6 @@ impl super::AmdCpuid { // 03h Unified cache // 1Fh-04h Reserved. // ``` - // - // cache_type: 0..4, let cache_type = result.eax & 15; if cache_type == 0 { break; @@ -186,10 +185,9 @@ impl super::AmdCpuid { let leaf_80000001 = self .get_mut(&CpuidKey::leaf(0x80000001)) .ok_or(NormalizeCpuidError::MissingLeaf0x80000001)?; + // CPUID Fn8000_0001_ECX[22] (Field Name: TopologyExtensions) // Topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID // Fn8000_001E_EDX. - // - // topology_extensions: 22, set_bit(&mut leaf_80000001.result.ecx, 22, true); Ok(()) } @@ -220,6 +218,7 @@ impl super::AmdCpuid { .get_mut(&CpuidKey::leaf(0x80000008)) .ok_or(FeatureEntryError::MissingLeaf0x80000008)?; + // CPUID Fn8000_0008_ECX[15:12] (Field Name: ApicIdSize) // APIC ID size. The number of bits in the initial APIC20[ApicId] value that indicate // logical processor ID within a package. The size of this field determines the // maximum number of logical processors (MNLP) that the package could @@ -228,15 +227,11 @@ impl super::AmdCpuid { // Fn8000_0008_ECX[NC]. A value of zero indicates that legacy methods must be // used to determine the maximum number of logical processors, as indicated by // CPUID Fn8000_0008_ECX[NC]. - // - // apic_id_size: 12..16, set_range(&mut leaf_80000008.result.ecx, 12..16, THREAD_ID_MAX_SIZE).unwrap(); + // CPUID Fn8000_0008_ECX[7:0] (Field Name: NC) // Number of physical threads - 1. The number of threads in the processor is NT+1 // (e.g., if NT = 0, then there is one thread). See “Legacy Method” on page 633. - // - // nt: 0..8, - // let sub = cpu_count .checked_sub(1) .ok_or(FeatureEntryError::NumberOfPhysicalThreadsOverflow)?; @@ -255,6 +250,7 @@ impl super::AmdCpuid { ) -> Result<(), ExtendedCacheTopologyError> { for i in 0.. { if let Some(subleaf) = self.get_mut(&CpuidKey::subleaf(0x8000001d, i)) { + // CPUID Fn8000_001D_EAX_x[7:5] (Field Name: CacheLevel) // Cache level. Identifies the level of this cache. Note that the enumeration value // is not necessarily equal to the cache level. // ```text @@ -265,10 +261,9 @@ impl super::AmdCpuid { // 011b Level 3 // 111b-100b Reserved. // ``` - // - // cache_level: 5..8 let cache_level = get_range(subleaf.result.eax, 5..8); + // CPUID Fn8000_001D_EAX_x[25:14] (Field Name: NumSharingCache) // Specifies the number of logical processors sharing the cache enumerated by N, // the value passed to the instruction in ECX. The number of logical processors // sharing this cache is the value of this field incremented by 1. To determine @@ -279,8 +274,6 @@ impl super::AmdCpuid { // // Logical processors with the same ShareId then share a cache. If // NumSharingCache+1 is not a power of two, round it up to the next power of two. - // - // num_sharing_cache: 14..26, match cache_level { // L1 & L2 Cache @@ -334,16 +327,18 @@ impl super::AmdCpuid { .get_mut(&CpuidKey::leaf(0x8000001e)) .ok_or(ExtendedApicIdError::MissingLeaf0x8000001e)?; + // CPUID Fn8000_001E_EAX[31:0] (Field Name: ExtendedApicId) // Extended APIC ID. If MSR0000_001B[ApicEn] = 0, this field is reserved. - // - // extended_apic_id: 0..32, set_range(&mut leaf_8000001e.result.eax, 0..32, u32::from(cpu_index)) .map_err(ExtendedApicIdError::ExtendedApicId)?; - // compute_unit_id: 0..8, + // CPUID Fn8000_001E_EBX[7:0] (Field Name: ComputeUnitId) + // Compute unit ID. Identifies a Compute Unit, which may be one or more physical cores that + // each implement one or more logical processors. set_range(&mut leaf_8000001e.result.ebx, 0..8, core_id) .map_err(ExtendedApicIdError::ComputeUnitId)?; + // CPUID Fn8000_001E_EBX[15:8] (Field Name: ThreadsPerComputeUnit) // Threads per compute unit (zero-based count). The actual number of threads // per compute unit is the value of this field + 1. To determine which logical // processors (threads) belong to a given Compute Unit, determine a ShareId @@ -355,28 +350,24 @@ impl super::AmdCpuid { // Unit. (If ThreadsPerComputeUnit+1 is not a power of two, round it up to the // next power of two). // - // threads_per_compute_unit: 8..16, - // // SAFETY: We know `cpus_per_core > 0` therefore this is always safe. let sub = u32::from(cpus_per_core.checked_sub(1).unwrap()); set_range(&mut leaf_8000001e.result.ebx, 8..16, sub) .map_err(ExtendedApicIdError::ThreadPerComputeUnit)?; + // CPUID Fn8000_001E_ECX[10:8] (Field Name: NodesPerProcessor) // Specifies the number of nodes in the package/socket in which this logical // processor resides. Node in this context corresponds to a processor die. // Encoding is N-1, where N is the number of nodes present in the socket. // - // nodes_per_processor: 8..11, - // // SAFETY: We know the value always fits within the range and thus is always safe. // Set nodes per processor. set_range(&mut leaf_8000001e.result.ecx, 8..11, NODES_PER_PROCESSOR).unwrap(); + // CPUID Fn8000_001E_ECX[7:0] (Field Name: NodeId) // Specifies the ID of the node containing the current logical processor. NodeId // values are unique across the system. // - // node_id: 0..8, - // // Put all the cpus in the same node. set_range(&mut leaf_8000001e.result.ecx, 0..8, 0).unwrap(); From b805228cb5ac45f5236f72a9ff00794cef703262 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 10:59:57 +0000 Subject: [PATCH 04/20] refactor(vmm): Remove unneeded constants There is no benefit defining constants that are used only once. Rather it makes harder to read code since the definition and usage are far apart. Signed-off-by: Takahiro Itazuri --- .../src/cpu_config/x86_64/cpuid/normalize.rs | 21 ++++--------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs index 18963875813..da339e4f7a5 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs @@ -221,26 +221,13 @@ impl super::Cpuid { cpu_index: u8, cpu_count: u8, ) -> Result<(), FeatureInformationError> { - // Flush a cache line size. - const EBX_CLFLUSH_CACHELINE: u32 = 8; - - // PDCM: Perfmon and Debug Capability. - const ECX_PDCM_BITINDEX: u8 = 15; - - // TSC-Deadline. - const ECX_TSC_DEADLINE_BITINDEX: u8 = 24; - - // CPU is running on a hypervisor. - const ECX_HYPERVISOR_BITINDEX: u8 = 31; - let leaf_1 = self .get_mut(&CpuidKey::leaf(0x1)) .ok_or(FeatureInformationError::MissingLeaf1)?; // CPUID.01H:EBX[15:08] // CLFLUSH line size (Value * 8 = cache line size in bytes; used also by CLFLUSHOPT). - set_range(&mut leaf_1.result.ebx, 8..16, EBX_CLFLUSH_CACHELINE) - .map_err(FeatureInformationError::Clflush)?; + set_range(&mut leaf_1.result.ebx, 8..16, 8).map_err(FeatureInformationError::Clflush)?; // CPUID.01H:EBX[23:16] // Maximum number of addressable IDs for logical processors in this physical package. @@ -266,15 +253,15 @@ impl super::Cpuid { // CPUID.01H:ECX[15] (Mnemonic: PDCM) // Performance and Debug Capability: A value of 1 indicates the processor supports the // performance and debug feature indication MSR IA32_PERF_CAPABILITIES. - set_bit(&mut leaf_1.result.ecx, ECX_PDCM_BITINDEX, false); + set_bit(&mut leaf_1.result.ecx, 15, false); // CPUID.01H:ECX[24] (Mnemonic: TSC-Deadline) // A value of 1 indicates that the processor’s local APIC timer supports one-shot operation // using a TSC deadline value. - set_bit(&mut leaf_1.result.ecx, ECX_TSC_DEADLINE_BITINDEX, true); + set_bit(&mut leaf_1.result.ecx, 24, true); // CPUID.01H:ECX[31] (Mnemonic: Hypervisor) - set_bit(&mut leaf_1.result.ecx, ECX_HYPERVISOR_BITINDEX, true); + set_bit(&mut leaf_1.result.ecx, 31, true); // CPUID.01H:EDX[28] (Mnemonic: HTT) // Max APIC IDs reserved field is Valid. A value of 0 for HTT indicates there is only a From 8bc742b9cb49278eee1905abfc20b2eb4f5f5fc0 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 11:24:37 +0000 Subject: [PATCH 05/20] refactor(vmm): Refer to Intel SDM rather than Intel x2APIC spec As mentioned in [1], Intel 64 Architecture x2APIC Specification has been merged into Volumes 2 and 3 of Intel 64 and IA-32 architectures software developer's manual. [1]: https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html Signed-off-by: Takahiro Itazuri --- .../src/cpu_config/x86_64/cpuid/normalize.rs | 52 ++++++++----------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs index da339e4f7a5..31fc3415d24 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs @@ -317,47 +317,37 @@ impl super::Cpuid { subleaf.result.edx = u32::from(cpu_index); subleaf.flags = KvmCpuidFlags::SIGNIFICANT_INDEX; - // "If SMT is not present in a processor implementation but CPUID leaf 0BH is - // supported, CPUID.EAX=0BH, ECX=0 will return EAX = 0, EBX = 1 and - // level type = 1. Number of logical processors at the core level is - // reported at level type = 2." (Intel® 64 Architecture x2APIC - // Specification, Ch. 2.8) match index { // CPUID.(EAX=0BH,ECX=N):EAX[4:0] - // Number of bits to shift right on x2APIC ID to get a unique topology ID of the - // next level type*. All logical processors with the same - // next level ID share current level. - // - // *Software should use this field (EAX[4:0]) to enumerate processor topology of - // the system. + // The number of bits that the x2APIC ID must be shifted to the right to address + // instances of the next higher-scoped domain. When logical processor is not + // supported by the processor, the value of this field at the Logical Processor + // domain sub-leaf may be returned as either 0 (no allocated bits in the x2APIC + // ID) or 1 (one allocated bit in the x2APIC ID); software should plan + // accordingly. // CPUID.(EAX=0BH,ECX=N):EBX[15:0] - // Number of logical processors at this level type. The number reflects - // configuration as shipped by Intel**. - // - // **Software must not use EBX[15:0] to enumerate processor topology of the - // system. This value in this field (EBX[15:0]) is only - // intended for display/diagnostic purposes. The actual - // number of logical processors available to BIOS/OS/Applications may be - // different from the value of EBX[15:0], depending on - // software and platform hardware configurations. + // The number of logical processors across all instances of this domain within + // the next-higher scoped domain. (For example, in a processor socket/package + // comprising "M" dies of "N" cores each, where each core has "L" logical + // processors, the "die" domain sub-leaf value of this field would be M*N*L.) + // This number reflects configuration as shipped by Intel. Note, software must + // not use this field to enumerate processor topology. // CPUID.(EAX=0BH,ECX=N):ECX[7:0] - // Level number. Same value in ECX input. + // The input ECX sub-leaf index. // CPUID.(EAX=0BH,ECX=N):ECX[15:8] - // Level type*** + // Domain Type. This field provides an identification value which indicates the + // domain as shown below. Although domains are ordered, their assigned + // identification values are not and software should not depend on it. // - // If an input value n in ECX returns the invalid level-type of 0 in ECX[15:8], - // other input values with ECX>n also return 0 in ECX[15:8]. + // Hierarchy Domain Domain Type Identification Value + // ----------------------------------------------------------------- + // Lowest Logical Processor 1 + // Highest Core 2 // - // ***The value of the “level type” field is not related to level numbers in any - // way, higher “level type” values do not mean higher - // levels. Level type field has the following encoding: - // - 0: Invalid. - // - 1: SMT. - // - 2: Core. - // - 3-255: Reserved. + // (Note that enumeration values of 0 and 3-255 are reserved.) // Thread Level Topology; index = 0 0 => { From 35230df6658fa9c17330d12e66c61582d282fcde Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 11:48:40 +0000 Subject: [PATCH 06/20] refactor(vmm): Update variable names and comments with SDM notation The last commit updates quotes of Intel specification from x2APIC spec to Intel SDM. In accrodance with the change, update variable names and comments (e.g. level => domain) and correct some comments appropriately. Signed-off-by: Takahiro Itazuri --- .../src/cpu_config/x86_64/cpuid/normalize.rs | 54 +++++++++---------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs index 31fc3415d24..a867981ebfe 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs @@ -64,10 +64,10 @@ pub enum ExtendedTopologyError { ApicId(CheckedAssignError), /// Failed to set `Number of logical processors at this level type`: {0} LogicalProcessors(CheckedAssignError), - /// Failed to set `Level Type`: {0} - LevelType(CheckedAssignError), - /// Failed to set `Level Number`: {0} - LevelNumber(CheckedAssignError), + /// Failed to set `Domain Type`: {0} + DomainType(CheckedAssignError), + /// Failed to set `Input ECX`: {0} + InputEcx(CheckedAssignError), /// Failed to set all leaves, as more than `u32::MAX` sub-leaves are present: {0} Overflow(>::Error), } @@ -282,10 +282,6 @@ impl super::Cpuid { cpu_bits: u8, cpus_per_core: u8, ) -> Result<(), ExtendedTopologyError> { - /// Level type used for setting thread level processor topology. - const LEVEL_TYPE_THREAD: u32 = 1; - /// Level type used for setting core level processor topology. - const LEVEL_TYPE_CORE: u32 = 2; /// The APIC ID shift in leaf 0xBh specifies the number of bits to shit the x2APIC ID to /// get a unique topology of the next level. This allows 128 logical /// processors/package. @@ -308,7 +304,7 @@ impl super::Cpuid { for index in 0.. { if let Some(subleaf) = self.get_mut(&CpuidKey::subleaf(0xB, index)) { - // reset eax, ebx, ecx + // Reset eax, ebx, ecx subleaf.result.eax = 0; subleaf.result.ebx = 0; subleaf.result.ecx = 0; @@ -349,47 +345,45 @@ impl super::Cpuid { // // (Note that enumeration values of 0 and 3-255 are reserved.) - // Thread Level Topology; index = 0 + // Logical processor domain 0 => { // To get the next level APIC ID, shift right with at most 1 because we have - // maximum 2 hyperthreads per core that can be represented by 1 bit. + // maximum 2 logical procerssors per core that can be represented by 1 bit. set_range(&mut subleaf.result.eax, 0..5, u32::from(cpu_bits)) .map_err(ExtendedTopologyError::ApicId)?; // When cpu_count == 1 or HT is disabled, there is 1 logical core at this - // level Otherwise there are 2 + // domain; otherwise there are 2 set_range(&mut subleaf.result.ebx, 0..16, u32::from(cpus_per_core)) .map_err(ExtendedTopologyError::LogicalProcessors)?; - set_range(&mut subleaf.result.ecx, 8..16, LEVEL_TYPE_THREAD) - .map_err(ExtendedTopologyError::LevelType)?; + // Skip setting 0 to ECX[7:0] since it's already reset to 0. + + // Set the domain type identification value for logical processor, + set_range(&mut subleaf.result.ecx, 8..16, 1) + .map_err(ExtendedTopologyError::DomainType)?; } - // Core Level Processor Topology; index = 1 + // Core domain 1 => { set_range(&mut subleaf.result.eax, 0..5, LEAFBH_INDEX1_APICID) .map_err(ExtendedTopologyError::ApicId)?; + // Configure such that the next higher-scoped domain (i.e. socket) include + // all logical processors. set_range(&mut subleaf.result.ebx, 0..16, u32::from(cpu_count)) .map_err(ExtendedTopologyError::LogicalProcessors)?; - // We expect here as this is an extremely rare case that is unlikely to ever - // occur. It would require manual editing of the CPUID structure to push - // more than 2^32 subleaves. - let sub = index; - set_range(&mut subleaf.result.ecx, 0..8, sub) - .map_err(ExtendedTopologyError::LevelNumber)?; - - set_range(&mut subleaf.result.ecx, 8..16, LEVEL_TYPE_CORE) - .map_err(ExtendedTopologyError::LevelType)?; + // Setting the input ECX value (i.e. `index`) + set_range(&mut subleaf.result.ecx, 0..8, index) + .map_err(ExtendedTopologyError::InputEcx)?; + + // Set the domain type identification value for core. + set_range(&mut subleaf.result.ecx, 8..16, 2) + .map_err(ExtendedTopologyError::DomainType)?; } - // Core Level Processor Topology; index >=2 - // No other levels available; This should already be set correctly, - // and it is added here as a "re-enforcement" in case we run on - // different hardware _ => { // We expect here as this is an extremely rare case that is unlikely to ever - // occur. It would require manual editing of the CPUID structure to push - // more than 2^32 subleaves. + // occur. subleaf.result.ecx = index; } } From 7747883ed8ca4abcb41a9c60c9c620c764e30995 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 18:14:07 +0000 Subject: [PATCH 07/20] refactor(vmm): Error if subleaf >= 2 given for leaf 0xB Our supported kernels no longer return any subleaves >= 1. We inserted subleaf 1 intentionally but subleaves >= 2 are not expected there. Signed-off-by: Takahiro Itazuri --- src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs index a867981ebfe..cddfc2b53b2 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs @@ -70,6 +70,8 @@ pub enum ExtendedTopologyError { InputEcx(CheckedAssignError), /// Failed to set all leaves, as more than `u32::MAX` sub-leaves are present: {0} Overflow(>::Error), + /// Unexpected subleaf: {0} + UnexpectedSubleaf(u32) } /// Error type for setting leaf 0x80000006 of Cpuid::normalize(). @@ -382,9 +384,11 @@ impl super::Cpuid { .map_err(ExtendedTopologyError::DomainType)?; } _ => { - // We expect here as this is an extremely rare case that is unlikely to ever - // occur. - subleaf.result.ecx = index; + // KVM no longer returns any subleaf numbers greater than 0. The patch was + // merged in v6.2 and backported to v5.10. Subleaves >= 2 should not be + // included. + // https://github.com/torvalds/linux/commit/45e966fcca03ecdcccac7cb236e16eea38cc18af + return Err(ExtendedTopologyError::UnexpectedSubleaf(index)); } } } else { From b486b58fb25ba26311949c7925bb95fc76082ef6 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 12:24:13 +0000 Subject: [PATCH 08/20] fix(vmm): Calc right-shift bits to address socket ID The number of bits that the x2APIC ID must be shifted to the right to address instances of the socket domain, which is next higher-scoped domain to the core domain, was hardcoded as 7. That means up to 128 vcpus support. Currently the max vcpu count is hardcoded as 32, so it is enough but we might want to increase it in the future. To avoid unexpected issues when increasing the max vcpu count, calculate the value based on the max vCPU count. Signed-off-by: Takahiro Itazuri --- .../src/cpu_config/x86_64/cpuid/normalize.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs index cddfc2b53b2..bdf0e1c01ae 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs @@ -4,6 +4,7 @@ use crate::cpu_config::x86_64::cpuid::{ CpuidEntry, CpuidKey, CpuidRegisters, CpuidTrait, KvmCpuidFlags, cpuid, }; +use crate::vmm_config::machine_config::MAX_SUPPORTED_VCPUS; /// Error type for [`super::Cpuid::normalize`]. #[allow(clippy::module_name_repetitions)] @@ -284,11 +285,6 @@ impl super::Cpuid { cpu_bits: u8, cpus_per_core: u8, ) -> Result<(), ExtendedTopologyError> { - /// The APIC ID shift in leaf 0xBh specifies the number of bits to shit the x2APIC ID to - /// get a unique topology of the next level. This allows 128 logical - /// processors/package. - const LEAFBH_INDEX1_APICID: u32 = 7; - // The following commit changed the behavior of KVM_GET_SUPPORTED_CPUID to no longer // include CPUID.(EAX=0BH,ECX=1). // https://lore.kernel.org/all/20221027092036.2698180-1-pbonzini@redhat.com/ @@ -367,11 +363,17 @@ impl super::Cpuid { } // Core domain 1 => { - set_range(&mut subleaf.result.eax, 0..5, LEAFBH_INDEX1_APICID) - .map_err(ExtendedTopologyError::ApicId)?; - // Configure such that the next higher-scoped domain (i.e. socket) include // all logical processors. + // + // The CPUID.(EAX=0BH,ECX=1).EAX[4:0] value must be an integer N such that + // 2^N is greater than or equal to the maximum number of vCPUs. + set_range( + &mut subleaf.result.eax, + 0..5, + MAX_SUPPORTED_VCPUS.next_power_of_two().ilog2(), + ) + .map_err(ExtendedTopologyError::ApicId)?; set_range(&mut subleaf.result.ebx, 0..16, u32::from(cpu_count)) .map_err(ExtendedTopologyError::LogicalProcessors)?; From 1c009b5b714448c990cc8e6cfe38007bb259bde6 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 18:27:13 +0000 Subject: [PATCH 09/20] refactor(vmm): Better error messages for CPUID normalization Show which combination of subleaf, register and bit(s) failed to set, remove unused error variant and reorder error variants in alphabetical order. Signed-off-by: Takahiro Itazuri --- .../cpu_config/x86_64/cpuid/amd/normalize.rs | 32 ++++++++++--------- .../x86_64/cpuid/intel/normalize.rs | 14 ++++---- .../src/cpu_config/x86_64/cpuid/normalize.rs | 32 +++++++++---------- 3 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs index f0ee9322e12..52bb806472d 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs @@ -48,10 +48,10 @@ pub enum PassthroughCacheTopologyError { pub enum FeatureEntryError { /// Missing leaf 0x80000008. MissingLeaf0x80000008, - /// Failed to set `nt` (number of physical threads) due to overflow. - NumberOfPhysicalThreadsOverflow, - /// Failed to set `nt` (number of physical threads). + /// Failed to set number of physical threads (CPUID.80000008H:ECX[7:0]): {0} NumberOfPhysicalThreads(CheckedAssignError), + /// Failed to set number of physical threads (CPUID.80000008H:ECX[7:0]) due to overflow. + NumberOfPhysicalThreadsOverflow, } /// Error type for setting leaf 0x8000001d section of [`super::AmdCpuid::normalize`]. @@ -59,22 +59,24 @@ pub enum FeatureEntryError { pub enum ExtendedCacheTopologyError { /// Missing leaf 0x8000001d. MissingLeaf0x8000001d, - /// Failed to set `num_sharing_cache` due to overflow. - NumSharingCacheOverflow, - /// Failed to set `num_sharing_cache`: {0} - NumSharingCache(CheckedAssignError), + #[rustfmt::skip] + /// Failed to set number of logical processors sharing cache(CPUID.(EAX=8000001DH,ECX={0}):EAX[25:14]): {1} + NumSharingCache(u32, CheckedAssignError), + #[rustfmt::skip] + /// Failed to set number of logical processors sharing cache (CPUID.(EAX=8000001DH,ECX={0}):EAX[25:14]) due to overflow. + NumSharingCacheOverflow(u32), } /// Error type for setting leaf 0x8000001e section of [`super::AmdCpuid::normalize`]. #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum ExtendedApicIdError { + /// Failed to set compute unit ID (CPUID.8000001EH:EBX[7:0]): {0} + ComputeUnitId(CheckedAssignError), + /// Failed to set extended APIC ID (CPUID.8000001EH:EAX[31:0]): {0} + ExtendedApicId(CheckedAssignError), /// Missing leaf 0x8000001e. MissingLeaf0x8000001e, - /// Failed to set `extended_apic_id`: {0} - ExtendedApicId(CheckedAssignError), - /// Failed to set `compute_unit_id`: {0} - ComputeUnitId(CheckedAssignError), - /// Failed to set `threads_per_compute_unit`: {0} + /// Failed to set threads per core unit (CPUID:8000001EH:EBX[15:8]): {0} ThreadPerComputeUnit(CheckedAssignError), } @@ -282,16 +284,16 @@ impl super::AmdCpuid { // SAFETY: We know `cpus_per_core > 0` therefore this is always safe. let sub = u32::from(cpus_per_core.checked_sub(1).unwrap()); set_range(&mut subleaf.result.eax, 14..26, sub) - .map_err(ExtendedCacheTopologyError::NumSharingCache)?; + .map_err(|err| ExtendedCacheTopologyError::NumSharingCache(i, err))?; } // L3 Cache // The L3 cache is shared among all the logical threads 3 => { let sub = cpu_count .checked_sub(1) - .ok_or(ExtendedCacheTopologyError::NumSharingCacheOverflow)?; + .ok_or(ExtendedCacheTopologyError::NumSharingCacheOverflow(i))?; set_range(&mut subleaf.result.eax, 14..26, u32::from(sub)) - .map_err(ExtendedCacheTopologyError::NumSharingCache)?; + .map_err(|err| ExtendedCacheTopologyError::NumSharingCache(i, err))?; } _ => (), } diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs index 0bfc5d89632..af4058298da 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs @@ -34,14 +34,14 @@ pub enum NormalizeCpuidError { #[allow(clippy::enum_variant_names)] #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum DeterministicCacheError { - /// Failed to set `Maximum number of addressable IDs for logical processors sharing this cache` due to underflow in cpu count. - MaxCpusPerCoreUnderflow, - /// Failed to set `Maximum number of addressable IDs for logical processors sharing this cache`: {0}. - MaxCpusPerCore(CheckedAssignError), - /// Failed to set `Maximum number of addressable IDs for processor cores in the physical package` due to underflow in cores. - MaxCorePerPackageUnderflow, - /// Failed to set `Maximum number of addressable IDs for processor cores in the physical package`: {0}. + /// Failed to set max addressable core ID in physical package (CPUID.04H:EAX[31:26]): {0}. MaxCorePerPackage(CheckedAssignError), + /// Failed to set max addressable core ID in physical package (CPUID.04H:EAX[31:26]) due to underflow in cores. + MaxCorePerPackageUnderflow, + /// Failed to set max addressable processor ID sharing cache (CPUID.04H:EAX[25:14]): {0}. + MaxCpusPerCore(CheckedAssignError), + /// Failed to set max addressable processor ID sharing cache (CPUID.04H:EAX[25:14]) due to underflow in cpu count. + MaxCpusPerCoreUnderflow, } /// We always use this brand string. diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs index bdf0e1c01ae..73c4a3c793e 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs @@ -61,16 +61,14 @@ pub enum GetMaxCpusPerPackageError { #[rustfmt::skip] #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum ExtendedTopologyError { - /// Failed to set `Number of bits to shift right on x2APIC ID to get a unique topology ID of the next level type`: {0} - ApicId(CheckedAssignError), - /// Failed to set `Number of logical processors at this level type`: {0} - LogicalProcessors(CheckedAssignError), - /// Failed to set `Domain Type`: {0} - DomainType(CheckedAssignError), - /// Failed to set `Input ECX`: {0} - InputEcx(CheckedAssignError), - /// Failed to set all leaves, as more than `u32::MAX` sub-leaves are present: {0} - Overflow(>::Error), + /// Failed to set domain type (CPUID.(EAX=0xB,ECX={0}):ECX[15:8]): {1} + DomainType(u32, CheckedAssignError), + /// Failed to set input ECX (CPUID.(EAX=0xB,ECX={0}):ECX[7:0]): {1} + InputEcx(u32, CheckedAssignError), + /// Failed to set number of logical processors (CPUID.(EAX=0xB,ECX={0}):EBX[15:0]): {1} + NumLogicalProcs(u32, CheckedAssignError), + /// Failed to set right-shift bits (CPUID.(EAX=0xB,ECX={0}):EAX[4:0]): {1} + RightShiftBits(u32, CheckedAssignError), /// Unexpected subleaf: {0} UnexpectedSubleaf(u32) } @@ -348,18 +346,18 @@ impl super::Cpuid { // To get the next level APIC ID, shift right with at most 1 because we have // maximum 2 logical procerssors per core that can be represented by 1 bit. set_range(&mut subleaf.result.eax, 0..5, u32::from(cpu_bits)) - .map_err(ExtendedTopologyError::ApicId)?; + .map_err(|err| ExtendedTopologyError::RightShiftBits(index, err))?; // When cpu_count == 1 or HT is disabled, there is 1 logical core at this // domain; otherwise there are 2 set_range(&mut subleaf.result.ebx, 0..16, u32::from(cpus_per_core)) - .map_err(ExtendedTopologyError::LogicalProcessors)?; + .map_err(|err| ExtendedTopologyError::NumLogicalProcs(index, err))?; // Skip setting 0 to ECX[7:0] since it's already reset to 0. // Set the domain type identification value for logical processor, set_range(&mut subleaf.result.ecx, 8..16, 1) - .map_err(ExtendedTopologyError::DomainType)?; + .map_err(|err| ExtendedTopologyError::DomainType(index, err))?; } // Core domain 1 => { @@ -373,17 +371,17 @@ impl super::Cpuid { 0..5, MAX_SUPPORTED_VCPUS.next_power_of_two().ilog2(), ) - .map_err(ExtendedTopologyError::ApicId)?; + .map_err(|err| ExtendedTopologyError::RightShiftBits(index, err))?; set_range(&mut subleaf.result.ebx, 0..16, u32::from(cpu_count)) - .map_err(ExtendedTopologyError::LogicalProcessors)?; + .map_err(|err| ExtendedTopologyError::NumLogicalProcs(index, err))?; // Setting the input ECX value (i.e. `index`) set_range(&mut subleaf.result.ecx, 0..8, index) - .map_err(ExtendedTopologyError::InputEcx)?; + .map_err(|err| ExtendedTopologyError::InputEcx(index, err))?; // Set the domain type identification value for core. set_range(&mut subleaf.result.ecx, 8..16, 2) - .map_err(ExtendedTopologyError::DomainType)?; + .map_err(|err| ExtendedTopologyError::DomainType(index, err))?; } _ => { // KVM no longer returns any subleaf numbers greater than 0. The patch was From a3bbd80eb98a08bb2c9c41bc31a4d41f0f15adf3 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 19:50:29 +0000 Subject: [PATCH 10/20] refactor(vmm): Bit-range manipulation with RangeInclusive Since CPUID notation uses the upper bound inclusive, use RangeInclusive instead of Range. Also, make set_range() and get_range() more readable and reliable by always validating that `y` fits within the given range and by asserting `end` is less than 32. Signed-off-by: Takahiro Itazuri --- .../cpu_config/x86_64/cpuid/amd/normalize.rs | 20 ++-- .../x86_64/cpuid/intel/normalize.rs | 8 +- .../src/cpu_config/x86_64/cpuid/normalize.rs | 106 ++++++++---------- 3 files changed, 61 insertions(+), 73 deletions(-) diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs index 52bb806472d..e481bc17681 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs @@ -229,7 +229,7 @@ impl super::AmdCpuid { // Fn8000_0008_ECX[NC]. A value of zero indicates that legacy methods must be // used to determine the maximum number of logical processors, as indicated by // CPUID Fn8000_0008_ECX[NC]. - set_range(&mut leaf_80000008.result.ecx, 12..16, THREAD_ID_MAX_SIZE).unwrap(); + set_range(&mut leaf_80000008.result.ecx, 12..=15, THREAD_ID_MAX_SIZE).unwrap(); // CPUID Fn8000_0008_ECX[7:0] (Field Name: NC) // Number of physical threads - 1. The number of threads in the processor is NT+1 @@ -237,7 +237,7 @@ impl super::AmdCpuid { let sub = cpu_count .checked_sub(1) .ok_or(FeatureEntryError::NumberOfPhysicalThreadsOverflow)?; - set_range(&mut leaf_80000008.result.ecx, 0..8, u32::from(sub)) + set_range(&mut leaf_80000008.result.ecx, 0..=7, u32::from(sub)) .map_err(FeatureEntryError::NumberOfPhysicalThreads)?; Ok(()) @@ -263,7 +263,7 @@ impl super::AmdCpuid { // 011b Level 3 // 111b-100b Reserved. // ``` - let cache_level = get_range(subleaf.result.eax, 5..8); + let cache_level = get_range(subleaf.result.eax, 5..=7); // CPUID Fn8000_001D_EAX_x[25:14] (Field Name: NumSharingCache) // Specifies the number of logical processors sharing the cache enumerated by N, @@ -283,7 +283,7 @@ impl super::AmdCpuid { 1 | 2 => { // SAFETY: We know `cpus_per_core > 0` therefore this is always safe. let sub = u32::from(cpus_per_core.checked_sub(1).unwrap()); - set_range(&mut subleaf.result.eax, 14..26, sub) + set_range(&mut subleaf.result.eax, 14..=25, sub) .map_err(|err| ExtendedCacheTopologyError::NumSharingCache(i, err))?; } // L3 Cache @@ -292,7 +292,7 @@ impl super::AmdCpuid { let sub = cpu_count .checked_sub(1) .ok_or(ExtendedCacheTopologyError::NumSharingCacheOverflow(i))?; - set_range(&mut subleaf.result.eax, 14..26, u32::from(sub)) + set_range(&mut subleaf.result.eax, 14..=25, u32::from(sub)) .map_err(|err| ExtendedCacheTopologyError::NumSharingCache(i, err))?; } _ => (), @@ -331,13 +331,13 @@ impl super::AmdCpuid { // CPUID Fn8000_001E_EAX[31:0] (Field Name: ExtendedApicId) // Extended APIC ID. If MSR0000_001B[ApicEn] = 0, this field is reserved. - set_range(&mut leaf_8000001e.result.eax, 0..32, u32::from(cpu_index)) + set_range(&mut leaf_8000001e.result.eax, 0..=31, u32::from(cpu_index)) .map_err(ExtendedApicIdError::ExtendedApicId)?; // CPUID Fn8000_001E_EBX[7:0] (Field Name: ComputeUnitId) // Compute unit ID. Identifies a Compute Unit, which may be one or more physical cores that // each implement one or more logical processors. - set_range(&mut leaf_8000001e.result.ebx, 0..8, core_id) + set_range(&mut leaf_8000001e.result.ebx, 0..=7, core_id) .map_err(ExtendedApicIdError::ComputeUnitId)?; // CPUID Fn8000_001E_EBX[15:8] (Field Name: ThreadsPerComputeUnit) @@ -354,7 +354,7 @@ impl super::AmdCpuid { // // SAFETY: We know `cpus_per_core > 0` therefore this is always safe. let sub = u32::from(cpus_per_core.checked_sub(1).unwrap()); - set_range(&mut leaf_8000001e.result.ebx, 8..16, sub) + set_range(&mut leaf_8000001e.result.ebx, 8..=15, sub) .map_err(ExtendedApicIdError::ThreadPerComputeUnit)?; // CPUID Fn8000_001E_ECX[10:8] (Field Name: NodesPerProcessor) @@ -364,14 +364,14 @@ impl super::AmdCpuid { // // SAFETY: We know the value always fits within the range and thus is always safe. // Set nodes per processor. - set_range(&mut leaf_8000001e.result.ecx, 8..11, NODES_PER_PROCESSOR).unwrap(); + set_range(&mut leaf_8000001e.result.ecx, 8..=10, NODES_PER_PROCESSOR).unwrap(); // CPUID Fn8000_001E_ECX[7:0] (Field Name: NodeId) // Specifies the ID of the node containing the current logical processor. NodeId // values are unique across the system. // // Put all the cpus in the same node. - set_range(&mut leaf_8000001e.result.ecx, 0..8, 0).unwrap(); + set_range(&mut leaf_8000001e.result.ecx, 0..=7, 0).unwrap(); Ok(()) } diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs index af4058298da..923fc67543e 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs @@ -99,7 +99,7 @@ impl super::IntelCpuid { // CPUID.04H:EAX[7:5] // Cache Level (Starts at 1) - let cache_level = get_range(subleaf.result.eax, 5..8); + let cache_level = get_range(subleaf.result.eax, 5..=7); // CPUID.04H:EAX[25:14] // Maximum number of addressable IDs for logical processors sharing this cache. @@ -116,7 +116,7 @@ impl super::IntelCpuid { // The L1 & L2 cache is shared by at most 2 hyperthreads 1 | 2 => { let sub = u32::from(cpus_per_core.checked_sub(1).unwrap()); - set_range(&mut subleaf.result.eax, 14..26, sub) + set_range(&mut subleaf.result.eax, 14..=25, sub) .map_err(DeterministicCacheError::MaxCpusPerCore)?; } // L3 Cache @@ -127,7 +127,7 @@ impl super::IntelCpuid { .checked_sub(1) .ok_or(DeterministicCacheError::MaxCpusPerCoreUnderflow)?, ); - set_range(&mut subleaf.result.eax, 14..26, sub) + set_range(&mut subleaf.result.eax, 14..=25, sub) .map_err(DeterministicCacheError::MaxCpusPerCore)?; } _ => (), @@ -150,7 +150,7 @@ impl super::IntelCpuid { let sub = u32::from(cores) .checked_sub(1) .ok_or(DeterministicCacheError::MaxCorePerPackageUnderflow)?; - set_range(&mut subleaf.result.eax, 26..32, sub) + set_range(&mut subleaf.result.eax, 26..=31, sub) .map_err(DeterministicCacheError::MaxCorePerPackage)?; } else { break; diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs index 73c4a3c793e..f9359036f5f 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs @@ -95,66 +95,54 @@ pub fn set_bit(x: &mut u32, bit: u8, y: bool) { } /// Sets a given range to a given value. -#[allow(clippy::arithmetic_side_effects)] pub fn set_range( x: &mut u32, - range: std::ops::Range, + range: std::ops::RangeInclusive, y: u32, ) -> Result<(), CheckedAssignError> { - debug_assert!(range.end >= range.start); - match range.end - range.start { - z @ 0..=31 => { - if y >= 2u32.pow(u32::from(z)) { - Err(CheckedAssignError) - } else { - let shift = y << range.start; - *x = shift | (*x & !mask(range)); - Ok(()) - } - } - 32 => { - let shift = y << range.start; - *x = shift | (*x & !mask(range)); - Ok(()) - } - 33.. => Err(CheckedAssignError), + let start = *range.start(); + let end = *range.end(); + + debug_assert!(end >= start); + debug_assert!(end < 32); + + // Ensure `y` fits within the number of bits in the specified range. + // Note that + // - 1 <= `num_bits` <= 32 from the above assertion + // - if `num_bits` equals to 32, `y` always fits within it since `y` is `u32`. + let num_bits = end - start + 1; + if num_bits < 32 && y >= (1u32 << num_bits) { + return Err(CheckedAssignError); } + + let mask = get_mask(range); + *x = (*x & !mask) | (y << start); + + Ok(()) } + /// Gets a given range within a given value. -#[allow(clippy::arithmetic_side_effects)] -pub fn get_range(x: u32, range: std::ops::Range) -> u32 { - debug_assert!(range.end >= range.start); - (x & mask(range.clone())) >> range.start +pub fn get_range(x: u32, range: std::ops::RangeInclusive) -> u32 { + let start = *range.start(); + let end = *range.end(); + + debug_assert!(end >= start); + debug_assert!(end < 32); + + let mask = get_mask(range); + (x & mask) >> start } /// Returns a mask where the given range is ones. -#[allow( - clippy::as_conversions, - clippy::arithmetic_side_effects, - clippy::cast_possible_truncation -)] -const fn mask(range: std::ops::Range) -> u32 { - /// Returns a value where in the binary representation all bits to the right of the x'th bit - /// from the left are 1. - #[allow(clippy::unreachable)] - const fn shift(x: u8) -> u32 { - if x == 0 { - 0 - } else if x < u32::BITS as u8 { - (1 << x) - 1 - } else if x == u32::BITS as u8 { - u32::MAX - } else { - unreachable!() - } +const fn get_mask(range: std::ops::RangeInclusive) -> u32 { + let num_bits = *range.end() - *range.start() + 1; + let shift = *range.start(); + + if num_bits == 32 { + u32::MAX + } else { + ((1u32 << num_bits) - 1) << shift } - - debug_assert!(range.end >= range.start); - debug_assert!(range.end <= u32::BITS as u8); - - let front = shift(range.start); - let back = shift(range.end); - !front & back } // We use this 2nd implementation so we can conveniently define functions only used within @@ -228,7 +216,7 @@ impl super::Cpuid { // CPUID.01H:EBX[15:08] // CLFLUSH line size (Value * 8 = cache line size in bytes; used also by CLFLUSHOPT). - set_range(&mut leaf_1.result.ebx, 8..16, 8).map_err(FeatureInformationError::Clflush)?; + set_range(&mut leaf_1.result.ebx, 8..=15, 8).map_err(FeatureInformationError::Clflush)?; // CPUID.01H:EBX[23:16] // Maximum number of addressable IDs for logical processors in this physical package. @@ -240,7 +228,7 @@ impl super::Cpuid { get_max_cpus_per_package(cpu_count) .map_err(FeatureInformationError::GetMaxCpusPerPackage)?, ); - set_range(&mut leaf_1.result.ebx, 16..24, max_cpus_per_package) + set_range(&mut leaf_1.result.ebx, 16..=23, max_cpus_per_package) .map_err(FeatureInformationError::SetMaxCpusPerPackage)?; // CPUID.01H:EBX[31:24] @@ -248,7 +236,7 @@ impl super::Cpuid { // // The 8-bit initial APIC ID in EBX[31:24] is replaced by the 32-bit x2APIC ID, available // in Leaf 0BH and Leaf 1FH. - set_range(&mut leaf_1.result.ebx, 24..32, u32::from(cpu_index)) + set_range(&mut leaf_1.result.ebx, 24..=31, u32::from(cpu_index)) .map_err(FeatureInformationError::InitialApicId)?; // CPUID.01H:ECX[15] (Mnemonic: PDCM) @@ -345,18 +333,18 @@ impl super::Cpuid { 0 => { // To get the next level APIC ID, shift right with at most 1 because we have // maximum 2 logical procerssors per core that can be represented by 1 bit. - set_range(&mut subleaf.result.eax, 0..5, u32::from(cpu_bits)) + set_range(&mut subleaf.result.eax, 0..=4, u32::from(cpu_bits)) .map_err(|err| ExtendedTopologyError::RightShiftBits(index, err))?; // When cpu_count == 1 or HT is disabled, there is 1 logical core at this // domain; otherwise there are 2 - set_range(&mut subleaf.result.ebx, 0..16, u32::from(cpus_per_core)) + set_range(&mut subleaf.result.ebx, 0..=15, u32::from(cpus_per_core)) .map_err(|err| ExtendedTopologyError::NumLogicalProcs(index, err))?; // Skip setting 0 to ECX[7:0] since it's already reset to 0. // Set the domain type identification value for logical processor, - set_range(&mut subleaf.result.ecx, 8..16, 1) + set_range(&mut subleaf.result.ecx, 8..=15, 1) .map_err(|err| ExtendedTopologyError::DomainType(index, err))?; } // Core domain @@ -368,19 +356,19 @@ impl super::Cpuid { // 2^N is greater than or equal to the maximum number of vCPUs. set_range( &mut subleaf.result.eax, - 0..5, + 0..=4, MAX_SUPPORTED_VCPUS.next_power_of_two().ilog2(), ) .map_err(|err| ExtendedTopologyError::RightShiftBits(index, err))?; - set_range(&mut subleaf.result.ebx, 0..16, u32::from(cpu_count)) + set_range(&mut subleaf.result.ebx, 0..=15, u32::from(cpu_count)) .map_err(|err| ExtendedTopologyError::NumLogicalProcs(index, err))?; // Setting the input ECX value (i.e. `index`) - set_range(&mut subleaf.result.ecx, 0..8, index) + set_range(&mut subleaf.result.ecx, 0..=7, index) .map_err(|err| ExtendedTopologyError::InputEcx(index, err))?; // Set the domain type identification value for core. - set_range(&mut subleaf.result.ecx, 8..16, 2) + set_range(&mut subleaf.result.ecx, 8..=15, 2) .map_err(|err| ExtendedTopologyError::DomainType(index, err))?; } _ => { From 625c4f9dcbb6923fdf55e641e4a41542ef464919 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 15:47:02 +0000 Subject: [PATCH 11/20] doc: Simplify normalized register list EAX, EBX, ECX and EDX are the all registers returned. Signed-off-by: Takahiro Itazuri --- docs/cpu_templates/cpuid-normalization.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/cpu_templates/cpuid-normalization.md b/docs/cpu_templates/cpuid-normalization.md index 2eb743add22..b2d2238ae69 100644 --- a/docs/cpu_templates/cpuid-normalization.md +++ b/docs/cpu_templates/cpuid-normalization.md @@ -28,15 +28,15 @@ See also: [boot protocol settings](boot-protocol.md) ## Intel-specific CPUID normalization -| Description | Leaf | Subleaf | Register | Bits | -| -------------------------------------------------------------- | :--------------------------------: | :-----: | :----------------: | :---: | -| Update deterministic cache parameters | 0x4 | all | EAX | 31:14 | -| Disable Intel Turbo Boost technology | 0x6 | - | EAX | 1 | -| Disable frequency selection | 0x6 | - | ECX | 3 | -| Set FDP_EXCPTN_ONLY bit | 0x7 | 0x0 | EBX | 6 | -| Set "Deprecates FPU CS and FPU DS values" bit | 0x7 | 0x0 | EBX | 13 | -| Disable performance monitoring | 0xa | - | EAX, EBX, ECX, EDX | all | -| Update brand string to use a default format and real frequency | 0x80000002, 0x80000003, 0x80000004 | - | EAX, EBX, ECX, EDX | all | +| Description | Leaf | Subleaf | Register | Bits | +| -------------------------------------------------------------- | :--------------------------------: | :-----: | :------: | :---: | +| Update deterministic cache parameters | 0x4 | all | EAX | 31:14 | +| Disable Intel Turbo Boost technology | 0x6 | - | EAX | 1 | +| Disable frequency selection | 0x6 | - | ECX | 3 | +| Set FDP_EXCPTN_ONLY bit | 0x7 | 0x0 | EBX | 6 | +| Set "Deprecates FPU CS and FPU DS values" bit | 0x7 | 0x0 | EBX | 13 | +| Disable performance monitoring | 0xa | - | all | all | +| Update brand string to use a default format and real frequency | 0x80000002, 0x80000003, 0x80000004 | - | all | all | ## AMD-specifc CPUID normalization From 9d58fc376bb8288d81f6dced4eefcd2ae47e3b5f Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 13:51:21 +0000 Subject: [PATCH 12/20] feat(vmm): Normalize CPUID leaf 0x1F Intel Sapphire Rapids has CPUID leaf 0x1F that is a preferred superset to leaf 0xB. Intel recommends using leaf 0x1F when available rather than leaf 0xB. We don't use any other domains than ones supported leaf 0xB, so just copy leaf 0xB to leaf 0x1F. Signed-off-by: Takahiro Itazuri --- docs/cpu_templates/cpuid-normalization.md | 1 + .../x86_64/cpuid/intel/normalize.rs | 129 ++++++++++++++++++ 2 files changed, 130 insertions(+) diff --git a/docs/cpu_templates/cpuid-normalization.md b/docs/cpu_templates/cpuid-normalization.md index b2d2238ae69..6a15ff4a9e0 100644 --- a/docs/cpu_templates/cpuid-normalization.md +++ b/docs/cpu_templates/cpuid-normalization.md @@ -36,6 +36,7 @@ See also: [boot protocol settings](boot-protocol.md) | Set FDP_EXCPTN_ONLY bit | 0x7 | 0x0 | EBX | 6 | | Set "Deprecates FPU CS and FPU DS values" bit | 0x7 | 0x0 | EBX | 13 | | Disable performance monitoring | 0xa | - | all | all | +| Fill v2 extended topology enumeration leaf | 0x1f | all | all | all | | Update brand string to use a default format and real frequency | 0x80000002, 0x80000003, 0x80000004 | - | all | all | ## AMD-specifc CPUID normalization diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs index 923fc67543e..110cd630e0b 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs @@ -73,6 +73,7 @@ impl super::IntelCpuid { self.update_power_management_entry()?; self.update_extended_feature_flags_entry()?; self.update_performance_monitoring_entry()?; + self.update_extended_topology_v2_entry(); self.update_brand_string_entry()?; Ok(()) @@ -210,6 +211,29 @@ impl super::IntelCpuid { Ok(()) } + /// Update extended topology v2 entry + /// + /// CPUID leaf 1FH is a preferred superset to leaf 0xB. Intel recommends using leaf 0x1F when + /// available rather than leaf 0xB. + /// + /// Since we don't use any domains than ones supported in leaf 0xB, we just copy contents of + /// leaf 0xB to leaf 0x1F. + fn update_extended_topology_v2_entry(&mut self) { + // Skip if leaf 0x1F does not exist. + if self.get(&CpuidKey::leaf(0x1F)).is_none() { + return; + } + + for index in 0.. { + if let Some(subleaf) = self.get(&CpuidKey::subleaf(0xB, index)) { + self.0 + .insert(CpuidKey::subleaf(0x1F, index), subleaf.clone()); + } else { + break; + } + } + } + fn update_brand_string_entry(&mut self) -> Result<(), NormalizeCpuidError> { // Get host brand string. let host_brand_string: [u8; BRAND_STRING_LENGTH] = host_brand_string(); @@ -331,9 +355,12 @@ mod tests { clippy::as_conversions )] + use std::collections::BTreeMap; use std::ffi::CStr; use super::*; + use crate::cpu_config::x86_64::cpuid::{CpuidEntry, IntelCpuid, KvmCpuidFlags}; + #[test] fn default_brand_string_test() { let brand_string = b"Intel(R) Xeon(R) Platinum 8275CL CPU @ 3.00GHz\0\0"; @@ -394,4 +421,106 @@ mod tests { assert!((leaf_7_0.result.ebx & (1 << 6)) > 0); assert!((leaf_7_0.result.ebx & (1 << 13)) > 0); } + + #[test] + fn test_update_extended_topology_v2_entry_no_leaf_0x1f() { + let mut cpuid = IntelCpuid(BTreeMap::from([( + CpuidKey { + leaf: 0xB, + subleaf: 0, + }, + CpuidEntry { + flags: KvmCpuidFlags::SIGNIFICANT_INDEX, + ..Default::default() + }, + )])); + + cpuid.update_extended_topology_v2_entry(); + + assert!( + cpuid + .get(&CpuidKey { + leaf: 0x1F, + subleaf: 0, + }) + .is_none() + ); + } + + #[test] + fn test_update_extended_topology_v2_entry() { + let mut cpuid = IntelCpuid(BTreeMap::from([ + ( + CpuidKey { + leaf: 0xB, + subleaf: 0, + }, + CpuidEntry { + flags: KvmCpuidFlags::SIGNIFICANT_INDEX, + result: CpuidRegisters { + eax: 0x1, + ebx: 0x2, + ecx: 0x3, + edx: 0x4, + }, + }, + ), + ( + CpuidKey { + leaf: 0xB, + subleaf: 1, + }, + CpuidEntry { + flags: KvmCpuidFlags::SIGNIFICANT_INDEX, + result: CpuidRegisters { + eax: 0xa, + ebx: 0xb, + ecx: 0xc, + edx: 0xd, + }, + }, + ), + ( + CpuidKey { + leaf: 0x1F, + subleaf: 0, + }, + CpuidEntry { + flags: KvmCpuidFlags::SIGNIFICANT_INDEX, + result: CpuidRegisters { + eax: 0xFFFFFFFF, + ebx: 0xFFFFFFFF, + ecx: 0xFFFFFFFF, + edx: 0xFFFFFFFF, + }, + }, + ), + ])); + + cpuid.update_extended_topology_v2_entry(); + + // Check leaf 0x1F, subleaf 0 is updated. + let leaf_1f_0 = cpuid + .get(&CpuidKey { + leaf: 0x1F, + subleaf: 0, + }) + .unwrap(); + assert_eq!(leaf_1f_0.result.eax, 0x1); + assert_eq!(leaf_1f_0.result.ebx, 0x2); + assert_eq!(leaf_1f_0.result.ecx, 0x3); + assert_eq!(leaf_1f_0.result.edx, 0x4); + + // Check lefa 0x1F, subleaf 1 is inserted. + let leaf_1f_1 = cpuid + .get(&CpuidKey { + leaf: 0x1F, + subleaf: 1, + }) + .unwrap(); + assert_eq!(leaf_1f_1.result.eax, 0xa); + assert_eq!(leaf_1f_1.result.ebx, 0xb); + assert_eq!(leaf_1f_1.result.ecx, 0xc); + assert_eq!(leaf_1f_1.result.edx, 0xd); + } } From e11eb3455cbee03f9b3c92aee508691d09eccc7a Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 17:58:05 +0000 Subject: [PATCH 13/20] refactor: Remove unneeded turbo fish (::) in unit test Some structs are imported in the last commit. Let's remove turbo fish! Signed-off-by: Takahiro Itazuri --- .../x86_64/cpuid/intel/normalize.rs | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs index 110cd630e0b..2e22134e882 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs @@ -398,22 +398,21 @@ mod tests { #[test] fn test_update_extended_feature_flags_entry() { - let mut cpuid = - crate::cpu_config::x86_64::cpuid::IntelCpuid(std::collections::BTreeMap::from([( - crate::cpu_config::x86_64::cpuid::CpuidKey { - leaf: 0x7, - subleaf: 0, - }, - crate::cpu_config::x86_64::cpuid::CpuidEntry { - flags: crate::cpu_config::x86_64::cpuid::KvmCpuidFlags::SIGNIFICANT_INDEX, - ..Default::default() - }, - )])); + let mut cpuid = IntelCpuid(BTreeMap::from([( + CpuidKey { + leaf: 0x7, + subleaf: 0, + }, + CpuidEntry { + flags: KvmCpuidFlags::SIGNIFICANT_INDEX, + ..Default::default() + }, + )])); cpuid.update_extended_feature_flags_entry().unwrap(); let leaf_7_0 = cpuid - .get(&crate::cpu_config::x86_64::cpuid::CpuidKey { + .get(&CpuidKey { leaf: 0x7, subleaf: 0, }) From 906e88897156ab8599d6ffaca0548b9022a09b40 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 15:21:47 +0000 Subject: [PATCH 14/20] test: Skip frequency check if host not report it Intel Sapphire Rapids does not report its frequency in the model name string on host ("Intel(R) Xeon(R) Platinum 8488C"). Signed-off-by: Takahiro Itazuri --- .../functional/test_cpu_features_x86_64.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/integration_tests/functional/test_cpu_features_x86_64.py b/tests/integration_tests/functional/test_cpu_features_x86_64.py index 270e3fa12e2..0fb51e5bd37 100644 --- a/tests/integration_tests/functional/test_cpu_features_x86_64.py +++ b/tests/integration_tests/functional/test_cpu_features_x86_64.py @@ -150,6 +150,8 @@ def test_brand_string(uvm_plain_any): * For Intel CPUs, the guest brand string should be: Intel(R) Xeon(R) Processor @ {host frequency} + or + Intel(R) Xeon(R) Processor where {host frequency} is the frequency reported by the host CPUID (e.g. 4.01GHz) * For AMD CPUs, the guest brand string should be: @@ -184,7 +186,9 @@ def test_brand_string(uvm_plain_any): cif = open("/proc/cpuinfo", "r", encoding="utf-8") cpu_info = cif.read() mo = re.search("model name.*:.* ([0-9]*.[0-9]*[G|M|T]Hz)", cpu_info) - assert mo + # Skip if host frequency is not reported + if mo is None: + return host_frequency = mo.group(1) # Assert the model name matches "Intel(R) Xeon(R) Processor @ " From 30199d1f7af149ab9e89c9487fb90ba3afe79324 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 15:38:51 +0000 Subject: [PATCH 15/20] test: Skip CPUID.(EAX=1FH,ECX=2) in test_cpu_config_dump_vs_actual CPUID leaf 1FH is a preferred superset to CPUID leaf 0BH. For the same reason as CPUID leaf 0BH, the subleaf 2 should be skipped if guest userspace cpuid command enumerates it. Signed-off-by: Takahiro Itazuri --- .../integration_tests/functional/test_cpu_template_helper.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration_tests/functional/test_cpu_template_helper.py b/tests/integration_tests/functional/test_cpu_template_helper.py index 1b1c2478da3..0c0e09d1a60 100644 --- a/tests/integration_tests/functional/test_cpu_template_helper.py +++ b/tests/integration_tests/functional/test_cpu_template_helper.py @@ -133,6 +133,10 @@ def build_cpu_config_dict(cpu_config_path): # support it, the userspace cpuid command in ubuntu 22 reports not only # the subleaf 0 but also the subleaf 1. (0x1B, 0x1), + # CPUID.1Fh is a preferred superset to CPUID.0Bh. For the same reason as + # CPUID.Bh, the subleaf 2 should be skipped when the guest userspace cpuid + # enumerates it. + (0x1F, 0x2), # CPUID.20000000h is not documented in Intel SDM and AMD APM. KVM doesn't # report it, but the userspace cpuid command in ubuntu 22 does. (0x20000000, 0x0), From bd5367e41512e6b685b974cdfda7572f2c84b956 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 16:08:21 +0000 Subject: [PATCH 16/20] test: Skip MSR 0xE1 in test_cpu_config_dump_vs_actual The MSR is R/W and guest OS modifies it after boot to control UMWAIT feature. Signed-off-by: Takahiro Itazuri --- tests/integration_tests/functional/test_cpu_template_helper.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integration_tests/functional/test_cpu_template_helper.py b/tests/integration_tests/functional/test_cpu_template_helper.py index 0c0e09d1a60..b6b6bf69dc8 100644 --- a/tests/integration_tests/functional/test_cpu_template_helper.py +++ b/tests/integration_tests/functional/test_cpu_template_helper.py @@ -188,6 +188,9 @@ def build_cpu_config_dict(cpu_config_path): 0x48, # MSR_IA32_SMBASE is not accessible outside of System Management Mode. 0x9E, + # MSR_IA32_UMWAIT_CONTROL is R/W MSR that guest OS modifies after boot to + # control UMWAIT feature. + 0xE1, # MSR_IA32_TSX_CTRL is R/W MSR to disable Intel TSX feature as a mitigation # against TAA vulnerability. 0x122, From 71dfa6395b89eea4c50108c42899c1454174d914 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 13 Mar 2025 16:14:10 +0000 Subject: [PATCH 17/20] test: Skip MSR 0x1C4 in test_cpu_config_dump_vs_actual MSR_IA32_XFD is R/W MSR for guest OS to control which XSAVE-enabled features are temporarily disabled. Guest OS disables TILEDATA by default using the MSR. Signed-off-by: Takahiro Itazuri --- .../integration_tests/functional/test_cpu_template_helper.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration_tests/functional/test_cpu_template_helper.py b/tests/integration_tests/functional/test_cpu_template_helper.py index b6b6bf69dc8..6ff2db1f7f0 100644 --- a/tests/integration_tests/functional/test_cpu_template_helper.py +++ b/tests/integration_tests/functional/test_cpu_template_helper.py @@ -200,6 +200,10 @@ def build_cpu_config_dict(cpu_config_path): 0x174, 0x175, 0x176, + # MSR_IA32_XFD is R/W MSR for guest OS to control which XSAVE-enabled + # features are temporarily disabled. Guest OS disables TILEDATA by default + # using the MSR. + 0x1C4, # MSR_IA32_TSC_DEADLINE specifies the time at which a timer interrupt # should occur and depends on the elapsed time. 0x6E0, From 4b7b11acffae011308eee66975e345053605c2a0 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Tue, 11 Mar 2025 06:42:10 +0000 Subject: [PATCH 18/20] test: Use any guest kernels in test_host_vs_guest_cpu_features CPU features that guest kernel can vary depending on its kernel version. Signed-off-by: Takahiro Itazuri --- .../functional/test_cpu_features_host_vs_guest.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py b/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py index c8075faa505..8f42ebe02cf 100644 --- a/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py +++ b/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py @@ -157,10 +157,12 @@ } -def test_host_vs_guest_cpu_features(uvm_nano): +def test_host_vs_guest_cpu_features(uvm_plain_any): """Check CPU features host vs guest""" - vm = uvm_nano + vm = uvm_plain_any + vm.spawn() + vm.basic_config() vm.add_net_iface() vm.start() host_feats = set(utils.check_output(CPU_FEATURES_CMD).stdout.split()) From d35479add0374b3b262728e19f75f8d0a5664da2 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Tue, 11 Mar 2025 06:40:55 +0000 Subject: [PATCH 19/20] test: test_host_vs_guest_cpu_features on Intel Sapphire Rapids Intel Sapphire Rapids has some new features compared to older Intel processors. Some of them are just not virtualized by KVM, others started to be passed through to guests since specific kernel versions, and the others can be emulated but now supported by hardware. Signed-off-by: Takahiro Itazuri --- .../test_cpu_features_host_vs_guest.py | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py b/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py index 8f42ebe02cf..e387018d6d2 100644 --- a/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py +++ b/tests/integration_tests/functional/test_cpu_features_host_vs_guest.py @@ -233,6 +233,102 @@ def test_host_vs_guest_cpu_features(uvm_plain_any): assert host_feats - guest_feats == host_guest_diff_6_1 assert guest_feats - host_feats == INTEL_GUEST_ONLY_FEATS - {"umip"} + case CpuModel.INTEL_SAPPHIRE_RAPIDS: + expected_host_minus_guest = INTEL_HOST_ONLY_FEATS.copy() + expected_guest_minus_host = INTEL_GUEST_ONLY_FEATS.copy() + + host_version = global_props.host_linux_version_tpl + guest_version = vm.guest_kernel_version + + # KVM does not support virtualization of the following hardware features yet for several + # reasons (e.g. security, simply difficulty of implementation). + expected_host_minus_guest |= { + # Intel Total Memory Encryption (TME) is the capability to encrypt the entirety of + # physical memory of a system. TME is enabled by system BIOS/hardware and applies to + # the phyiscal memory as a whole. + "tme", + # PCONFIG instruction allows software to configure certain platform features. It + # supports these features with multiple leaf functions, selecting a leaf function + # using the value in EAX. As of this writing, the only defined PCONFIG leaf function + # is for key programming for total memory encryption-multi-key (TME-MK). + "pconfig", + # Architectural Last Branch Record (Arch LBR) that is a feature that logs the most + # recently executed branch instructions (e.g. source and destination addresses). + # Traditional LBR implementations have existed in Intel CPUs for years and the MSR + # interface varied by CPU model. Arch LBR is a standardized version. There is a + # kernel patch created in 2022 but didn't get merged due to a mess. + # https://lore.kernel.org/all/20221125040604.5051-1-weijiang.yang@intel.com/ + "arch_lbr", + # ENQCMD/ENQCMDS are instructions that allow software to atomically write 64-byte + # commands to enqueue registers, which are special device registers accessed using + # memory-mapped I/O. + "enqcmd", + # Intel Resource Director Technology (RDT) feature set provides a set of allocation + # (resource control) capabilities including Cache Allocation Technology (CAT) and + # Code and Data Prioritization (CDP). + # L3 variants are listed in INTEL_HOST_ONLY_FEATS. + "cat_l2", + "cdp_l2", + # This is a synthesized bit for split lock detection that raise an Alignment Check + # (#AC) exception if an operand of an atomic operation crosses two cache lines. It + # is not enumerated on CPUID, instead detected by actually attempting to read from + # MSR address 0x33 (MSR_MEMORY_CTRL in Intel SDM, MSR_TEST_CTRL in Linux kernel). + "split_lock_detect", + } + + # The following features are also not virtualized by KVM yet but are only supported on + # newer kernel versions. + if host_version >= (5, 18): + expected_host_minus_guest |= { + # Hardware Feedback Interface (HFI) is a feature that gives OSes a performance + # and energy efficiency capability data for each CPU that can be used to + # influence task placement decisions. + # https://github.com/torvalds/linux/commit/7b8f40b3de75c971a4e5f9308b06deb59118dbac + "hfi", + # Indirect Brach Tracking (IBT) is a feature where the CPU ensures that indirect + # branch targets start with ENDBRANCH instruction (`endbr32` or `endbr64`), + # which executes as a no-op; if anything else is found, a control-protection + # (#CP) fault will be raised. + # https://github.com/torvalds/linux/commit/991625f3dd2cbc4b787deb0213e2bcf8fa264b21 + "ibt", + } + + # AVX512 FP16 is supported and passed through on v5.11+. + # https://github.com/torvalds/linux/commit/e1b35da5e624f8b09d2e98845c2e4c84b179d9a4 + # https://github.com/torvalds/linux/commit/2224fc9efb2d6593fbfb57287e39ba4958b188ba + if host_version >= (5, 11) and guest_version < (5, 11): + expected_host_minus_guest |= {"avx512_fp16"} + + # AVX VNNI support is supported and passed through on v5.12+. + # https://github.com/torvalds/linux/commit/b85a0425d8056f3bd8d0a94ecdddf2a39d32a801 + # https://github.com/torvalds/linux/commit/1085a6b585d7d1c441cd10fdb4c7a4d96a22eba7 + if host_version >= (5, 12) and guest_version < (5, 12): + expected_host_minus_guest |= {"avx_vnni"} + + # Bus lock detection is supported on v5.12+ and passed through on v5.13+. + # https://github.com/torvalds/linux/commit/f21d4d3b97a8603567e5d4250bd75e8ebbd520af + # https://github.com/torvalds/linux/commit/76ea438b4afcd9ee8da3387e9af4625eaccff58f + if host_version >= (5, 13) and guest_version < (5, 12): + expected_host_minus_guest |= {"bus_lock_detect"} + + # Intel AMX is supported and passed through on v5.17+. + # https://github.com/torvalds/linux/commit/690a757d610e50c2c3acd2e4bc3992cfc63feff2 + if host_version >= (5, 17) and guest_version < (5, 17): + expected_host_minus_guest |= {"amx_bf16", "amx_int8", "amx_tile"} + + expected_guest_minus_host -= { + # UMIP can be emulated by KVM on Intel processors, but is supported in hardware on + # Intel Sapphire Rapids and passed through. + "umip", + # This is a synthesized bit and it is always set on guest thanks to kvm-clock. But + # Intel Sapphire Rapids reports TSC frequency on CPUID leaf 0x15, so the bit is also + # set on host. + "tsc_known_freq", + } + + assert host_feats - guest_feats == expected_host_minus_guest + assert guest_feats - host_feats == expected_guest_minus_host + case CpuModel.ARM_NEOVERSE_N1: expected_guest_minus_host = set() expected_host_minus_guest = set() From 4dd97bb0dd6fa21606c88e2090f5a53036001fea Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Wed, 12 Mar 2025 11:55:40 +0000 Subject: [PATCH 20/20] test: Ignore REPTAR on Intel Sapphire Rapids REPTAR is reported "vulnerable" inside guest on Intel Sapphire Rapids for the same reason as Intel Ice Lake (microcode versio not exposed to guests). Signed-off-by: Takahiro Itazuri --- tests/integration_tests/security/test_vulnerabilities.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration_tests/security/test_vulnerabilities.py b/tests/integration_tests/security/test_vulnerabilities.py index 14307e56888..356bdb4370f 100644 --- a/tests/integration_tests/security/test_vulnerabilities.py +++ b/tests/integration_tests/security/test_vulnerabilities.py @@ -85,7 +85,10 @@ def expected_vulnerabilities(self, cpu_template_name): Since we have a test on host and the exception in guest is not valid, we add a check to ignore this exception. """ - if global_props.cpu_codename == "INTEL_ICELAKE" and cpu_template_name is None: + if ( + global_props.cpu_codename in ["INTEL_ICELAKE", "INTEL_SAPPHIRE_RAPIDS"] + and cpu_template_name is None + ): return { '{"NAME": "REPTAR", "CVE": "CVE-2023-23583", "VULNERABLE": true, "INFOS": "Your microcode is too old to mitigate the vulnerability"}' }