diff --git a/Cargo.lock b/Cargo.lock index 9cbb3b518e1..5a3ef6f2da2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11064,7 +11064,7 @@ dependencies = [ [[package]] name = "raw-cpuid" version = "11.5.0" -source = "git+https://github.com/oxidecomputer/rust-cpuid.git?rev=0a8dbd2311263f6a59ea58089e33c8331436ff3a#0a8dbd2311263f6a59ea58089e33c8331436ff3a" +source = "git+https://github.com/oxidecomputer/rust-cpuid.git?rev=a4cf01df76f35430ff5d39dc2fe470bcb953503b#a4cf01df76f35430ff5d39dc2fe470bcb953503b" dependencies = [ "bitflags 2.9.4", ] diff --git a/Cargo.toml b/Cargo.toml index 4ebad957b89..7444d418d8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -658,7 +658,7 @@ rand_distr = "0.5.1" rand_seeder = "0.4.0" range-requests = { path = "range-requests" } ratatui = "0.29.0" -raw-cpuid = { git = "https://github.com/oxidecomputer/rust-cpuid.git", rev = "0a8dbd2311263f6a59ea58089e33c8331436ff3a" } +raw-cpuid = { git = "https://github.com/oxidecomputer/rust-cpuid.git", rev = "a4cf01df76f35430ff5d39dc2fe470bcb953503b" } rayon = "1.10" rcgen = "0.12.1" reconfigurator-cli = { path = "dev-tools/reconfigurator-cli" } diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index 65c8b4e787e..345f31c5bb8 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -104,6 +104,12 @@ pub fn functionally_same(base: CpuIdDump, target: CpuIdDump) -> bool { if base_info.has_fp256() != target_info.has_fp256() { return false; } + + // TODO: same as above: we probably just need to require "base" has + // the same or wider FPU datapath than "target" + if base_info.has_fp512() != target_info.has_fp512() { + return false; + } } _ => { // Specific cases here may be acceptable, but for expediency (and @@ -500,8 +506,8 @@ fn milan_ideal() -> CpuIdDump { // Set up processor optimization info (leaf 8000_001Ah) let mut leaf = PerformanceOptimizationInfo::empty(); - leaf.set_movu(true); // TODO: BREAKING - leaf.set_fp256(true); // TODO: BREAKINGISH? + leaf.set_movu(true); + leaf.set_fp256(true); cpuid .set_performance_optimization_info(Some(leaf)) .expect("can set leaf 8000_001Ah"); @@ -548,6 +554,232 @@ fn milan_ideal() -> CpuIdDump { dump } +pub fn turin_v1() -> CpuIdDump { + // For VMs, a Turin-like CPU is very much like Milan with AVX-512 features, + // so start from Milan. + let baseline = milan_ideal(); + + let mut cpuid = CpuId::with_cpuid_reader(baseline); + + let mut leaf = + cpuid.get_feature_info().expect("baseline Milan defines leaf 1"); + + // Set up EAX: Family 1Ah model 2h stepping 1. + leaf.set_extended_family_id(0x0B); + leaf.set_base_family_id(0x0F); + leaf.set_base_model_id(0x02); + leaf.set_stepping_id(0x01); + + // EBX, ECX, EDX are all unchanged from Milan (same cache line flush size, + // leaf 1 features are unchanged) + + cpuid.set_feature_info(Some(leaf)).expect("can set leaf 1"); + + let mut leaf = cpuid + .get_extended_feature_info() + .expect("baseline Milan defines leaf 7"); + + // Same as with initial Milan profiles, `rdseed` is not supported by the + // virt stack, so we should hide it from guests for now. + leaf.set_rdseed(false); + + // Turin supports the TSC_ADJUST MSR but guest plumbing is not present for + // it and it's not clear what a guest would productively do with it anyway. + leaf.set_tsc_adjust_msr(false); + + // Turin supports MOVDIR64B and MOVDIRI, so pass them through. + leaf.set_movdir64b(true); + leaf.set_movdiri(true); + + // These AVX512 features are present for all Turin processors. + leaf.set_avx512f(true); + leaf.set_avx512dq(true); + leaf.set_avx512_ifma(true); + leaf.set_avx512cd(true); + leaf.set_avx512bw(true); + leaf.set_avx512vl(true); + + leaf.set_avx512vbmi(true); + leaf.set_avx512vbmi2(true); + leaf.set_gfni(true); + leaf.set_avx512vnni(true); + leaf.set_avx512bitalg(true); + leaf.set_avx512vpopcntdq(true); + // While hardware supports 57-bit virtual addresses, the bhyve support is + // not there yet. + leaf.set_la57(false); + + leaf.set_avx512_vp2intersect(true); + + leaf.set_avx512_bf16(true); + leaf.set_avx_vnni(true); + + cpuid.set_extended_feature_info(Some(leaf)).expect("can set leaf 7h"); + + // This is the same information for leaf D as in Milan, but with the new + // AVX-512 bits in Turin. + // TODO: kind of gross to have to pass an empty `CpuIdDump` here... + let mut state = ExtendedStateInfo::empty(CpuIdDump::new()); + state.set_xcr0_supports_legacy_x87(true); + state.set_xcr0_supports_sse_128(true); + state.set_xcr0_supports_avx_256(true); + // Update leaf D for the larger XCR0 set + state.set_xcr0_supports_avx512_opmask(true); + state.set_xcr0_supports_avx512_zmm_hi256(true); + state.set_xcr0_supports_avx512_zmm_hi16(true); + // Managed dynamically in practice. + state.set_xsave_area_size_enabled_features(0x980); + // `Core::X86::Cpuid::ProcExtStateEnumEcx00`, but minus the MPK support we + // don't make available to guests. + state.set_xsave_area_size_supported_features(0x980); + + state.set_xsaveopt(true); + state.set_xsavec(true); + state.set_xgetbv(true); + state.set_xsave_size(0x980); + + let mut leaves = state.into_leaves().to_vec(); + let mut ymm_state = ExtendedState::empty(); + ymm_state.set_size(0x100); + ymm_state.set_offset(0x240); + leaves.push(Some(ymm_state.into_leaf())); + // level 3 + leaves.push(None); + // level 4 + leaves.push(None); + // levels 5, 6, and 7 are described in the PPR: + // `Core::X86::Cpuid::ProcExtStateEnumEax06` + // + // level 5 + let mut kregs_state = ExtendedState::empty(); + kregs_state.set_size(0x040); + kregs_state.set_offset(0x340); + leaves.push(Some(kregs_state.into_leaf())); + // level 6 + let mut zmmhi_state = ExtendedState::empty(); + zmmhi_state.set_size(0x200); + zmmhi_state.set_offset(0x380); + leaves.push(Some(zmmhi_state.into_leaf())); + // level 7 + let mut zmmhi16_state = ExtendedState::empty(); + zmmhi16_state.set_size(0x400); + zmmhi16_state.set_offset(0x580); + leaves.push(Some(zmmhi16_state.into_leaf())); + + cpuid.set_extended_state_info(Some(&leaves[..])).expect("can set leaf Dh"); + + let mut leaf = cpuid + .get_extended_processor_and_feature_identifiers() + .expect("baseline Milan defines leaf 8000_0001"); + + // This is the same as the leaf 1 EAX configured earlier. + leaf.set_extended_signature(0x00B00F21); + + // Hide topology extensions. We'd want to set this and set + // ThreadsPerComputeUnit to indicate SMT is active, but we'd run afoul of + // https://github.com/oxidecomputer/propolis/issues/940, which in turn + // really needs us to disallow VM shapes with odd vCPU counts. For now, just + // hide topology extensions and we'll get sockets into shape in a later CPU + // platform rev. + leaf.set_topology_extensions(false); + // This is just strange. bhyve supports all six performance counters, so we + // *should* be free to set this bit. Linux is fine with this. But + // experimentally I've seen that with this bit set and TopologyExtensions + // *not* set (and leaves 8000_001D,8000_001E zeroed), Windows Server 2022 + // gets into an infinite loop somewhere early in boot. + // + // We want to hide topology extensions for a bit still - we'd like to + // indicate SMT there, but that wants some other changes (see above or + // Propolis#940) + // + // So, if we don't have TopologyExtensions, apparently Windows can't have + // six perf counters? + leaf.set_perf_cntr_extensions(false); + // RDTSCP requires some bhyve and Propolis work to support, so it is masked + // off for now. + leaf.set_rdtscp(false); + cpuid + .set_extended_processor_and_feature_identifiers(Some(leaf)) + .expect("can set leaf 8000_0001h"); + + cpuid + .set_processor_brand_string(Some(b"Oxide Virtual Turin-like Processor")) + .expect("can set vCPU brand string"); + + let mut leaf = cpuid + .get_processor_capacity_feature_info() + .expect("can get leaf 8000_0008h"); + + // Support for `wbnoinvd` is hidden in bhyve for the time being. This would + // probably be fine to pass through, but it is as-yet untested. Continue + // hiding this instruction. + leaf.set_wbnoinvd(false); + + // "Processor is not vulnerable to Branch Type Confusion" + // This is 1 for all Turin processors and does not require particular MSR + // settings or hypervisor support, so pass it along. + leaf.set_btc_no(true); + + // BSFD, SSBD, STIBP, and IBRS, are all supported on Turin, but guests + // are not yet allowed to access SPEC_CTRL to enable (or confirm they are + // enabled). + leaf.set_psfd(false); + leaf.set_ssbd(false); + leaf.set_stibp(false); + leaf.set_ibrs(false); + + cpuid + .set_processor_capacity_feature_info(Some(leaf)) + .expect("can set leaf 8000_0008h"); + + let mut leaf = cpuid + .get_performance_optimization_info() + .expect("baseline Milan defines 8000_001Ah"); + leaf.set_fp256(false); + leaf.set_fp512(true); + cpuid + .set_performance_optimization_info(Some(leaf)) + .expect("can set leaf 8000_001Ah"); + + let mut leaf = cpuid + .get_extended_feature_identification_2() + .expect("can get leaf 8000_0021h"); + + // FP512 downgrade is configurable via MSR, but the MSR is not made + // available to guests. The other bits are present on all Turin processors. + leaf.set_fp512_downgrade(false); + leaf.set_fast_rep_scasb(true); + leaf.set_epsf(true); + leaf.set_opcode_0f_017_reclaim(true); + leaf.set_amd_ermsb(true); + leaf.set_fast_short_repe_cmpsb(true); + leaf.set_fast_short_rep_stosb(true); + // The EFER write is permitted in bhyve, so this *should* work? But I'm not + // very familiar with ohw this is used in practice or where guest OSes would + // find it beneficial. Hide it for now and we'll come back to this for a + // broader speculative controls enablement with SPEC_CTRL/PRED_CMD later. + leaf.set_automatic_ibrs(false); + // The EFER write is permitted in bhyve, so this *should* work? But the + // forward utility of this bit is not as clear, so hide it. + leaf.set_upper_address_ignore(false); + // Architectural behavior, so we should pass this through. + leaf.set_fs_gs_base_write_not_serializing(true); + + cpuid + .set_extended_feature_identification_2(Some(leaf)) + .expect("can set leaf 8000_0021h"); + + // Cache topology leaves are otherwise left zeroed; if we can avoid getting + // into it, let's try! + + let mut source = cpuid.into_source(); + // We've cleared `topology_extensions` above, now remove the leaves so + // Propolis doesn't try specializing these; we don't want them presented yet! + source.set_leaf(0x8000_001D, None); + source.set_leaf(0x8000_001E, None); + source +} + pub fn milan_rfd314() -> CpuIdDump { // This is the Milan we'd "want" to expose, absent any other constraints. let baseline = milan_ideal(); @@ -753,7 +985,7 @@ pub fn dump_to_cpuid_entries(dump: CpuIdDump) -> Vec { #[cfg(test)] mod test { use crate::app::instance_platform::cpu_platform::{ - dump_to_cpuid_entries, milan_rfd314, + dump_to_cpuid_entries, milan_rfd314, turin_v1, }; use raw_cpuid::{ CpuId, CpuIdReader, CpuIdResult, CpuIdWriter, L1CacheTlbInfo, @@ -846,6 +1078,76 @@ mod test { cpuid_leaf!(0x80000021, 0x00000045, 0x00000000, 0x00000000, 0x00000000), ]; + // This CPUID leaf blob is some small tweaks on top of the "ideal Milan", + // maintaining some details that are disabled due to needed bhyve support + // and including Turin-specific features as supported and relevant to + // guests. + const TURIN_V1_CPUID: [CpuidEntry; 25] = [ + cpuid_leaf!(0x0, 0x0000000D, 0x68747541, 0x444D4163, 0x69746E65), + cpuid_leaf!(0x1, 0x00B00F21, 0x00000800, 0xF6D83203, 0x078BFBFF), + cpuid_leaf!(0x5, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x6, 0x00000004, 0x00000000, 0x00000000, 0x00000000), + cpuid_subleaf!( + 0x7, 0x0, 0x00000001, 0xF1BB03A9, 0x18005F42, 0x00000110 + ), + cpuid_subleaf!( + 0x7, 0x1, 0x00000030, 0x00000000, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x0, 0x000000E7, 0x00000980, 0x00000980, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x1, 0x00000007, 0x00000980, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000 + ), + /* + * subleaves 3 and 4 are all-zero + */ + cpuid_subleaf!( + 0xD, 0x5, 0x00000040, 0x00000340, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x6, 0x00000200, 0x00000380, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x7, 0x00000400, 0x00000580, 0x00000000, 0x00000000 + ), + cpuid_leaf!(0x80000000, 0x80000021, 0x68747541, 0x444D4163, 0x69746E65), + cpuid_leaf!(0x80000001, 0x00B00F21, 0x40000000, 0x440001F1, 0x25D3FBFF), + cpuid_leaf!(0x80000002, 0x6469784F, 0x69562065, 0x61757472, 0x7554206C), + cpuid_leaf!(0x80000003, 0x2D6E6972, 0x656B696C, 0x6F725020, 0x73736563), + cpuid_leaf!(0x80000004, 0x2020726F, 0x20202020, 0x20202020, 0x00202020), + cpuid_leaf!(0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100), + cpuid_leaf!(0x80000008, 0x00003030, 0x20000005, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000000A, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001A, 0x0000000A, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001B, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001F, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000021, 0x000D8C47, 0x00000000, 0x00000000, 0x00000000), + ]; + + // Test that Turin V1 matches the predetermined CPUID leaves written above + // (e.g. that the collection of builders behind `turin_v1` produce this + // profile as used for testing and elsewhere). + // + // This is largely "baseline Milan" with Turin-specific additions. + #[test] + fn turin_v1_is_as_described() { + let computed = dump_to_cpuid_entries(turin_v1()); + + for (l, r) in TURIN_V1_CPUID.iter().zip(computed.as_slice().iter()) { + eprintln!("comparing {:#08x}.{:?}", l.leaf, l.subleaf); + assert_eq!( + l, r, + "leaf 0x{:08x} (subleaf? {:?}) did not match", + l.leaf, l.subleaf + ); + } + } + // Test that the initial RFD 314 definition matches what we compute as the // CPUID profile with that configuration in `milan_rfd314()`. #[test] diff --git a/nexus/src/app/instance_platform/mod.rs b/nexus/src/app/instance_platform/mod.rs index 2760e0e5588..5aa3000e8ef 100644 --- a/nexus/src/app/instance_platform/mod.rs +++ b/nexus/src/app/instance_platform/mod.rs @@ -509,13 +509,18 @@ fn cpuid_from_vmm_cpu_platform( ) -> Option { let cpuid = match platform { db::model::VmmCpuPlatform::SledDefault => return None, - db::model::VmmCpuPlatform::AmdMilan - | db::model::VmmCpuPlatform::AmdTurin => Cpuid { + db::model::VmmCpuPlatform::AmdMilan => Cpuid { entries: cpu_platform::dump_to_cpuid_entries( cpu_platform::milan_rfd314(), ), vendor: CpuidVendor::Amd, }, + db::model::VmmCpuPlatform::AmdTurin => Cpuid { + entries: cpu_platform::dump_to_cpuid_entries( + cpu_platform::turin_v1(), + ), + vendor: CpuidVendor::Amd, + }, }; Some(cpuid)