From 4130d307b22dc1e24b2c163845f61dc7ec30a3ac Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Tue, 15 Apr 2025 19:57:57 +0000 Subject: [PATCH 01/42] make sleds report their CPU families to Nexus RFD 505 proposes that instances should be able to set a "minimum hardware platform" or "minimum CPU platform" that allows users to constrain an instance to run on sleds that have a specific set of CPU features available. This allows a user to opt a VM into advanced hardware features (e.g. AVX-512 support) by constraining it to run only on sleds that support those features. For this to work, Nexus needs to understand what CPUs are present in which sleds. Have sled-agent query CPUID to get CPU vendor and family information and report this to Nexus as part of the sled hardware manifest. --- Cargo.lock | 1 + nexus/db-model/src/lib.rs | 2 + nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-model/src/sled.rs | 12 ++ nexus/db-model/src/sled_cpu_family.rs | 57 ++++++++ .../src/db/datastore/crucible_dataset.rs | 2 + .../src/db/datastore/support_bundle.rs | 2 + .../src/db/pub_test_utils/helpers.rs | 9 ++ nexus/db-schema/src/enums.rs | 1 + nexus/db-schema/src/schema.rs | 1 + nexus/inventory/Cargo.toml | 1 + nexus/inventory/src/collector.rs | 2 + .../rendezvous/src/crucible_dataset.rs | 2 + .../background/tasks/blueprint_execution.rs | 4 +- .../background/tasks/inventory_collection.rs | 2 + nexus/src/app/sled.rs | 1 + nexus/test-utils/src/lib.rs | 12 ++ nexus/tests/integration_tests/rack.rs | 4 + nexus/tests/integration_tests/sleds.rs | 63 ++++++-- nexus/types/src/external_api/views.rs | 22 +++ nexus/types/src/internal_api/params.rs | 21 +++ openapi/nexus-internal.json | 35 +++++ openapi/nexus.json | 35 +++++ schema/crdb/dbinit.sql | 22 ++- schema/crdb/sled-cpu-family/up01.sql | 5 + schema/crdb/sled-cpu-family/up02.sql | 2 + schema/crdb/sled-cpu-family/up03.sql | 1 + sled-agent/src/bin/sled-agent-sim.rs | 3 + sled-agent/src/nexus.rs | 20 +++ sled-agent/src/sim/config.rs | 9 ++ sled-agent/src/sim/server.rs | 1 + sled-hardware/src/illumos/mod.rs | 7 +- sled-hardware/src/lib.rs | 135 ++++++++++++++++++ sled-hardware/src/non_illumos/mod.rs | 6 +- sled-hardware/types/src/lib.rs | 7 + 35 files changed, 495 insertions(+), 17 deletions(-) create mode 100644 nexus/db-model/src/sled_cpu_family.rs create mode 100644 schema/crdb/sled-cpu-family/up01.sql create mode 100644 schema/crdb/sled-cpu-family/up02.sql create mode 100644 schema/crdb/sled-cpu-family/up03.sql diff --git a/Cargo.lock b/Cargo.lock index c0351eb6571..ff5e1ac1815 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6555,6 +6555,7 @@ dependencies = [ "id-map", "iddqd", "itertools 0.14.0", + "nexus-client", "nexus-sled-agent-shared", "nexus-types", "ntp-admin-client", diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index d4c2df179d1..ba26c0c2309 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -103,6 +103,7 @@ mod silo_group; mod silo_user; mod silo_user_password_hash; mod sled; +mod sled_cpu_family; mod sled_instance; mod sled_policy; mod sled_resource_vmm; @@ -223,6 +224,7 @@ pub use silo_group::*; pub use silo_user::*; pub use silo_user_password_hash::*; pub use sled::*; +pub use sled_cpu_family::*; pub use sled_instance::*; pub use sled_policy::to_db_sled_policy; // Do not expose DbSledPolicy pub use sled_resource_vmm::*; diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 1507aeab46f..7a42e172291 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(173, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(174, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(174, "sled-cpu-family"), KnownVersion::new(173, "inv-internal-dns"), KnownVersion::new(172, "add-zones-with-mupdate-override"), KnownVersion::new(171, "inv-clear-mupdate-override"), diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index 6ed06e20021..f4c8e62f9ae 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -6,6 +6,7 @@ use super::{ByteCount, Generation, SledState, SqlU16, SqlU32}; use crate::collection::DatastoreCollectionConfig; use crate::ipv6; use crate::sled::shared::Baseboard; +use crate::sled_cpu_family::SledCpuFamily; use crate::sled_policy::DbSledPolicy; use chrono::{DateTime, Utc}; use db_macros::Asset; @@ -40,6 +41,8 @@ pub struct SledSystemHardware { // current VMM reservoir size pub reservoir_size: ByteCount, + + pub cpu_family: SledCpuFamily, } /// Database representation of a Sled. @@ -84,6 +87,9 @@ pub struct Sled { // ServiceAddress (Repo Depot API). Uses `ip`. pub repo_depot_port: SqlU16, + + /// The family of this sled's CPU. + pub cpu_family: SledCpuFamily, } impl Sled { @@ -141,6 +147,7 @@ impl From for views::Sled { state: sled.state.into(), usable_hardware_threads: sled.usable_hardware_threads.0, usable_physical_ram: *sled.usable_physical_ram, + cpu_family: sled.cpu_family.into(), } } } @@ -185,6 +192,7 @@ impl From for params::SledAgentInfo { usable_physical_ram: sled.usable_physical_ram.into(), reservoir_size: sled.reservoir_size.into(), generation: sled.sled_agent_gen.into(), + cpu_family: sled.cpu_family.into(), decommissioned, } } @@ -229,6 +237,8 @@ pub struct SledUpdate { // ServiceAddress (Repo Depot API). Uses `ip`. pub repo_depot_port: SqlU16, + pub cpu_family: SledCpuFamily, + // Generation number - owned and incremented by sled-agent. pub sled_agent_gen: Generation, } @@ -258,6 +268,7 @@ impl SledUpdate { ip: addr.ip().into(), port: addr.port().into(), repo_depot_port: repo_depot_port.into(), + cpu_family: hardware.cpu_family, sled_agent_gen, } } @@ -296,6 +307,7 @@ impl SledUpdate { repo_depot_port: self.repo_depot_port, last_used_address, sled_agent_gen: self.sled_agent_gen, + cpu_family: self.cpu_family, } } diff --git a/nexus/db-model/src/sled_cpu_family.rs b/nexus/db-model/src/sled_cpu_family.rs new file mode 100644 index 00000000000..8247e1a2506 --- /dev/null +++ b/nexus/db-model/src/sled_cpu_family.rs @@ -0,0 +1,57 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::impl_enum_type; +use serde::{Deserialize, Serialize}; + +impl_enum_type!( + SledCpuFamilyEnum: + + #[derive( + Copy, + Clone, + Debug, + PartialEq, + AsExpression, + FromSqlRow, + Serialize, + Deserialize + )] + pub enum SledCpuFamily; + + Unknown => b"unknown" + AmdMilan => b"amd_milan" + AmdTurin => b"amd_turin" +); + +impl From for SledCpuFamily { + fn from(value: nexus_types::internal_api::params::SledCpuFamily) -> Self { + use nexus_types::internal_api::params::SledCpuFamily as InputFamily; + match value { + InputFamily::Unknown => Self::Unknown, + InputFamily::AmdMilan => Self::AmdMilan, + InputFamily::AmdTurin => Self::AmdTurin, + } + } +} + +impl From for nexus_types::internal_api::params::SledCpuFamily { + fn from(value: SledCpuFamily) -> Self { + match value { + SledCpuFamily::Unknown => Self::Unknown, + SledCpuFamily::AmdMilan => Self::AmdMilan, + SledCpuFamily::AmdTurin => Self::AmdTurin, + } + } +} + +impl From for nexus_types::external_api::views::SledCpuFamily { + fn from(value: SledCpuFamily) -> Self { + match value { + SledCpuFamily::Unknown => Self::Unknown, + SledCpuFamily::AmdMilan => Self::AmdMilan, + SledCpuFamily::AmdTurin => Self::AmdTurin, + } + } +} diff --git a/nexus/db-queries/src/db/datastore/crucible_dataset.rs b/nexus/db-queries/src/db/datastore/crucible_dataset.rs index 83b6cd6cb6a..fd9eee898bf 100644 --- a/nexus/db-queries/src/db/datastore/crucible_dataset.rs +++ b/nexus/db-queries/src/db/datastore/crucible_dataset.rs @@ -294,6 +294,7 @@ mod test { use crate::db::pub_test_utils::TestDatabase; use nexus_db_model::Generation; use nexus_db_model::SledBaseboard; + use nexus_db_model::SledCpuFamily; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use omicron_common::api::external::ByteCount; @@ -323,6 +324,7 @@ mod test { usable_hardware_threads: 128, usable_physical_ram: (64 << 30).try_into().unwrap(), reservoir_size: (16 << 30).try_into().unwrap(), + cpu_family: SledCpuFamily::AmdMilan, }, Uuid::new_v4(), Generation::new(), diff --git a/nexus/db-queries/src/db/datastore/support_bundle.rs b/nexus/db-queries/src/db/datastore/support_bundle.rs index b6aaf5b4661..05195def6df 100644 --- a/nexus/db-queries/src/db/datastore/support_bundle.rs +++ b/nexus/db-queries/src/db/datastore/support_bundle.rs @@ -515,6 +515,7 @@ mod test { use crate::db::pub_test_utils::TestDatabase; use nexus_db_model::Generation; use nexus_db_model::SledBaseboard; + use nexus_db_model::SledCpuFamily; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use nexus_db_model::Zpool; @@ -617,6 +618,7 @@ mod test { usable_hardware_threads: 128, usable_physical_ram: (64 << 30).try_into().unwrap(), reservoir_size: (16 << 30).try_into().unwrap(), + cpu_family: SledCpuFamily::AmdMilan, }, rack_id, Generation::new(), diff --git a/nexus/db-queries/src/db/pub_test_utils/helpers.rs b/nexus/db-queries/src/db/pub_test_utils/helpers.rs index c81f6440d0a..9369324e72a 100644 --- a/nexus/db-queries/src/db/pub_test_utils/helpers.rs +++ b/nexus/db-queries/src/db/pub_test_utils/helpers.rs @@ -25,6 +25,7 @@ use nexus_db_model::ProjectImage; use nexus_db_model::ProjectImageIdentity; use nexus_db_model::Resources; use nexus_db_model::SledBaseboard; +use nexus_db_model::SledCpuFamily; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use nexus_db_model::Snapshot; @@ -77,6 +78,7 @@ pub struct SledSystemHardwareBuilder { usable_hardware_threads: u32, usable_physical_ram: i64, reservoir_size: i64, + cpu_family: SledCpuFamily, } impl Default for SledSystemHardwareBuilder { @@ -86,6 +88,7 @@ impl Default for SledSystemHardwareBuilder { usable_hardware_threads: 4, usable_physical_ram: 1 << 40, reservoir_size: 1 << 39, + cpu_family: SledCpuFamily::AmdMilan, } } } @@ -121,12 +124,18 @@ impl SledSystemHardwareBuilder { self } + pub fn cpu_family(&mut self, family: SledCpuFamily) -> &mut Self { + self.cpu_family = family; + self + } + pub fn build(&self) -> SledSystemHardware { SledSystemHardware { is_scrimlet: self.is_scrimlet, usable_hardware_threads: self.usable_hardware_threads, usable_physical_ram: self.usable_physical_ram.try_into().unwrap(), reservoir_size: self.reservoir_size.try_into().unwrap(), + cpu_family: self.cpu_family, } } } diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index ffee098d6c7..372d42dc40a 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -72,6 +72,7 @@ define_enums! { RouterRouteKindEnum => "router_route_kind", SagaStateEnum => "saga_state", ServiceKindEnum => "service_kind", + SledCpuFamilyEnum => "sled_cpu_family", SledPolicyEnum => "sled_policy", SledRoleEnum => "sled_role", SledStateEnum => "sled_state", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 8ef398e44d1..3f1fa67ca70 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -971,6 +971,7 @@ table! { sled_state -> crate::enums::SledStateEnum, sled_agent_gen -> Int8, repo_depot_port -> Int4, + cpu_family -> crate::enums::SledCpuFamilyEnum, } } diff --git a/nexus/inventory/Cargo.toml b/nexus/inventory/Cargo.toml index f6b90fb6f30..9d42aab4d0a 100644 --- a/nexus/inventory/Cargo.toml +++ b/nexus/inventory/Cargo.toml @@ -48,6 +48,7 @@ omicron-workspace-hack.workspace = true expectorate.workspace = true gateway-test-utils.workspace = true httpmock.workspace = true +nexus-client.workspace = true omicron-sled-agent.workspace = true regex.workspace = true tokio.workspace = true diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index 211f6d8ad83..fddde6f5c07 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -666,6 +666,7 @@ mod test { use crate::StaticSledAgentEnumerator; use gateway_messages::SpPort; use id_map::IdMap; + use nexus_client::types::SledCpuFamily; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronSledConfig; @@ -913,6 +914,7 @@ mod test { None, None, sim::ZpoolConfig::None, + SledCpuFamily::AmdMilan, ); let agent = diff --git a/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs b/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs index 0d4fd8a8382..22d68157bf2 100644 --- a/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs +++ b/nexus/reconfigurator/rendezvous/src/crucible_dataset.rs @@ -130,6 +130,7 @@ mod tests { use async_bb8_diesel::AsyncSimpleConnection; use nexus_db_model::Generation; use nexus_db_model::SledBaseboard; + use nexus_db_model::SledCpuFamily; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use nexus_db_model::Zpool; @@ -201,6 +202,7 @@ mod tests { usable_hardware_threads: 128, usable_physical_ram: (64 << 30).try_into().unwrap(), reservoir_size: (16 << 30).try_into().unwrap(), + cpu_family: SledCpuFamily::Unknown, }, Uuid::new_v4(), Generation::new(), diff --git a/nexus/src/app/background/tasks/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs index 88fea70e7a1..3a2c6ff404d 100644 --- a/nexus/src/app/background/tasks/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -180,7 +180,8 @@ mod test { use id_map::IdMap; use itertools::Itertools as _; use nexus_db_model::{ - ByteCount, SledBaseboard, SledSystemHardware, SledUpdate, Zpool, + ByteCount, SledBaseboard, SledCpuFamily, SledSystemHardware, + SledUpdate, Zpool, }; use nexus_db_queries::authn; use nexus_db_queries::context::OpContext; @@ -359,6 +360,7 @@ mod test { usable_hardware_threads: 4, usable_physical_ram: ByteCount(1000.into()), reservoir_size: ByteCount(999.into()), + cpu_family: SledCpuFamily::AmdMilan, }, rack_id, nexus_db_model::Generation::new(), diff --git a/nexus/src/app/background/tasks/inventory_collection.rs b/nexus/src/app/background/tasks/inventory_collection.rs index 87c13422bcc..a55d60124d2 100644 --- a/nexus/src/app/background/tasks/inventory_collection.rs +++ b/nexus/src/app/background/tasks/inventory_collection.rs @@ -267,6 +267,7 @@ mod test { use crate::app::background::BackgroundTask; use nexus_db_model::Generation; use nexus_db_model::SledBaseboard; + use nexus_db_model::SledCpuFamily; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use nexus_db_queries::context::OpContext; @@ -443,6 +444,7 @@ mod test { usable_physical_ram: ByteCount::from_gibibytes_u32(16) .into(), reservoir_size: ByteCount::from_gibibytes_u32(8).into(), + cpu_family: SledCpuFamily::AmdMilan, }, rack_id, Generation::new(), diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 715a1504081..799cb4136f7 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -77,6 +77,7 @@ impl super::Nexus { usable_hardware_threads: info.usable_hardware_threads, usable_physical_ram: info.usable_physical_ram.into(), reservoir_size: info.reservoir_size.into(), + cpu_family: info.cpu_family.into(), }, self.rack_id, info.generation.into(), diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 9a76249fb12..ed42d80ef7c 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -27,6 +27,7 @@ use id_map::IdMap; use internal_dns_types::config::DnsConfigBuilder; use internal_dns_types::names::DNS_ZONE_EXTERNAL_TESTING; use internal_dns_types::names::ServiceName; +use nexus_client::types::SledCpuFamily; use nexus_config::Database; use nexus_config::DpdConfig; use nexus_config::InternalDns; @@ -1902,7 +1903,18 @@ pub async fn start_sled_agent( Some(nexus_address), Some(update_directory), sim::ZpoolConfig::None, + SledCpuFamily::AmdMilan, ); + start_sled_agent_with_config(log, &config, sled_index, simulated_upstairs) + .await +} + +pub async fn start_sled_agent_with_config( + log: Logger, + config: &sim::Config, + sled_index: u16, + simulated_upstairs: &Arc, +) -> Result { let server = sim::Server::start(&config, &log, true, simulated_upstairs, sled_index) .await diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs index 9eebe3d2130..b5d63858908 100644 --- a/nexus/tests/integration_tests/rack.rs +++ b/nexus/tests/integration_tests/rack.rs @@ -7,6 +7,7 @@ use http::Method; use http::StatusCode; use nexus_client::types::SledId; use nexus_db_model::SledBaseboard; +use nexus_db_model::SledCpuFamily as DbSledCpuFamily; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; use nexus_sled_agent_shared::inventory::SledRole; @@ -20,6 +21,7 @@ use nexus_types::external_api::params; use nexus_types::external_api::shared::UninitializedSled; use nexus_types::external_api::views::Rack; use nexus_types::internal_api::params::SledAgentInfo; +use nexus_types::internal_api::params::SledCpuFamily; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; use omicron_uuid_kinds::GenericUuid; @@ -135,6 +137,7 @@ async fn test_sled_list_uninitialized(cptestctx: &ControlPlaneTestContext) { usable_hardware_threads: 32, usable_physical_ram: ByteCount::from_gibibytes_u32(100), reservoir_size: ByteCount::from_mebibytes_u32(100), + cpu_family: SledCpuFamily::Unknown, generation: Generation::new(), decommissioned: false, }; @@ -240,6 +243,7 @@ async fn test_sled_add(cptestctx: &ControlPlaneTestContext) { usable_hardware_threads: 8, usable_physical_ram: (1 << 30).try_into().unwrap(), reservoir_size: (1 << 20).try_into().unwrap(), + cpu_family: DbSledCpuFamily::Unknown, }, nexus.rack_id(), Generation::new().into(), diff --git a/nexus/tests/integration_tests/sleds.rs b/nexus/tests/integration_tests/sleds.rs index 8735bd568e4..d4d5ee825fd 100644 --- a/nexus/tests/integration_tests/sleds.rs +++ b/nexus/tests/integration_tests/sleds.rs @@ -15,10 +15,10 @@ use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_instance; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::objects_list_page_authz; -use nexus_test_utils::start_sled_agent; +use nexus_test_utils::{start_sled_agent, start_sled_agent_with_config}; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::views::SledInstance; -use nexus_types::external_api::views::{PhysicalDisk, Sled}; +use nexus_types::external_api::views::{PhysicalDisk, Sled, SledCpuFamily}; use omicron_sled_agent::sim; use omicron_test_utils::dev::poll::{CondCheckError, wait_for_condition}; use omicron_uuid_kinds::GenericUuid; @@ -60,34 +60,60 @@ async fn test_sleds_list(cptestctx: &ControlPlaneTestContext) { assert_eq!(sleds_list(&client, &sleds_url).await.len(), 2); // Now start a few more sled agents. - let nsleds = 3; - let mut sas = Vec::with_capacity(nsleds); - for i in 0..nsleds { + let mut sas = Vec::new(); + let nexus_address = + cptestctx.server.get_http_server_internal_address().await; + let update_directory = Utf8Path::new("/should/not/be/used"); + let simulated_upstairs = &cptestctx.first_sled_agent().simulated_upstairs; + + for _ in 0..4 { let sa_id = SledUuid::new_v4(); let log = cptestctx.logctx.log.new(o!( "sled_id" => sa_id.to_string() )); - let addr = cptestctx.server.get_http_server_internal_address().await; - let update_directory = Utf8Path::new("/should/not/be/used"); sas.push( start_sled_agent( log, - addr, + nexus_address, sa_id, // Index starts at 2: the `nexus_test` macro already created two // sled agents as part of the ControlPlaneTestContext setup. - 2 + i as u16, + 2 + sas.len() as u16 + 1, &update_directory, sim::SimMode::Explicit, - &cptestctx.first_sled_agent().simulated_upstairs, + &simulated_upstairs, ) .await .unwrap(), ); } + let turin_sled_id = SledUuid::new_v4(); + let turin_sled_agent_log = + cptestctx.logctx.log.new(o!( "sled_id" => turin_sled_id.to_string() )); + + let turin_config = omicron_sled_agent::sim::Config::for_testing( + turin_sled_id, + omicron_sled_agent::sim::SimMode::Explicit, + Some(nexus_address), + Some(&update_directory), + omicron_sled_agent::sim::ZpoolConfig::None, + nexus_client::types::SledCpuFamily::AmdTurin, + ); + + sas.push( + start_sled_agent_with_config( + turin_sled_agent_log, + &turin_config, + 2 + sas.len() as u16 + 1, + &simulated_upstairs, + ) + .await + .unwrap(), + ); + // List sleds again. let sleds_found = sleds_list(&client, &sleds_url).await; - assert_eq!(sleds_found.len(), nsleds + 2); + assert_eq!(sleds_found.len(), sas.len() + 2); let sledids_found = sleds_found.iter().map(|sv| sv.identity.id).collect::>(); @@ -95,6 +121,21 @@ async fn test_sleds_list(cptestctx: &ControlPlaneTestContext) { sledids_found_sorted.sort(); assert_eq!(sledids_found, sledids_found_sorted); + let milans_found = sleds_found + .iter() + .filter(|sv| sv.cpu_family == SledCpuFamily::AmdMilan) + .count(); + // Simulated sled-agents report Milan processors by default. The two fake + // sled-agents created by `#[nexus_test]` as well as the four manually + // created above should be counted here. + assert_eq!(milans_found, 2 + 4); + + let turins_found = sleds_found + .iter() + .filter(|sv| sv.cpu_family == SledCpuFamily::AmdTurin) + .count(); + assert_eq!(turins_found, 1); + // Tear down the agents. for sa in sas { sa.http_server.close().await.unwrap(); diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 9bd8ab5cc12..1fdd609b366 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -596,6 +596,8 @@ pub struct Sled { pub usable_hardware_threads: u32, /// Amount of RAM which may be used by the Sled's OS pub usable_physical_ram: ByteCount, + /// The family of the sled's CPU(s). + pub cpu_family: SledCpuFamily, } /// The operator-defined provision policy of a sled. @@ -765,6 +767,26 @@ impl fmt::Display for SledState { } } +/// Identifies the kind of CPU present on a sled, determined by reading CPUID. +/// This is the CPU family used in deciding if this sled can support an instance +/// with a particular required CPU platform. +// In lab and development environments in particular, the family reported here +// may differ from the real processor family. `sled-hardware::detect_cpu_family` +// tries to map various CPUs that we would not ship in a rack to their +// greatest-common-denominator family names here. +#[derive(Clone, Serialize, Deserialize, Debug, JsonSchema, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum SledCpuFamily { + /// The CPU vendor or its model/family numbers were not recognized. + Unknown, + + /// The sled has an AMD Milan (Zen 3) processor. + AmdMilan, + + /// The sled has an AMD Turin (Zen 5) processor. + AmdTurin, +} + /// An operator's view of an instance running on a given sled #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct SledInstance { diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index a1a707d12a9..81663f787c0 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -30,6 +30,24 @@ use std::net::SocketAddr; use std::net::SocketAddrV6; use uuid::Uuid; +/// Identifies the kind of CPU present on a sled, determined by reading CPUID. +#[derive(Serialize, Deserialize, Debug, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum SledCpuFamily { + /// The CPU vendor or its family number don't correspond to any of the + /// known family variants. + Unknown, + + /// AMD Milan processors (or very close). Could be an actual Milan in a + /// Gimlet, a close-to-Milan client Zen 3 part, or Zen 4 (for which Milan is + /// the greatest common denominator). + AmdMilan, + + /// AMD Turin processors (or very close). Could be an actual Turin in a + /// Cosmo, or a close-to-Turin client Zen 5 part. + AmdTurin, +} + /// Sent by a sled agent to Nexus to inform about resources #[derive(Serialize, Deserialize, Debug, JsonSchema)] pub struct SledAgentInfo { @@ -56,6 +74,9 @@ pub struct SledAgentInfo { /// Must be smaller than "usable_physical_ram" pub reservoir_size: ByteCount, + /// The family of the sled's CPU. + pub cpu_family: SledCpuFamily, + /// The generation number of this request from sled-agent pub generation: Generation, diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 7f927ff8e88..75b9e22e84b 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -7195,6 +7195,14 @@ } ] }, + "cpu_family": { + "description": "The family of the sled's CPU.", + "allOf": [ + { + "$ref": "#/components/schemas/SledCpuFamily" + } + ] + }, "decommissioned": { "description": "Whether the sled-agent has been decommissioned by nexus\n\nThis flag is only set to true by nexus. Setting it on an upsert from sled-agent has no effect.", "type": "boolean" @@ -7250,6 +7258,7 @@ }, "required": [ "baseboard", + "cpu_family", "decommissioned", "generation", "repo_depot_port", @@ -7260,6 +7269,32 @@ "usable_physical_ram" ] }, + "SledCpuFamily": { + "description": "Identifies the kind of CPU present on a sled, determined by reading CPUID.", + "oneOf": [ + { + "description": "The CPU vendor or its family number don't correspond to any of the known family variants.", + "type": "string", + "enum": [ + "unknown" + ] + }, + { + "description": "AMD Milan processors (or very close). Could be an actual Milan in a Gimlet, a close-to-Milan client Zen 3 part, or Zen 4 (for which Milan is the greatest common denominator).", + "type": "string", + "enum": [ + "amd_milan" + ] + }, + { + "description": "AMD Turin processors (or very close). Could be an actual Turin in a Cosmo, or a close-to-Turin client Zen 5 part.", + "type": "string", + "enum": [ + "amd_turin" + ] + } + ] + }, "SledId": { "type": "object", "properties": { diff --git a/openapi/nexus.json b/openapi/nexus.json index bd91bcc6534..657d9f5c4d0 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -23719,6 +23719,14 @@ "baseboard": { "$ref": "#/components/schemas/Baseboard" }, + "cpu_family": { + "description": "The family of the sled's CPU(s).", + "allOf": [ + { + "$ref": "#/components/schemas/SledCpuFamily" + } + ] + }, "id": { "description": "unique, immutable, system-controlled identifier for each resource", "type": "string", @@ -23772,6 +23780,7 @@ }, "required": [ "baseboard", + "cpu_family", "id", "policy", "rack_id", @@ -23782,6 +23791,32 @@ "usable_physical_ram" ] }, + "SledCpuFamily": { + "description": "Identifies the kind of CPU present on a sled, determined by reading CPUID. This is the CPU family used in deciding if this sled can support an instance with a particular required CPU platform.", + "oneOf": [ + { + "description": "The CPU vendor or its model/family numbers were not recognized.", + "type": "string", + "enum": [ + "unknown" + ] + }, + { + "description": "The sled has an AMD Milan (Zen 3) processor.", + "type": "string", + "enum": [ + "amd_milan" + ] + }, + { + "description": "The sled has an AMD Turin (Zen 5) processor.", + "type": "string", + "enum": [ + "amd_turin" + ] + } + ] + }, "SledId": { "description": "The unique ID of a sled.", "type": "object", diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 3da3de08a80..c4aa716e9f8 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -187,6 +187,21 @@ CREATE TYPE IF NOT EXISTS omicron.public.sled_state AS ENUM ( 'decommissioned' ); +-- The model of CPU installed in a particular sled, discovered by sled-agent +-- and reported to Nexus. This determines what VMs can run on a sled: instances +-- that require a specific minimum CPU platform can only run on sleds whose +-- CPUs support all the features of that platform. +CREATE TYPE IF NOT EXISTS omicron.public.sled_cpu_family AS ENUM ( + -- Sled-agent didn't recognize the sled's CPU. + 'unknown', + + -- AMD Milan, or lab CPU close enough that sled-agent reported it as one. + 'amd_milan', + + -- AMD Turin, or lab CPU close enough that sled-agent reported it as one. + 'amd_turin' +); + CREATE TABLE IF NOT EXISTS omicron.public.sled ( /* Identity metadata (asset) */ id UUID PRIMARY KEY, @@ -229,7 +244,10 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled ( /* The bound port of the Repo Depot API server, running on the same IP as the sled agent server. */ - repo_depot_port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL + repo_depot_port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, + + /* The sled's detected CPU family. */ + cpu_family omicron.public.sled_cpu_family NOT NULL ); -- Add an index that ensures a given physical sled (identified by serial and @@ -6342,7 +6360,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '173.0.0', NULL) + (TRUE, NOW(), NOW(), '174.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/sled-cpu-family/up01.sql b/schema/crdb/sled-cpu-family/up01.sql new file mode 100644 index 00000000000..9531cec6a7d --- /dev/null +++ b/schema/crdb/sled-cpu-family/up01.sql @@ -0,0 +1,5 @@ +CREATE TYPE IF NOT EXISTS omicron.public.sled_cpu_family AS ENUM ( + 'unknown', + 'amd_milan', + 'amd_turin' +); diff --git a/schema/crdb/sled-cpu-family/up02.sql b/schema/crdb/sled-cpu-family/up02.sql new file mode 100644 index 00000000000..1409e918dae --- /dev/null +++ b/schema/crdb/sled-cpu-family/up02.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.sled ADD COLUMN IF NOT EXISTS + cpu_family omicron.public.sled_cpu_family NOT NULL DEFAULT 'unknown'; diff --git a/schema/crdb/sled-cpu-family/up03.sql b/schema/crdb/sled-cpu-family/up03.sql new file mode 100644 index 00000000000..612de867e4f --- /dev/null +++ b/schema/crdb/sled-cpu-family/up03.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.sled ALTER COLUMN cpu_family DROP DEFAULT; diff --git a/sled-agent/src/bin/sled-agent-sim.rs b/sled-agent/src/bin/sled-agent-sim.rs index ca96b2513e1..8378dc02a49 100644 --- a/sled-agent/src/bin/sled-agent-sim.rs +++ b/sled-agent/src/bin/sled-agent-sim.rs @@ -12,6 +12,7 @@ use clap::Parser; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; +use nexus_client::types::SledCpuFamily; use omicron_common::api::internal::nexus::Certificate; use omicron_common::cmd::CmdError; use omicron_common::cmd::fatal; @@ -110,6 +111,7 @@ async fn do_run() -> Result<(), CmdError> { hardware_threads: 32, physical_ram: 64 * (1 << 30), reservoir_ram: 32 * (1 << 30), + cpu_family: SledCpuFamily::AmdMilan, baseboard: Baseboard::Gimlet { identifier: format!("sim-{}", args.uuid), model: String::from("sim-gimlet"), @@ -122,6 +124,7 @@ async fn do_run() -> Result<(), CmdError> { Some(args.nexus_addr), Some(tmp.path()), ZpoolConfig::TenVirtualU2s, + SledCpuFamily::AmdMilan, ) }; diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 3faeed749bb..52870fe1532 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -77,6 +77,24 @@ impl ConvertInto } } +impl ConvertInto + for sled_hardware_types::CpuFamily +{ + fn convert(self) -> nexus_client::types::SledCpuFamily { + match self { + sled_hardware_types::CpuFamily::Unknown => { + nexus_client::types::SledCpuFamily::Unknown + } + sled_hardware_types::CpuFamily::AmdMilan => { + nexus_client::types::SledCpuFamily::AmdMilan + } + sled_hardware_types::CpuFamily::AmdTurin => { + nexus_client::types::SledCpuFamily::AmdTurin + } + } + } +} + // Somewhat arbitrary bound size, large enough that we should never hit it. const QUEUE_SIZE: usize = 256; @@ -275,6 +293,7 @@ impl NexusNotifierTask { .usable_physical_ram_bytes() .into(), reservoir_size: vmm_reservoir_manager.reservoir_size().into(), + cpu_family: hardware.cpu_family().convert(), generation, decommissioned: false, } @@ -654,6 +673,7 @@ mod test { usable_physical_ram: ByteCount::from(1024 * 1024 * 1024u32) .into(), reservoir_size: ByteCount::from(0u32).into(), + cpu_family: nexus_client::types::SledCpuFamily::Unknown, generation: Generation::new(), decommissioned: false, })); diff --git a/sled-agent/src/sim/config.rs b/sled-agent/src/sim/config.rs index dbd9f00c22e..d77d08fc50b 100644 --- a/sled-agent/src/sim/config.rs +++ b/sled-agent/src/sim/config.rs @@ -7,6 +7,7 @@ use crate::updates::ConfigUpdates; use camino::Utf8Path; use dropshot::ConfigDropshot; +use nexus_client::types::SledCpuFamily; use omicron_uuid_kinds::SledUuid; use serde::Deserialize; use serde::Serialize; @@ -56,6 +57,12 @@ pub struct ConfigHardware { pub hardware_threads: u32, pub physical_ram: u64, pub reservoir_ram: u64, + /// The kind of CPU to report the simulated sled as. In reality this is + /// constrained by `baseboard`; a `Baseboard::Gimlet` will only have an + /// `SledCpuFamily::AmdMilan`. A future `Baseboard::Cosmo` will *never* have + /// a `SledCpuFamily::AmdMilan`. Because the baseboard does not imply a + /// specific individual CPU family, though, it's simpler to record here. + pub cpu_family: SledCpuFamily, pub baseboard: Baseboard, } @@ -93,6 +100,7 @@ impl Config { nexus_address: Option, update_directory: Option<&Utf8Path>, zpool_config: ZpoolConfig, + cpu_family: SledCpuFamily, ) -> Config { // This IP range is guaranteed by RFC 6666 to discard traffic. // For tests that don't use a Nexus, we use this address to simulate a @@ -133,6 +141,7 @@ impl Config { hardware_threads: TEST_HARDWARE_THREADS, physical_ram: TEST_PHYSICAL_RAM, reservoir_ram: TEST_RESERVOIR_RAM, + cpu_family, baseboard: Baseboard::Gimlet { identifier: format!("sim-{}", id), model: String::from("sim-gimlet"), diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 05c75e18c0e..690efdadfe3 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -166,6 +166,7 @@ impl Server { config.hardware.reservoir_ram, ) .unwrap(), + cpu_family: config.hardware.cpu_family, generation: Generation::new(), decommissioned: false, }, diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index 057db6012b6..d51ede8a037 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -9,7 +9,7 @@ use gethostname::gethostname; use illumos_devinfo::{DevInfo, DevLinkType, DevLinks, Node, Property}; use libnvme::{Nvme, controller::Controller}; use omicron_common::disk::{DiskIdentity, DiskVariant}; -use sled_hardware_types::Baseboard; +use sled_hardware_types::{Baseboard, CpuFamily}; use slog::Logger; use slog::debug; use slog::error; @@ -797,6 +797,11 @@ impl HardwareManager { .unwrap_or_else(|| Baseboard::unknown()) } + pub fn cpu_family(&self) -> CpuFamily { + let log = self.log.new(slog::o!("component" => "detect_cpu_family")); + crate::detect_cpu_family(&log) + } + pub fn online_processor_count(&self) -> u32 { self.inner.lock().unwrap().online_processor_count } diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index 18c6b4ba3a2..d778d619191 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -149,3 +149,138 @@ impl MemoryReservations { vmm_eligible } } + +/// Detects the current sled's CPU family using the CPUID instruction. +#[cfg(target_arch = "x86_64")] +pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { + use core::arch::x86_64::__cpuid_count; + use sled_hardware_types::CpuFamily; + + // Read leaf 0 to figure out the processor's vendor and whether leaf 1 + // (which contains family, model, and stepping information) is available. + let leaf_0 = unsafe { __cpuid_count(0, 0) }; + + info!(log, "read CPUID leaf 0 to detect CPU vendor"; "values" => ?leaf_0); + + // If leaf 1 is unavailable, there's no way to figure out what family this + // processor belongs to. + if leaf_0.eax < 1 { + return CpuFamily::Unknown; + } + + // Check the vendor ID string in ebx/ecx/edx. + match (leaf_0.ebx, leaf_0.ecx, leaf_0.edx) { + // "AuthenticAMD"; see AMD APM volume 3 (March 2024) section E.3.1. + (0x68747541, 0x444D4163, 0x69746E65) => {} + _ => return CpuFamily::Unknown, + } + + // Feature detection after this point is AMD-specific - if we find ourselves + // supporting other CPU vendors we'll want to split this out accordingly. + + // Per AMD APM volume 3 (March 2024) section E.3.2, the processor family + // number is computed as follows: + // + // - Read bits 11:8 of leaf 1 eax to get the "base" family value. If this + // value is less than 0xF, the family value is equal to the base family + // value. + // - If the base family value is 0xF, eax[27:20] contains the "extended" + // family value, and the actual family value is the sum of the base and + // the extended values. + let leaf_1 = unsafe { __cpuid_count(1, 0) }; + let mut family = (leaf_1.eax & 0x00000F00) >> 8; + if family == 0xF { + family += (leaf_1.eax & 0x0FF00000) >> 20; + } + + // Also from the APM volume 3 section E.3.2, the processor model number is + // computed as follows: + // + // - Read bits 7:4 of leaf 1 eax to get the "base" model value. + // - If the "base" family value is less than 0xF, the "base" model stands. + // Otherwise, four additional bits of the model come from eax[19:16]. + // + // If the computed family number is 0xF or greater, that implies the "bsae" + // family was 0xF or greater as well. + let mut model = (leaf_1.eax & 0x000000F0) >> 4; + if family >= 0xF { + model |= (leaf_1.eax & 0x000F0000) >> 12; + } + + info!( + log, + "read CPUID leaf 1 to detect CPU family"; + "values" => ?leaf_1, + "family" => family, + "model" => model, + ); + + // Match on the family/model ranges we've detected. Notably client parts are + // reported as if they were their server counterparts; the feature parity is + // close enough that guests probably won't run into issues. This lowers + // friction for testing migrations where the control plane would need to + // tell what hosts could be compatible with a VMM's CPU platform. + // + // TODO(?): Exhaustively check that client parts support all CPU features of + // the corresponding Oxide CPU platform before doing this "as-if" reporting. + // Lab systems built out of client parts may have hardware which support all + // features in the corresponding instance CPU platform, but have individual + // features disabled in the BIOS or by client part microcode. This can + // result in funky situations, like an Oxide CPU platform advertising CPU + // features that lab systems don't support. This is unlikely, but take + // AVX512 as an example: users can often disable AVX512 entirely on Zen 5 + // BIOSes. In this case a VM on a 9000-series Ryzen will be told those + // instructions are available only for the guest to get #UD at runtime. + match family { + 0x19 if model <= 0x0F => { + // This covers both Milan and Zen 3-based Threadrippers. I don't + // have a 5000-series Threadripper on hand to test but I believe + // they are feature-compatible. + CpuFamily::AmdMilan + } + 0x19 if model >= 0x10 && model <= 0x1F => { + // This covers both Genoa and Zen 4-based Threadrippers. Again, + // don't have a comparable Threadripper to test here. + // + // We intend to expose Turin and Milan as families a guest can + // choose, skipping the Zen 4 EPYC parts. So, round this down to + // Milan; if we're here it's a lab system and the alternative is + // "unknown". + CpuFamily::AmdMilan + } + 0x19 if model >= 0x20 && model <= 0x2F => { + // These are client Zen 3 parts aka Vermeer. Feature-wise, they are + // missing INVLPGB from Milan, but are otherwise close, and we don't + // expose INVLPGB to guests currently anyway. + CpuFamily::AmdMilan + } + 0x19 if model >= 0x60 && model <= 0x6F => { + // These are client Zen 4 parts aka Raphael. Similar to the above + // with Genoa and Vermeer, round these down to Milan in support of + // lab clusters instead of calling them unknown. + CpuFamily::AmdMilan + } + 0x1A if model <= 0x0F => CpuFamily::AmdTurin, + 0x1A if model >= 0x10 && model <= 0x1F => { + // These are Turin Dense, but from a CPU feature perspective they're + // equivalently capable to Turin, so for our purposes they're the + // same. + CpuFamily::AmdTurin + } + 0x1A if model >= 0x40 && model <= 0x4F => { + // These are client Zen 5 parts aka Granite Ridge. Won't be in a + // rack, but plausibly in a lab cluster. Like other non-server + // parts, these don't have INVLPGB, which we don't expose to guests. + // They should otherwise be a sufficient stand-in for Turin. + CpuFamily::AmdTurin + } + // Remaining family/model ranges in known families are likely mobile + // parts and intentionally rolled up into "Unknown." There, it's harder + // to predict what features out of the corresponding CPU platform would + // actually be present. It's also less likely that someone has a laptop + // or APU as part of a development cluster! + // + // Other families are, of course, unknown. + _ => CpuFamily::Unknown, + } +} diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index c54afe87301..448dc59287c 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -6,7 +6,7 @@ use crate::SledMode; use crate::disk::{DiskPaths, Partition, PooledDiskError, UnparsedDisk}; use omicron_common::disk::{DiskIdentity, DiskVariant}; use omicron_uuid_kinds::ZpoolUuid; -use sled_hardware_types::Baseboard; +use sled_hardware_types::{Baseboard, CpuFamily}; use slog::Logger; use std::collections::HashMap; use tokio::sync::broadcast; @@ -41,6 +41,10 @@ impl HardwareManager { unimplemented!("Accessing hardware unsupported on non-illumos"); } + pub fn cpu_family(&self) -> CpuFamily { + unimplemented!("Accessing hardware unsupported on non-illumos"); + } + pub fn online_processor_count(&self) -> u32 { unimplemented!("Accessing hardware unsupported on non-illumos"); } diff --git a/sled-hardware/types/src/lib.rs b/sled-hardware/types/src/lib.rs index b34b5b1f422..1a7047fb076 100644 --- a/sled-hardware/types/src/lib.rs +++ b/sled-hardware/types/src/lib.rs @@ -95,3 +95,10 @@ impl std::fmt::Display for Baseboard { } } } + +#[derive(Clone, Copy, Debug)] +pub enum CpuFamily { + Unknown, + AmdMilan, + AmdTurin, +} From d316a2e1f322dff3f478d4447a196e34db441b04 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 30 Jul 2025 19:03:28 +0000 Subject: [PATCH 02/42] differentiate Turin and Turin Dense for the control plane --- nexus/db-model/src/sled_cpu_family.rs | 1 + nexus/types/src/external_api/views.rs | 3 +++ nexus/types/src/internal_api/params.rs | 4 ++++ schema/crdb/dbinit.sql | 5 ++++- sled-hardware/src/lib.rs | 8 ++++---- sled-hardware/types/src/lib.rs | 1 + 6 files changed, 17 insertions(+), 5 deletions(-) diff --git a/nexus/db-model/src/sled_cpu_family.rs b/nexus/db-model/src/sled_cpu_family.rs index 8247e1a2506..b69ca5f8c17 100644 --- a/nexus/db-model/src/sled_cpu_family.rs +++ b/nexus/db-model/src/sled_cpu_family.rs @@ -23,6 +23,7 @@ impl_enum_type!( Unknown => b"unknown" AmdMilan => b"amd_milan" AmdTurin => b"amd_turin" + AmdTurinDense => b"amd_turin_dense" ); impl From for SledCpuFamily { diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 1fdd609b366..e9cdcbe97b6 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -785,6 +785,9 @@ pub enum SledCpuFamily { /// The sled has an AMD Turin (Zen 5) processor. AmdTurin, + + /// The sled has an AMD Turin Dense (Zen 5c) processor. + AmdTurinDense, } /// An operator's view of an instance running on a given sled diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index 81663f787c0..45755187d39 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -46,6 +46,10 @@ pub enum SledCpuFamily { /// AMD Turin processors (or very close). Could be an actual Turin in a /// Cosmo, or a close-to-Turin client Zen 5 part. AmdTurin, + + /// AMD Turin Dense processors. There are no "Turin Dense-like" CPUs unlike + /// other cases, so this means a bona fide Zen 5c Turin Dense part. + AmdTurinDense, } /// Sent by a sled agent to Nexus to inform about resources diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index c4aa716e9f8..0a5587c5939 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -199,7 +199,10 @@ CREATE TYPE IF NOT EXISTS omicron.public.sled_cpu_family AS ENUM ( 'amd_milan', -- AMD Turin, or lab CPU close enough that sled-agent reported it as one. - 'amd_turin' + 'amd_turin', + + -- AMD Turin Dense. There are no "Turin Dense-likes", so this is precise. + 'amd_turin_dense' ); CREATE TABLE IF NOT EXISTS omicron.public.sled ( diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index d778d619191..3a8c5227c3a 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -262,10 +262,10 @@ pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { } 0x1A if model <= 0x0F => CpuFamily::AmdTurin, 0x1A if model >= 0x10 && model <= 0x1F => { - // These are Turin Dense, but from a CPU feature perspective they're - // equivalently capable to Turin, so for our purposes they're the - // same. - CpuFamily::AmdTurin + // These are Turin Dense. From a CPU feature perspective they're + // equivalently capable to Turin, but they are physically distinct + // and sled operators should be able to see that. + CpuFamily::AmdTurinDense } 0x1A if model >= 0x40 && model <= 0x4F => { // These are client Zen 5 parts aka Granite Ridge. Won't be in a diff --git a/sled-hardware/types/src/lib.rs b/sled-hardware/types/src/lib.rs index 1a7047fb076..663ce8de323 100644 --- a/sled-hardware/types/src/lib.rs +++ b/sled-hardware/types/src/lib.rs @@ -101,4 +101,5 @@ pub enum CpuFamily { Unknown, AmdMilan, AmdTurin, + AmdTurinDense, } From 13672898a91ab79c3667416f8294f39b3d0757e0 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 30 Jul 2025 19:04:38 +0000 Subject: [PATCH 03/42] unwind CPU families from the public sled API --- nexus/db-model/src/sled.rs | 8 +++- nexus/db-model/src/sled_cpu_family.rs | 12 +---- nexus/tests/integration_tests/sleds.rs | 63 +++++--------------------- nexus/types/src/external_api/views.rs | 25 ---------- openapi/nexus-internal.json | 7 +++ openapi/nexus.json | 35 -------------- sled-agent/src/nexus.rs | 3 ++ 7 files changed, 30 insertions(+), 123 deletions(-) diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index f4c8e62f9ae..e9967569006 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -89,6 +89,13 @@ pub struct Sled { pub repo_depot_port: SqlU16, /// The family of this sled's CPU. + /// + /// This is primarily useful for questions about instance CPU platform + /// compatibility; it is too broad for topology-related sled selection + /// and more precise than a more general report of microarchitecture. We + /// likely should include much more about the sled's CPU alongside this for + /// those broader questions and reporting (see + /// https://github.com/oxidecomputer/omicron/issues/8730 for examples). pub cpu_family: SledCpuFamily, } @@ -147,7 +154,6 @@ impl From for views::Sled { state: sled.state.into(), usable_hardware_threads: sled.usable_hardware_threads.0, usable_physical_ram: *sled.usable_physical_ram, - cpu_family: sled.cpu_family.into(), } } } diff --git a/nexus/db-model/src/sled_cpu_family.rs b/nexus/db-model/src/sled_cpu_family.rs index b69ca5f8c17..12c8c4ba5c7 100644 --- a/nexus/db-model/src/sled_cpu_family.rs +++ b/nexus/db-model/src/sled_cpu_family.rs @@ -33,6 +33,7 @@ impl From for SledCpuFamily { InputFamily::Unknown => Self::Unknown, InputFamily::AmdMilan => Self::AmdMilan, InputFamily::AmdTurin => Self::AmdTurin, + InputFamily::AmdTurinDense => Self::AmdTurinDense, } } } @@ -43,16 +44,7 @@ impl From for nexus_types::internal_api::params::SledCpuFamily { SledCpuFamily::Unknown => Self::Unknown, SledCpuFamily::AmdMilan => Self::AmdMilan, SledCpuFamily::AmdTurin => Self::AmdTurin, - } - } -} - -impl From for nexus_types::external_api::views::SledCpuFamily { - fn from(value: SledCpuFamily) -> Self { - match value { - SledCpuFamily::Unknown => Self::Unknown, - SledCpuFamily::AmdMilan => Self::AmdMilan, - SledCpuFamily::AmdTurin => Self::AmdTurin, + SledCpuFamily::AmdTurinDense => Self::AmdTurinDense, } } } diff --git a/nexus/tests/integration_tests/sleds.rs b/nexus/tests/integration_tests/sleds.rs index d4d5ee825fd..8735bd568e4 100644 --- a/nexus/tests/integration_tests/sleds.rs +++ b/nexus/tests/integration_tests/sleds.rs @@ -15,10 +15,10 @@ use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_instance; use nexus_test_utils::resource_helpers::create_project; use nexus_test_utils::resource_helpers::objects_list_page_authz; -use nexus_test_utils::{start_sled_agent, start_sled_agent_with_config}; +use nexus_test_utils::start_sled_agent; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::views::SledInstance; -use nexus_types::external_api::views::{PhysicalDisk, Sled, SledCpuFamily}; +use nexus_types::external_api::views::{PhysicalDisk, Sled}; use omicron_sled_agent::sim; use omicron_test_utils::dev::poll::{CondCheckError, wait_for_condition}; use omicron_uuid_kinds::GenericUuid; @@ -60,60 +60,34 @@ async fn test_sleds_list(cptestctx: &ControlPlaneTestContext) { assert_eq!(sleds_list(&client, &sleds_url).await.len(), 2); // Now start a few more sled agents. - let mut sas = Vec::new(); - let nexus_address = - cptestctx.server.get_http_server_internal_address().await; - let update_directory = Utf8Path::new("/should/not/be/used"); - let simulated_upstairs = &cptestctx.first_sled_agent().simulated_upstairs; - - for _ in 0..4 { + let nsleds = 3; + let mut sas = Vec::with_capacity(nsleds); + for i in 0..nsleds { let sa_id = SledUuid::new_v4(); let log = cptestctx.logctx.log.new(o!( "sled_id" => sa_id.to_string() )); + let addr = cptestctx.server.get_http_server_internal_address().await; + let update_directory = Utf8Path::new("/should/not/be/used"); sas.push( start_sled_agent( log, - nexus_address, + addr, sa_id, // Index starts at 2: the `nexus_test` macro already created two // sled agents as part of the ControlPlaneTestContext setup. - 2 + sas.len() as u16 + 1, + 2 + i as u16, &update_directory, sim::SimMode::Explicit, - &simulated_upstairs, + &cptestctx.first_sled_agent().simulated_upstairs, ) .await .unwrap(), ); } - let turin_sled_id = SledUuid::new_v4(); - let turin_sled_agent_log = - cptestctx.logctx.log.new(o!( "sled_id" => turin_sled_id.to_string() )); - - let turin_config = omicron_sled_agent::sim::Config::for_testing( - turin_sled_id, - omicron_sled_agent::sim::SimMode::Explicit, - Some(nexus_address), - Some(&update_directory), - omicron_sled_agent::sim::ZpoolConfig::None, - nexus_client::types::SledCpuFamily::AmdTurin, - ); - - sas.push( - start_sled_agent_with_config( - turin_sled_agent_log, - &turin_config, - 2 + sas.len() as u16 + 1, - &simulated_upstairs, - ) - .await - .unwrap(), - ); - // List sleds again. let sleds_found = sleds_list(&client, &sleds_url).await; - assert_eq!(sleds_found.len(), sas.len() + 2); + assert_eq!(sleds_found.len(), nsleds + 2); let sledids_found = sleds_found.iter().map(|sv| sv.identity.id).collect::>(); @@ -121,21 +95,6 @@ async fn test_sleds_list(cptestctx: &ControlPlaneTestContext) { sledids_found_sorted.sort(); assert_eq!(sledids_found, sledids_found_sorted); - let milans_found = sleds_found - .iter() - .filter(|sv| sv.cpu_family == SledCpuFamily::AmdMilan) - .count(); - // Simulated sled-agents report Milan processors by default. The two fake - // sled-agents created by `#[nexus_test]` as well as the four manually - // created above should be counted here. - assert_eq!(milans_found, 2 + 4); - - let turins_found = sleds_found - .iter() - .filter(|sv| sv.cpu_family == SledCpuFamily::AmdTurin) - .count(); - assert_eq!(turins_found, 1); - // Tear down the agents. for sa in sas { sa.http_server.close().await.unwrap(); diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index e9cdcbe97b6..9bd8ab5cc12 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -596,8 +596,6 @@ pub struct Sled { pub usable_hardware_threads: u32, /// Amount of RAM which may be used by the Sled's OS pub usable_physical_ram: ByteCount, - /// The family of the sled's CPU(s). - pub cpu_family: SledCpuFamily, } /// The operator-defined provision policy of a sled. @@ -767,29 +765,6 @@ impl fmt::Display for SledState { } } -/// Identifies the kind of CPU present on a sled, determined by reading CPUID. -/// This is the CPU family used in deciding if this sled can support an instance -/// with a particular required CPU platform. -// In lab and development environments in particular, the family reported here -// may differ from the real processor family. `sled-hardware::detect_cpu_family` -// tries to map various CPUs that we would not ship in a rack to their -// greatest-common-denominator family names here. -#[derive(Clone, Serialize, Deserialize, Debug, JsonSchema, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum SledCpuFamily { - /// The CPU vendor or its model/family numbers were not recognized. - Unknown, - - /// The sled has an AMD Milan (Zen 3) processor. - AmdMilan, - - /// The sled has an AMD Turin (Zen 5) processor. - AmdTurin, - - /// The sled has an AMD Turin Dense (Zen 5c) processor. - AmdTurinDense, -} - /// An operator's view of an instance running on a given sled #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct SledInstance { diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 75b9e22e84b..ff039ee29a6 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -7292,6 +7292,13 @@ "enum": [ "amd_turin" ] + }, + { + "description": "AMD Turin Dense processors. There are no \"Turin Dense-like\" CPUs unlike other cases, so this means a bona fide Zen 5c Turin Dense part.", + "type": "string", + "enum": [ + "amd_turin_dense" + ] } ] }, diff --git a/openapi/nexus.json b/openapi/nexus.json index 657d9f5c4d0..bd91bcc6534 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -23719,14 +23719,6 @@ "baseboard": { "$ref": "#/components/schemas/Baseboard" }, - "cpu_family": { - "description": "The family of the sled's CPU(s).", - "allOf": [ - { - "$ref": "#/components/schemas/SledCpuFamily" - } - ] - }, "id": { "description": "unique, immutable, system-controlled identifier for each resource", "type": "string", @@ -23780,7 +23772,6 @@ }, "required": [ "baseboard", - "cpu_family", "id", "policy", "rack_id", @@ -23791,32 +23782,6 @@ "usable_physical_ram" ] }, - "SledCpuFamily": { - "description": "Identifies the kind of CPU present on a sled, determined by reading CPUID. This is the CPU family used in deciding if this sled can support an instance with a particular required CPU platform.", - "oneOf": [ - { - "description": "The CPU vendor or its model/family numbers were not recognized.", - "type": "string", - "enum": [ - "unknown" - ] - }, - { - "description": "The sled has an AMD Milan (Zen 3) processor.", - "type": "string", - "enum": [ - "amd_milan" - ] - }, - { - "description": "The sled has an AMD Turin (Zen 5) processor.", - "type": "string", - "enum": [ - "amd_turin" - ] - } - ] - }, "SledId": { "description": "The unique ID of a sled.", "type": "object", diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 52870fe1532..9c335dc88e4 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -91,6 +91,9 @@ impl ConvertInto sled_hardware_types::CpuFamily::AmdTurin => { nexus_client::types::SledCpuFamily::AmdTurin } + sled_hardware_types::CpuFamily::AmdTurinDense => { + nexus_client::types::SledCpuFamily::AmdTurinDense + } } } } From b5eaf68865d1fc920d819e21084d066921d24fd8 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 30 Jul 2025 21:12:02 +0000 Subject: [PATCH 04/42] review notes --- sled-hardware/src/lib.rs | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index 3a8c5227c3a..3f57745a81c 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -151,6 +151,15 @@ impl MemoryReservations { } /// Detects the current sled's CPU family using the CPUID instruction. +/// +/// TODO: Ideally we would call into libtopo and pass along the information +/// identified there. See https://github.com/oxidecomputer/omicron/issues/8732. +/// +/// Everything here is duplicative with CPU identification done by the kernel. +/// You'll even find a very similar (but much more comprehensive) AMD family +/// mapping at `amd_revmap` in `usr/src/uts/intel/os/cpuid_subr.c`. But +/// sled-agent does not yet know about libtopo, getting topo snapshots, walking +/// them, or any of that, so the parsing is performed again here. #[cfg(target_arch = "x86_64")] pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { use core::arch::x86_64::__cpuid_count; @@ -200,7 +209,7 @@ pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { // - If the "base" family value is less than 0xF, the "base" model stands. // Otherwise, four additional bits of the model come from eax[19:16]. // - // If the computed family number is 0xF or greater, that implies the "bsae" + // If the computed family number is 0xF or greater, that implies the "base" // family was 0xF or greater as well. let mut model = (leaf_1.eax & 0x000000F0) >> 4; if family >= 0xF { @@ -210,9 +219,12 @@ pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { info!( log, "read CPUID leaf 1 to detect CPU family"; - "values" => ?leaf_1, - "family" => family, - "model" => model, + "leaf1.eax" => format_args!("{:#08x}", leaf_1.eax), + "leaf1.ebx" => format_args!("{:#08x}", leaf_1.ebx), + "leaf1.ecx" => format_args!("{:#08x}", leaf_1.ecx), + "leaf1.edx" => format_args!("{:#08x}", leaf_1.edx), + "parsed family" => format_args!("{family:#x}"), + "parsed model" => format_args!("{model:#x}"), ); // Match on the family/model ranges we've detected. Notably client parts are From 114f383266fae89c4447bfac396356197c0a12bc Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 30 Jul 2025 21:51:21 +0000 Subject: [PATCH 05/42] fix links ugh --- nexus/db-model/src/sled.rs | 2 +- sled-hardware/src/lib.rs | 3 ++- sled-hardware/types/src/lib.rs | 10 ++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index e9967569006..631cc92de0a 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -95,7 +95,7 @@ pub struct Sled { /// and more precise than a more general report of microarchitecture. We /// likely should include much more about the sled's CPU alongside this for /// those broader questions and reporting (see - /// https://github.com/oxidecomputer/omicron/issues/8730 for examples). + /// for examples). pub cpu_family: SledCpuFamily, } diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index 3f57745a81c..bd5d00ac47b 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -153,7 +153,8 @@ impl MemoryReservations { /// Detects the current sled's CPU family using the CPUID instruction. /// /// TODO: Ideally we would call into libtopo and pass along the information -/// identified there. See https://github.com/oxidecomputer/omicron/issues/8732. +/// identified there. See +/// . /// /// Everything here is duplicative with CPU identification done by the kernel. /// You'll even find a very similar (but much more comprehensive) AMD family diff --git a/sled-hardware/types/src/lib.rs b/sled-hardware/types/src/lib.rs index 663ce8de323..5d6ea5c8d3b 100644 --- a/sled-hardware/types/src/lib.rs +++ b/sled-hardware/types/src/lib.rs @@ -96,6 +96,16 @@ impl std::fmt::Display for Baseboard { } } +/// A general description of the CPU family for processor(s) in this sled. +/// +/// This is intended to broadly support the control plane answering the question +/// "can I run this instance on that sled?" given an instance with either no or +/// some CPU platform requirement. It is not enough information for more precise +/// placement questions - for example, is a CPU a high-frequency part or +/// many-core part? We don't include Genoa here, but in that CPU family there +/// are high frequency parts, many-core parts, and large-cache parts. To support +/// those questions (or satisfactorily answer #8730) we would need to collect +/// additional information and send it along. #[derive(Clone, Copy, Debug)] pub enum CpuFamily { Unknown, From 4c40d473943d335c079041238900a8c184d83213 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 30 Jul 2025 23:21:38 +0000 Subject: [PATCH 06/42] migration still needs to know about turin dense --- schema/crdb/sled-cpu-family/up01.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/schema/crdb/sled-cpu-family/up01.sql b/schema/crdb/sled-cpu-family/up01.sql index 9531cec6a7d..f1bb76f3389 100644 --- a/schema/crdb/sled-cpu-family/up01.sql +++ b/schema/crdb/sled-cpu-family/up01.sql @@ -1,5 +1,6 @@ CREATE TYPE IF NOT EXISTS omicron.public.sled_cpu_family AS ENUM ( 'unknown', 'amd_milan', - 'amd_turin' + 'amd_turin', + 'amd_turin_dense' ); From 5ec45d3352d0bc52f3a3e432594643dacd87b7c6 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 2 Aug 2025 00:14:35 +0000 Subject: [PATCH 07/42] sled-agent needs to expose cpu_family for inventory collections too the existing plumbing was sufficient for sled-agent to report the CPU family at startup, but did not provide the CPU family when Nexus calls later for inventory collections. when you've upgraded to this version, the database migration sets the sled CPU family to `unknown` expecting that the next inventory collection will figure things out. this doesn't happen, and the initial check-in doesn't update the CPU type either (presumably because the sled is already known and initialized from the control plane's perspective?) this does... most of the plumbing to report a sled's CPU family for inventory collection, but it doesn't actually work. `SledCpuFamily` being both in `omicron-common` and `nexus-client` is kind of unworkable. probably need a `ConvertInto` or something to transform the shared into the `nexus-client` when needed..? i've been trying to figure out what exactly is necessary and what is just building a mess for myself for two hours and this feels like it's going nowhere. --- common/src/api/internal/shared.rs | 33 +++++++++++++++++++ nexus-sled-agent-shared/src/inventory.rs | 3 +- nexus/db-model/src/inventory.rs | 3 ++ nexus/db-model/src/sled_cpu_family.rs | 8 ++--- .../db-queries/src/db/datastore/inventory.rs | 5 +++ nexus/db-schema/src/schema.rs | 1 + nexus/inventory/src/builder.rs | 1 + nexus/inventory/src/examples.rs | 2 ++ nexus/reconfigurator/planning/src/system.rs | 3 ++ nexus/types/src/internal_api/params.rs | 23 +------------ nexus/types/src/inventory.rs | 2 ++ nexus/types/src/inventory/display.rs | 2 ++ sled-agent/src/sim/config.rs | 2 +- sled-agent/src/sim/sled_agent.rs | 1 + sled-agent/src/sled_agent.rs | 3 ++ 15 files changed, 64 insertions(+), 28 deletions(-) diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index ebc9f6a46ca..f912cba3f30 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -1101,6 +1101,39 @@ pub struct SledIdentifiers { pub serial: String, } +/// Identifies the kind of CPU present on a sled, determined by reading CPUID. +#[derive(Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum SledCpuFamily { + /// The CPU vendor or its family number don't correspond to any of the + /// known family variants. + Unknown, + + /// AMD Milan processors (or very close). Could be an actual Milan in a + /// Gimlet, a close-to-Milan client Zen 3 part, or Zen 4 (for which Milan is + /// the greatest common denominator). + AmdMilan, + + /// AMD Turin processors (or very close). Could be an actual Turin in a + /// Cosmo, or a close-to-Turin client Zen 5 part. + AmdTurin, + + /// AMD Turin Dense processors. There are no "Turin Dense-like" CPUs unlike + /// other cases, so this means a bona fide Zen 5c Turin Dense part. + AmdTurinDense, +} + +impl fmt::Display for SledCpuFamily { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SledCpuFamily::Unknown => write!(f, "unknown"), + SledCpuFamily::AmdMilan => write!(f, "milan"), + SledCpuFamily::AmdTurin => write!(f, "turin"), + SledCpuFamily::AmdTurinDense => write!(f, "turin_dense"), + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index f5e5baa0aef..867512737e0 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -26,7 +26,7 @@ use omicron_common::update::OmicronZoneManifestSource; use omicron_common::{ api::{ external::{ByteCount, Generation}, - internal::shared::{NetworkInterface, SourceNatConfig}, + internal::shared::{NetworkInterface, SourceNatConfig, SledCpuFamily}, }, disk::{DatasetConfig, DiskVariant, OmicronPhysicalDiskConfig}, update::ArtifactId, @@ -121,6 +121,7 @@ pub struct Inventory { pub baseboard: Baseboard, pub usable_hardware_threads: u32, pub usable_physical_ram: ByteCount, + pub cpu_family: SledCpuFamily, pub reservoir_size: ByteCount, pub disks: Vec, pub zpools: Vec, diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 12fdf5aad25..ebebf139805 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -8,6 +8,7 @@ use crate::ArtifactHash; use crate::Generation; use crate::PhysicalDiskKind; use crate::omicron_zone_config::{self, OmicronZoneNic}; +use crate::sled_cpu_family::SledCpuFamily; use crate::typed_uuid::DbTypedUuid; use crate::{ ByteCount, MacAddr, Name, ServiceKind, SqlU8, SqlU16, SqlU32, @@ -887,6 +888,7 @@ pub struct InvSledAgent { pub sled_role: SledRole, pub usable_hardware_threads: SqlU32, pub usable_physical_ram: ByteCount, + pub cpu_family: SledCpuFamily, pub reservoir_size: ByteCount, // Soft foreign key to an `InvOmicronSledConfig` pub ledgered_sled_config: Option>, @@ -1300,6 +1302,7 @@ impl InvSledAgent { usable_physical_ram: ByteCount::from( sled_agent.usable_physical_ram, ), + cpu_family: sled_agent.cpu_family.into(), reservoir_size: ByteCount::from(sled_agent.reservoir_size), ledgered_sled_config: ledgered_sled_config.map(From::from), reconciler_status, diff --git a/nexus/db-model/src/sled_cpu_family.rs b/nexus/db-model/src/sled_cpu_family.rs index 12c8c4ba5c7..13838b89fce 100644 --- a/nexus/db-model/src/sled_cpu_family.rs +++ b/nexus/db-model/src/sled_cpu_family.rs @@ -26,9 +26,9 @@ impl_enum_type!( AmdTurinDense => b"amd_turin_dense" ); -impl From for SledCpuFamily { - fn from(value: nexus_types::internal_api::params::SledCpuFamily) -> Self { - use nexus_types::internal_api::params::SledCpuFamily as InputFamily; +impl From for SledCpuFamily { + fn from(value: omicron_common::api::internal::shared::SledCpuFamily) -> Self { + use omicron_common::api::internal::shared::SledCpuFamily as InputFamily; match value { InputFamily::Unknown => Self::Unknown, InputFamily::AmdMilan => Self::AmdMilan, @@ -38,7 +38,7 @@ impl From for SledCpuFamily { } } -impl From for nexus_types::internal_api::params::SledCpuFamily { +impl From for omicron_common::api::internal::shared::SledCpuFamily { fn from(value: SledCpuFamily) -> Self { match value { SledCpuFamily::Unknown => Self::Unknown, diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 79dc4f443bf..71255dd9552 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -1385,6 +1385,8 @@ impl DataStore { sled_agent.usable_physical_ram, ) .into_sql::(), + nexus_db_model::SledCpuFamily::from(sled_agent.cpu_family) + .into_sql::(), nexus_db_model::ByteCount::from( sled_agent.reservoir_size, ) @@ -1439,6 +1441,7 @@ impl DataStore { sa_dsl::sled_role, sa_dsl::usable_hardware_threads, sa_dsl::usable_physical_ram, + sa_dsl::cpu_family, sa_dsl::reservoir_size, sa_dsl::ledgered_sled_config, sa_dsl::reconciler_status_kind, @@ -1470,6 +1473,7 @@ impl DataStore { _sled_role, _usable_hardware_threads, _usable_physical_ram, + _cpu_family, _reservoir_size, _ledgered_sled_config, _reconciler_status_kind, @@ -3846,6 +3850,7 @@ impl DataStore { sled_role: s.sled_role.into(), usable_hardware_threads: u32::from(s.usable_hardware_threads), usable_physical_ram: s.usable_physical_ram.into(), + cpu_family: s.cpu_family.into(), reservoir_size: s.reservoir_size.into(), // For disks, zpools, and datasets, the map for a sled ID is // only populated if there is at least one disk/zpool/dataset diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 3f1fa67ca70..fb04983dce4 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -1609,6 +1609,7 @@ table! { sled_role -> crate::enums::SledRoleEnum, usable_hardware_threads -> Int8, usable_physical_ram -> Int8, + cpu_family -> crate::enums::SledCpuFamilyEnum, reservoir_size -> Int8, ledgered_sled_config -> Nullable, diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index c9eb4622df3..76af085af9a 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -595,6 +595,7 @@ impl CollectionBuilder { baseboard_id, usable_hardware_threads: inventory.usable_hardware_threads, usable_physical_ram: inventory.usable_physical_ram, + cpu_family: inventory.cpu_family, reservoir_size: inventory.reservoir_size, time_collected, sled_id, diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index fa0bf82d309..86c67a9c652 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -40,6 +40,7 @@ use nexus_types::inventory::ZpoolName; use omicron_cockroach_metrics::MetricValue; use omicron_cockroach_metrics::PrometheusMetrics; use omicron_common::api::external::ByteCount; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::DatasetConfig; use omicron_common::disk::DatasetKind; use omicron_common::disk::DatasetName; @@ -957,6 +958,7 @@ pub fn sled_agent( sled_id, usable_hardware_threads: 10, usable_physical_ram: ByteCount::from(1024 * 1024), + cpu_family: SledCpuFamily::AmdMilan, disks, zpools, datasets, diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 4f745953142..2ad075b4fc1 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -57,6 +57,7 @@ use omicron_common::address::SLED_PREFIX; use omicron_common::address::get_sled_address; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::DiskIdentity; use omicron_common::disk::DiskVariant; use omicron_common::disk::M2Slot; @@ -1071,6 +1072,7 @@ impl Sled { sled_id, usable_hardware_threads: 10, usable_physical_ram: ByteCount::from(1024 * 1024), + cpu_family: SledCpuFamily::AmdMilan, // Populate disks, appearing like a real device. disks: zpools .values() @@ -1267,6 +1269,7 @@ impl Sled { sled_id, usable_hardware_threads: inv_sled_agent.usable_hardware_threads, usable_physical_ram: inv_sled_agent.usable_physical_ram, + cpu_family: inv_sled_agent.cpu_family, disks: vec![], zpools: vec![], datasets: vec![], diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index 45755187d39..3362853fe4f 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -18,6 +18,7 @@ use omicron_common::api::internal::nexus::Certificate; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::ExternalPortDiscovery; use omicron_common::api::internal::shared::RackNetworkConfig; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::api::internal::shared::SourceNatConfig; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::PhysicalDiskUuid; @@ -30,28 +31,6 @@ use std::net::SocketAddr; use std::net::SocketAddrV6; use uuid::Uuid; -/// Identifies the kind of CPU present on a sled, determined by reading CPUID. -#[derive(Serialize, Deserialize, Debug, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub enum SledCpuFamily { - /// The CPU vendor or its family number don't correspond to any of the - /// known family variants. - Unknown, - - /// AMD Milan processors (or very close). Could be an actual Milan in a - /// Gimlet, a close-to-Milan client Zen 3 part, or Zen 4 (for which Milan is - /// the greatest common denominator). - AmdMilan, - - /// AMD Turin processors (or very close). Could be an actual Turin in a - /// Cosmo, or a close-to-Turin client Zen 5 part. - AmdTurin, - - /// AMD Turin Dense processors. There are no "Turin Dense-like" CPUs unlike - /// other cases, so this means a bona fide Zen 5c Turin Dense part. - AmdTurinDense, -} - /// Sent by a sled agent to Nexus to inform about resources #[derive(Serialize, Deserialize, Debug, JsonSchema)] pub struct SledAgentInfo { diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 7acfe51cd6a..285a529b394 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -36,6 +36,7 @@ use omicron_common::api::external::ByteCount; pub use omicron_common::api::internal::shared::NetworkInterface; pub use omicron_common::api::internal::shared::NetworkInterfaceKind; pub use omicron_common::api::internal::shared::SourceNatConfig; +pub use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::M2Slot; pub use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::CollectionUuid; @@ -638,6 +639,7 @@ pub struct SledAgent { pub sled_role: SledRole, pub usable_hardware_threads: u32, pub usable_physical_ram: ByteCount, + pub cpu_family: SledCpuFamily, pub reservoir_size: ByteCount, pub disks: Vec, pub zpools: Vec, diff --git a/nexus/types/src/inventory/display.rs b/nexus/types/src/inventory/display.rs index 2917b762623..cfee239a417 100644 --- a/nexus/types/src/inventory/display.rs +++ b/nexus/types/src/inventory/display.rs @@ -553,6 +553,7 @@ fn display_sleds( sled_role, usable_hardware_threads, usable_physical_ram, + cpu_family, reservoir_size, disks, zpools, @@ -585,6 +586,7 @@ fn display_sleds( )?; writeln!(indented, "address: {}", sled_agent_address)?; writeln!(indented, "usable hw threads: {}", usable_hardware_threads)?; + writeln!(indented, "CPU family: {}", cpu_family)?; writeln!( indented, "usable memory (GiB): {}", diff --git a/sled-agent/src/sim/config.rs b/sled-agent/src/sim/config.rs index d77d08fc50b..4f0851d0dd3 100644 --- a/sled-agent/src/sim/config.rs +++ b/sled-agent/src/sim/config.rs @@ -7,7 +7,7 @@ use crate::updates::ConfigUpdates; use camino::Utf8Path; use dropshot::ConfigDropshot; -use nexus_client::types::SledCpuFamily; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_uuid_kinds::SledUuid; use serde::Deserialize; use serde::Serialize; diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 43010c18014..c1609c89358 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -754,6 +754,7 @@ impl SledAgent { self.config.hardware.physical_ram, ) .context("usable_physical_ram")?, + cpu_family: self.config.hardware.cpu_family, reservoir_size: ByteCount::try_from( self.config.hardware.reservoir_ram, ) diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 883140ed8d8..c76d102535f 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -1097,6 +1097,8 @@ impl SledAgent { self.inner.hardware.online_processor_count(); let usable_physical_ram = self.inner.hardware.usable_physical_ram_bytes(); + let cpu_family = + self.inner.hardware.cpu_family(); let reservoir_size = self.inner.instances.reservoir_size(); let sled_role = if is_scrimlet { SledRole::Scrimlet } else { SledRole::Gimlet }; @@ -1119,6 +1121,7 @@ impl SledAgent { baseboard, usable_hardware_threads, usable_physical_ram: ByteCount::try_from(usable_physical_ram)?, + cpu_family, reservoir_size, disks, zpools, From e9cbbdd7b3462c0410d801a69166651102368a6e Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 2 Aug 2025 01:28:12 +0000 Subject: [PATCH 08/42] it compiles (might work now?) --- common/src/api/internal/shared.rs | 13 +++++++++- nexus-sled-agent-shared/src/inventory.rs | 2 +- nexus/db-model/src/sled_cpu_family.rs | 12 ++++++--- .../src/db/datastore/physical_disk.rs | 2 ++ nexus/inventory/src/collector.rs | 2 +- nexus/test-utils/src/lib.rs | 2 +- nexus/tests/integration_tests/rack.rs | 2 +- nexus/types/src/inventory.rs | 2 +- sled-agent/src/bin/sled-agent-sim.rs | 2 +- sled-agent/src/nexus.rs | 11 ++++---- sled-agent/src/rack_setup/plan/service.rs | 2 ++ sled-agent/src/rack_setup/service.rs | 2 ++ sled-agent/src/sim/server.rs | 4 +-- sled-agent/src/sled_agent.rs | 3 +-- sled-hardware/src/illumos/mod.rs | 5 ++-- sled-hardware/src/lib.rs | 26 ++++++++++--------- sled-hardware/src/non_illumos/mod.rs | 5 ++-- sled-hardware/types/src/lib.rs | 18 ------------- 18 files changed, 62 insertions(+), 53 deletions(-) diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index f912cba3f30..f23925318c3 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -1102,7 +1102,18 @@ pub struct SledIdentifiers { } /// Identifies the kind of CPU present on a sled, determined by reading CPUID. -#[derive(Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, JsonSchema)] +/// +/// This is intended to broadly support the control plane answering the question +/// "can I run this instance on that sled?" given an instance with either no or +/// some CPU platform requirement. It is not enough information for more precise +/// placement questions - for example, is a CPU a high-frequency part or +/// many-core part? We don't include Genoa here, but in that CPU family there +/// are high frequency parts, many-core parts, and large-cache parts. To support +/// those questions (or satisfactorily answer #8730) we would need to collect +/// additional information and send it along. +#[derive( + Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, JsonSchema, +)] #[serde(rename_all = "snake_case")] pub enum SledCpuFamily { /// The CPU vendor or its family number don't correspond to any of the diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index 867512737e0..ac7856695fd 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -26,7 +26,7 @@ use omicron_common::update::OmicronZoneManifestSource; use omicron_common::{ api::{ external::{ByteCount, Generation}, - internal::shared::{NetworkInterface, SourceNatConfig, SledCpuFamily}, + internal::shared::{NetworkInterface, SledCpuFamily, SourceNatConfig}, }, disk::{DatasetConfig, DiskVariant, OmicronPhysicalDiskConfig}, update::ArtifactId, diff --git a/nexus/db-model/src/sled_cpu_family.rs b/nexus/db-model/src/sled_cpu_family.rs index 13838b89fce..700be75946d 100644 --- a/nexus/db-model/src/sled_cpu_family.rs +++ b/nexus/db-model/src/sled_cpu_family.rs @@ -26,8 +26,12 @@ impl_enum_type!( AmdTurinDense => b"amd_turin_dense" ); -impl From for SledCpuFamily { - fn from(value: omicron_common::api::internal::shared::SledCpuFamily) -> Self { +impl From + for SledCpuFamily +{ + fn from( + value: omicron_common::api::internal::shared::SledCpuFamily, + ) -> Self { use omicron_common::api::internal::shared::SledCpuFamily as InputFamily; match value { InputFamily::Unknown => Self::Unknown, @@ -38,7 +42,9 @@ impl From for SledCpuFamil } } -impl From for omicron_common::api::internal::shared::SledCpuFamily { +impl From + for omicron_common::api::internal::shared::SledCpuFamily +{ fn from(value: SledCpuFamily) -> Self { match value { SledCpuFamily::Unknown => Self::Unknown, diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 9409c6c9e1d..2cea054b29d 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -344,6 +344,7 @@ mod test { }; use nexus_types::identity::Asset; use omicron_common::api::external::ByteCount; + use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::{DiskIdentity, DiskVariant}; use omicron_test_utils::dev; use std::num::NonZeroU32; @@ -693,6 +694,7 @@ mod test { sled_id: SledUuid::from_untyped_uuid(sled.id()), usable_hardware_threads: 10, usable_physical_ram: ByteCount::from(1024 * 1024), + cpu_family: SledCpuFamily::AmdMilan, disks, zpools: vec![], datasets: vec![], diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index fddde6f5c07..ce1988b36de 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -666,7 +666,6 @@ mod test { use crate::StaticSledAgentEnumerator; use gateway_messages::SpPort; use id_map::IdMap; - use nexus_client::types::SledCpuFamily; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronSledConfig; @@ -676,6 +675,7 @@ mod test { use nexus_types::inventory::Collection; use omicron_cockroach_metrics::CockroachClusterAdminClient; use omicron_common::api::external::Generation; + use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::zpool_name::ZpoolName; use omicron_sled_agent::sim; use omicron_uuid_kinds::OmicronZoneUuid; diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index ed42d80ef7c..845d09b3a2f 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -27,7 +27,6 @@ use id_map::IdMap; use internal_dns_types::config::DnsConfigBuilder; use internal_dns_types::names::DNS_ZONE_EXTERNAL_TESTING; use internal_dns_types::names::ServiceName; -use nexus_client::types::SledCpuFamily; use nexus_config::Database; use nexus_config::DpdConfig; use nexus_config::InternalDns; @@ -75,6 +74,7 @@ use omicron_common::api::internal::nexus::ProducerKind; use omicron_common::api::internal::shared::DatasetKind; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::api::internal::shared::SourceNatConfig; use omicron_common::api::internal::shared::SwitchLocation; use omicron_common::disk::CompressionAlgorithm; diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs index b5d63858908..32610a3c043 100644 --- a/nexus/tests/integration_tests/rack.rs +++ b/nexus/tests/integration_tests/rack.rs @@ -21,9 +21,9 @@ use nexus_types::external_api::params; use nexus_types::external_api::shared::UninitializedSled; use nexus_types::external_api::views::Rack; use nexus_types::internal_api::params::SledAgentInfo; -use nexus_types::internal_api::params::SledCpuFamily; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_uuid_kinds::GenericUuid; use std::time::Duration; use uuid::Uuid; diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 285a529b394..70f2451d29b 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -35,8 +35,8 @@ use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; use omicron_common::api::external::ByteCount; pub use omicron_common::api::internal::shared::NetworkInterface; pub use omicron_common::api::internal::shared::NetworkInterfaceKind; +use omicron_common::api::internal::shared::SledCpuFamily; pub use omicron_common::api::internal::shared::SourceNatConfig; -pub use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::M2Slot; pub use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::CollectionUuid; diff --git a/sled-agent/src/bin/sled-agent-sim.rs b/sled-agent/src/bin/sled-agent-sim.rs index 8378dc02a49..ccb5e0eaf8d 100644 --- a/sled-agent/src/bin/sled-agent-sim.rs +++ b/sled-agent/src/bin/sled-agent-sim.rs @@ -12,8 +12,8 @@ use clap::Parser; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; -use nexus_client::types::SledCpuFamily; use omicron_common::api::internal::nexus::Certificate; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::cmd::CmdError; use omicron_common::cmd::fatal; use omicron_sled_agent::sim::RssArgs; diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 9c335dc88e4..5ebb086da13 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -78,20 +78,21 @@ impl ConvertInto } impl ConvertInto - for sled_hardware_types::CpuFamily + for omicron_common::api::internal::shared::SledCpuFamily { fn convert(self) -> nexus_client::types::SledCpuFamily { + use omicron_common::api::internal::shared::SledCpuFamily as SharedSledCpuFamily; match self { - sled_hardware_types::CpuFamily::Unknown => { + SharedSledCpuFamily::Unknown => { nexus_client::types::SledCpuFamily::Unknown } - sled_hardware_types::CpuFamily::AmdMilan => { + SharedSledCpuFamily::AmdMilan => { nexus_client::types::SledCpuFamily::AmdMilan } - sled_hardware_types::CpuFamily::AmdTurin => { + SharedSledCpuFamily::AmdTurin => { nexus_client::types::SledCpuFamily::AmdTurin } - sled_hardware_types::CpuFamily::AmdTurinDense => { + SharedSledCpuFamily::AmdTurinDense => { nexus_client::types::SledCpuFamily::AmdTurinDense } } diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 3732bca059a..bc6476f122a 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -1160,6 +1160,7 @@ mod tests { use omicron_common::api::external::ByteCount; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::RackNetworkConfig; + use omicron_common::api::internal::shared::SledCpuFamily; use oxnet::Ipv6Net; use sled_agent_types::rack_init::BootstrapAddressDiscovery; use sled_agent_types::rack_init::RecoverySiloConfig; @@ -1372,6 +1373,7 @@ mod tests { baseboard: Baseboard::Unknown, usable_hardware_threads: 32, usable_physical_ram: ByteCount::try_from(1_u64 << 40).unwrap(), + cpu_family: SledCpuFamily::AmdMilan, reservoir_size: ByteCount::try_from(1_u64 << 40).unwrap(), disks, zpools: vec![], diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 508733d4f2e..38684da7a57 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -1754,6 +1754,7 @@ mod test { use omicron_common::{ address::{Ipv6Subnet, SLED_PREFIX, get_sled_address}, api::external::{ByteCount, Generation}, + api::internal::shared::SledCpuFamily, disk::{DiskIdentity, DiskVariant}, }; use omicron_uuid_kinds::SledUuid; @@ -1775,6 +1776,7 @@ mod test { baseboard: Baseboard::Unknown, usable_hardware_threads: 32, usable_physical_ram: ByteCount::from_gibibytes_u32(16), + cpu_family: SledCpuFamily::AmdMilan, reservoir_size: ByteCount::from_gibibytes_u32(0), disks: (0..u2_count) .map(|i| InventoryDisk { diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 690efdadfe3..f252e327834 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -8,7 +8,7 @@ use super::config::Config; use super::http_entrypoints::api as http_api; use super::sled_agent::SledAgent; use super::storage::PantryServer; -use crate::nexus::NexusClient; +use crate::nexus::{ConvertInto, NexusClient}; use crate::rack_setup::SledConfig; use crate::rack_setup::service::build_initial_blueprint_from_sled_configs; use crate::rack_setup::{ @@ -166,7 +166,7 @@ impl Server { config.hardware.reservoir_ram, ) .unwrap(), - cpu_family: config.hardware.cpu_family, + cpu_family: config.hardware.cpu_family.convert(), generation: Generation::new(), decommissioned: false, }, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index c76d102535f..7124938291c 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -1097,8 +1097,7 @@ impl SledAgent { self.inner.hardware.online_processor_count(); let usable_physical_ram = self.inner.hardware.usable_physical_ram_bytes(); - let cpu_family = - self.inner.hardware.cpu_family(); + let cpu_family = self.inner.hardware.cpu_family(); let reservoir_size = self.inner.instances.reservoir_size(); let sled_role = if is_scrimlet { SledRole::Scrimlet } else { SledRole::Gimlet }; diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index d51ede8a037..9e319d3cd8b 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -8,8 +8,9 @@ use camino::Utf8PathBuf; use gethostname::gethostname; use illumos_devinfo::{DevInfo, DevLinkType, DevLinks, Node, Property}; use libnvme::{Nvme, controller::Controller}; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::{DiskIdentity, DiskVariant}; -use sled_hardware_types::{Baseboard, CpuFamily}; +use sled_hardware_types::Baseboard; use slog::Logger; use slog::debug; use slog::error; @@ -797,7 +798,7 @@ impl HardwareManager { .unwrap_or_else(|| Baseboard::unknown()) } - pub fn cpu_family(&self) -> CpuFamily { + pub fn cpu_family(&self) -> SledCpuFamily { let log = self.log.new(slog::o!("component" => "detect_cpu_family")); crate::detect_cpu_family(&log) } diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index bd5d00ac47b..97089091539 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -162,9 +162,11 @@ impl MemoryReservations { /// sled-agent does not yet know about libtopo, getting topo snapshots, walking /// them, or any of that, so the parsing is performed again here. #[cfg(target_arch = "x86_64")] -pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { +pub fn detect_cpu_family( + log: &Logger, +) -> omicron_common::api::internal::shared::SledCpuFamily { use core::arch::x86_64::__cpuid_count; - use sled_hardware_types::CpuFamily; + use omicron_common::api::internal::shared::SledCpuFamily; // Read leaf 0 to figure out the processor's vendor and whether leaf 1 // (which contains family, model, and stepping information) is available. @@ -175,14 +177,14 @@ pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { // If leaf 1 is unavailable, there's no way to figure out what family this // processor belongs to. if leaf_0.eax < 1 { - return CpuFamily::Unknown; + return SledCpuFamily::Unknown; } // Check the vendor ID string in ebx/ecx/edx. match (leaf_0.ebx, leaf_0.ecx, leaf_0.edx) { // "AuthenticAMD"; see AMD APM volume 3 (March 2024) section E.3.1. (0x68747541, 0x444D4163, 0x69746E65) => {} - _ => return CpuFamily::Unknown, + _ => return SledCpuFamily::Unknown, } // Feature detection after this point is AMD-specific - if we find ourselves @@ -249,7 +251,7 @@ pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { // This covers both Milan and Zen 3-based Threadrippers. I don't // have a 5000-series Threadripper on hand to test but I believe // they are feature-compatible. - CpuFamily::AmdMilan + SledCpuFamily::AmdMilan } 0x19 if model >= 0x10 && model <= 0x1F => { // This covers both Genoa and Zen 4-based Threadrippers. Again, @@ -259,33 +261,33 @@ pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { // choose, skipping the Zen 4 EPYC parts. So, round this down to // Milan; if we're here it's a lab system and the alternative is // "unknown". - CpuFamily::AmdMilan + SledCpuFamily::AmdMilan } 0x19 if model >= 0x20 && model <= 0x2F => { // These are client Zen 3 parts aka Vermeer. Feature-wise, they are // missing INVLPGB from Milan, but are otherwise close, and we don't // expose INVLPGB to guests currently anyway. - CpuFamily::AmdMilan + SledCpuFamily::AmdMilan } 0x19 if model >= 0x60 && model <= 0x6F => { // These are client Zen 4 parts aka Raphael. Similar to the above // with Genoa and Vermeer, round these down to Milan in support of // lab clusters instead of calling them unknown. - CpuFamily::AmdMilan + SledCpuFamily::AmdMilan } - 0x1A if model <= 0x0F => CpuFamily::AmdTurin, + 0x1A if model <= 0x0F => SledCpuFamily::AmdTurin, 0x1A if model >= 0x10 && model <= 0x1F => { // These are Turin Dense. From a CPU feature perspective they're // equivalently capable to Turin, but they are physically distinct // and sled operators should be able to see that. - CpuFamily::AmdTurinDense + SledCpuFamily::AmdTurinDense } 0x1A if model >= 0x40 && model <= 0x4F => { // These are client Zen 5 parts aka Granite Ridge. Won't be in a // rack, but plausibly in a lab cluster. Like other non-server // parts, these don't have INVLPGB, which we don't expose to guests. // They should otherwise be a sufficient stand-in for Turin. - CpuFamily::AmdTurin + SledCpuFamily::AmdTurin } // Remaining family/model ranges in known families are likely mobile // parts and intentionally rolled up into "Unknown." There, it's harder @@ -294,6 +296,6 @@ pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::CpuFamily { // or APU as part of a development cluster! // // Other families are, of course, unknown. - _ => CpuFamily::Unknown, + _ => SledCpuFamily::Unknown, } } diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index 448dc59287c..c3dd03c61b5 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -4,9 +4,10 @@ use crate::SledMode; use crate::disk::{DiskPaths, Partition, PooledDiskError, UnparsedDisk}; +use omicron_common::api::internal::SledCpuFamily; use omicron_common::disk::{DiskIdentity, DiskVariant}; use omicron_uuid_kinds::ZpoolUuid; -use sled_hardware_types::{Baseboard, CpuFamily}; +use sled_hardware_types::Baseboard; use slog::Logger; use std::collections::HashMap; use tokio::sync::broadcast; @@ -41,7 +42,7 @@ impl HardwareManager { unimplemented!("Accessing hardware unsupported on non-illumos"); } - pub fn cpu_family(&self) -> CpuFamily { + pub fn cpu_family(&self) -> SledCpuFamily { unimplemented!("Accessing hardware unsupported on non-illumos"); } diff --git a/sled-hardware/types/src/lib.rs b/sled-hardware/types/src/lib.rs index 5d6ea5c8d3b..b34b5b1f422 100644 --- a/sled-hardware/types/src/lib.rs +++ b/sled-hardware/types/src/lib.rs @@ -95,21 +95,3 @@ impl std::fmt::Display for Baseboard { } } } - -/// A general description of the CPU family for processor(s) in this sled. -/// -/// This is intended to broadly support the control plane answering the question -/// "can I run this instance on that sled?" given an instance with either no or -/// some CPU platform requirement. It is not enough information for more precise -/// placement questions - for example, is a CPU a high-frequency part or -/// many-core part? We don't include Genoa here, but in that CPU family there -/// are high frequency parts, many-core parts, and large-cache parts. To support -/// those questions (or satisfactorily answer #8730) we would need to collect -/// additional information and send it along. -#[derive(Clone, Copy, Debug)] -pub enum CpuFamily { - Unknown, - AmdMilan, - AmdTurin, - AmdTurinDense, -} From bf7ccae08a50b449b2bc7ff4f7960d8b767df9cc Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 2 Aug 2025 02:02:50 +0000 Subject: [PATCH 09/42] migrations need to be... right ... --- schema/crdb/dbinit.sql | 4 ++++ schema/crdb/sled-cpu-family/up04.sql | 2 ++ schema/crdb/sled-cpu-family/up05.sql | 1 + 3 files changed, 7 insertions(+) create mode 100644 schema/crdb/sled-cpu-family/up04.sql create mode 100644 schema/crdb/sled-cpu-family/up05.sql diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 0a5587c5939..6eccc05b831 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3705,6 +3705,10 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_agent ( -- present. mupdate_override_boot_disk_error TEXT, + -- The sled's CPU family. This is also duplicated with the `sled` table, + -- similar to `usable_hardware_threads` and friends above. + cpu_family omicron.public.sled_cpu_family NOT NULL, + CONSTRAINT reconciler_status_sled_config_present_if_running CHECK ( (reconciler_status_kind = 'running' AND reconciler_status_sled_config IS NOT NULL) diff --git a/schema/crdb/sled-cpu-family/up04.sql b/schema/crdb/sled-cpu-family/up04.sql new file mode 100644 index 00000000000..b2fd0b97156 --- /dev/null +++ b/schema/crdb/sled-cpu-family/up04.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.inv_sled_agent ADD COLUMN IF NOT EXISTS + cpu_family omicron.public.sled_cpu_family NOT NULL DEFAULT 'unknown'; diff --git a/schema/crdb/sled-cpu-family/up05.sql b/schema/crdb/sled-cpu-family/up05.sql new file mode 100644 index 00000000000..61db961a1b5 --- /dev/null +++ b/schema/crdb/sled-cpu-family/up05.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.inv_sled_agent ALTER COLUMN cpu_family DROP DEFAULT; From 0a79d5ed71ce6f07f40df68b4f88aac2bdba241c Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 2 Aug 2025 02:10:29 +0000 Subject: [PATCH 10/42] and that's the missing update of cpu_family. --- nexus/db-queries/src/db/datastore/sled.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 435a2e9d3b6..39ce5e08c36 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -309,6 +309,7 @@ impl DataStore { .eq(sled_update.usable_hardware_threads), dsl::usable_physical_ram.eq(sled_update.usable_physical_ram), dsl::reservoir_size.eq(sled_update.reservoir_size), + dsl::cpu_family.eq(sled_update.cpu_family), dsl::sled_agent_gen.eq(sled_update.sled_agent_gen), )) .filter(dsl::sled_agent_gen.lt(sled_update.sled_agent_gen)) From ea59a267a37069a688fbeb719a69d4d08d2ba0a1 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 2 Aug 2025 02:43:07 +0000 Subject: [PATCH 11/42] non-illumos has to build too ofc --- sled-hardware/src/non_illumos/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index c3dd03c61b5..caae6fcf6a1 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -4,7 +4,7 @@ use crate::SledMode; use crate::disk::{DiskPaths, Partition, PooledDiskError, UnparsedDisk}; -use omicron_common::api::internal::SledCpuFamily; +use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::{DiskIdentity, DiskVariant}; use omicron_uuid_kinds::ZpoolUuid; use sled_hardware_types::Baseboard; From 10cd3356e58774fe91ab48fbc9886338f3ec10a8 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sun, 3 Aug 2025 23:34:17 +0000 Subject: [PATCH 12/42] fix expectorated output and, oh, docs are in the openapi spec --- .../tests/output/cmds-example-stdout | 3 ++ .../output/cmds-mupdate-update-flow-stdout | 3 ++ openapi/nexus-internal.json | 2 +- openapi/sled-agent.json | 37 +++++++++++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout index e6f97ae6104..a9b94800388 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout @@ -1101,6 +1101,7 @@ sled 2eb69596-f081-4e2d-9425-9994926e0832 (role = Gimlet, serial serial1) found at: from fake sled agent address: [fd00:1122:3344:102::1]:12345 usable hw threads: 10 + CPU family: milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -1210,6 +1211,7 @@ sled 32d8d836-4d8a-4e54-8fa9-f31d79c42646 (role = Gimlet, serial serial2) found at: from fake sled agent address: [fd00:1122:3344:103::1]:12345 usable hw threads: 10 + CPU family: milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -1319,6 +1321,7 @@ sled 89d02b1b-478c-401a-8e28-7a26f74fa41b (role = Gimlet, serial serial0) found at: from fake sled agent address: [fd00:1122:3344:101::1]:12345 usable hw threads: 10 + CPU family: milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout index 77b502ab240..deeb6a9c044 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout @@ -88,6 +88,7 @@ sled 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c (role = Gimlet, serial serial1) found at: from fake sled agent address: [fd00:1122:3344:102::1]:12345 usable hw threads: 10 + CPU family: milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -194,6 +195,7 @@ sled 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 (role = Gimlet, serial serial0) found at: from fake sled agent address: [fd00:1122:3344:101::1]:12345 usable hw threads: 10 + CPU family: milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -302,6 +304,7 @@ sled d81c6a84-79b8-4958-ae41-ea46c9b19763 (role = Gimlet, serial serial2) found at: from fake sled agent address: [fd00:1122:3344:103::1]:12345 usable hw threads: 10 + CPU family: milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index ff039ee29a6..f3954ea20a2 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -7270,7 +7270,7 @@ ] }, "SledCpuFamily": { - "description": "Identifies the kind of CPU present on a sled, determined by reading CPUID.", + "description": "Identifies the kind of CPU present on a sled, determined by reading CPUID.\n\nThis is intended to broadly support the control plane answering the question \"can I run this instance on that sled?\" given an instance with either no or some CPU platform requirement. It is not enough information for more precise placement questions - for example, is a CPU a high-frequency part or many-core part? We don't include Genoa here, but in that CPU family there are high frequency parts, many-core parts, and large-cache parts. To support those questions (or satisfactorily answer #8730) we would need to collect additional information and send it along.", "oneOf": [ { "description": "The CPU vendor or its family number don't correspond to any of the known family variants.", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index ac78c85e472..79fcdddbc6f 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -5059,6 +5059,9 @@ "baseboard": { "$ref": "#/components/schemas/Baseboard" }, + "cpu_family": { + "$ref": "#/components/schemas/SledCpuFamily" + }, "datasets": { "type": "array", "items": { @@ -5122,6 +5125,7 @@ }, "required": [ "baseboard", + "cpu_family", "datasets", "disks", "reconciler_status", @@ -6993,6 +6997,39 @@ "com4" ] }, + "SledCpuFamily": { + "description": "Identifies the kind of CPU present on a sled, determined by reading CPUID.\n\nThis is intended to broadly support the control plane answering the question \"can I run this instance on that sled?\" given an instance with either no or some CPU platform requirement. It is not enough information for more precise placement questions - for example, is a CPU a high-frequency part or many-core part? We don't include Genoa here, but in that CPU family there are high frequency parts, many-core parts, and large-cache parts. To support those questions (or satisfactorily answer #8730) we would need to collect additional information and send it along.", + "oneOf": [ + { + "description": "The CPU vendor or its family number don't correspond to any of the known family variants.", + "type": "string", + "enum": [ + "unknown" + ] + }, + { + "description": "AMD Milan processors (or very close). Could be an actual Milan in a Gimlet, a close-to-Milan client Zen 3 part, or Zen 4 (for which Milan is the greatest common denominator).", + "type": "string", + "enum": [ + "amd_milan" + ] + }, + { + "description": "AMD Turin processors (or very close). Could be an actual Turin in a Cosmo, or a close-to-Turin client Zen 5 part.", + "type": "string", + "enum": [ + "amd_turin" + ] + }, + { + "description": "AMD Turin Dense processors. There are no \"Turin Dense-like\" CPUs unlike other cases, so this means a bona fide Zen 5c Turin Dense part.", + "type": "string", + "enum": [ + "amd_turin_dense" + ] + } + ] + }, "SledDiagnosticsQueryOutput": { "oneOf": [ { From 99a37f08ae4163f003b74776870f8be454643b11 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 6 Aug 2025 20:26:48 +0000 Subject: [PATCH 13/42] cleanup --- common/src/api/internal/shared.rs | 18 ++++++++++++------ sled-hardware/src/lib.rs | 4 ++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index f23925318c3..c87e1a81d56 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -1134,17 +1134,23 @@ pub enum SledCpuFamily { AmdTurinDense, } -impl fmt::Display for SledCpuFamily { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl SledCpuFamily { + fn as_str(&self) -> &'static str { match self { - SledCpuFamily::Unknown => write!(f, "unknown"), - SledCpuFamily::AmdMilan => write!(f, "milan"), - SledCpuFamily::AmdTurin => write!(f, "turin"), - SledCpuFamily::AmdTurinDense => write!(f, "turin_dense"), + SledCpuFamily::Unknown => "unknown", + SledCpuFamily::AmdMilan => "amd_milan", + SledCpuFamily::AmdTurin => "amd_turin", + SledCpuFamily::AmdTurinDense => "amd_turin_dense", } } } +impl fmt::Display for SledCpuFamily { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.as_str()) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index 97089091539..b475e2f28da 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -226,8 +226,8 @@ pub fn detect_cpu_family( "leaf1.ebx" => format_args!("{:#08x}", leaf_1.ebx), "leaf1.ecx" => format_args!("{:#08x}", leaf_1.ecx), "leaf1.edx" => format_args!("{:#08x}", leaf_1.edx), - "parsed family" => format_args!("{family:#x}"), - "parsed model" => format_args!("{model:#x}"), + "parsed_family" => format_args!("{family:#x}"), + "parsed_model" => format_args!("{model:#x}"), ); // Match on the family/model ranges we've detected. Notably client parts are From 6846a4ad9f9a47cbbb656e7a8e97bff750ec20a3 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 6 Aug 2025 22:43:09 +0000 Subject: [PATCH 14/42] move SledCpuFamily to a more fitting place --- common/src/api/internal/shared.rs | 50 ------------------- nexus-sled-agent-shared/src/inventory.rs | 6 +-- nexus/db-model/src/sled_cpu_family.rs | 14 ++---- .../src/db/datastore/physical_disk.rs | 3 +- nexus/inventory/src/collector.rs | 2 +- nexus/inventory/src/examples.rs | 2 +- nexus/reconfigurator/planning/src/system.rs | 2 +- nexus/test-utils/src/lib.rs | 2 +- nexus/tests/integration_tests/rack.rs | 2 +- nexus/types/src/internal_api/params.rs | 3 +- nexus/types/src/inventory.rs | 2 +- sled-agent/src/bin/sled-agent-sim.rs | 3 +- sled-agent/src/nexus.rs | 4 +- sled-agent/src/rack_setup/plan/service.rs | 2 +- sled-agent/src/rack_setup/service.rs | 3 +- sled-agent/src/sim/config.rs | 3 +- sled-hardware/src/illumos/mod.rs | 3 +- sled-hardware/src/lib.rs | 6 +-- sled-hardware/src/non_illumos/mod.rs | 3 +- sled-hardware/types/src/lib.rs | 50 +++++++++++++++++++ 20 files changed, 75 insertions(+), 90 deletions(-) diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index c87e1a81d56..ebc9f6a46ca 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -1101,56 +1101,6 @@ pub struct SledIdentifiers { pub serial: String, } -/// Identifies the kind of CPU present on a sled, determined by reading CPUID. -/// -/// This is intended to broadly support the control plane answering the question -/// "can I run this instance on that sled?" given an instance with either no or -/// some CPU platform requirement. It is not enough information for more precise -/// placement questions - for example, is a CPU a high-frequency part or -/// many-core part? We don't include Genoa here, but in that CPU family there -/// are high frequency parts, many-core parts, and large-cache parts. To support -/// those questions (or satisfactorily answer #8730) we would need to collect -/// additional information and send it along. -#[derive( - Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, JsonSchema, -)] -#[serde(rename_all = "snake_case")] -pub enum SledCpuFamily { - /// The CPU vendor or its family number don't correspond to any of the - /// known family variants. - Unknown, - - /// AMD Milan processors (or very close). Could be an actual Milan in a - /// Gimlet, a close-to-Milan client Zen 3 part, or Zen 4 (for which Milan is - /// the greatest common denominator). - AmdMilan, - - /// AMD Turin processors (or very close). Could be an actual Turin in a - /// Cosmo, or a close-to-Turin client Zen 5 part. - AmdTurin, - - /// AMD Turin Dense processors. There are no "Turin Dense-like" CPUs unlike - /// other cases, so this means a bona fide Zen 5c Turin Dense part. - AmdTurinDense, -} - -impl SledCpuFamily { - fn as_str(&self) -> &'static str { - match self { - SledCpuFamily::Unknown => "unknown", - SledCpuFamily::AmdMilan => "amd_milan", - SledCpuFamily::AmdTurin => "amd_turin", - SledCpuFamily::AmdTurinDense => "amd_turin_dense", - } - } -} - -impl fmt::Display for SledCpuFamily { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.as_str()) - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index ac7856695fd..d26d8ad29a1 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -26,7 +26,7 @@ use omicron_common::update::OmicronZoneManifestSource; use omicron_common::{ api::{ external::{ByteCount, Generation}, - internal::shared::{NetworkInterface, SledCpuFamily, SourceNatConfig}, + internal::shared::{NetworkInterface, SourceNatConfig}, }, disk::{DatasetConfig, DiskVariant, OmicronPhysicalDiskConfig}, update::ArtifactId, @@ -40,9 +40,9 @@ use omicron_uuid_kinds::{SledUuid, ZpoolUuid}; use schemars::schema::{Schema, SchemaObject}; use schemars::{JsonSchema, SchemaGenerator}; use serde::{Deserialize, Serialize}; -// Export this type for convenience -- this way, dependents don't have to +// Export these types for convenience -- this way, dependents don't have to // depend on sled-hardware-types. -pub use sled_hardware_types::Baseboard; +pub use sled_hardware_types::{Baseboard, SledCpuFamily}; use strum::EnumIter; use tufaceous_artifact::{ArtifactHash, KnownArtifactKind}; diff --git a/nexus/db-model/src/sled_cpu_family.rs b/nexus/db-model/src/sled_cpu_family.rs index 700be75946d..703728eca1d 100644 --- a/nexus/db-model/src/sled_cpu_family.rs +++ b/nexus/db-model/src/sled_cpu_family.rs @@ -26,13 +26,9 @@ impl_enum_type!( AmdTurinDense => b"amd_turin_dense" ); -impl From - for SledCpuFamily -{ - fn from( - value: omicron_common::api::internal::shared::SledCpuFamily, - ) -> Self { - use omicron_common::api::internal::shared::SledCpuFamily as InputFamily; +impl From for SledCpuFamily { + fn from(value: nexus_sled_agent_shared::inventory::SledCpuFamily) -> Self { + use nexus_sled_agent_shared::inventory::SledCpuFamily as InputFamily; match value { InputFamily::Unknown => Self::Unknown, InputFamily::AmdMilan => Self::AmdMilan, @@ -42,9 +38,7 @@ impl From } } -impl From - for omicron_common::api::internal::shared::SledCpuFamily -{ +impl From for nexus_sled_agent_shared::inventory::SledCpuFamily { fn from(value: SledCpuFamily) -> Self { match value { SledCpuFamily::Unknown => Self::Unknown, diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 2cea054b29d..0012ee54f64 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -340,11 +340,10 @@ mod test { use nexus_db_lookup::LookupPath; use nexus_sled_agent_shared::inventory::{ Baseboard, ConfigReconcilerInventoryStatus, Inventory, InventoryDisk, - SledRole, ZoneImageResolverInventory, + SledCpuFamily, SledRole, ZoneImageResolverInventory, }; use nexus_types::identity::Asset; use omicron_common::api::external::ByteCount; - use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::{DiskIdentity, DiskVariant}; use omicron_test_utils::dev; use std::num::NonZeroU32; diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index ce1988b36de..ec02670684a 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -671,11 +671,11 @@ mod test { use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; + use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::inventory::OmicronZoneType; use nexus_types::inventory::Collection; use omicron_cockroach_metrics::CockroachClusterAdminClient; use omicron_common::api::external::Generation; - use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::zpool_name::ZpoolName; use omicron_sled_agent::sim; use omicron_uuid_kinds::OmicronZoneUuid; diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index 86c67a9c652..81233de5a83 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -29,6 +29,7 @@ use nexus_sled_agent_shared::inventory::InventoryZpool; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZonesConfig; use nexus_sled_agent_shared::inventory::OrphanedDataset; +use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::inventory::SledRole; use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; use nexus_types::inventory::BaseboardId; @@ -40,7 +41,6 @@ use nexus_types::inventory::ZpoolName; use omicron_cockroach_metrics::MetricValue; use omicron_cockroach_metrics::PrometheusMetrics; use omicron_common::api::external::ByteCount; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::DatasetConfig; use omicron_common::disk::DatasetKind; use omicron_common::disk::DatasetName; diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 2ad075b4fc1..172db99c67a 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -23,6 +23,7 @@ use nexus_sled_agent_shared::inventory::InventoryDisk; use nexus_sled_agent_shared::inventory::InventoryZpool; use nexus_sled_agent_shared::inventory::MupdateOverrideBootInventory; use nexus_sled_agent_shared::inventory::OmicronSledConfig; +use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::inventory::SledRole; use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; use nexus_sled_agent_shared::inventory::ZoneManifestBootInventory; @@ -57,7 +58,6 @@ use omicron_common::address::SLED_PREFIX; use omicron_common::address::get_sled_address; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::DiskIdentity; use omicron_common::disk::DiskVariant; use omicron_common::disk::M2Slot; diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 845d09b3a2f..f68c5a96573 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -38,6 +38,7 @@ use nexus_db_queries::db::pub_test_utils::crdb; use nexus_sled_agent_shared::inventory::HostPhase2DesiredSlots; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneDataset; +use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::recovery_silo::RecoverySiloConfig; use nexus_test_interface::NexusServer; use nexus_types::deployment::Blueprint; @@ -74,7 +75,6 @@ use omicron_common::api::internal::nexus::ProducerKind; use omicron_common::api::internal::shared::DatasetKind; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::api::internal::shared::SourceNatConfig; use omicron_common::api::internal::shared::SwitchLocation; use omicron_common::disk::CompressionAlgorithm; diff --git a/nexus/tests/integration_tests/rack.rs b/nexus/tests/integration_tests/rack.rs index 32610a3c043..5b2fb969433 100644 --- a/nexus/tests/integration_tests/rack.rs +++ b/nexus/tests/integration_tests/rack.rs @@ -10,6 +10,7 @@ use nexus_db_model::SledBaseboard; use nexus_db_model::SledCpuFamily as DbSledCpuFamily; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; +use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::inventory::SledRole; use nexus_test_utils::TEST_SUITE_PASSWORD; use nexus_test_utils::http_testing::AuthnMode; @@ -23,7 +24,6 @@ use nexus_types::external_api::views::Rack; use nexus_types::internal_api::params::SledAgentInfo; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_uuid_kinds::GenericUuid; use std::time::Duration; use uuid::Uuid; diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index 3362853fe4f..fdb12c42a26 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -8,7 +8,7 @@ use crate::deployment::Blueprint; use crate::external_api::params::PhysicalDiskKind; use crate::external_api::shared::Baseboard; use crate::external_api::shared::IpRange; -use nexus_sled_agent_shared::inventory::SledRole; +use nexus_sled_agent_shared::inventory::{SledCpuFamily, SledRole}; use nexus_sled_agent_shared::recovery_silo::RecoverySiloConfig; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; @@ -18,7 +18,6 @@ use omicron_common::api::internal::nexus::Certificate; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::ExternalPortDiscovery; use omicron_common::api::internal::shared::RackNetworkConfig; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::api::internal::shared::SourceNatConfig; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::PhysicalDiskUuid; diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 70f2451d29b..9c1322d5821 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -30,12 +30,12 @@ use nexus_sled_agent_shared::inventory::InventoryDisk; use nexus_sled_agent_shared::inventory::InventoryZpool; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; +use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::inventory::SledRole; use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; use omicron_common::api::external::ByteCount; pub use omicron_common::api::internal::shared::NetworkInterface; pub use omicron_common::api::internal::shared::NetworkInterfaceKind; -use omicron_common::api::internal::shared::SledCpuFamily; pub use omicron_common::api::internal::shared::SourceNatConfig; use omicron_common::disk::M2Slot; pub use omicron_common::zpool_name::ZpoolName; diff --git a/sled-agent/src/bin/sled-agent-sim.rs b/sled-agent/src/bin/sled-agent-sim.rs index ccb5e0eaf8d..88ca421c555 100644 --- a/sled-agent/src/bin/sled-agent-sim.rs +++ b/sled-agent/src/bin/sled-agent-sim.rs @@ -13,7 +13,6 @@ use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; use omicron_common::api::internal::nexus::Certificate; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::cmd::CmdError; use omicron_common::cmd::fatal; use omicron_sled_agent::sim::RssArgs; @@ -22,7 +21,7 @@ use omicron_sled_agent::sim::{ run_standalone_server, }; use omicron_uuid_kinds::SledUuid; -use sled_hardware_types::Baseboard; +use sled_hardware_types::{Baseboard, SledCpuFamily}; use std::net::SocketAddr; use std::net::SocketAddrV6; diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 5ebb086da13..e9e28b5c606 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -78,10 +78,10 @@ impl ConvertInto } impl ConvertInto - for omicron_common::api::internal::shared::SledCpuFamily + for sled_hardware_types::SledCpuFamily { fn convert(self) -> nexus_client::types::SledCpuFamily { - use omicron_common::api::internal::shared::SledCpuFamily as SharedSledCpuFamily; + use sled_hardware_types::SledCpuFamily as SharedSledCpuFamily; match self { SharedSledCpuFamily::Unknown => { nexus_client::types::SledCpuFamily::Unknown diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index bc6476f122a..37c74805c3c 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -1155,12 +1155,12 @@ impl ServicePortBuilder { mod tests { use super::*; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; + use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; use omicron_common::address::IpRange; use omicron_common::api::external::ByteCount; use omicron_common::api::internal::shared::AllowedSourceIps; use omicron_common::api::internal::shared::RackNetworkConfig; - use omicron_common::api::internal::shared::SledCpuFamily; use oxnet::Ipv6Net; use sled_agent_types::rack_init::BootstrapAddressDiscovery; use sled_agent_types::rack_init::RecoverySiloConfig; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 38684da7a57..5cf06ba0c32 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -1749,12 +1749,11 @@ mod test { use nexus_reconfigurator_blippy::{Blippy, BlippyReportSortKey}; use nexus_sled_agent_shared::inventory::{ Baseboard, ConfigReconcilerInventoryStatus, Inventory, InventoryDisk, - OmicronZoneType, SledRole, ZoneImageResolverInventory, + OmicronZoneType, SledCpuFamily, SledRole, ZoneImageResolverInventory, }; use omicron_common::{ address::{Ipv6Subnet, SLED_PREFIX, get_sled_address}, api::external::{ByteCount, Generation}, - api::internal::shared::SledCpuFamily, disk::{DiskIdentity, DiskVariant}, }; use omicron_uuid_kinds::SledUuid; diff --git a/sled-agent/src/sim/config.rs b/sled-agent/src/sim/config.rs index 4f0851d0dd3..58454d2a507 100644 --- a/sled-agent/src/sim/config.rs +++ b/sled-agent/src/sim/config.rs @@ -7,11 +7,10 @@ use crate::updates::ConfigUpdates; use camino::Utf8Path; use dropshot::ConfigDropshot; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_uuid_kinds::SledUuid; use serde::Deserialize; use serde::Serialize; -pub use sled_hardware_types::Baseboard; +pub use sled_hardware_types::{Baseboard, SledCpuFamily}; use std::net::Ipv6Addr; use std::net::{IpAddr, SocketAddr}; diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index 9e319d3cd8b..3f673e0b4ca 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -8,9 +8,8 @@ use camino::Utf8PathBuf; use gethostname::gethostname; use illumos_devinfo::{DevInfo, DevLinkType, DevLinks, Node, Property}; use libnvme::{Nvme, controller::Controller}; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::{DiskIdentity, DiskVariant}; -use sled_hardware_types::Baseboard; +use sled_hardware_types::{Baseboard, SledCpuFamily}; use slog::Logger; use slog::debug; use slog::error; diff --git a/sled-hardware/src/lib.rs b/sled-hardware/src/lib.rs index b475e2f28da..582c13f4053 100644 --- a/sled-hardware/src/lib.rs +++ b/sled-hardware/src/lib.rs @@ -162,11 +162,9 @@ impl MemoryReservations { /// sled-agent does not yet know about libtopo, getting topo snapshots, walking /// them, or any of that, so the parsing is performed again here. #[cfg(target_arch = "x86_64")] -pub fn detect_cpu_family( - log: &Logger, -) -> omicron_common::api::internal::shared::SledCpuFamily { +pub fn detect_cpu_family(log: &Logger) -> sled_hardware_types::SledCpuFamily { use core::arch::x86_64::__cpuid_count; - use omicron_common::api::internal::shared::SledCpuFamily; + use sled_hardware_types::SledCpuFamily; // Read leaf 0 to figure out the processor's vendor and whether leaf 1 // (which contains family, model, and stepping information) is available. diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index caae6fcf6a1..314ea2ed4a4 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -4,10 +4,9 @@ use crate::SledMode; use crate::disk::{DiskPaths, Partition, PooledDiskError, UnparsedDisk}; -use omicron_common::api::internal::shared::SledCpuFamily; use omicron_common::disk::{DiskIdentity, DiskVariant}; use omicron_uuid_kinds::ZpoolUuid; -use sled_hardware_types::Baseboard; +use sled_hardware_types::{SledCpuFamily, Baseboard}; use slog::Logger; use std::collections::HashMap; use tokio::sync::broadcast; diff --git a/sled-hardware/types/src/lib.rs b/sled-hardware/types/src/lib.rs index b34b5b1f422..ce4a29da4c0 100644 --- a/sled-hardware/types/src/lib.rs +++ b/sled-hardware/types/src/lib.rs @@ -95,3 +95,53 @@ impl std::fmt::Display for Baseboard { } } } + +/// Identifies the kind of CPU present on a sled, determined by reading CPUID. +/// +/// This is intended to broadly support the control plane answering the question +/// "can I run this instance on that sled?" given an instance with either no or +/// some CPU platform requirement. It is not enough information for more precise +/// placement questions - for example, is a CPU a high-frequency part or +/// many-core part? We don't include Genoa here, but in that CPU family there +/// are high frequency parts, many-core parts, and large-cache parts. To support +/// those questions (or satisfactorily answer #8730) we would need to collect +/// additional information and send it along. +#[derive( + Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, JsonSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum SledCpuFamily { + /// The CPU vendor or its family number don't correspond to any of the + /// known family variants. + Unknown, + + /// AMD Milan processors (or very close). Could be an actual Milan in a + /// Gimlet, a close-to-Milan client Zen 3 part, or Zen 4 (for which Milan is + /// the greatest common denominator). + AmdMilan, + + /// AMD Turin processors (or very close). Could be an actual Turin in a + /// Cosmo, or a close-to-Turin client Zen 5 part. + AmdTurin, + + /// AMD Turin Dense processors. There are no "Turin Dense-like" CPUs unlike + /// other cases, so this means a bona fide Zen 5c Turin Dense part. + AmdTurinDense, +} + +impl SledCpuFamily { + fn as_str(&self) -> &'static str { + match self { + SledCpuFamily::Unknown => "unknown", + SledCpuFamily::AmdMilan => "amd_milan", + SledCpuFamily::AmdTurin => "amd_turin", + SledCpuFamily::AmdTurinDense => "amd_turin_dense", + } + } +} + +impl std::fmt::Display for SledCpuFamily { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} From 5f94661d8a68c4b7a65f533af5337f3a18ed7838 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 6 Aug 2025 23:10:13 +0000 Subject: [PATCH 15/42] rustfmt AGH --- nexus/inventory/src/collector.rs | 2 +- sled-hardware/src/non_illumos/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index ec02670684a..7a4ac6ca959 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -671,8 +671,8 @@ mod test { use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; - use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_sled_agent_shared::inventory::OmicronZoneType; + use nexus_sled_agent_shared::inventory::SledCpuFamily; use nexus_types::inventory::Collection; use omicron_cockroach_metrics::CockroachClusterAdminClient; use omicron_common::api::external::Generation; diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index 314ea2ed4a4..fa660ad0caa 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -6,7 +6,7 @@ use crate::SledMode; use crate::disk::{DiskPaths, Partition, PooledDiskError, UnparsedDisk}; use omicron_common::disk::{DiskIdentity, DiskVariant}; use omicron_uuid_kinds::ZpoolUuid; -use sled_hardware_types::{SledCpuFamily, Baseboard}; +use sled_hardware_types::{Baseboard, SledCpuFamily}; use slog::Logger; use std::collections::HashMap; use tokio::sync::broadcast; From 543bdc952de883f0fec1f866119e214f00c5613c Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 6 Aug 2025 23:29:10 +0000 Subject: [PATCH 16/42] and expectorate up the reconfigurator output --- .../reconfigurator-cli/tests/output/cmds-example-stdout | 6 +++--- .../tests/output/cmds-mupdate-update-flow-stdout | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout index a9b94800388..eafab73da16 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout @@ -1101,7 +1101,7 @@ sled 2eb69596-f081-4e2d-9425-9994926e0832 (role = Gimlet, serial serial1) found at: from fake sled agent address: [fd00:1122:3344:102::1]:12345 usable hw threads: 10 - CPU family: milan + CPU family: amd_milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -1211,7 +1211,7 @@ sled 32d8d836-4d8a-4e54-8fa9-f31d79c42646 (role = Gimlet, serial serial2) found at: from fake sled agent address: [fd00:1122:3344:103::1]:12345 usable hw threads: 10 - CPU family: milan + CPU family: amd_milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -1321,7 +1321,7 @@ sled 89d02b1b-478c-401a-8e28-7a26f74fa41b (role = Gimlet, serial serial0) found at: from fake sled agent address: [fd00:1122:3344:101::1]:12345 usable hw threads: 10 - CPU family: milan + CPU family: amd_milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout index deeb6a9c044..ace1d720beb 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-update-flow-stdout @@ -88,7 +88,7 @@ sled 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c (role = Gimlet, serial serial1) found at: from fake sled agent address: [fd00:1122:3344:102::1]:12345 usable hw threads: 10 - CPU family: milan + CPU family: amd_milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -195,7 +195,7 @@ sled 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 (role = Gimlet, serial serial0) found at: from fake sled agent address: [fd00:1122:3344:101::1]:12345 usable hw threads: 10 - CPU family: milan + CPU family: amd_milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: @@ -304,7 +304,7 @@ sled d81c6a84-79b8-4958-ae41-ea46c9b19763 (role = Gimlet, serial serial2) found at: from fake sled agent address: [fd00:1122:3344:103::1]:12345 usable hw threads: 10 - CPU family: milan + CPU family: amd_milan usable memory (GiB): 0 reservoir (GiB): 0 physical disks: From 34516b421cc3c0e92d0092b284c69380db335da3 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Mon, 21 Apr 2025 23:12:46 +0000 Subject: [PATCH 17/42] instance minimum CPU platforms RFD 505 proposes that instances should be able to set a "minimum hardware platform" or "minimum CPU platform" that allows uers to constrain an instance to run on sleds that have a specific set of CPU features available. Previously, actually-available CPU information was plumbed from sleds to Nexus. This actually adds a `min_cpu_platform` setting for instance creation and uses it to drive selection of guest CPUID leaves. As-is, this moves VMs on Gimlets away from the byhve-default CPUID leaves (which are effectively "host CPUID information, but features that are not or cannot be virtualized are masked out"), instead using the specific CPUID information set out in RFD 505. There is no provision for Turin yet, which instead gets CPUID leaves that look like Milan. Adding a set of CPUID information to advertise for an `amd_turin` CPU platform, from here, is fairly straightforward. This does not have a mechanism to enforce specific CPU platform use or disuse, either in a silo or rack-wide. One could imagine a simple system oriented around "this silo is permitted to specify these minimum CPU platforms", but that leaves uncomfortable issues like: if a silo A permits only Milan, and silo B permits Milan and Turin, all Milan CPUs are allocated already, and someone is attemting to create a new Milan-based VM in silo A, should this succeed using Turin CPUs potentially starving silo B? --- common/src/api/external/mod.rs | 44 ++ dev-tools/omdb/src/bin/omdb/db.rs | 6 + end-to-end-tests/src/instance_launch.rs | 1 + nexus/db-model/src/instance.rs | 8 +- .../src/instance_minimum_cpu_platform.rs | 67 +++ nexus/db-model/src/lib.rs | 4 + nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-model/src/sled.rs | 21 +- nexus/db-model/src/sled_cpu_family.rs | 19 + nexus/db-model/src/vmm.rs | 9 +- nexus/db-model/src/vmm_cpu_platform.rs | 60 +++ nexus/db-queries/src/db/datastore/instance.rs | 64 ++- .../db-queries/src/db/datastore/migration.rs | 1 + nexus/db-queries/src/db/datastore/sled.rs | 49 ++- .../virtual_provisioning_collection.rs | 1 + nexus/db-queries/src/db/datastore/vmm.rs | 4 + nexus/db-queries/src/db/datastore/vpc.rs | 1 + .../src/db/pub_test_utils/helpers.rs | 1 + .../db-queries/src/db/queries/external_ip.rs | 1 + .../src/db/queries/network_interface.rs | 1 + .../src/db/queries/sled_reservation.rs | 73 +++- .../sled_find_targets_query_with_cpu.sql | 108 +++++ nexus/db-schema/src/enums.rs | 2 + nexus/db-schema/src/schema.rs | 2 + .../background/tasks/abandoned_vmm_reaper.rs | 2 + .../tasks/instance_reincarnation.rs | 3 + nexus/src/app/instance.rs | 37 +- nexus/src/app/instance_platform.rs | 115 +++++- nexus/src/app/sagas/instance_common.rs | 4 +- nexus/src/app/sagas/instance_create.rs | 1 + nexus/src/app/sagas/instance_delete.rs | 1 + nexus/src/app/sagas/instance_migrate.rs | 2 + nexus/src/app/sagas/instance_start.rs | 45 ++- nexus/src/app/sagas/instance_update/mod.rs | 1 + nexus/src/app/sagas/snapshot_create.rs | 1 + nexus/test-utils/src/resource_helpers.rs | 1 + nexus/tests/integration_tests/endpoints.rs | 3 + nexus/tests/integration_tests/external_ips.rs | 1 + nexus/tests/integration_tests/instances.rs | 382 +++++++++++++++++- nexus/tests/integration_tests/projects.rs | 1 + nexus/tests/integration_tests/quotas.rs | 1 + nexus/tests/integration_tests/schema.rs | 1 + nexus/tests/integration_tests/snapshots.rs | 2 + .../integration_tests/subnet_allocation.rs | 1 + nexus/tests/integration_tests/utilization.rs | 1 + nexus/types/src/external_api/params.rs | 13 +- openapi/nexus-internal.json | 28 ++ openapi/nexus.json | 47 +++ .../up01.sql | 4 + .../up02.sql | 5 + .../up03.sql | 2 + .../up04.sql | 3 + .../up05.sql | 1 + schema/crdb/dbinit.sql | 32 +- 54 files changed, 1232 insertions(+), 59 deletions(-) create mode 100644 nexus/db-model/src/instance_minimum_cpu_platform.rs create mode 100644 nexus/db-model/src/vmm_cpu_platform.rs create mode 100644 nexus/db-queries/tests/output/sled_find_targets_query_with_cpu.sql create mode 100644 schema/crdb/add-instance-minimum-cpu-platform/up01.sql create mode 100644 schema/crdb/add-instance-minimum-cpu-platform/up02.sql create mode 100644 schema/crdb/add-instance-minimum-cpu-platform/up03.sql create mode 100644 schema/crdb/add-instance-minimum-cpu-platform/up04.sql create mode 100644 schema/crdb/add-instance-minimum-cpu-platform/up05.sql diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 3ed8e980264..7867b1bee7a 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -1194,6 +1194,10 @@ pub struct Instance { #[serde(flatten)] pub auto_restart_status: InstanceAutoRestartStatus, + + /// The minimum required CPU platform for this instance. If this is `null`, + /// the instance requires no particular CPU platform. + pub min_cpu_platform: Option, } /// Status of control-plane driven automatic failure recovery for this instance. @@ -1258,6 +1262,46 @@ pub enum InstanceAutoRestartPolicy { BestEffort, } +/// A minimum required CPU platform for an instance. +/// +/// When an instance specifies a minimum required CPU platform: +/// +/// - The system may expose (to the VM) new CPU features that are only present +/// on that platform (or on newer platforms of the same lineage that also +/// support those features). +/// - The instance must run on hosts that have CPUs that support all the +/// features of the supplied minimum platform. +/// +/// That is, the instance is restricted to hosts that have the specified minimum +/// host CPU type (or a more advanced, but still compatible, CPU), but in +/// exchange the CPU features exposed by the minimum platform are available for +/// the guest to use. Note that this may prevent an instance from starting (if +/// the hosts it requires are full but there is capacity on other incompatible +/// hosts). +/// +/// If an instance does not specify a minimum required CPU platform, then when +/// it starts, the control plane selects a host for the instance and then +/// supplies the guest with the "minimum" CPU platform supported by that host. +/// This maximizes the number of hosts that can run the VM if it later needs to +/// migrate to another host. +/// +/// In all cases, the CPU features presented by a given CPU platform are a +/// subset of what the corresponding hardware may actually support; features +/// which cannot be used from a virtual environment or do not have full +/// hypervisor support may be masked off. See RFD 314 for specific CPU features +/// in a CPU platform. +#[derive( + Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, Eq, PartialEq, +)] +#[serde(rename_all = "snake_case")] +pub enum InstanceMinimumCpuPlatform { + /// An AMD Milan-like CPU platform. + AmdMilan, + + /// An AMD Turin-like CPU platform. + AmdTurin, +} + // AFFINITY GROUPS /// Affinity policy used to describe "what to do when a request cannot be satisfied" diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 8290739a734..e2b1943c871 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -4760,6 +4760,7 @@ async fn cmd_db_instance_info( propolis_ip: _, propolis_port: _, instance_id: _, + cpu_platform: _, time_created, time_deleted, runtime: @@ -7356,6 +7357,7 @@ fn prettyprint_vmm( const INSTANCE_ID: &'static str = "instance ID"; const SLED_ID: &'static str = "sled ID"; const SLED_SERIAL: &'static str = "sled serial"; + const CPU_PLATFORM: &'static str = "CPU platform"; const ADDRESS: &'static str = "propolis address"; const STATE: &'static str = "state"; const WIDTH: usize = const_max_len(&[ @@ -7366,6 +7368,7 @@ fn prettyprint_vmm( INSTANCE_ID, SLED_ID, SLED_SERIAL, + CPU_PLATFORM, STATE, ADDRESS, ]); @@ -7379,6 +7382,7 @@ fn prettyprint_vmm( sled_id, propolis_ip, propolis_port, + cpu_platform, runtime: db::model::VmmRuntimeState { state, r#gen, time_state_updated }, } = vmm; @@ -7405,6 +7409,7 @@ fn prettyprint_vmm( if let Some(serial) = sled_serial { println!("{indent}{SLED_SERIAL:>width$}: {serial}"); } + println!("{indent}{CPU_PLATFORM:>width$}: {cpu_platform}"); } async fn cmd_db_vmm_list( @@ -7480,6 +7485,7 @@ async fn cmd_db_vmm_list( sled_id, propolis_ip: _, propolis_port: _, + cpu_platform: _, runtime: db::model::VmmRuntimeState { state, diff --git a/end-to-end-tests/src/instance_launch.rs b/end-to-end-tests/src/instance_launch.rs index 648d49e4d6f..1d9ffa5b6b9 100644 --- a/end-to-end-tests/src/instance_launch.rs +++ b/end-to-end-tests/src/instance_launch.rs @@ -79,6 +79,7 @@ async fn instance_launch() -> Result<()> { start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), + min_cpu_platform: None, }) .send() .await?; diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index 56e3e80e53e..8ccd7c6b01b 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -5,7 +5,7 @@ use super::InstanceIntendedState as IntendedState; use super::{ ByteCount, Disk, ExternalIp, Generation, InstanceAutoRestartPolicy, - InstanceCpuCount, InstanceState, Vmm, VmmState, + InstanceCpuCount, InstanceMinimumCpuPlatform, InstanceState, Vmm, VmmState, }; use crate::collection::DatastoreAttachTargetConfig; use crate::serde_time_delta::optional_time_delta; @@ -68,6 +68,9 @@ pub struct Instance { #[diesel(column_name = boot_disk_id)] pub boot_disk_id: Option, + /// The instance's minimum required CPU platform. + pub min_cpu_platform: Option, + #[diesel(embed)] pub runtime_state: InstanceRuntimeState, @@ -139,6 +142,7 @@ impl Instance { // Intentionally ignore `params.boot_disk_id` here: we can't set // `boot_disk_id` until the referenced disk is attached. boot_disk_id: None, + min_cpu_platform: params.min_cpu_platform.map(Into::into), runtime_state, intended_state, @@ -493,4 +497,6 @@ pub struct InstanceUpdate { pub ncpus: InstanceCpuCount, pub memory: ByteCount, + + pub min_cpu_platform: Option, } diff --git a/nexus/db-model/src/instance_minimum_cpu_platform.rs b/nexus/db-model/src/instance_minimum_cpu_platform.rs new file mode 100644 index 00000000000..bb0b911d9a9 --- /dev/null +++ b/nexus/db-model/src/instance_minimum_cpu_platform.rs @@ -0,0 +1,67 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::SledCpuFamily; + +use super::impl_enum_type; +use serde::{Deserialize, Serialize}; + +impl_enum_type!( + InstanceMinimumCpuPlatformEnum: + + #[derive( + Copy, + Clone, + Debug, + PartialEq, + AsExpression, + FromSqlRow, + Serialize, + Deserialize + )] + pub enum InstanceMinimumCpuPlatform; + + AmdMilan => b"amd_milan" + AmdTurin => b"amd_turin" +); + +impl InstanceMinimumCpuPlatform { + /// Returns a slice containing the set of sled CPU families that can + /// accommodate an instance with this minimum CPU platform. + pub fn compatible_sled_cpu_families(&self) -> &'static [SledCpuFamily] { + match self { + // Turin-based sleds have a superset of the features made available + // in a guest's Milan CPU platform + Self::AmdMilan => { + &[SledCpuFamily::AmdMilan, SledCpuFamily::AmdTurin] + } + Self::AmdTurin => &[SledCpuFamily::AmdTurin], + } + } +} + +impl From + for InstanceMinimumCpuPlatform +{ + fn from( + value: omicron_common::api::external::InstanceMinimumCpuPlatform, + ) -> Self { + use omicron_common::api::external::InstanceMinimumCpuPlatform as ApiPlatform; + match value { + ApiPlatform::AmdMilan => Self::AmdMilan, + ApiPlatform::AmdTurin => Self::AmdTurin, + } + } +} + +impl From + for omicron_common::api::external::InstanceMinimumCpuPlatform +{ + fn from(value: InstanceMinimumCpuPlatform) -> Self { + match value { + InstanceMinimumCpuPlatform::AmdMilan => Self::AmdMilan, + InstanceMinimumCpuPlatform::AmdTurin => Self::AmdTurin, + } + } +} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index ba26c0c2309..a1bbbc0db2a 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -44,6 +44,7 @@ mod instance; mod instance_auto_restart_policy; mod instance_cpu_count; mod instance_intended_state; +mod instance_minimum_cpu_platform; mod instance_state; mod internet_gateway; mod inventory; @@ -123,6 +124,7 @@ mod utilization; mod virtual_provisioning_collection; mod virtual_provisioning_resource; mod vmm; +mod vmm_cpu_platform; mod vni; mod volume; mod volume_repair; @@ -181,6 +183,7 @@ pub use instance::*; pub use instance_auto_restart_policy::*; pub use instance_cpu_count::*; pub use instance_intended_state::*; +pub use instance_minimum_cpu_platform::*; pub use instance_state::*; pub use internet_gateway::*; pub use inventory::*; @@ -248,6 +251,7 @@ pub use v2p_mapping::*; pub use virtual_provisioning_collection::*; pub use virtual_provisioning_resource::*; pub use vmm::*; +pub use vmm_cpu_platform::*; pub use vmm_state::*; pub use vni::*; pub use volume::*; diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 7a42e172291..b232c896542 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(174, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(175, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(175, "add-instance-minimum-cpu-platform"), KnownVersion::new(174, "sled-cpu-family"), KnownVersion::new(173, "inv-internal-dns"), KnownVersion::new(172, "add-zones-with-mupdate-override"), diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index 631cc92de0a..ba9baba432f 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -346,12 +346,13 @@ impl SledUpdate { #[derive(Clone, Debug)] pub struct SledReservationConstraints { must_select_from: Vec, + cpu_families: Vec, } impl SledReservationConstraints { /// Creates a constraint set with no constraints in it. pub fn none() -> Self { - Self { must_select_from: Vec::new() } + Self { must_select_from: Vec::new(), cpu_families: Vec::new() } } /// If the constraints include a set of sleds that the caller must select @@ -365,6 +366,19 @@ impl SledReservationConstraints { Some(&self.must_select_from) } } + + /// If the constraints include a list of acceptable sled CPU families, + /// returns `Some` and a slice containing the members of that set. + /// + /// If no "must select a sled with one of these CPUs" constraint exists, + /// returns None. + pub fn cpu_families(&self) -> Option<&[SledCpuFamily]> { + if self.cpu_families.is_empty() { + None + } else { + Some(&self.cpu_families) + } + } } #[derive(Debug)] @@ -387,6 +401,11 @@ impl SledReservationConstraintBuilder { self } + pub fn cpu_families(mut self, families: &[SledCpuFamily]) -> Self { + self.constraints.cpu_families.extend(families); + self + } + /// Builds a set of constraints from this builder's current state. pub fn build(self) -> SledReservationConstraints { self.constraints diff --git a/nexus/db-model/src/sled_cpu_family.rs b/nexus/db-model/src/sled_cpu_family.rs index 703728eca1d..34785459008 100644 --- a/nexus/db-model/src/sled_cpu_family.rs +++ b/nexus/db-model/src/sled_cpu_family.rs @@ -26,6 +26,25 @@ impl_enum_type!( AmdTurinDense => b"amd_turin_dense" ); +impl SledCpuFamily { + /// Yields the minimum compatible instance CPU platform that can run on this + /// sled. + /// + /// Each instance CPU platform has a set `C` of sled CPU families that can + /// host it. The "minimum compatible platform" is the instance CPU platform + /// for which (a) `self` is in `C`, and (b) `C` is of maximum cardinality. + /// That is: the minimum compatible platform is chosen so that a VMM that + /// uses it can run on sleds of this family and as many other families as + /// possible. + pub fn minimum_compatible_platform(&self) -> crate::VmmCpuPlatform { + match self { + Self::Unknown => crate::VmmCpuPlatform::SledDefault, + Self::AmdMilan => crate::VmmCpuPlatform::AmdMilan, + Self::AmdTurin => crate::VmmCpuPlatform::AmdMilan, + } + } +} + impl From for SledCpuFamily { fn from(value: nexus_sled_agent_shared::inventory::SledCpuFamily) -> Self { use nexus_sled_agent_shared::inventory::SledCpuFamily as InputFamily; diff --git a/nexus/db-model/src/vmm.rs b/nexus/db-model/src/vmm.rs index 50d8d97f162..88456cebb19 100644 --- a/nexus/db-model/src/vmm.rs +++ b/nexus/db-model/src/vmm.rs @@ -13,7 +13,7 @@ //! sled agent or that sled agent will never update (like the sled ID). use super::{Generation, VmmState}; -use crate::SqlU16; +use crate::{SqlU16, VmmCpuPlatform}; use chrono::{DateTime, Utc}; use nexus_db_schema::schema::vmm; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, SledUuid}; @@ -55,6 +55,11 @@ pub struct Vmm { /// The socket port on which this VMM is serving the Propolis server API. pub propolis_port: SqlU16, + /// The CPU platform for this VMM. This may be chosen implicitly by the + /// control plane if this VMM's instance didn't specify a required platform + /// when it was started. + pub cpu_platform: VmmCpuPlatform, + /// Runtime state for the VMM. #[diesel(embed)] pub runtime: VmmRuntimeState, @@ -71,6 +76,7 @@ impl Vmm { sled_id: SledUuid, propolis_ip: ipnetwork::IpNetwork, propolis_port: u16, + cpu_platform: VmmCpuPlatform, ) -> Self { let now = Utc::now(); @@ -82,6 +88,7 @@ impl Vmm { sled_id: sled_id.into_untyped_uuid(), propolis_ip, propolis_port: SqlU16(propolis_port), + cpu_platform, runtime: VmmRuntimeState { state: VmmState::Creating, time_state_updated: now, diff --git a/nexus/db-model/src/vmm_cpu_platform.rs b/nexus/db-model/src/vmm_cpu_platform.rs new file mode 100644 index 00000000000..67e60863924 --- /dev/null +++ b/nexus/db-model/src/vmm_cpu_platform.rs @@ -0,0 +1,60 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::{InstanceMinimumCpuPlatform, SledCpuFamily}; + +use super::impl_enum_type; +use serde::{Deserialize, Serialize}; + +impl_enum_type!( + VmmCpuPlatformEnum: + + #[derive( + Copy, + Clone, + Debug, + PartialEq, + AsExpression, + FromSqlRow, + Serialize, + Deserialize, + strum::Display + )] + pub enum VmmCpuPlatform; + + SledDefault => b"sled_default" + AmdMilan => b"amd_milan" + AmdTurin => b"amd_turin" +); + +impl VmmCpuPlatform { + /// If this VMM has a well-known CPU platform, returns a `Some` containing + /// the set of sled CPU families that can host that the VMM. Returns `None` + /// if there is insufficient information to determine what CPU families + /// could host this VMM. + pub fn compatible_sled_cpu_families(&self) -> Option<&[SledCpuFamily]> { + match self { + // Milan-based instances can run on both Milan and Turin processors. + Self::AmdMilan => { + Some(&[SledCpuFamily::AmdMilan, SledCpuFamily::AmdTurin]) + } + Self::AmdTurin => Some(&[SledCpuFamily::AmdTurin]), + + // VMMs get the "sled default" CPU platform when an instance starts + // up on a sled that hasn't reported a well-known CPU family. Assume + // that nothing is known about the VM's compatible CPU platforms in + // this case. + Self::SledDefault => None, + } + } +} + +impl From for VmmCpuPlatform { + fn from(value: InstanceMinimumCpuPlatform) -> Self { + match value { + InstanceMinimumCpuPlatform::AmdMilan => Self::AmdMilan, + InstanceMinimumCpuPlatform::AmdTurin => Self::AmdTurin, + } + } +} diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index c8363e55926..9e0336b6c86 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -22,6 +22,7 @@ use crate::db::model::InstanceAutoRestart; use crate::db::model::InstanceAutoRestartPolicy; use crate::db::model::InstanceCpuCount; use crate::db::model::InstanceIntendedState; +use crate::db::model::InstanceMinimumCpuPlatform; use crate::db::model::InstanceRuntimeState; use crate::db::model::InstanceState; use crate::db::model::InstanceUpdate; @@ -265,6 +266,7 @@ impl From for external::Instance { .parse() .expect("found invalid hostname in the database"), boot_disk_id: value.instance.boot_disk_id, + min_cpu_platform: value.instance.min_cpu_platform.map(Into::into), runtime: external::InstanceRuntimeState { run_state: value.effective_state(), time_run_state_updated, @@ -1096,6 +1098,7 @@ impl DataStore { auto_restart_policy, ncpus, memory, + min_cpu_platform, } = update.clone(); async move { // Set the auto-restart policy. @@ -1109,12 +1112,13 @@ impl DataStore { .await?; // Set vCPUs and memory size. - self.instance_set_size_on_conn( + self.instance_set_cpu_and_mem_on_conn( &conn, &err, &authz_instance, ncpus, memory, + min_cpu_platform, ) .await?; @@ -1281,51 +1285,66 @@ impl DataStore { } } - /// Set an instance's CPU count and memory size to the provided values, + /// Set an instance's CPU/memory configuration to the provided values, /// within an existing transaction. /// /// The instance must be in an updatable state for this update to succeed. /// If the instance is not updatable, return `Error::Conflict`. /// - /// To update an instance's CPU or memory sizes an instance must not be - /// incarnated by a VMM. This constraint ensures that the sizes recorded in - /// Nexus sum to the actual peak possible resource usage of running - /// instances. - /// - /// Does not allow setting sizes of running instances to ensure that if an - /// instance is running, its resource reservation matches what we record in - /// the database. - async fn instance_set_size_on_conn( + /// These parameters all currently require the instance to not be incarnated + /// by a VMM to be changed. This is to ensure that if the instance is + /// running, its real allocation and platform are aligned with the + /// instance's database record. + async fn instance_set_cpu_and_mem_on_conn( &self, conn: &async_bb8_diesel::Connection, err: &OptionalError, authz_instance: &authz::Instance, ncpus: InstanceCpuCount, memory: ByteCount, + min_cpu_platform: Option, ) -> Result<(), diesel::result::Error> { use nexus_db_schema::schema::instance::dsl as instance_dsl; - let r = diesel::update(instance_dsl::instance) + let query = diesel::update(instance_dsl::instance) + .into_boxed() .filter(instance_dsl::id.eq(authz_instance.id())) .filter( instance_dsl::state .eq_any(InstanceState::NOT_INCARNATED_STATES), + ); + + let query = if min_cpu_platform.is_some() { + query.filter( + instance_dsl::ncpus + .ne(ncpus) + .or(instance_dsl::memory.ne(memory)) + .or(instance_dsl::min_cpu_platform.ne(min_cpu_platform)) + .or(instance_dsl::min_cpu_platform.is_null()), ) - .filter( + } else { + query.filter( instance_dsl::ncpus .ne(ncpus) - .or(instance_dsl::memory.ne(memory)), + .or(instance_dsl::memory.ne(memory)) + .or(instance_dsl::min_cpu_platform.is_not_null()), ) + }; + + let r = query .set(( instance_dsl::ncpus.eq(ncpus), instance_dsl::memory.eq(memory), + instance_dsl::min_cpu_platform.eq(min_cpu_platform), )) .check_if_exists::(authz_instance.id()) .execute_and_check(&conn) .await?; match r.status { UpdateStatus::NotUpdatedButExists => { - if (r.found.ncpus, r.found.memory) == (ncpus, memory) { + if (r.found.ncpus, r.found.memory, r.found.min_cpu_platform) + == (ncpus, memory, min_cpu_platform) + { // Not updated, because the update is no change.. return Ok(()); } @@ -1334,21 +1353,22 @@ impl DataStore { .contains(&r.found.runtime().nexus_state) { return Err(err.bail(Error::conflict( - "instance must be stopped to be resized", + "instance must be stopped to change CPU or memory", ))); } // There should be no other reason the update fails on an // existing instance. warn!( - self.log, "failed to instance_set_size_on_conn on an \ + self.log, "failed to instance_set_cpu_and_mem_on_conn on an \ instance that should have been updatable"; "instance_id" => %r.found.id(), "new ncpus" => ?ncpus, "new memory" => ?memory, + "new CPU platform" => ?min_cpu_platform, ); return Err(err.bail(Error::internal_error( - "unable to reconfigure instance size", + "unable to change instance CPU or memory", ))); } UpdateStatus::Updated => Ok(()), @@ -2171,6 +2191,7 @@ mod tests { use nexus_db_lookup::LookupPath; use nexus_db_model::InstanceState; use nexus_db_model::Project; + use nexus_db_model::VmmCpuPlatform; use nexus_db_model::VmmRuntimeState; use nexus_db_model::VmmState; use nexus_types::external_api::params; @@ -2234,6 +2255,7 @@ mod tests { external_ips: Vec::new(), disks: Vec::new(), boot_disk: None, + min_cpu_platform: None, ssh_public_keys: None, start: false, auto_restart_policy: Default::default(), @@ -2846,6 +2868,7 @@ mod tests { sled_id: Uuid::new_v4(), propolis_ip: "10.1.9.32".parse().unwrap(), propolis_port: 420.into(), + cpu_platform: VmmCpuPlatform::SledDefault, runtime: VmmRuntimeState { time_state_updated: Utc::now(), gen: Generation::new(), @@ -2908,6 +2931,7 @@ mod tests { sled_id: Uuid::new_v4(), propolis_ip: "10.1.9.42".parse().unwrap(), propolis_port: 666.into(), + cpu_platform: VmmCpuPlatform::SledDefault, runtime: VmmRuntimeState { time_state_updated: Utc::now(), gen: Generation::new(), @@ -3005,6 +3029,7 @@ mod tests { sled_id: Uuid::new_v4(), propolis_ip: "10.1.9.32".parse().unwrap(), propolis_port: 420.into(), + cpu_platform: VmmCpuPlatform::SledDefault, runtime: VmmRuntimeState { time_state_updated: Utc::now(), r#gen: Generation::new(), @@ -3045,6 +3070,7 @@ mod tests { sled_id: Uuid::new_v4(), propolis_ip: "10.1.9.42".parse().unwrap(), propolis_port: 420.into(), + cpu_platform: VmmCpuPlatform::SledDefault, runtime: VmmRuntimeState { time_state_updated: Utc::now(), r#gen: Generation::new(), @@ -3147,6 +3173,7 @@ mod tests { sled_id: Uuid::new_v4(), propolis_ip: "10.1.9.42".parse().unwrap(), propolis_port: 420.into(), + cpu_platform: VmmCpuPlatform::SledDefault, runtime: VmmRuntimeState { time_state_updated: Utc::now(), r#gen: Generation::new(), @@ -3293,6 +3320,7 @@ mod tests { sled_id, propolis_ip: "10.1.9.42".parse().unwrap(), propolis_port: 420.into(), + cpu_platform: VmmCpuPlatform::SledDefault, runtime: VmmRuntimeState { time_state_updated: Utc::now(), r#gen: Generation::new(), diff --git a/nexus/db-queries/src/db/datastore/migration.rs b/nexus/db-queries/src/db/datastore/migration.rs index eff396570cd..adbcf8523e1 100644 --- a/nexus/db-queries/src/db/datastore/migration.rs +++ b/nexus/db-queries/src/db/datastore/migration.rs @@ -235,6 +235,7 @@ mod tests { external_ips: Vec::new(), disks: Vec::new(), boot_disk: None, + min_cpu_platform: None, ssh_public_keys: None, start: false, auto_restart_policy: Default::default(), diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 39ce5e08c36..8a11f2294d6 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -480,7 +480,7 @@ impl DataStore { // Note that this is not transactional, to reduce contention. // However, that lack of transactionality means we need to validate // our constraints again when we later try to INSERT the reservation. - let possible_sleds = sled_find_targets_query(instance_id, &resources) + let possible_sleds = sled_find_targets_query(instance_id, &resources, constraints.cpu_families()) .get_results_async::<( // Sled UUID Uuid, @@ -1084,11 +1084,11 @@ pub(in crate::db::datastore) mod test { use anyhow::{Context, Result}; use itertools::Itertools; use nexus_db_lookup::LookupPath; - use nexus_db_model::Generation; - use nexus_db_model::PhysicalDisk; use nexus_db_model::PhysicalDiskKind; use nexus_db_model::PhysicalDiskPolicy; use nexus_db_model::PhysicalDiskState; + use nexus_db_model::{Generation, SledCpuFamily}; + use nexus_db_model::{InstanceMinimumCpuPlatform, PhysicalDisk}; use nexus_types::identity::Asset; use nexus_types::identity::Resource; use omicron_common::api::external; @@ -1476,6 +1476,7 @@ pub(in crate::db::datastore) mod test { groups: Vec, force_onto_sled: Option, resources: db::model::Resources, + min_cpu_platform: Option, } struct FindTargetsOutput { @@ -1492,6 +1493,7 @@ pub(in crate::db::datastore) mod test { groups: vec![], force_onto_sled: None, resources: small_resource_request(), + min_cpu_platform: None, } } @@ -1503,7 +1505,10 @@ pub(in crate::db::datastore) mod test { ) -> Vec { assert!(self.force_onto_sled.is_none()); - sled_find_targets_query(self.id, &self.resources) + let families = + self.min_cpu_platform.map(|p| p.compatible_sled_cpu_families()); + + sled_find_targets_query(self.id, &self.resources, families) .get_results_async::<( Uuid, bool, @@ -2648,6 +2653,42 @@ pub(in crate::db::datastore) mod test { logctx.cleanup_successful(); } + #[tokio::test] + async fn sled_reservation_cpu_constraints() { + let logctx = dev::test_setup_log("sled_reservation_cpu_constraints"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + let (_authz_project, _project) = + create_project(&opctx, &datastore, "project").await; + + let mut sleds = vec![]; + for family in [SledCpuFamily::AmdMilan, SledCpuFamily::AmdTurin] { + for _ in 0..2 { + let mut builder = SledUpdateBuilder::new(); + builder.rack_id(rack_id()); + builder.hardware().cpu_family(family); + let (sled, _) = + datastore.sled_upsert(builder.build()).await.unwrap(); + sleds.push(sled); + } + } + + let mut test_instance = Instance::new(); + for platform in [None, Some(InstanceMinimumCpuPlatform::AmdMilan)] { + test_instance.min_cpu_platform = platform; + let possible_sleds = test_instance.find_targets(&datastore).await; + assert_eq!(possible_sleds.len(), 4); + } + + test_instance.min_cpu_platform = + Some(InstanceMinimumCpuPlatform::AmdTurin); + let possible_sleds = test_instance.find_targets(&datastore).await; + assert_eq!(possible_sleds.len(), 2); + + db.terminate().await; + logctx.cleanup_successful(); + } + async fn lookup_physical_disk( datastore: &DataStore, id: PhysicalDiskUuid, diff --git a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs index dfe3358986a..e97adb0777c 100644 --- a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs +++ b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs @@ -455,6 +455,7 @@ mod test { external_ips: Vec::new(), disks: Vec::new(), boot_disk: None, + min_cpu_platform: None, ssh_public_keys: None, start: false, auto_restart_policy: Default::default(), diff --git a/nexus/db-queries/src/db/datastore/vmm.rs b/nexus/db-queries/src/db/datastore/vmm.rs index f0d4cb9ed0e..5559d988837 100644 --- a/nexus/db-queries/src/db/datastore/vmm.rs +++ b/nexus/db-queries/src/db/datastore/vmm.rs @@ -446,6 +446,7 @@ mod tests { use crate::db::model::VmmRuntimeState; use crate::db::model::VmmState; use crate::db::pub_test_utils::TestDatabase; + use nexus_db_model::VmmCpuPlatform; use omicron_common::api::internal::nexus; use omicron_test_utils::dev; use omicron_uuid_kinds::InstanceUuid; @@ -470,6 +471,7 @@ mod tests { sled_id: Uuid::new_v4(), propolis_ip: "10.1.9.32".parse().unwrap(), propolis_port: 420.into(), + cpu_platform: VmmCpuPlatform::SledDefault, runtime: VmmRuntimeState { time_state_updated: Utc::now(), r#gen: Generation::new(), @@ -491,6 +493,7 @@ mod tests { sled_id: Uuid::new_v4(), propolis_ip: "10.1.9.42".parse().unwrap(), propolis_port: 420.into(), + cpu_platform: VmmCpuPlatform::SledDefault, runtime: VmmRuntimeState { time_state_updated: Utc::now(), r#gen: Generation::new(), @@ -601,6 +604,7 @@ mod tests { sled_id: Uuid::new_v4(), propolis_ip: "10.1.9.69".parse().unwrap(), propolis_port: 420.into(), + cpu_platform: VmmCpuPlatform::SledDefault, runtime: VmmRuntimeState { time_state_updated: Utc::now(), r#gen: Generation::new(), diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 6e3a1bef151..a6ebf16a00a 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -3976,6 +3976,7 @@ mod tests { external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, ssh_public_keys: None, start: false, auto_restart_policy: Default::default(), diff --git a/nexus/db-queries/src/db/pub_test_utils/helpers.rs b/nexus/db-queries/src/db/pub_test_utils/helpers.rs index 9369324e72a..131fb2f3722 100644 --- a/nexus/db-queries/src/db/pub_test_utils/helpers.rs +++ b/nexus/db-queries/src/db/pub_test_utils/helpers.rs @@ -236,6 +236,7 @@ pub async fn create_stopped_instance_record( external_ips: Vec::new(), disks: Vec::new(), boot_disk: None, + min_cpu_platform: None, ssh_public_keys: None, start: false, auto_restart_policy: Default::default(), diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 5887dc620c0..83f3fb667cc 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -1003,6 +1003,7 @@ mod tests { external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index 1ff2cc94ff8..97d3202409b 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -1893,6 +1893,7 @@ mod tests { external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/db-queries/src/db/queries/sled_reservation.rs b/nexus/db-queries/src/db/queries/sled_reservation.rs index 690dc263a9d..e1ccefa3310 100644 --- a/nexus/db-queries/src/db/queries/sled_reservation.rs +++ b/nexus/db-queries/src/db/queries/sled_reservation.rs @@ -9,7 +9,9 @@ use crate::db::model::SledResourceVmm; use crate::db::raw_query_builder::QueryBuilder; use crate::db::raw_query_builder::TypedSqlQuery; use diesel::sql_types; +use nexus_db_model::SledCpuFamily; use nexus_db_schema::enums::AffinityPolicyEnum; +use nexus_db_schema::enums::SledCpuFamilyEnum; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; @@ -86,6 +88,7 @@ fn subquery_other_a_instances(query: &mut QueryBuilder) { pub fn sled_find_targets_query( instance_id: InstanceUuid, resources: &Resources, + sled_families: Option<&[SledCpuFamily]>, ) -> TypedSqlQuery<( sql_types::Uuid, sql_types::Bool, @@ -93,7 +96,8 @@ pub fn sled_find_targets_query( sql_types::Nullable, )> { let mut query = QueryBuilder::new(); - query.sql(" + query.sql( + " WITH sled_targets AS ( SELECT sled.id as sled_id FROM sled @@ -103,7 +107,27 @@ pub fn sled_find_targets_query( sled.time_deleted IS NULL AND sled.sled_policy = 'in_service' AND sled.sled_state = 'active' - GROUP BY sled.id + ", + ); + + // TODO(gjc): eww. the correct way to do this is to write this as + // + // "AND sled.cpu_family = ANY (" + // + // and then just have one `param` which can be bound to a + // `sql_types::Array` + if let Some(families) = sled_families { + query.sql(" AND sled.cpu_family IN ("); + for i in 0..families.len() { + if i > 0 { + query.sql(", "); + } + query.param(); + } + query.sql(")"); + } + + query.sql("GROUP BY sled.id HAVING COALESCE(SUM(CAST(sled_resource_vmm.hardware_threads AS INT8)), 0) + " ).param().sql(" <= sled.usable_hardware_threads AND @@ -215,14 +239,21 @@ pub fn sled_find_targets_query( // - Sled UUID // - Whether or not there is space on the sled for the specified instance // - What affinity/anti-affinity policies apply - query - .sql( - " + query.sql( + " SELECT sled_id, TRUE, a_policy, aa_policy FROM sleds_with_space UNION SELECT sled_id, FALSE, a_policy, aa_policy FROM sleds_without_space ", - ) + ); + + if let Some(families) = sled_families { + for f in families { + query.bind::(*f); + } + } + + query .bind::(resources.hardware_threads) .bind::(resources.rss_ram) .bind::(resources.reservoir_ram) @@ -411,7 +442,7 @@ mod test { model::ByteCount::from(external::ByteCount::from_gibibytes_u32(0)), ); - let query = sled_find_targets_query(id, &resources); + let query = sled_find_targets_query(id, &resources, None); expectorate_query_contents( &query, "tests/output/sled_find_targets_query.sql", @@ -419,6 +450,27 @@ mod test { .await; } + #[tokio::test] + async fn expectorate_sled_find_targets_query_with_cpu() { + let id = InstanceUuid::nil(); + let resources = Resources::new( + 0, + model::ByteCount::from(external::ByteCount::from_gibibytes_u32(0)), + model::ByteCount::from(external::ByteCount::from_gibibytes_u32(0)), + ); + + let query = sled_find_targets_query( + id, + &resources, + Some(&[SledCpuFamily::AmdMilan]), + ); + expectorate_query_contents( + &query, + "tests/output/sled_find_targets_query_with_cpu.sql", + ) + .await; + } + #[tokio::test] async fn explain_sled_find_targets_query() { let logctx = dev::test_setup_log("explain_sled_find_targets_query"); @@ -433,7 +485,12 @@ mod test { model::ByteCount::from(external::ByteCount::from_gibibytes_u32(0)), ); - let query = sled_find_targets_query(id, &resources); + let query = sled_find_targets_query( + id, + &resources, + Some(&[SledCpuFamily::AmdMilan]), + ); + let _ = query .explain_async(&conn) .await diff --git a/nexus/db-queries/tests/output/sled_find_targets_query_with_cpu.sql b/nexus/db-queries/tests/output/sled_find_targets_query_with_cpu.sql new file mode 100644 index 00000000000..6fc5a1b369e --- /dev/null +++ b/nexus/db-queries/tests/output/sled_find_targets_query_with_cpu.sql @@ -0,0 +1,108 @@ +WITH + sled_targets + AS ( + SELECT + sled.id AS sled_id + FROM + sled LEFT JOIN sled_resource_vmm ON sled_resource_vmm.sled_id = sled.id + WHERE + sled.time_deleted IS NULL + AND sled.sled_policy = 'in_service' + AND sled.sled_state = 'active' + AND sled.cpu_family IN ($1,) + GROUP BY + sled.id + HAVING + COALESCE(sum(CAST(sled_resource_vmm.hardware_threads AS INT8)), 0) + $2 + <= sled.usable_hardware_threads + AND COALESCE(sum(CAST(sled_resource_vmm.rss_ram AS INT8)), 0) + $3 + <= sled.usable_physical_ram + AND COALESCE(sum(CAST(sled_resource_vmm.reservoir_ram AS INT8)), 0) + $4 + <= sled.reservoir_size + ), + our_aa_groups + AS (SELECT group_id FROM anti_affinity_group_instance_membership WHERE instance_id = $5), + other_aa_instances + AS ( + SELECT + anti_affinity_group_instance_membership.group_id, instance_id + FROM + anti_affinity_group_instance_membership + JOIN our_aa_groups ON + anti_affinity_group_instance_membership.group_id = our_aa_groups.group_id + WHERE + instance_id != $6 + ), + other_aa_instances_by_policy + AS ( + SELECT + policy, instance_id + FROM + other_aa_instances + JOIN anti_affinity_group ON + anti_affinity_group.id = other_aa_instances.group_id + AND anti_affinity_group.failure_domain = 'sled' + WHERE + anti_affinity_group.time_deleted IS NULL + ), + aa_policy_and_sleds + AS ( + SELECT + DISTINCT policy, sled_id + FROM + other_aa_instances_by_policy + JOIN sled_resource_vmm ON + sled_resource_vmm.instance_id = other_aa_instances_by_policy.instance_id + ), + our_a_groups AS (SELECT group_id FROM affinity_group_instance_membership WHERE instance_id = $7), + other_a_instances + AS ( + SELECT + affinity_group_instance_membership.group_id, instance_id + FROM + affinity_group_instance_membership + JOIN our_a_groups ON affinity_group_instance_membership.group_id = our_a_groups.group_id + WHERE + instance_id != $8 + ), + other_a_instances_by_policy + AS ( + SELECT + policy, instance_id + FROM + other_a_instances + JOIN affinity_group ON + affinity_group.id = other_a_instances.group_id + AND affinity_group.failure_domain = 'sled' + WHERE + affinity_group.time_deleted IS NULL + ), + a_policy_and_sleds + AS ( + SELECT + DISTINCT policy, sled_id + FROM + other_a_instances_by_policy + JOIN sled_resource_vmm ON + sled_resource_vmm.instance_id = other_a_instances_by_policy.instance_id + ), + sleds_with_space + AS ( + SELECT + s.sled_id, a.policy AS a_policy, aa.policy AS aa_policy + FROM + sled_targets AS s + LEFT JOIN a_policy_and_sleds AS a ON a.sled_id = s.sled_id + LEFT JOIN aa_policy_and_sleds AS aa ON aa.sled_id = s.sled_id + ), + sleds_without_space + AS ( + SELECT + sled_id, policy AS a_policy, NULL AS aa_policy + FROM + a_policy_and_sleds + WHERE + a_policy_and_sleds.sled_id NOT IN (SELECT sled_id FROM sleds_with_space) + ) +SELECT sled_id, true, a_policy, aa_policy FROM sleds_with_space +UNION SELECT sled_id, false, a_policy, aa_policy FROM sleds_without_space diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index 372d42dc40a..b563e926cfb 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -46,6 +46,7 @@ define_enums! { IdentityProviderTypeEnum => "provider_type", IdentityTypeEnum => "identity_type", InstanceAutoRestartPolicyEnum => "instance_auto_restart", + InstanceMinimumCpuPlatformEnum => "instance_min_cpu_platform", InstanceStateEnum => "instance_state_v2", InstanceIntendedStateEnum => "instance_intended_state", InvConfigReconcilerStatusKindEnum => "inv_config_reconciler_status_kind", @@ -89,6 +90,7 @@ define_enums! { UserDataExportResourceTypeEnum => "user_data_export_resource_type", UserDataExportStateEnum => "user_data_export_state", UserProvisionTypeEnum => "user_provision_type", + VmmCpuPlatformEnum => "vmm_cpu_platform", VmmStateEnum => "vmm_state", VolumeResourceUsageTypeEnum => "volume_resource_usage_type", VpcFirewallRuleActionEnum => "vpc_firewall_rule_action", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index fb04983dce4..9f9b4b6d6e1 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -424,6 +424,7 @@ table! { auto_restart_policy -> Nullable, auto_restart_cooldown -> Nullable, boot_disk_id -> Nullable, + min_cpu_platform -> Nullable, time_state_updated -> Timestamptz, state_generation -> Int8, active_propolis_id -> Nullable, @@ -448,6 +449,7 @@ table! { sled_id -> Uuid, propolis_ip -> Inet, propolis_port -> Int4, + cpu_platform -> crate::enums::VmmCpuPlatformEnum, time_state_updated -> Timestamptz, state_generation -> Int8, state -> crate::enums::VmmStateEnum, diff --git a/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs b/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs index d2ed52c30da..64a1d4e861f 100644 --- a/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs +++ b/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs @@ -201,6 +201,7 @@ mod tests { use nexus_db_model::Resources; use nexus_db_model::SledResourceVmm; use nexus_db_model::Vmm; + use nexus_db_model::VmmCpuPlatform; use nexus_db_model::VmmRuntimeState; use nexus_db_model::VmmState; use nexus_test_utils::resource_helpers; @@ -246,6 +247,7 @@ mod tests { sled_id: Uuid::new_v4(), propolis_ip: "::1".parse().unwrap(), propolis_port: 12345.into(), + cpu_platform: VmmCpuPlatform::SledDefault, runtime: VmmRuntimeState { state: VmmState::Destroyed, time_state_updated: Utc::now(), diff --git a/nexus/src/app/background/tasks/instance_reincarnation.rs b/nexus/src/app/background/tasks/instance_reincarnation.rs index 4e9fae3c601..c95d82f1fc7 100644 --- a/nexus/src/app/background/tasks/instance_reincarnation.rs +++ b/nexus/src/app/background/tasks/instance_reincarnation.rs @@ -316,6 +316,7 @@ mod test { use nexus_db_model::InstanceRuntimeState; use nexus_db_model::InstanceState; use nexus_db_model::Vmm; + use nexus_db_model::VmmCpuPlatform; use nexus_db_model::VmmRuntimeState; use nexus_db_model::VmmState; use nexus_db_queries::authz; @@ -389,6 +390,7 @@ mod test { external_ips: Vec::new(), disks: Vec::new(), boot_disk: None, + min_cpu_platform: None, ssh_public_keys: None, start: state == InstanceState::Vmm, auto_restart_policy, @@ -437,6 +439,7 @@ mod test { sled_id: Uuid::new_v4(), propolis_ip: "10.1.9.42".parse().unwrap(), propolis_port: 420.into(), + cpu_platform: VmmCpuPlatform::SledDefault, runtime: VmmRuntimeState { time_state_updated: Utc::now(), r#gen: Generation::new(), diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index c2dbf6269da..1f951cef2e3 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -357,16 +357,24 @@ impl super::Nexus { let (.., authz_project, authz_instance) = instance_lookup.lookup_for(authz::Action::Modify).await?; - check_instance_cpu_memory_sizes(params.ncpus, params.memory)?; + let params::InstanceUpdate { + ncpus, + memory, + auto_restart_policy, + boot_disk, + min_cpu_platform, + } = params; + + check_instance_cpu_memory_sizes(*ncpus, *memory)?; - let boot_disk_id = match params.boot_disk.clone() { + let boot_disk_id = match boot_disk { Some(disk) => { let selector = params::DiskSelector { project: match &disk { NameOrId::Name(_) => Some(authz_project.id().into()), NameOrId::Id(_) => None, }, - disk, + disk: disk.clone(), }; let (.., authz_disk) = self .disk_lookup(opctx, selector)? @@ -378,12 +386,18 @@ impl super::Nexus { None => None, }; - let auto_restart_policy = params.auto_restart_policy.map(Into::into); - let ncpus = params.ncpus.into(); - let memory = params.memory.into(); - - let update = - InstanceUpdate { boot_disk_id, auto_restart_policy, ncpus, memory }; + let auto_restart_policy = auto_restart_policy.map(Into::into); + let ncpus = (*ncpus).into(); + let memory = (*memory).into(); + let min_cpu_platform = min_cpu_platform.map(Into::into); + + let update = InstanceUpdate { + boot_disk_id, + auto_restart_policy, + ncpus, + memory, + min_cpu_platform, + }; self.datastore() .instance_reconfigure(opctx, &authz_instance, update) .await @@ -1257,6 +1271,7 @@ impl super::Nexus { .generate_vmm_spec( &operation, db_instance, + initial_vmm, &disks, &nics, &ssh_keys, @@ -2337,7 +2352,7 @@ mod tests { use futures::{SinkExt, StreamExt}; use nexus_db_model::{ Instance as DbInstance, InstanceState as DbInstanceState, - VmmState as DbVmmState, + VmmCpuPlatform, VmmState as DbVmmState, }; use omicron_common::api::external::{ Hostname, IdentityMetadataCreateParams, InstanceCpuCount, Name, @@ -2459,6 +2474,7 @@ mod tests { external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, ssh_public_keys: None, start: false, auto_restart_policy: Default::default(), @@ -2478,6 +2494,7 @@ mod tests { ipnetwork::IpNetwork::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 0) .unwrap(), 0, + VmmCpuPlatform::SledDefault, ); (instance, vmm) diff --git a/nexus/src/app/instance_platform.rs b/nexus/src/app/instance_platform.rs index aed3d1b2f69..96a5e21225c 100644 --- a/nexus/src/app/instance_platform.rs +++ b/nexus/src/app/instance_platform.rs @@ -78,9 +78,10 @@ use omicron_common::api::external::Error; use omicron_common::api::internal::shared::NetworkInterface; use sled_agent_client::types::{ BlobStorageBackend, Board, BootOrderEntry, BootSettings, Chipset, - ComponentV0, CrucibleStorageBackend, I440Fx, InstanceSpecV0, NvmeDisk, - PciPath, QemuPvpanic, SerialPort, SerialPortNumber, SpecKey, VirtioDisk, - VirtioNetworkBackend, VirtioNic, VmmSpec, + ComponentV0, Cpuid, CpuidEntry, CpuidVendor, CrucibleStorageBackend, + I440Fx, InstanceSpecV0, NvmeDisk, PciPath, QemuPvpanic, SerialPort, + SerialPortNumber, SpecKey, VirtioDisk, VirtioNetworkBackend, VirtioNic, + VmmSpec, }; use uuid::Uuid; @@ -406,6 +407,7 @@ impl super::Nexus { &self, reason: &InstanceRegisterReason, instance: &db::model::Instance, + vmm: &db::model::Vmm, disks: &[db::model::Disk], nics: &[NetworkInterface], ssh_keys: &[db::model::SshKey], @@ -481,7 +483,7 @@ impl super::Nexus { let spec = InstanceSpecV0 { board: Board { chipset: Chipset::I440Fx(I440Fx { enable_pcie: false }), - cpuid: None, + cpuid: cpuid_from_vmm_cpu_platform(vmm.cpu_platform), cpus, guest_hv_interface: None, memory_mb: instance.memory.to_whole_mebibytes(), @@ -492,3 +494,108 @@ impl super::Nexus { Ok(VmmSpec(spec)) } } + +/// Yields the CPUID configuration to use for a VMM that specifies the supplied +/// minimum CPU `platform`. +// +// This is a free function (and not an `Into` impl on `VmmCpuPlatform`) to keep +// all of the gnarly CPUID details out of the DB model crate, which defines that +// type. +fn cpuid_from_vmm_cpu_platform( + platform: db::model::VmmCpuPlatform, +) -> Option { + macro_rules! cpuid_leaf { + ($leaf:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { + CpuidEntry { + leaf: $leaf, + subleaf: None, + eax: $eax, + ebx: $ebx, + ecx: $ecx, + edx: $edx, + } + }; + } + + macro_rules! cpuid_subleaf { + ($leaf:literal, $sl:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { + CpuidEntry { + leaf: $leaf, + subleaf: Some($sl), + eax: $eax, + ebx: $ebx, + ecx: $ecx, + edx: $edx, + } + }; + } + + // See [RFD 314](https://314.rfd.oxide.computer/) section 6 for all the + // gnarly details. + const MILAN_CPUID: [CpuidEntry; 32] = [ + cpuid_leaf!(0x0, 0x0000000D, 0x68747541, 0x444D4163, 0x69746E65), + cpuid_leaf!(0x1, 0x00A00F11, 0x00000800, 0xF6F83203, 0x078BFBFF), + cpuid_leaf!(0x5, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x6, 0x00000002, 0x00000000, 0x00000000, 0x00000000), + cpuid_subleaf!( + 0x7, 0x0, 0x00000000, 0x219C03A9, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0xB, 0x0, 0x00000001, 0x00000002, 0x00000100, 0x00000000 + ), + cpuid_subleaf!( + 0xB, 0x1, 0x00000000, 0x00000000, 0x00000201, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x0, 0x00000007, 0x00000340, 0x00000340, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x1, 0x00000007, 0x00000340, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000 + ), + cpuid_leaf!(0x80000000, 0x80000021, 0x68747541, 0x444D4163, 0x69746E65), + cpuid_leaf!(0x80000001, 0x00A00F11, 0x40000000, 0x444001F0, 0x27D3FBFF), + cpuid_leaf!(0x80000002, 0x73736F72, 0x726F6365, 0x31332050, 0x43203737), + cpuid_leaf!(0x80000003, 0x20455059, 0x00414D44, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000005, 0xFF40FF40, 0xFF40FF40, 0x20080140, 0x20080140), + cpuid_leaf!(0x80000006, 0x08002200, 0x68004200, 0x02006140, 0x01009140), + cpuid_leaf!(0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100), + cpuid_leaf!(0x80000008, 0x00003030, 0x111ED205, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000000A, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000019, 0xF040F040, 0xF040F040, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001B, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_subleaf!( + 0x8000001D, 0x0, 0x00000121, 0x01C0003F, 0x0000003F, 0x00000000 + ), + cpuid_subleaf!( + 0x8000001D, 0x1, 0x00000122, 0x01C0003F, 0x0000003F, 0x00000000 + ), + cpuid_subleaf!( + 0x8000001D, 0x2, 0x00000143, 0x01C0003F, 0x000003FF, 0x00000002 + ), + cpuid_subleaf!( + 0x8000001D, 0x3, 0x00000163, 0x03C0003F, 0x00007FFF, 0x00000001 + ), + cpuid_subleaf!( + 0x8000001D, 0x4, 0x00000000, 0x00000000, 0x00000000, 0x00000000 + ), + cpuid_leaf!(0x8000001E, 0x00000000, 0x00000100, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001F, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000021, 0x0000002D, 0x00000000, 0x00000000, 0x00000000), + ]; + + let cpuid = match platform { + db::model::VmmCpuPlatform::SledDefault => return None, + db::model::VmmCpuPlatform::AmdMilan + | db::model::VmmCpuPlatform::AmdTurin => { + Cpuid { entries: MILAN_CPUID.to_vec(), vendor: CpuidVendor::Amd } + } + }; + + Some(cpuid) +} diff --git a/nexus/src/app/sagas/instance_common.rs b/nexus/src/app/sagas/instance_common.rs index b4644201e0c..4c20a5b1cb4 100644 --- a/nexus/src/app/sagas/instance_common.rs +++ b/nexus/src/app/sagas/instance_common.rs @@ -10,7 +10,7 @@ use crate::Nexus; use nexus_db_lookup::LookupPath; use nexus_db_model::{ ByteCount, ExternalIp, InstanceState, IpAttachState, Ipv4NatEntry, - SledReservationConstraints, SledResourceVmm, VmmState, + SledReservationConstraints, SledResourceVmm, VmmCpuPlatform, VmmState, }; use nexus_db_queries::authz; use nexus_db_queries::{authn, context::OpContext, db, db::DataStore}; @@ -94,6 +94,7 @@ pub async fn create_and_insert_vmm_record( propolis_id: PropolisUuid, sled_id: SledUuid, propolis_ip: Ipv6Addr, + cpu_platform: VmmCpuPlatform, ) -> Result { let vmm = db::model::Vmm::new( propolis_id, @@ -101,6 +102,7 @@ pub async fn create_and_insert_vmm_record( sled_id, IpAddr::V6(propolis_ip).into(), DEFAULT_PROPOLIS_PORT, + cpu_platform, ); let vmm = datastore diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 7242379ee8a..de67f108679 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -1294,6 +1294,7 @@ pub mod test { name: DISK_NAME.parse().unwrap(), }, )), + min_cpu_platform: None, disks: Vec::new(), start: false, auto_restart_policy: Default::default(), diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index 2056a14784c..f3f1d9ec8a1 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -235,6 +235,7 @@ mod test { boot_disk: Some(params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { name: DISK_NAME.parse().unwrap() }, )), + min_cpu_platform: None, disks: Vec::new(), start: false, auto_restart_policy: Default::default(), diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 96349960dc8..4b7a8ef84a1 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -319,6 +319,7 @@ async fn sim_create_vmm_record( propolis_id, sled_id, propolis_ip, + params.src_vmm.cpu_platform, ) .await } @@ -612,6 +613,7 @@ mod tests { external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 4045f55984a..e2dd2769a90 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -162,13 +162,21 @@ async fn sis_alloc_server( let reservoir_ram = params.db_instance.memory; let propolis_id = sagactx.lookup::("propolis_id")?; + let mut constraint_builder = + db::model::SledReservationConstraintBuilder::new(); + if let Some(min_cpu_platform) = params.db_instance.min_cpu_platform.as_ref() + { + constraint_builder = constraint_builder + .cpu_families(min_cpu_platform.compatible_sled_cpu_families()); + } + let resource = super::instance_common::reserve_vmm_resources( osagactx.nexus(), InstanceUuid::from_untyped_uuid(params.db_instance.id()), propolis_id, u32::from(hardware_threads.0), reservoir_ram, - db::model::SledReservationConstraints::none(), + constraint_builder.build(), ) .await?; @@ -211,6 +219,39 @@ async fn sis_create_vmm_record( let sled_id = sagactx.lookup::("sled_id")?; let propolis_ip = sagactx.lookup::("propolis_ip")?; + // If the instance supplied a minimum CPU platform, record that as the VMM's + // required platform, irrespective of what sled was picked. (This allows a + // VM to land on a "better" sled than its minimum requirement and migrate + // back to a minimum-required sled later.) + // + // If the instance didn't supply a minimum CPU platform, select one for this + // VMM by looking up the chosen sled and selecting the "minimum compatible + // platform" for sleds of that lineage. This maximizes the number of sleds + // that can host the VMM if it needs to migrate in the future. Selecting the + // sled first and then deriving the platform is meant to support + // heterogeneous deployments: if a deployment contains some sleds with CPUs + // from vendor A, and some with CPUs from vendor B, then selecting the sled + // first implicitly chooses a vendor, and then the "minimum compatible" + // computation selects the most compatible platform that can run on sleds + // with CPUs from that vendor. + let cpu_platform = + if let Some(cpu_platform) = params.db_instance.min_cpu_platform { + cpu_platform.into() + } else { + let (.., sled) = osagactx + .nexus() + .sled_lookup( + &osagactx.nexus().opctx_alloc, + &sled_id.into_untyped_uuid(), + ) + .map_err(ActionError::action_failed)? + .fetch() + .await + .map_err(ActionError::action_failed)?; + + sled.cpu_family.minimum_compatible_platform() + }; + super::instance_common::create_and_insert_vmm_record( osagactx.datastore(), &opctx, @@ -218,6 +259,7 @@ async fn sis_create_vmm_record( propolis_id, sled_id, propolis_ip, + cpu_platform, ) .await } @@ -843,6 +885,7 @@ mod test { external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/src/app/sagas/instance_update/mod.rs b/nexus/src/app/sagas/instance_update/mod.rs index 5605ef688d8..96548793d38 100644 --- a/nexus/src/app/sagas/instance_update/mod.rs +++ b/nexus/src/app/sagas/instance_update/mod.rs @@ -1577,6 +1577,7 @@ mod test { external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index c6d019becb7..01951189282 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -2170,6 +2170,7 @@ mod test { network_interfaces: params::InstanceNetworkInterfaceAttachment::None, boot_disk, + min_cpu_platform: None, disks: data_disks, external_ips: vec![], start: true, diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 97d96e1fceb..5ecd031b8d1 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -663,6 +663,7 @@ pub async fn create_instance_with( external_ips, disks, boot_disk: None, + min_cpu_platform: None, start, auto_restart_policy, anti_affinity_groups: Vec::new(), diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 01d03f94331..7db8d944176 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -666,6 +666,7 @@ pub static DEMO_INSTANCE_CREATE: LazyLock = }], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -687,6 +688,7 @@ pub static DEMO_STOPPED_INSTANCE_CREATE: LazyLock = }], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -694,6 +696,7 @@ pub static DEMO_STOPPED_INSTANCE_CREATE: LazyLock = pub static DEMO_INSTANCE_UPDATE: LazyLock = LazyLock::new(|| params::InstanceUpdate { boot_disk: None, + min_cpu_platform: None, auto_restart_policy: None, ncpus: InstanceCpuCount(1), memory: ByteCount::from_gibibytes_u32(16), diff --git a/nexus/tests/integration_tests/external_ips.rs b/nexus/tests/integration_tests/external_ips.rs index ef6917db0b9..bd2eaf30e71 100644 --- a/nexus/tests/integration_tests/external_ips.rs +++ b/nexus/tests/integration_tests/external_ips.rs @@ -1006,6 +1006,7 @@ async fn test_floating_ip_attach_fail_between_projects( }], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 38fcd06c922..66ba780f12b 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -16,6 +16,7 @@ use nexus_db_lookup::LookupPath; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; +use nexus_test_interface::NexusServer; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; @@ -38,12 +39,14 @@ use nexus_test_utils::resource_helpers::object_put; use nexus_test_utils::resource_helpers::object_put_error; use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::resource_helpers::test_params; +use nexus_test_utils::start_sled_agent_with_config; use nexus_test_utils::wait_for_producer; use nexus_types::external_api::params::SshKeyCreate; use nexus_types::external_api::shared::IpKind; use nexus_types::external_api::shared::IpRange; use nexus_types::external_api::shared::Ipv4Range; use nexus_types::external_api::shared::SiloIdentityMode; +use nexus_types::external_api::views::Sled; use nexus_types::external_api::views::SshKey; use nexus_types::external_api::{params, views}; use nexus_types::identity::Resource; @@ -60,6 +63,7 @@ use omicron_common::api::external::IdentityMetadataUpdateParams; use omicron_common::api::external::Instance; use omicron_common::api::external::InstanceAutoRestartPolicy; use omicron_common::api::external::InstanceCpuCount; +use omicron_common::api::external::InstanceMinimumCpuPlatform; use omicron_common::api::external::InstanceNetworkInterface; use omicron_common::api::external::InstanceState; use omicron_common::api::external::Name; @@ -120,6 +124,10 @@ fn get_instance_start_url(instance_name: &str) -> String { format!("/v1/instances/{}/start?{}", instance_name, get_project_selector()) } +fn get_instance_stop_url(instance_name: &str) -> String { + format!("/v1/instances/{}/stop?{}", instance_name, get_project_selector()) +} + fn get_disks_url() -> String { format!("/v1/disks?{}", get_project_selector()) } @@ -233,6 +241,7 @@ async fn test_create_instance_with_bad_hostname_impl( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: false, ssh_public_keys: None, auto_restart_policy: Default::default(), @@ -341,6 +350,7 @@ async fn test_instances_create_reboot_halt( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -587,7 +597,7 @@ async fn test_instances_create_reboot_halt( client, StatusCode::NOT_FOUND, Method::POST, - get_instance_url(format!("{}/start", instance_name).as_str()).as_str(), + get_instance_start_url(instance_name).as_str(), ) .authn_as(AuthnMode::PrivilegedUser) .execute() @@ -597,7 +607,7 @@ async fn test_instances_create_reboot_halt( client, StatusCode::NOT_FOUND, Method::POST, - get_instance_url(format!("{}/stop", instance_name).as_str()).as_str(), + get_instance_stop_url(instance_name).as_str(), ) .authn_as(AuthnMode::PrivilegedUser) .execute() @@ -1686,6 +1696,22 @@ async fn expect_instance_reboot_fail( .expect("expected instance reboot to fail"); } +async fn expect_instance_start_fail( + client: &ClientTestContext, + instance_name: &str, + status: http::StatusCode, +) { + let url = get_instance_url(format!("{instance_name}/start").as_str()); + let builder = RequestBuilder::new(client, Method::POST, &url) + .body(None as Option<&serde_json::Value>) + .expect_status(Some(status)); + NexusRequest::new(builder) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("expected instance start to fail"); +} + async fn expect_instance_stop_fail( client: &ClientTestContext, instance_name: &str, @@ -2015,6 +2041,7 @@ async fn test_instances_create_stopped_start( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -2199,6 +2226,7 @@ async fn test_instance_using_image_from_other_project_fails( }, )], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -2265,6 +2293,7 @@ async fn test_instance_create_saga_removes_instance_database_record( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -2296,6 +2325,7 @@ async fn test_instance_create_saga_removes_instance_database_record( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -2388,6 +2418,7 @@ async fn test_instance_with_single_explicit_ip_address( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), @@ -2507,6 +2538,7 @@ async fn test_instance_with_new_custom_network_interfaces( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -2625,6 +2657,7 @@ async fn test_instance_create_delete_network_interface( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -2872,6 +2905,7 @@ async fn test_instance_update_network_interfaces( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -3503,6 +3537,7 @@ async fn test_instance_with_multiple_nics_unwinds_completely( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -3575,6 +3610,7 @@ async fn test_attach_one_disk_to_instance(cptestctx: &ControlPlaneTestContext) { boot_disk: Some(params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { name: disk_name.clone() }, )), + min_cpu_platform: None, disks: Vec::new(), start: true, auto_restart_policy: Default::default(), @@ -3667,6 +3703,7 @@ async fn test_instance_create_attach_disks( }, ), ], + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -3765,6 +3802,7 @@ async fn test_instance_create_attach_disks_undo( ), ], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -3849,6 +3887,7 @@ async fn test_attach_eight_disks_to_instance( ) }) .collect(), + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -3937,6 +3976,7 @@ async fn test_cannot_attach_nine_disks_to_instance( ) }) .collect(), + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -4039,6 +4079,7 @@ async fn test_cannot_attach_faulted_disks(cptestctx: &ControlPlaneTestContext) { ) }) .collect(), + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -4130,6 +4171,7 @@ async fn test_disks_detached_when_instance_destroyed( ) }) .collect(), + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -4228,6 +4270,7 @@ async fn test_disks_detached_when_instance_destroyed( ) }) .collect(), + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -4312,6 +4355,7 @@ async fn test_duplicate_disk_attach_requests_ok( ), ], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -4356,6 +4400,7 @@ async fn test_duplicate_disk_attach_requests_ok( name: Name::try_from(String::from("alsodata")).unwrap(), }, )], + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -4412,6 +4457,7 @@ async fn test_cannot_detach_boot_disk(cptestctx: &ControlPlaneTestContext) { name: Name::try_from(String::from("probablydata0")).unwrap(), }, )), + min_cpu_platform: None, disks: Vec::new(), start: false, auto_restart_policy: Default::default(), @@ -4475,6 +4521,7 @@ async fn test_cannot_detach_boot_disk(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { boot_disk: None, auto_restart_policy: None, + min_cpu_platform: None, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }, @@ -4547,6 +4594,7 @@ async fn test_updating_running_instance_boot_disk_is_conflict( boot_disk: Some(params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { name: probablydata.clone() }, )), + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -4578,6 +4626,7 @@ async fn test_updating_running_instance_boot_disk_is_conflict( params::InstanceUpdate { boot_disk: Some(alsodata.clone().into()), auto_restart_policy: None, + min_cpu_platform: None, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }, @@ -4596,6 +4645,7 @@ async fn test_updating_running_instance_boot_disk_is_conflict( // was created. boot_disk: Some(probablydata.clone().into()), auto_restart_policy: Some(InstanceAutoRestartPolicy::BestEffort), + min_cpu_platform: None, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }, @@ -4618,6 +4668,7 @@ async fn test_updating_missing_instance_is_not_found( params::InstanceUpdate { boot_disk: None, auto_restart_policy: None, + min_cpu_platform: None, ncpus: InstanceCpuCount::try_from(0).unwrap(), memory: ByteCount::from_gibibytes_u32(0), }, @@ -4707,6 +4758,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], boot_disk: None, + min_cpu_platform: None, disks: Vec::new(), start: true, // Start out with None @@ -4739,6 +4791,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), + min_cpu_platform: None, ncpus: new_ncpus, memory: new_memory, }, @@ -4746,7 +4799,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { ) .await; - assert_eq!(err.message, "instance must be stopped to be resized"); + assert_eq!(err.message, "instance must be stopped to change CPU or memory"); instance_post(&client, instance_name, InstanceOp::Stop).await; let nexus = &cptestctx.server.server_context().nexus; @@ -4761,6 +4814,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), + min_cpu_platform: None, ncpus: new_ncpus, memory: new_memory, }, @@ -4776,6 +4830,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), + min_cpu_platform: None, ncpus: initial_ncpus, memory: new_memory, }, @@ -4790,6 +4845,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), + min_cpu_platform: None, ncpus: initial_ncpus, memory: initial_memory, }, @@ -4808,6 +4864,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), + min_cpu_platform: None, ncpus: InstanceCpuCount(MAX_VCPU_PER_INSTANCE + 1), memory: instance.memory, }, @@ -4829,6 +4886,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), + min_cpu_platform: None, ncpus: instance.ncpus, memory: ByteCount::from_mebibytes_u32(0), }, @@ -4844,6 +4902,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), + min_cpu_platform: None, ncpus: instance.ncpus, memory: ByteCount::try_from(MAX_MEMORY_BYTES_PER_INSTANCE - 1) .unwrap(), @@ -4861,6 +4920,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), + min_cpu_platform: None, ncpus: instance.ncpus, memory: ByteCount::from_mebibytes_u32( (max_mib + 1024).try_into().unwrap(), @@ -4883,6 +4943,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), + min_cpu_platform: None, ncpus: new_ncpus, memory: new_memory, }, @@ -4914,6 +4975,7 @@ async fn test_auto_restart_policy_can_be_changed( network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], boot_disk: None, + min_cpu_platform: None, disks: Vec::new(), start: true, // Start out with None @@ -4943,6 +5005,7 @@ async fn test_auto_restart_policy_can_be_changed( dbg!(params::InstanceUpdate { auto_restart_policy, boot_disk: None, + min_cpu_platform: None, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }), @@ -4964,6 +5027,78 @@ async fn test_auto_restart_policy_can_be_changed( assert_reconfigured(None).await; } +// Test reconfiguring an instance's minimum CPU platform. +#[nexus_test] +async fn test_min_cpu_platform_can_be_changed( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let instance_name = "milan-is-enough-for-anyone"; + + create_project_and_pool(&client).await; + + let instance_params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: String::from("stuff"), + }, + ncpus: InstanceCpuCount::try_from(2).unwrap(), + memory: ByteCount::from_gibibytes_u32(4), + hostname: instance_name.parse().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + boot_disk: None, + // Start out with None + min_cpu_platform: None, + disks: Vec::new(), + start: false, + auto_restart_policy: None, + anti_affinity_groups: Vec::new(), + }; + + let builder = + RequestBuilder::new(client, http::Method::POST, &get_instances_url()) + .body(Some(&instance_params)) + .expect_status(Some(http::StatusCode::CREATED)); + let response = NexusRequest::new(builder) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Expected instance creation to work!"); + + let instance = response.parsed_body::().unwrap(); + + // Starts out as None. + assert_eq!(instance.min_cpu_platform, None); + + let assert_reconfigured = |min_cpu_platform| async move { + let instance = expect_instance_reconfigure_ok( + client, + &instance.identity.id, + dbg!(params::InstanceUpdate { + auto_restart_policy: None, + boot_disk: None, + min_cpu_platform, + ncpus: InstanceCpuCount::try_from(2).unwrap(), + memory: ByteCount::from_gibibytes_u32(4), + }), + ) + .await; + assert_eq!(dbg!(instance).min_cpu_platform, min_cpu_platform,); + }; + + // Reconfigure to Milan. + assert_reconfigured(Some(InstanceMinimumCpuPlatform::AmdMilan)).await; + + // Reconfigure to Turin (even though we have no Turin in the test env!) + assert_reconfigured(Some(InstanceMinimumCpuPlatform::AmdTurin)).await; + + // Reconfigure back to None. + assert_reconfigured(None).await; +} + // Create an instance with boot disk set to one of its attached disks, then set // it to the other disk. #[nexus_test] @@ -5012,6 +5147,7 @@ async fn test_boot_disk_can_be_changed(cptestctx: &ControlPlaneTestContext) { name: Name::try_from(String::from("probablydata1")).unwrap(), }, )], + min_cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -5039,6 +5175,7 @@ async fn test_boot_disk_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { boot_disk: Some(disks[1].identity.id.into()), auto_restart_policy: None, + min_cpu_platform: None, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }, @@ -5082,6 +5219,7 @@ async fn test_boot_disk_must_be_attached(cptestctx: &ControlPlaneTestContext) { external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -5106,6 +5244,7 @@ async fn test_boot_disk_must_be_attached(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { boot_disk: Some(disks[0].identity.id.into()), auto_restart_policy: None, + min_cpu_platform: None, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }, @@ -5139,6 +5278,7 @@ async fn test_boot_disk_must_be_attached(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { boot_disk: Some(disks[0].identity.id.into()), auto_restart_policy: None, + min_cpu_platform: None, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }, @@ -5174,6 +5314,7 @@ async fn test_instances_memory_rejected_less_than_min_memory_size( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -5227,6 +5368,7 @@ async fn test_instances_memory_not_divisible_by_min_memory_size( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -5280,6 +5422,7 @@ async fn test_instances_memory_greater_than_max_size( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -5387,6 +5530,7 @@ async fn test_instance_create_with_anti_affinity_groups( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, auto_restart_policy: Default::default(), anti_affinity_groups: anti_affinity_groups_param, }; @@ -5456,6 +5600,7 @@ async fn test_instance_create_with_duplicate_anti_affinity_groups( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, auto_restart_policy: Default::default(), anti_affinity_groups: anti_affinity_groups_param, }; @@ -5526,6 +5671,7 @@ async fn test_instance_create_with_anti_affinity_groups_that_do_not_exist( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, auto_restart_policy: Default::default(), anti_affinity_groups: anti_affinity_groups_param, }; @@ -5609,6 +5755,7 @@ async fn test_instance_create_with_ssh_keys( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), }; @@ -5658,6 +5805,7 @@ async fn test_instance_create_with_ssh_keys( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), }; @@ -5706,6 +5854,7 @@ async fn test_instance_create_with_ssh_keys( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), }; @@ -5772,6 +5921,24 @@ async fn expect_instance_start_ok( .expect("Expected instance start to succeed with 202 Accepted"); } +async fn expect_instance_stop_ok( + client: &ClientTestContext, + instance_name: &str, +) { + let builder = RequestBuilder::new( + client, + http::Method::POST, + &get_instance_stop_url(instance_name), + ) + .expect_status(Some(http::StatusCode::ACCEPTED)); + + NexusRequest::new(builder) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Expected instance stop to succeed with 202 Accepted"); +} + async fn expect_instance_creation_ok( client: &ClientTestContext, url_instances: &str, @@ -5829,6 +5996,7 @@ async fn test_cannot_provision_instance_beyond_cpu_capacity( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -5888,6 +6056,7 @@ async fn test_cannot_provision_instance_beyond_cpu_limit( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -5944,6 +6113,7 @@ async fn test_cannot_provision_instance_beyond_ram_capacity( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -5976,6 +6146,205 @@ async fn test_cannot_provision_instance_beyond_ram_capacity( expect_instance_start_ok(client, configs[2].0).await; } +#[nexus_test] +async fn test_can_start_instance_with_min_cpu_platform( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + create_project_and_pool(client).await; + + let name1 = Name::try_from(String::from("test")).unwrap(); + let instance_params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: name1.clone(), + description: String::from("probably serving data"), + }, + ncpus: InstanceCpuCount::try_from(1).unwrap(), + memory: ByteCount::from_gibibytes_u32(4), + hostname: "test".parse().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + disks: vec![], + boot_disk: None, + // Note that we're actually setting min_cpu_platform this time! + min_cpu_platform: Some(InstanceMinimumCpuPlatform::AmdMilan), + start: false, + auto_restart_policy: Default::default(), + anti_affinity_groups: Vec::new(), + }; + let url_instances = get_instances_url(); + + let builder = + RequestBuilder::new(client, http::Method::POST, &url_instances) + .body(Some(&instance_params)) + .expect_status(Some(http::StatusCode::CREATED)); + + let response = NexusRequest::new(builder) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Expected instance creation to succeed."); + + let instance = response.parsed_body::().unwrap(); + let instance_id = InstanceUuid::from_untyped_uuid(instance.identity.id); + // Now that the instance is created, lets try to start it. + + let nexus = &cptestctx.server.server_context().nexus; + + expect_instance_start_ok(client, instance.identity.name.as_str()).await; + instance_simulate(nexus, &instance_id).await; + instance_wait_for_state(client, instance_id, InstanceState::Running).await; + + // Great, now let's update the instance to require Turin and start it again. + // This will fail because there is no Turin in our simulated environment + // (yet!) + expect_instance_stop_ok(client, instance.identity.name.as_str()).await; + instance_simulate(nexus, &instance_id).await; + instance_wait_for_state(client, instance_id, InstanceState::Stopped).await; + + let instance = expect_instance_reconfigure_ok( + &client, + &instance.identity.id, + params::InstanceUpdate { + boot_disk: None, + auto_restart_policy: None, + min_cpu_platform: Some(InstanceMinimumCpuPlatform::AmdTurin), + ncpus: InstanceCpuCount::try_from(1).unwrap(), + memory: ByteCount::from_gibibytes_u32(4), + }, + ) + .await; + + expect_instance_start_fail_507(client, instance.identity.name.as_str()) + .await; + + // We'd like to see the instance actually start, so add a Turin sled and try again. + + // There should be one sled from `#[nexus_test]`, check that first. + let sleds_url = "/v1/system/hardware/sleds"; + assert_eq!( + objects_list_page_authz::(&client, &sleds_url).await.items.len(), + 1 + ); + + let nexus_address = + cptestctx.server.get_http_server_internal_address().await; + + let fake_turin_sled_id = SledUuid::new_v4(); + let turin_sled_agent_log = cptestctx + .logctx + .log + .new(o!( "sled_id" => fake_turin_sled_id.to_string() )); + + let config = omicron_sled_agent::sim::Config::for_testing( + fake_turin_sled_id, + omicron_sled_agent::sim::SimMode::Explicit, + Some(nexus_address), + Some(&camino::Utf8Path::new("/an/unused/update/directory")), + omicron_sled_agent::sim::ZpoolConfig::None, + nexus_client::types::SledCpuFamily::AmdTurin, + ); + + // We have to hold on to the new simulated sled-agent otherwise it will be immediately dropped + // and shut down. + let _agent = start_sled_agent_with_config( + turin_sled_agent_log, + &config, + 3, + &cptestctx.first_sled_agent().simulated_upstairs, + ) + .await + .expect("can start test sled-agent"); + + // Wait for Nexus to report that the new sled is present.. + poll::wait_for_condition( + || async { + let items = objects_list_page_authz::(&client, &sleds_url) + .await + .items; + + if items.len() == 2 { + Ok(()) + } else { + Err(CondCheckError::<()>::NotYet) + } + }, + &Duration::from_secs(5), + &Duration::from_secs(60), + ) + .await + .unwrap(); + + // Finally, start the Turin-requiring instance for real! + expect_instance_start_ok(client, instance.identity.name.as_str()).await; + + // The VMM should specifically be on our new fake Turin sled. + let instance_sled = nexus + .active_instance_info(&instance_id, None) + .await + .unwrap() + .expect("running instance should have a sled") + .sled_id; + + assert_eq!(instance_sled, fake_turin_sled_id); +} + +#[nexus_test] +async fn test_cannot_start_instance_with_unsatisfiable_min_cpu( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + create_project_and_pool(client).await; + + let name1 = Name::try_from(String::from("test")).unwrap(); + let instance_params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: name1.clone(), + description: String::from("probably serving data"), + }, + ncpus: InstanceCpuCount::try_from(1).unwrap(), + memory: ByteCount::from_gibibytes_u32(4), + hostname: "test".parse().unwrap(), + user_data: vec![], + ssh_public_keys: None, + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + disks: vec![], + boot_disk: None, + // Require Turin to start the instance, but there are no Turin sleds in + // our fake environment. Creating this instance should succeed, but + // starting it won't. + min_cpu_platform: Some(InstanceMinimumCpuPlatform::AmdTurin), + start: false, + auto_restart_policy: Default::default(), + anti_affinity_groups: Vec::new(), + }; + let url_instances = get_instances_url(); + + let builder = + RequestBuilder::new(client, http::Method::POST, &url_instances) + .body(Some(&instance_params)) + .expect_status(Some(http::StatusCode::CREATED)); + + let _response = NexusRequest::new(builder) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Expected instance creation to succeed."); + + // Starting the instance, which should fail because we can't pick a sled + // that satisfies the instance's requirements. + + expect_instance_start_fail( + client, + name1.as_str(), + http::StatusCode::INSUFFICIENT_STORAGE, + ) + .await; +} + #[nexus_test] async fn test_instance_serial(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; @@ -6244,6 +6613,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( ssh_public_keys: None, disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -6314,6 +6684,7 @@ async fn test_instance_ephemeral_ip_from_orphan_pool( ssh_public_keys: None, disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -6378,6 +6749,7 @@ async fn test_instance_ephemeral_ip_no_default_pool_error( ssh_public_keys: None, disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -6516,6 +6888,7 @@ async fn test_instance_allow_only_one_ephemeral_ip( external_ips: vec![ephemeral_create.clone(), ephemeral_create], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -6650,6 +7023,7 @@ async fn test_instance_create_in_silo(cptestctx: &ControlPlaneTestContext) { }], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -6697,7 +7071,7 @@ async fn test_instance_create_in_silo(cptestctx: &ControlPlaneTestContext) { RequestBuilder::new( client, Method::POST, - &format!("/v1/instances/{}/stop", instance.identity.id), + &get_instance_stop_url(instance.identity.name.as_str()), ) .body(None as Option<&serde_json::Value>) .expect_status(Some(StatusCode::ACCEPTED)), diff --git a/nexus/tests/integration_tests/projects.rs b/nexus/tests/integration_tests/projects.rs index c76a2628a9a..9e41cbd02e4 100644 --- a/nexus/tests/integration_tests/projects.rs +++ b/nexus/tests/integration_tests/projects.rs @@ -169,6 +169,7 @@ async fn test_project_deletion_with_instance( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/tests/integration_tests/quotas.rs b/nexus/tests/integration_tests/quotas.rs index d665d75027f..5f1d22eea4f 100644 --- a/nexus/tests/integration_tests/quotas.rs +++ b/nexus/tests/integration_tests/quotas.rs @@ -88,6 +88,7 @@ impl ResourceAllocator { external_ips: Vec::::new(), disks: Vec::::new(), boot_disk: None, + min_cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 9111e6a0949..76546ca4de5 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -1391,6 +1391,7 @@ fn at_current_101_0_0<'a>(ctx: &'a MigrationContext<'a>) -> BoxFuture<'a, ()> { params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], boot_disk: None, + min_cpu_platform: None, disks: Vec::new(), start: false, auto_restart_policy: Default::default(), diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index 6a18d13a3ca..506140451e4 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -145,6 +145,7 @@ async fn test_snapshot_basic(cptestctx: &ControlPlaneTestContext) { boot_disk: Some(params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { name: base_disk_name.clone() }, )), + min_cpu_platform: None, disks: Vec::new(), external_ips: vec![], start: true, @@ -351,6 +352,7 @@ async fn test_snapshot_stopped_instance(cptestctx: &ControlPlaneTestContext) { boot_disk: Some(params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { name: base_disk_name.clone() }, )), + min_cpu_platform: None, disks: Vec::new(), external_ips: vec![], start: false, diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs index 4de3bd04500..ae1ff51e98e 100644 --- a/nexus/tests/integration_tests/subnet_allocation.rs +++ b/nexus/tests/integration_tests/subnet_allocation.rs @@ -63,6 +63,7 @@ async fn create_instance_expect_failure( external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/tests/integration_tests/utilization.rs b/nexus/tests/integration_tests/utilization.rs index 762a0eb5303..44006016d28 100644 --- a/nexus/tests/integration_tests/utilization.rs +++ b/nexus/tests/integration_tests/utilization.rs @@ -231,6 +231,7 @@ async fn create_resources_in_test_suite_silo(client: &ClientTestContext) { external_ips: vec![], disks: vec![], boot_disk: None, + min_cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 333a8e73cbe..4f106e58ac2 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -13,8 +13,8 @@ use omicron_common::api::external::{ AddressLotKind, AffinityPolicy, AllowedSourceIps, BfdMode, BgpPeer, ByteCount, FailureDomain, Hostname, IdentityMetadataCreateParams, IdentityMetadataUpdateParams, InstanceAutoRestartPolicy, InstanceCpuCount, - LinkFec, LinkSpeed, Name, NameOrId, Nullable, PaginationOrder, - RouteDestination, RouteTarget, UserId, + InstanceMinimumCpuPlatform, LinkFec, LinkSpeed, Name, NameOrId, Nullable, + PaginationOrder, RouteDestination, RouteTarget, UserId, }; use omicron_common::disk::DiskVariant; use oxnet::{IpNet, Ipv4Net, Ipv6Net}; @@ -1263,6 +1263,11 @@ pub struct InstanceCreate { /// Anti-Affinity groups which this instance should be added. #[serde(default)] pub anti_affinity_groups: Vec, + + /// The minimum required CPU platform for this instance. If this is `null`, + /// the instance requires no particular CPU platform. + #[serde(default)] + pub min_cpu_platform: Option, } /// Parameters of an `Instance` that can be reconfigured after creation. @@ -1294,6 +1299,10 @@ pub struct InstanceUpdate { /// In that case, any configured default policy will be used if this is /// `null`. pub auto_restart_policy: Option, + + /// The minimum required CPU platform for this instance. If this is `null`, + /// the instance requires no particular CPU platform. + pub min_cpu_platform: Option, } #[inline] diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index f3954ea20a2..385df3257e0 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -4792,6 +4792,15 @@ } ] }, + "min_cpu_platform": { + "nullable": true, + "description": "The minimum required CPU platform for this instance. If this is `null`, the instance requires no particular CPU platform.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceMinimumCpuPlatform" + } + ] + }, "name": { "description": "unique, mutable, user-controlled identifier for each resource", "allOf": [ @@ -4891,6 +4900,25 @@ "dst_sled_id" ] }, + "InstanceMinimumCpuPlatform": { + "description": "A minimum required CPU platform for an instance.\n\nWhen an instance specifies a minimum required CPU platform:\n\n- The system may expose (to the VM) new CPU features that are only present on that platform (or on newer platforms of the same lineage that also support those features). - The instance must run on hosts that have CPUs that support all the features of the supplied minimum platform.\n\nThat is, the instance is restricted to hosts that have the specified minimum host CPU type (or a more advanced, but still compatible, CPU), but in exchange the CPU features exposed by the minimum platform are available for the guest to use. Note that this may prevent an instance from starting (if the hosts it requires are full but there is capacity on other incompatible hosts).\n\nIf an instance does not specify a minimum required CPU platform, then when it starts, the control plane selects a host for the instance and then supplies the guest with the \"minimum\" CPU platform supported by that host. This maximizes the number of hosts that can run the VM if it later needs to migrate to another host.", + "oneOf": [ + { + "description": "An AMD Zen 3-compatible CPU platform.", + "type": "string", + "enum": [ + "amd_milan" + ] + }, + { + "description": "An AMD Zen 5-compatible CPU platform.", + "type": "string", + "enum": [ + "amd_turin" + ] + } + ] + }, "InstanceState": { "description": "Running state of an Instance (primarily: booted or stopped)\n\nThis typically reflects whether it's starting, running, stopping, or stopped, but also includes states related to the Instance's lifecycle", "oneOf": [ diff --git a/openapi/nexus.json b/openapi/nexus.json index bd91bcc6534..7f95f43c761 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -19845,6 +19845,15 @@ } ] }, + "min_cpu_platform": { + "nullable": true, + "description": "The minimum required CPU platform for this instance. If this is `null`, the instance requires no particular CPU platform.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceMinimumCpuPlatform" + } + ] + }, "name": { "description": "unique, mutable, user-controlled identifier for each resource", "allOf": [ @@ -19997,6 +20006,16 @@ } ] }, + "min_cpu_platform": { + "nullable": true, + "description": "The minimum required CPU platform for this instance. If this is `null`, the instance requires no particular CPU platform.", + "default": null, + "allOf": [ + { + "$ref": "#/components/schemas/InstanceMinimumCpuPlatform" + } + ] + }, "name": { "$ref": "#/components/schemas/Name" }, @@ -20117,6 +20136,25 @@ } ] }, + "InstanceMinimumCpuPlatform": { + "description": "A minimum required CPU platform for an instance.\n\nWhen an instance specifies a minimum required CPU platform:\n\n- The system may expose (to the VM) new CPU features that are only present on that platform (or on newer platforms of the same lineage that also support those features). - The instance must run on hosts that have CPUs that support all the features of the supplied minimum platform.\n\nThat is, the instance is restricted to hosts that have the specified minimum host CPU type (or a more advanced, but still compatible, CPU), but in exchange the CPU features exposed by the minimum platform are available for the guest to use. Note that this may prevent an instance from starting (if the hosts it requires are full but there is capacity on other incompatible hosts).\n\nIf an instance does not specify a minimum required CPU platform, then when it starts, the control plane selects a host for the instance and then supplies the guest with the \"minimum\" CPU platform supported by that host. This maximizes the number of hosts that can run the VM if it later needs to migrate to another host.", + "oneOf": [ + { + "description": "An AMD Zen 3-compatible CPU platform.", + "type": "string", + "enum": [ + "amd_milan" + ] + }, + { + "description": "An AMD Zen 5-compatible CPU platform.", + "type": "string", + "enum": [ + "amd_turin" + ] + } + ] + }, "InstanceNetworkInterface": { "description": "An `InstanceNetworkInterface` represents a virtual network interface device attached to an instance.", "type": "object", @@ -20503,6 +20541,15 @@ } ] }, + "min_cpu_platform": { + "nullable": true, + "description": "The minimum required CPU platform for this instance. If this is `null`, the instance requires no particular CPU platform.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceMinimumCpuPlatform" + } + ] + }, "ncpus": { "description": "The number of CPUs to assign to this instance.", "allOf": [ diff --git a/schema/crdb/add-instance-minimum-cpu-platform/up01.sql b/schema/crdb/add-instance-minimum-cpu-platform/up01.sql new file mode 100644 index 00000000000..d0b5098fe8c --- /dev/null +++ b/schema/crdb/add-instance-minimum-cpu-platform/up01.sql @@ -0,0 +1,4 @@ +CREATE TYPE IF NOT EXISTS omicron.public.instance_min_cpu_platform AS ENUM ( + 'amd_milan', + 'amd_turin' +); diff --git a/schema/crdb/add-instance-minimum-cpu-platform/up02.sql b/schema/crdb/add-instance-minimum-cpu-platform/up02.sql new file mode 100644 index 00000000000..1fddfdb2af8 --- /dev/null +++ b/schema/crdb/add-instance-minimum-cpu-platform/up02.sql @@ -0,0 +1,5 @@ +CREATE TYPE IF NOT EXISTS omicron.public.vmm_cpu_platform AS ENUM ( + 'sled_default', + 'amd_milan', + 'amd_turin' +); diff --git a/schema/crdb/add-instance-minimum-cpu-platform/up03.sql b/schema/crdb/add-instance-minimum-cpu-platform/up03.sql new file mode 100644 index 00000000000..c3c39120712 --- /dev/null +++ b/schema/crdb/add-instance-minimum-cpu-platform/up03.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.instance + ADD COLUMN IF NOT EXISTS min_cpu_platform omicron.public.instance_min_cpu_platform; diff --git a/schema/crdb/add-instance-minimum-cpu-platform/up04.sql b/schema/crdb/add-instance-minimum-cpu-platform/up04.sql new file mode 100644 index 00000000000..3b8c39aaf53 --- /dev/null +++ b/schema/crdb/add-instance-minimum-cpu-platform/up04.sql @@ -0,0 +1,3 @@ +ALTER TABLE omicron.public.vmm + ADD COLUMN IF NOT EXISTS cpu_platform omicron.public.vmm_cpu_platform + DEFAULT 'sled_default'; diff --git a/schema/crdb/add-instance-minimum-cpu-platform/up05.sql b/schema/crdb/add-instance-minimum-cpu-platform/up05.sql new file mode 100644 index 00000000000..4311f568747 --- /dev/null +++ b/schema/crdb/add-instance-minimum-cpu-platform/up05.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.vmm ALTER COLUMN cpu_platform DROP DEFAULT; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 6eccc05b831..403d3887f5a 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1194,9 +1194,14 @@ CREATE TYPE IF NOT EXISTS omicron.public.instance_auto_restart AS ENUM ( 'best_effort' ); +CREATE TYPE IF NOT EXISTS omicron.public.instance_min_cpu_platform AS ENUM ( + 'amd_milan', + 'amd_turin' +); + /* * Represents the *desired* state of an instance, as requested by the user. -*/ + */ CREATE TYPE IF NOT EXISTS omicron.public.instance_intended_state AS ENUM ( /* The instance should be running. */ 'running', @@ -1306,6 +1311,20 @@ CREATE TABLE IF NOT EXISTS omicron.public.instance ( */ intended_state omicron.public.instance_intended_state NOT NULL, + /* + * The minimum required CPU platform for this instance. If set, the + * instance's VMs may make use of all the CPU features supplied by their + * minimum platform, but in exchange they may only run on sleds whose + * CPUs support all of those features. + * + * If this is NULL, the control plane ignores CPU constraints when + * selecting a sled for this instance. Then, once it has selected a + * sled, it supplies a "lowest common denominator" CPU platform that + * is compatible with that sled to maximize the number of sleds the VM + * can migrate to. + */ + min_cpu_platform omicron.public.instance_min_cpu_platform, + CONSTRAINT vmm_iff_active_propolis CHECK ( ((state = 'vmm') AND (active_propolis_id IS NOT NULL)) OR ((state != 'vmm') AND (active_propolis_id IS NULL)) @@ -5044,6 +5063,12 @@ CREATE INDEX IF NOT EXISTS lookup_anti_affinity_group_instance_membership_by_ins instance_id ); +CREATE TYPE IF NOT EXISTS omicron.public.vmm_cpu_platform AS ENUM ( + 'sled_default', + 'amd_milan', + 'amd_turin' +); + -- Per-VMM state. CREATE TABLE IF NOT EXISTS omicron.public.vmm ( id UUID PRIMARY KEY, @@ -5055,7 +5080,8 @@ CREATE TABLE IF NOT EXISTS omicron.public.vmm ( sled_id UUID NOT NULL, propolis_ip INET NOT NULL, propolis_port INT4 NOT NULL CHECK (propolis_port BETWEEN 0 AND 65535) DEFAULT 12400, - state omicron.public.vmm_state NOT NULL + state omicron.public.vmm_state NOT NULL, + cpu_platform omicron.public.vmm_cpu_platform ); CREATE INDEX IF NOT EXISTS lookup_vmms_by_sled_id ON omicron.public.vmm ( @@ -6367,7 +6393,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '174.0.0', NULL) + (TRUE, NOW(), NOW(), '175.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 38310383f0262be873e03afed46efdc50c190c35 Mon Sep 17 00:00:00 2001 From: iximeow Date: Mon, 28 Jul 2025 18:08:25 +0000 Subject: [PATCH 18/42] walk back "minimum"ness of CPU platforms --- common/src/api/external/mod.rs | 27 ++- end-to-end-tests/src/instance_launch.rs | 2 +- nexus/db-model/src/instance.rs | 10 +- ...u_platform.rs => instance_cpu_platform.rs} | 28 ++-- nexus/db-model/src/lib.rs | 4 +- nexus/db-model/src/schema_versions.rs | 2 +- nexus/db-model/src/vmm_cpu_platform.rs | 10 +- nexus/db-queries/src/db/datastore/instance.rs | 28 ++-- .../db-queries/src/db/datastore/migration.rs | 2 +- nexus/db-queries/src/db/datastore/sled.rs | 15 +- .../virtual_provisioning_collection.rs | 2 +- nexus/db-queries/src/db/datastore/vpc.rs | 2 +- .../src/db/pub_test_utils/helpers.rs | 2 +- .../db-queries/src/db/queries/external_ip.rs | 2 +- .../src/db/queries/network_interface.rs | 2 +- nexus/db-schema/src/enums.rs | 2 +- nexus/db-schema/src/schema.rs | 2 +- .../tasks/instance_reincarnation.rs | 2 +- nexus/src/app/instance.rs | 8 +- nexus/src/app/instance_platform.rs | 2 +- nexus/src/app/sagas/instance_create.rs | 2 +- nexus/src/app/sagas/instance_delete.rs | 2 +- nexus/src/app/sagas/instance_migrate.rs | 2 +- nexus/src/app/sagas/instance_start.rs | 19 +-- nexus/src/app/sagas/instance_update/mod.rs | 2 +- nexus/src/app/sagas/snapshot_create.rs | 2 +- nexus/test-utils/src/resource_helpers.rs | 2 +- nexus/tests/integration_tests/endpoints.rs | 6 +- nexus/tests/integration_tests/external_ips.rs | 2 +- nexus/tests/integration_tests/instances.rs | 156 +++++++++--------- nexus/tests/integration_tests/projects.rs | 2 +- nexus/tests/integration_tests/quotas.rs | 2 +- nexus/tests/integration_tests/schema.rs | 2 +- nexus/tests/integration_tests/snapshots.rs | 4 +- .../integration_tests/subnet_allocation.rs | 2 +- nexus/tests/integration_tests/utilization.rs | 2 +- nexus/types/src/external_api/params.rs | 14 +- openapi/nexus-internal.json | 54 +++--- openapi/nexus.json | 90 +++++----- .../crdb/add-instance-cpu-platform/up01.sql | 4 + .../up02.sql | 0 .../crdb/add-instance-cpu-platform/up03.sql | 2 + .../up04.sql | 0 .../up05.sql | 0 .../up01.sql | 4 - .../up03.sql | 2 - schema/crdb/dbinit.sql | 24 ++- 47 files changed, 274 insertions(+), 283 deletions(-) rename nexus/db-model/src/{instance_minimum_cpu_platform.rs => instance_cpu_platform.rs} (61%) create mode 100644 schema/crdb/add-instance-cpu-platform/up01.sql rename schema/crdb/{add-instance-minimum-cpu-platform => add-instance-cpu-platform}/up02.sql (100%) create mode 100644 schema/crdb/add-instance-cpu-platform/up03.sql rename schema/crdb/{add-instance-minimum-cpu-platform => add-instance-cpu-platform}/up04.sql (100%) rename schema/crdb/{add-instance-minimum-cpu-platform => add-instance-cpu-platform}/up05.sql (100%) delete mode 100644 schema/crdb/add-instance-minimum-cpu-platform/up01.sql delete mode 100644 schema/crdb/add-instance-minimum-cpu-platform/up03.sql diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 7867b1bee7a..610dd757297 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -1195,9 +1195,9 @@ pub struct Instance { #[serde(flatten)] pub auto_restart_status: InstanceAutoRestartStatus, - /// The minimum required CPU platform for this instance. If this is `null`, - /// the instance requires no particular CPU platform. - pub min_cpu_platform: Option, + /// The CPU platform for this instance. If this is `null`, the instance + /// requires no particular CPU platform. + pub cpu_platform: Option, } /// Status of control-plane driven automatic failure recovery for this instance. @@ -1262,24 +1262,23 @@ pub enum InstanceAutoRestartPolicy { BestEffort, } -/// A minimum required CPU platform for an instance. +/// A required CPU platform for an instance. /// -/// When an instance specifies a minimum required CPU platform: +/// When an instance specifies a required CPU platform: /// /// - The system may expose (to the VM) new CPU features that are only present /// on that platform (or on newer platforms of the same lineage that also /// support those features). /// - The instance must run on hosts that have CPUs that support all the -/// features of the supplied minimum platform. +/// features of the supplied platform. /// -/// That is, the instance is restricted to hosts that have the specified minimum -/// host CPU type (or a more advanced, but still compatible, CPU), but in -/// exchange the CPU features exposed by the minimum platform are available for -/// the guest to use. Note that this may prevent an instance from starting (if -/// the hosts it requires are full but there is capacity on other incompatible -/// hosts). +/// That is, the instance is restricted to hosts that have the CPUs which +/// support all features of the required platform, but in exchange the CPU +/// features exposed by the platform are available for the guest to use. Note +/// that this may prevent an instance from starting (if the hosts that could run +/// it are full but there is capacity on other incompatible hosts). /// -/// If an instance does not specify a minimum required CPU platform, then when +/// If an instance does not specify a required CPU platform, then when /// it starts, the control plane selects a host for the instance and then /// supplies the guest with the "minimum" CPU platform supported by that host. /// This maximizes the number of hosts that can run the VM if it later needs to @@ -1294,7 +1293,7 @@ pub enum InstanceAutoRestartPolicy { Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, Eq, PartialEq, )] #[serde(rename_all = "snake_case")] -pub enum InstanceMinimumCpuPlatform { +pub enum InstanceCpuPlatform { /// An AMD Milan-like CPU platform. AmdMilan, diff --git a/end-to-end-tests/src/instance_launch.rs b/end-to-end-tests/src/instance_launch.rs index 1d9ffa5b6b9..7277adc08be 100644 --- a/end-to-end-tests/src/instance_launch.rs +++ b/end-to-end-tests/src/instance_launch.rs @@ -79,7 +79,7 @@ async fn instance_launch() -> Result<()> { start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), - min_cpu_platform: None, + cpu_platform: None, }) .send() .await?; diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index 8ccd7c6b01b..a224adfb530 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -5,7 +5,7 @@ use super::InstanceIntendedState as IntendedState; use super::{ ByteCount, Disk, ExternalIp, Generation, InstanceAutoRestartPolicy, - InstanceCpuCount, InstanceMinimumCpuPlatform, InstanceState, Vmm, VmmState, + InstanceCpuCount, InstanceCpuPlatform, InstanceState, Vmm, VmmState, }; use crate::collection::DatastoreAttachTargetConfig; use crate::serde_time_delta::optional_time_delta; @@ -68,8 +68,8 @@ pub struct Instance { #[diesel(column_name = boot_disk_id)] pub boot_disk_id: Option, - /// The instance's minimum required CPU platform. - pub min_cpu_platform: Option, + /// The instance's required CPU platform. + pub cpu_platform: Option, #[diesel(embed)] pub runtime_state: InstanceRuntimeState, @@ -142,7 +142,7 @@ impl Instance { // Intentionally ignore `params.boot_disk_id` here: we can't set // `boot_disk_id` until the referenced disk is attached. boot_disk_id: None, - min_cpu_platform: params.min_cpu_platform.map(Into::into), + cpu_platform: params.cpu_platform.map(Into::into), runtime_state, intended_state, @@ -498,5 +498,5 @@ pub struct InstanceUpdate { pub memory: ByteCount, - pub min_cpu_platform: Option, + pub cpu_platform: Option, } diff --git a/nexus/db-model/src/instance_minimum_cpu_platform.rs b/nexus/db-model/src/instance_cpu_platform.rs similarity index 61% rename from nexus/db-model/src/instance_minimum_cpu_platform.rs rename to nexus/db-model/src/instance_cpu_platform.rs index bb0b911d9a9..9e05ae4d329 100644 --- a/nexus/db-model/src/instance_minimum_cpu_platform.rs +++ b/nexus/db-model/src/instance_cpu_platform.rs @@ -8,7 +8,7 @@ use super::impl_enum_type; use serde::{Deserialize, Serialize}; impl_enum_type!( - InstanceMinimumCpuPlatformEnum: + InstanceCpuPlatformEnum: #[derive( Copy, @@ -20,15 +20,15 @@ impl_enum_type!( Serialize, Deserialize )] - pub enum InstanceMinimumCpuPlatform; + pub enum InstanceCpuPlatform; AmdMilan => b"amd_milan" AmdTurin => b"amd_turin" ); -impl InstanceMinimumCpuPlatform { +impl InstanceCpuPlatform { /// Returns a slice containing the set of sled CPU families that can - /// accommodate an instance with this minimum CPU platform. + /// accommodate an instance with this CPU platform. pub fn compatible_sled_cpu_families(&self) -> &'static [SledCpuFamily] { match self { // Turin-based sleds have a superset of the features made available @@ -41,13 +41,11 @@ impl InstanceMinimumCpuPlatform { } } -impl From - for InstanceMinimumCpuPlatform +impl From + for InstanceCpuPlatform { - fn from( - value: omicron_common::api::external::InstanceMinimumCpuPlatform, - ) -> Self { - use omicron_common::api::external::InstanceMinimumCpuPlatform as ApiPlatform; + fn from(value: omicron_common::api::external::InstanceCpuPlatform) -> Self { + use omicron_common::api::external::InstanceCpuPlatform as ApiPlatform; match value { ApiPlatform::AmdMilan => Self::AmdMilan, ApiPlatform::AmdTurin => Self::AmdTurin, @@ -55,13 +53,13 @@ impl From } } -impl From - for omicron_common::api::external::InstanceMinimumCpuPlatform +impl From + for omicron_common::api::external::InstanceCpuPlatform { - fn from(value: InstanceMinimumCpuPlatform) -> Self { + fn from(value: InstanceCpuPlatform) -> Self { match value { - InstanceMinimumCpuPlatform::AmdMilan => Self::AmdMilan, - InstanceMinimumCpuPlatform::AmdTurin => Self::AmdTurin, + InstanceCpuPlatform::AmdMilan => Self::AmdMilan, + InstanceCpuPlatform::AmdTurin => Self::AmdTurin, } } } diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index a1bbbc0db2a..06ffa0124f0 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -43,8 +43,8 @@ mod image; mod instance; mod instance_auto_restart_policy; mod instance_cpu_count; +mod instance_cpu_platform; mod instance_intended_state; -mod instance_minimum_cpu_platform; mod instance_state; mod internet_gateway; mod inventory; @@ -182,8 +182,8 @@ pub use image::*; pub use instance::*; pub use instance_auto_restart_policy::*; pub use instance_cpu_count::*; +pub use instance_cpu_platform::*; pub use instance_intended_state::*; -pub use instance_minimum_cpu_platform::*; pub use instance_state::*; pub use internet_gateway::*; pub use inventory::*; diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index b232c896542..ceb49e5f829 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -28,7 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), - KnownVersion::new(175, "add-instance-minimum-cpu-platform"), + KnownVersion::new(175, "add-instance-cpu-platform"), KnownVersion::new(174, "sled-cpu-family"), KnownVersion::new(173, "inv-internal-dns"), KnownVersion::new(172, "add-zones-with-mupdate-override"), diff --git a/nexus/db-model/src/vmm_cpu_platform.rs b/nexus/db-model/src/vmm_cpu_platform.rs index 67e60863924..e177ee7d351 100644 --- a/nexus/db-model/src/vmm_cpu_platform.rs +++ b/nexus/db-model/src/vmm_cpu_platform.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::{InstanceMinimumCpuPlatform, SledCpuFamily}; +use crate::{InstanceCpuPlatform, SledCpuFamily}; use super::impl_enum_type; use serde::{Deserialize, Serialize}; @@ -50,11 +50,11 @@ impl VmmCpuPlatform { } } -impl From for VmmCpuPlatform { - fn from(value: InstanceMinimumCpuPlatform) -> Self { +impl From for VmmCpuPlatform { + fn from(value: InstanceCpuPlatform) -> Self { match value { - InstanceMinimumCpuPlatform::AmdMilan => Self::AmdMilan, - InstanceMinimumCpuPlatform::AmdTurin => Self::AmdTurin, + InstanceCpuPlatform::AmdMilan => Self::AmdMilan, + InstanceCpuPlatform::AmdTurin => Self::AmdTurin, } } } diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index 9e0336b6c86..8b827d286ef 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -21,8 +21,8 @@ use crate::db::model::Instance; use crate::db::model::InstanceAutoRestart; use crate::db::model::InstanceAutoRestartPolicy; use crate::db::model::InstanceCpuCount; +use crate::db::model::InstanceCpuPlatform; use crate::db::model::InstanceIntendedState; -use crate::db::model::InstanceMinimumCpuPlatform; use crate::db::model::InstanceRuntimeState; use crate::db::model::InstanceState; use crate::db::model::InstanceUpdate; @@ -266,7 +266,7 @@ impl From for external::Instance { .parse() .expect("found invalid hostname in the database"), boot_disk_id: value.instance.boot_disk_id, - min_cpu_platform: value.instance.min_cpu_platform.map(Into::into), + cpu_platform: value.instance.cpu_platform.map(Into::into), runtime: external::InstanceRuntimeState { run_state: value.effective_state(), time_run_state_updated, @@ -1098,7 +1098,7 @@ impl DataStore { auto_restart_policy, ncpus, memory, - min_cpu_platform, + cpu_platform, } = update.clone(); async move { // Set the auto-restart policy. @@ -1118,7 +1118,7 @@ impl DataStore { &authz_instance, ncpus, memory, - min_cpu_platform, + cpu_platform, ) .await?; @@ -1302,7 +1302,7 @@ impl DataStore { authz_instance: &authz::Instance, ncpus: InstanceCpuCount, memory: ByteCount, - min_cpu_platform: Option, + cpu_platform: Option, ) -> Result<(), diesel::result::Error> { use nexus_db_schema::schema::instance::dsl as instance_dsl; @@ -1314,20 +1314,20 @@ impl DataStore { .eq_any(InstanceState::NOT_INCARNATED_STATES), ); - let query = if min_cpu_platform.is_some() { + let query = if cpu_platform.is_some() { query.filter( instance_dsl::ncpus .ne(ncpus) .or(instance_dsl::memory.ne(memory)) - .or(instance_dsl::min_cpu_platform.ne(min_cpu_platform)) - .or(instance_dsl::min_cpu_platform.is_null()), + .or(instance_dsl::cpu_platform.ne(cpu_platform)) + .or(instance_dsl::cpu_platform.is_null()), ) } else { query.filter( instance_dsl::ncpus .ne(ncpus) .or(instance_dsl::memory.ne(memory)) - .or(instance_dsl::min_cpu_platform.is_not_null()), + .or(instance_dsl::cpu_platform.is_not_null()), ) }; @@ -1335,15 +1335,15 @@ impl DataStore { .set(( instance_dsl::ncpus.eq(ncpus), instance_dsl::memory.eq(memory), - instance_dsl::min_cpu_platform.eq(min_cpu_platform), + instance_dsl::cpu_platform.eq(cpu_platform), )) .check_if_exists::(authz_instance.id()) .execute_and_check(&conn) .await?; match r.status { UpdateStatus::NotUpdatedButExists => { - if (r.found.ncpus, r.found.memory, r.found.min_cpu_platform) - == (ncpus, memory, min_cpu_platform) + if (r.found.ncpus, r.found.memory, r.found.cpu_platform) + == (ncpus, memory, cpu_platform) { // Not updated, because the update is no change.. return Ok(()); @@ -1365,7 +1365,7 @@ impl DataStore { "instance_id" => %r.found.id(), "new ncpus" => ?ncpus, "new memory" => ?memory, - "new CPU platform" => ?min_cpu_platform, + "new CPU platform" => ?cpu_platform, ); return Err(err.bail(Error::internal_error( "unable to change instance CPU or memory", @@ -2255,7 +2255,7 @@ mod tests { external_ips: Vec::new(), disks: Vec::new(), boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, ssh_public_keys: None, start: false, auto_restart_policy: Default::default(), diff --git a/nexus/db-queries/src/db/datastore/migration.rs b/nexus/db-queries/src/db/datastore/migration.rs index adbcf8523e1..8981ab9bf35 100644 --- a/nexus/db-queries/src/db/datastore/migration.rs +++ b/nexus/db-queries/src/db/datastore/migration.rs @@ -235,7 +235,7 @@ mod tests { external_ips: Vec::new(), disks: Vec::new(), boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, ssh_public_keys: None, start: false, auto_restart_policy: Default::default(), diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 8a11f2294d6..1142a8c97c3 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -1088,7 +1088,7 @@ pub(in crate::db::datastore) mod test { use nexus_db_model::PhysicalDiskPolicy; use nexus_db_model::PhysicalDiskState; use nexus_db_model::{Generation, SledCpuFamily}; - use nexus_db_model::{InstanceMinimumCpuPlatform, PhysicalDisk}; + use nexus_db_model::{InstanceCpuPlatform, PhysicalDisk}; use nexus_types::identity::Asset; use nexus_types::identity::Resource; use omicron_common::api::external; @@ -1476,7 +1476,7 @@ pub(in crate::db::datastore) mod test { groups: Vec, force_onto_sled: Option, resources: db::model::Resources, - min_cpu_platform: Option, + cpu_platform: Option, } struct FindTargetsOutput { @@ -1493,7 +1493,7 @@ pub(in crate::db::datastore) mod test { groups: vec![], force_onto_sled: None, resources: small_resource_request(), - min_cpu_platform: None, + cpu_platform: None, } } @@ -1506,7 +1506,7 @@ pub(in crate::db::datastore) mod test { assert!(self.force_onto_sled.is_none()); let families = - self.min_cpu_platform.map(|p| p.compatible_sled_cpu_families()); + self.cpu_platform.map(|p| p.compatible_sled_cpu_families()); sled_find_targets_query(self.id, &self.resources, families) .get_results_async::<( @@ -2674,14 +2674,13 @@ pub(in crate::db::datastore) mod test { } let mut test_instance = Instance::new(); - for platform in [None, Some(InstanceMinimumCpuPlatform::AmdMilan)] { - test_instance.min_cpu_platform = platform; + for platform in [None, Some(InstanceCpuPlatform::AmdMilan)] { + test_instance.cpu_platform = platform; let possible_sleds = test_instance.find_targets(&datastore).await; assert_eq!(possible_sleds.len(), 4); } - test_instance.min_cpu_platform = - Some(InstanceMinimumCpuPlatform::AmdTurin); + test_instance.cpu_platform = Some(InstanceCpuPlatform::AmdTurin); let possible_sleds = test_instance.find_targets(&datastore).await; assert_eq!(possible_sleds.len(), 2); diff --git a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs index e97adb0777c..d1df678ecc7 100644 --- a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs +++ b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs @@ -455,7 +455,7 @@ mod test { external_ips: Vec::new(), disks: Vec::new(), boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, ssh_public_keys: None, start: false, auto_restart_policy: Default::default(), diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index a6ebf16a00a..980cb54989c 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -3976,7 +3976,7 @@ mod tests { external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, ssh_public_keys: None, start: false, auto_restart_policy: Default::default(), diff --git a/nexus/db-queries/src/db/pub_test_utils/helpers.rs b/nexus/db-queries/src/db/pub_test_utils/helpers.rs index 131fb2f3722..67c54639643 100644 --- a/nexus/db-queries/src/db/pub_test_utils/helpers.rs +++ b/nexus/db-queries/src/db/pub_test_utils/helpers.rs @@ -236,7 +236,7 @@ pub async fn create_stopped_instance_record( external_ips: Vec::new(), disks: Vec::new(), boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, ssh_public_keys: None, start: false, auto_restart_policy: Default::default(), diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 83f3fb667cc..5e11378c8e5 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -1003,7 +1003,7 @@ mod tests { external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index 97d3202409b..58061f7c2ca 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -1893,7 +1893,7 @@ mod tests { external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index b563e926cfb..61ac665754d 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -46,7 +46,7 @@ define_enums! { IdentityProviderTypeEnum => "provider_type", IdentityTypeEnum => "identity_type", InstanceAutoRestartPolicyEnum => "instance_auto_restart", - InstanceMinimumCpuPlatformEnum => "instance_min_cpu_platform", + InstanceCpuPlatformEnum => "instance_cpu_platform", InstanceStateEnum => "instance_state_v2", InstanceIntendedStateEnum => "instance_intended_state", InvConfigReconcilerStatusKindEnum => "inv_config_reconciler_status_kind", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 9f9b4b6d6e1..1db30d49d10 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -424,7 +424,7 @@ table! { auto_restart_policy -> Nullable, auto_restart_cooldown -> Nullable, boot_disk_id -> Nullable, - min_cpu_platform -> Nullable, + cpu_platform -> Nullable, time_state_updated -> Timestamptz, state_generation -> Int8, active_propolis_id -> Nullable, diff --git a/nexus/src/app/background/tasks/instance_reincarnation.rs b/nexus/src/app/background/tasks/instance_reincarnation.rs index c95d82f1fc7..586bdf5cbb4 100644 --- a/nexus/src/app/background/tasks/instance_reincarnation.rs +++ b/nexus/src/app/background/tasks/instance_reincarnation.rs @@ -390,7 +390,7 @@ mod test { external_ips: Vec::new(), disks: Vec::new(), boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, ssh_public_keys: None, start: state == InstanceState::Vmm, auto_restart_policy, diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 1f951cef2e3..39b50ee1529 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -362,7 +362,7 @@ impl super::Nexus { memory, auto_restart_policy, boot_disk, - min_cpu_platform, + cpu_platform, } = params; check_instance_cpu_memory_sizes(*ncpus, *memory)?; @@ -389,14 +389,14 @@ impl super::Nexus { let auto_restart_policy = auto_restart_policy.map(Into::into); let ncpus = (*ncpus).into(); let memory = (*memory).into(); - let min_cpu_platform = min_cpu_platform.map(Into::into); + let cpu_platform = cpu_platform.map(Into::into); let update = InstanceUpdate { boot_disk_id, auto_restart_policy, ncpus, memory, - min_cpu_platform, + cpu_platform, }; self.datastore() .instance_reconfigure(opctx, &authz_instance, update) @@ -2474,7 +2474,7 @@ mod tests { external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, ssh_public_keys: None, start: false, auto_restart_policy: Default::default(), diff --git a/nexus/src/app/instance_platform.rs b/nexus/src/app/instance_platform.rs index 96a5e21225c..098ca5a7b42 100644 --- a/nexus/src/app/instance_platform.rs +++ b/nexus/src/app/instance_platform.rs @@ -496,7 +496,7 @@ impl super::Nexus { } /// Yields the CPUID configuration to use for a VMM that specifies the supplied -/// minimum CPU `platform`. +/// CPU `platform`. // // This is a free function (and not an `Into` impl on `VmmCpuPlatform`) to keep // all of the gnarly CPUID details out of the DB model crate, which defines that diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index de67f108679..fc3b19d291a 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -1294,7 +1294,7 @@ pub mod test { name: DISK_NAME.parse().unwrap(), }, )), - min_cpu_platform: None, + cpu_platform: None, disks: Vec::new(), start: false, auto_restart_policy: Default::default(), diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index f3f1d9ec8a1..a5f59bd65af 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -235,7 +235,7 @@ mod test { boot_disk: Some(params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { name: DISK_NAME.parse().unwrap() }, )), - min_cpu_platform: None, + cpu_platform: None, disks: Vec::new(), start: false, auto_restart_policy: Default::default(), diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 4b7a8ef84a1..6500d3efcfb 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -613,7 +613,7 @@ mod tests { external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index e2dd2769a90..b8dda8e0973 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -164,10 +164,9 @@ async fn sis_alloc_server( let mut constraint_builder = db::model::SledReservationConstraintBuilder::new(); - if let Some(min_cpu_platform) = params.db_instance.min_cpu_platform.as_ref() - { + if let Some(cpu_platform) = params.db_instance.cpu_platform.as_ref() { constraint_builder = constraint_builder - .cpu_families(min_cpu_platform.compatible_sled_cpu_families()); + .cpu_families(cpu_platform.compatible_sled_cpu_families()); } let resource = super::instance_common::reserve_vmm_resources( @@ -219,13 +218,13 @@ async fn sis_create_vmm_record( let sled_id = sagactx.lookup::("sled_id")?; let propolis_ip = sagactx.lookup::("propolis_ip")?; - // If the instance supplied a minimum CPU platform, record that as the VMM's + // If the instance supplied a CPU platform, record that as the VMM's // required platform, irrespective of what sled was picked. (This allows a - // VM to land on a "better" sled than its minimum requirement and migrate - // back to a minimum-required sled later.) + // VM to land on a "better" sled than its requirement and migrate back to a + // minimum-allowed-by-requirement sled later.) // - // If the instance didn't supply a minimum CPU platform, select one for this - // VMM by looking up the chosen sled and selecting the "minimum compatible + // If the instance didn't supply a CPU platform, select one for this VMM by + // looking up the chosen sled and selecting the "minimum compatible // platform" for sleds of that lineage. This maximizes the number of sleds // that can host the VMM if it needs to migrate in the future. Selecting the // sled first and then deriving the platform is meant to support @@ -235,7 +234,7 @@ async fn sis_create_vmm_record( // computation selects the most compatible platform that can run on sleds // with CPUs from that vendor. let cpu_platform = - if let Some(cpu_platform) = params.db_instance.min_cpu_platform { + if let Some(cpu_platform) = params.db_instance.cpu_platform { cpu_platform.into() } else { let (.., sled) = osagactx @@ -885,7 +884,7 @@ mod test { external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/src/app/sagas/instance_update/mod.rs b/nexus/src/app/sagas/instance_update/mod.rs index 96548793d38..fd020803f4c 100644 --- a/nexus/src/app/sagas/instance_update/mod.rs +++ b/nexus/src/app/sagas/instance_update/mod.rs @@ -1577,7 +1577,7 @@ mod test { external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 01951189282..4b156c3eeda 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -2170,7 +2170,7 @@ mod test { network_interfaces: params::InstanceNetworkInterfaceAttachment::None, boot_disk, - min_cpu_platform: None, + cpu_platform: None, disks: data_disks, external_ips: vec![], start: true, diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 5ecd031b8d1..6f69f453dfb 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -663,7 +663,7 @@ pub async fn create_instance_with( external_ips, disks, boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start, auto_restart_policy, anti_affinity_groups: Vec::new(), diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 7db8d944176..a0cd1cc14f2 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -666,7 +666,7 @@ pub static DEMO_INSTANCE_CREATE: LazyLock = }], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -688,7 +688,7 @@ pub static DEMO_STOPPED_INSTANCE_CREATE: LazyLock = }], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -696,7 +696,7 @@ pub static DEMO_STOPPED_INSTANCE_CREATE: LazyLock = pub static DEMO_INSTANCE_UPDATE: LazyLock = LazyLock::new(|| params::InstanceUpdate { boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, auto_restart_policy: None, ncpus: InstanceCpuCount(1), memory: ByteCount::from_gibibytes_u32(16), diff --git a/nexus/tests/integration_tests/external_ips.rs b/nexus/tests/integration_tests/external_ips.rs index bd2eaf30e71..2edb9f8bd76 100644 --- a/nexus/tests/integration_tests/external_ips.rs +++ b/nexus/tests/integration_tests/external_ips.rs @@ -1006,7 +1006,7 @@ async fn test_floating_ip_attach_fail_between_projects( }], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 66ba780f12b..c5ff7d82779 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -63,7 +63,7 @@ use omicron_common::api::external::IdentityMetadataUpdateParams; use omicron_common::api::external::Instance; use omicron_common::api::external::InstanceAutoRestartPolicy; use omicron_common::api::external::InstanceCpuCount; -use omicron_common::api::external::InstanceMinimumCpuPlatform; +use omicron_common::api::external::InstanceCpuPlatform; use omicron_common::api::external::InstanceNetworkInterface; use omicron_common::api::external::InstanceState; use omicron_common::api::external::Name; @@ -241,7 +241,7 @@ async fn test_create_instance_with_bad_hostname_impl( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: false, ssh_public_keys: None, auto_restart_policy: Default::default(), @@ -350,7 +350,7 @@ async fn test_instances_create_reboot_halt( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -2041,7 +2041,7 @@ async fn test_instances_create_stopped_start( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -2226,7 +2226,7 @@ async fn test_instance_using_image_from_other_project_fails( }, )], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -2293,7 +2293,7 @@ async fn test_instance_create_saga_removes_instance_database_record( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -2325,7 +2325,7 @@ async fn test_instance_create_saga_removes_instance_database_record( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -2418,7 +2418,7 @@ async fn test_instance_with_single_explicit_ip_address( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), @@ -2538,7 +2538,7 @@ async fn test_instance_with_new_custom_network_interfaces( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -2657,7 +2657,7 @@ async fn test_instance_create_delete_network_interface( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -2905,7 +2905,7 @@ async fn test_instance_update_network_interfaces( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -3537,7 +3537,7 @@ async fn test_instance_with_multiple_nics_unwinds_completely( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -3610,7 +3610,7 @@ async fn test_attach_one_disk_to_instance(cptestctx: &ControlPlaneTestContext) { boot_disk: Some(params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { name: disk_name.clone() }, )), - min_cpu_platform: None, + cpu_platform: None, disks: Vec::new(), start: true, auto_restart_policy: Default::default(), @@ -3703,7 +3703,7 @@ async fn test_instance_create_attach_disks( }, ), ], - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -3802,7 +3802,7 @@ async fn test_instance_create_attach_disks_undo( ), ], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -3887,7 +3887,7 @@ async fn test_attach_eight_disks_to_instance( ) }) .collect(), - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -3976,7 +3976,7 @@ async fn test_cannot_attach_nine_disks_to_instance( ) }) .collect(), - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -4079,7 +4079,7 @@ async fn test_cannot_attach_faulted_disks(cptestctx: &ControlPlaneTestContext) { ) }) .collect(), - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -4171,7 +4171,7 @@ async fn test_disks_detached_when_instance_destroyed( ) }) .collect(), - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -4270,7 +4270,7 @@ async fn test_disks_detached_when_instance_destroyed( ) }) .collect(), - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -4355,7 +4355,7 @@ async fn test_duplicate_disk_attach_requests_ok( ), ], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -4400,7 +4400,7 @@ async fn test_duplicate_disk_attach_requests_ok( name: Name::try_from(String::from("alsodata")).unwrap(), }, )], - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -4457,7 +4457,7 @@ async fn test_cannot_detach_boot_disk(cptestctx: &ControlPlaneTestContext) { name: Name::try_from(String::from("probablydata0")).unwrap(), }, )), - min_cpu_platform: None, + cpu_platform: None, disks: Vec::new(), start: false, auto_restart_policy: Default::default(), @@ -4521,7 +4521,7 @@ async fn test_cannot_detach_boot_disk(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { boot_disk: None, auto_restart_policy: None, - min_cpu_platform: None, + cpu_platform: None, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }, @@ -4594,7 +4594,7 @@ async fn test_updating_running_instance_boot_disk_is_conflict( boot_disk: Some(params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { name: probablydata.clone() }, )), - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -4626,7 +4626,7 @@ async fn test_updating_running_instance_boot_disk_is_conflict( params::InstanceUpdate { boot_disk: Some(alsodata.clone().into()), auto_restart_policy: None, - min_cpu_platform: None, + cpu_platform: None, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }, @@ -4645,7 +4645,7 @@ async fn test_updating_running_instance_boot_disk_is_conflict( // was created. boot_disk: Some(probablydata.clone().into()), auto_restart_policy: Some(InstanceAutoRestartPolicy::BestEffort), - min_cpu_platform: None, + cpu_platform: None, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }, @@ -4668,7 +4668,7 @@ async fn test_updating_missing_instance_is_not_found( params::InstanceUpdate { boot_disk: None, auto_restart_policy: None, - min_cpu_platform: None, + cpu_platform: None, ncpus: InstanceCpuCount::try_from(0).unwrap(), memory: ByteCount::from_gibibytes_u32(0), }, @@ -4758,7 +4758,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, disks: Vec::new(), start: true, // Start out with None @@ -4791,7 +4791,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), - min_cpu_platform: None, + cpu_platform: None, ncpus: new_ncpus, memory: new_memory, }, @@ -4814,7 +4814,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), - min_cpu_platform: None, + cpu_platform: None, ncpus: new_ncpus, memory: new_memory, }, @@ -4830,7 +4830,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), - min_cpu_platform: None, + cpu_platform: None, ncpus: initial_ncpus, memory: new_memory, }, @@ -4845,7 +4845,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), - min_cpu_platform: None, + cpu_platform: None, ncpus: initial_ncpus, memory: initial_memory, }, @@ -4864,7 +4864,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), - min_cpu_platform: None, + cpu_platform: None, ncpus: InstanceCpuCount(MAX_VCPU_PER_INSTANCE + 1), memory: instance.memory, }, @@ -4886,7 +4886,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), - min_cpu_platform: None, + cpu_platform: None, ncpus: instance.ncpus, memory: ByteCount::from_mebibytes_u32(0), }, @@ -4902,7 +4902,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), - min_cpu_platform: None, + cpu_platform: None, ncpus: instance.ncpus, memory: ByteCount::try_from(MAX_MEMORY_BYTES_PER_INSTANCE - 1) .unwrap(), @@ -4920,7 +4920,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), - min_cpu_platform: None, + cpu_platform: None, ncpus: instance.ncpus, memory: ByteCount::from_mebibytes_u32( (max_mib + 1024).try_into().unwrap(), @@ -4943,7 +4943,7 @@ async fn test_size_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { auto_restart_policy, boot_disk: boot_disk_nameorid.clone(), - min_cpu_platform: None, + cpu_platform: None, ncpus: new_ncpus, memory: new_memory, }, @@ -4975,7 +4975,7 @@ async fn test_auto_restart_policy_can_be_changed( network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, disks: Vec::new(), start: true, // Start out with None @@ -5005,7 +5005,7 @@ async fn test_auto_restart_policy_can_be_changed( dbg!(params::InstanceUpdate { auto_restart_policy, boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }), @@ -5027,11 +5027,9 @@ async fn test_auto_restart_policy_can_be_changed( assert_reconfigured(None).await; } -// Test reconfiguring an instance's minimum CPU platform. +// Test reconfiguring an instance's CPU platform. #[nexus_test] -async fn test_min_cpu_platform_can_be_changed( - cptestctx: &ControlPlaneTestContext, -) { +async fn test_cpu_platform_can_be_changed(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; let instance_name = "milan-is-enough-for-anyone"; @@ -5051,7 +5049,7 @@ async fn test_min_cpu_platform_can_be_changed( external_ips: vec![], boot_disk: None, // Start out with None - min_cpu_platform: None, + cpu_platform: None, disks: Vec::new(), start: false, auto_restart_policy: None, @@ -5071,29 +5069,29 @@ async fn test_min_cpu_platform_can_be_changed( let instance = response.parsed_body::().unwrap(); // Starts out as None. - assert_eq!(instance.min_cpu_platform, None); + assert_eq!(instance.cpu_platform, None); - let assert_reconfigured = |min_cpu_platform| async move { + let assert_reconfigured = |cpu_platform| async move { let instance = expect_instance_reconfigure_ok( client, &instance.identity.id, dbg!(params::InstanceUpdate { auto_restart_policy: None, boot_disk: None, - min_cpu_platform, + cpu_platform, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }), ) .await; - assert_eq!(dbg!(instance).min_cpu_platform, min_cpu_platform,); + assert_eq!(dbg!(instance).cpu_platform, cpu_platform,); }; // Reconfigure to Milan. - assert_reconfigured(Some(InstanceMinimumCpuPlatform::AmdMilan)).await; + assert_reconfigured(Some(InstanceCpuPlatform::AmdMilan)).await; // Reconfigure to Turin (even though we have no Turin in the test env!) - assert_reconfigured(Some(InstanceMinimumCpuPlatform::AmdTurin)).await; + assert_reconfigured(Some(InstanceCpuPlatform::AmdTurin)).await; // Reconfigure back to None. assert_reconfigured(None).await; @@ -5147,7 +5145,7 @@ async fn test_boot_disk_can_be_changed(cptestctx: &ControlPlaneTestContext) { name: Name::try_from(String::from("probablydata1")).unwrap(), }, )], - min_cpu_platform: None, + cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -5175,7 +5173,7 @@ async fn test_boot_disk_can_be_changed(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { boot_disk: Some(disks[1].identity.id.into()), auto_restart_policy: None, - min_cpu_platform: None, + cpu_platform: None, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }, @@ -5219,7 +5217,7 @@ async fn test_boot_disk_must_be_attached(cptestctx: &ControlPlaneTestContext) { external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -5244,7 +5242,7 @@ async fn test_boot_disk_must_be_attached(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { boot_disk: Some(disks[0].identity.id.into()), auto_restart_policy: None, - min_cpu_platform: None, + cpu_platform: None, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }, @@ -5278,7 +5276,7 @@ async fn test_boot_disk_must_be_attached(cptestctx: &ControlPlaneTestContext) { params::InstanceUpdate { boot_disk: Some(disks[0].identity.id.into()), auto_restart_policy: None, - min_cpu_platform: None, + cpu_platform: None, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }, @@ -5314,7 +5312,7 @@ async fn test_instances_memory_rejected_less_than_min_memory_size( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -5368,7 +5366,7 @@ async fn test_instances_memory_not_divisible_by_min_memory_size( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -5422,7 +5420,7 @@ async fn test_instances_memory_greater_than_max_size( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -5530,7 +5528,7 @@ async fn test_instance_create_with_anti_affinity_groups( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, auto_restart_policy: Default::default(), anti_affinity_groups: anti_affinity_groups_param, }; @@ -5600,7 +5598,7 @@ async fn test_instance_create_with_duplicate_anti_affinity_groups( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, auto_restart_policy: Default::default(), anti_affinity_groups: anti_affinity_groups_param, }; @@ -5671,7 +5669,7 @@ async fn test_instance_create_with_anti_affinity_groups_that_do_not_exist( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, auto_restart_policy: Default::default(), anti_affinity_groups: anti_affinity_groups_param, }; @@ -5755,7 +5753,7 @@ async fn test_instance_create_with_ssh_keys( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), }; @@ -5805,7 +5803,7 @@ async fn test_instance_create_with_ssh_keys( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), }; @@ -5854,7 +5852,7 @@ async fn test_instance_create_with_ssh_keys( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), }; @@ -5996,7 +5994,7 @@ async fn test_cannot_provision_instance_beyond_cpu_capacity( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -6056,7 +6054,7 @@ async fn test_cannot_provision_instance_beyond_cpu_limit( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -6113,7 +6111,7 @@ async fn test_cannot_provision_instance_beyond_ram_capacity( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -6147,7 +6145,7 @@ async fn test_cannot_provision_instance_beyond_ram_capacity( } #[nexus_test] -async fn test_can_start_instance_with_min_cpu_platform( +async fn test_can_start_instance_with_cpu_platform( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; @@ -6168,8 +6166,8 @@ async fn test_can_start_instance_with_min_cpu_platform( external_ips: vec![], disks: vec![], boot_disk: None, - // Note that we're actually setting min_cpu_platform this time! - min_cpu_platform: Some(InstanceMinimumCpuPlatform::AmdMilan), + // Note that we're actually setting cpu_platform this time! + cpu_platform: Some(InstanceCpuPlatform::AmdMilan), start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -6210,7 +6208,7 @@ async fn test_can_start_instance_with_min_cpu_platform( params::InstanceUpdate { boot_disk: None, auto_restart_policy: None, - min_cpu_platform: Some(InstanceMinimumCpuPlatform::AmdTurin), + cpu_platform: Some(InstanceCpuPlatform::AmdTurin), ncpus: InstanceCpuCount::try_from(1).unwrap(), memory: ByteCount::from_gibibytes_u32(4), }, @@ -6292,7 +6290,7 @@ async fn test_can_start_instance_with_min_cpu_platform( } #[nexus_test] -async fn test_cannot_start_instance_with_unsatisfiable_min_cpu( +async fn test_cannot_start_instance_with_unsatisfiable_cpu_platform( cptestctx: &ControlPlaneTestContext, ) { let client = &cptestctx.external_client; @@ -6316,7 +6314,7 @@ async fn test_cannot_start_instance_with_unsatisfiable_min_cpu( // Require Turin to start the instance, but there are no Turin sleds in // our fake environment. Creating this instance should succeed, but // starting it won't. - min_cpu_platform: Some(InstanceMinimumCpuPlatform::AmdTurin), + cpu_platform: Some(InstanceCpuPlatform::AmdTurin), start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -6613,7 +6611,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( ssh_public_keys: None, disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -6684,7 +6682,7 @@ async fn test_instance_ephemeral_ip_from_orphan_pool( ssh_public_keys: None, disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -6749,7 +6747,7 @@ async fn test_instance_ephemeral_ip_no_default_pool_error( ssh_public_keys: None, disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -6888,7 +6886,7 @@ async fn test_instance_allow_only_one_ephemeral_ip( external_ips: vec![ephemeral_create.clone(), ephemeral_create], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), @@ -7023,7 +7021,7 @@ async fn test_instance_create_in_silo(cptestctx: &ControlPlaneTestContext) { }], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/tests/integration_tests/projects.rs b/nexus/tests/integration_tests/projects.rs index 9e41cbd02e4..cc5e34032e0 100644 --- a/nexus/tests/integration_tests/projects.rs +++ b/nexus/tests/integration_tests/projects.rs @@ -169,7 +169,7 @@ async fn test_project_deletion_with_instance( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/tests/integration_tests/quotas.rs b/nexus/tests/integration_tests/quotas.rs index 5f1d22eea4f..9cab95bd5f7 100644 --- a/nexus/tests/integration_tests/quotas.rs +++ b/nexus/tests/integration_tests/quotas.rs @@ -88,7 +88,7 @@ impl ResourceAllocator { external_ips: Vec::::new(), disks: Vec::::new(), boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: false, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 76546ca4de5..63ca5b1dcb4 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -1391,7 +1391,7 @@ fn at_current_101_0_0<'a>(ctx: &'a MigrationContext<'a>) -> BoxFuture<'a, ()> { params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, disks: Vec::new(), start: false, auto_restart_policy: Default::default(), diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index 506140451e4..83405813063 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -145,7 +145,7 @@ async fn test_snapshot_basic(cptestctx: &ControlPlaneTestContext) { boot_disk: Some(params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { name: base_disk_name.clone() }, )), - min_cpu_platform: None, + cpu_platform: None, disks: Vec::new(), external_ips: vec![], start: true, @@ -352,7 +352,7 @@ async fn test_snapshot_stopped_instance(cptestctx: &ControlPlaneTestContext) { boot_disk: Some(params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { name: base_disk_name.clone() }, )), - min_cpu_platform: None, + cpu_platform: None, disks: Vec::new(), external_ips: vec![], start: false, diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs index ae1ff51e98e..6cf937279a6 100644 --- a/nexus/tests/integration_tests/subnet_allocation.rs +++ b/nexus/tests/integration_tests/subnet_allocation.rs @@ -63,7 +63,7 @@ async fn create_instance_expect_failure( external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/tests/integration_tests/utilization.rs b/nexus/tests/integration_tests/utilization.rs index 44006016d28..f5e4958502d 100644 --- a/nexus/tests/integration_tests/utilization.rs +++ b/nexus/tests/integration_tests/utilization.rs @@ -231,7 +231,7 @@ async fn create_resources_in_test_suite_silo(client: &ClientTestContext) { external_ips: vec![], disks: vec![], boot_disk: None, - min_cpu_platform: None, + cpu_platform: None, start: true, auto_restart_policy: Default::default(), anti_affinity_groups: Vec::new(), diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 4f106e58ac2..9b328a2beaf 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::{ AddressLotKind, AffinityPolicy, AllowedSourceIps, BfdMode, BgpPeer, ByteCount, FailureDomain, Hostname, IdentityMetadataCreateParams, IdentityMetadataUpdateParams, InstanceAutoRestartPolicy, InstanceCpuCount, - InstanceMinimumCpuPlatform, LinkFec, LinkSpeed, Name, NameOrId, Nullable, + InstanceCpuPlatform, LinkFec, LinkSpeed, Name, NameOrId, Nullable, PaginationOrder, RouteDestination, RouteTarget, UserId, }; use omicron_common::disk::DiskVariant; @@ -1264,10 +1264,10 @@ pub struct InstanceCreate { #[serde(default)] pub anti_affinity_groups: Vec, - /// The minimum required CPU platform for this instance. If this is `null`, - /// the instance requires no particular CPU platform. + /// The CPU platform to be used for this instance. If this is `null`, the + /// instance requires no particular CPU platform. #[serde(default)] - pub min_cpu_platform: Option, + pub cpu_platform: Option, } /// Parameters of an `Instance` that can be reconfigured after creation. @@ -1300,9 +1300,9 @@ pub struct InstanceUpdate { /// `null`. pub auto_restart_policy: Option, - /// The minimum required CPU platform for this instance. If this is `null`, - /// the instance requires no particular CPU platform. - pub min_cpu_platform: Option, + /// The CPU platform to be used for this instance. If this is `null`, the + /// instance requires no particular CPU platform. + pub cpu_platform: Option, } #[inline] diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 385df3257e0..7bcb7944a22 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -4771,6 +4771,15 @@ "type": "string", "format": "uuid" }, + "cpu_platform": { + "nullable": true, + "description": "The CPU platform for this instance. If this is `null`, the instance requires no particular CPU platform.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceCpuPlatform" + } + ] + }, "description": { "description": "human-readable free-form text about a resource", "type": "string" @@ -4792,15 +4801,6 @@ } ] }, - "min_cpu_platform": { - "nullable": true, - "description": "The minimum required CPU platform for this instance. If this is `null`, the instance requires no particular CPU platform.", - "allOf": [ - { - "$ref": "#/components/schemas/InstanceMinimumCpuPlatform" - } - ] - }, "name": { "description": "unique, mutable, user-controlled identifier for each resource", "allOf": [ @@ -4886,32 +4886,18 @@ "format": "uint16", "minimum": 0 }, - "InstanceMigrateRequest": { - "description": "Parameters used when migrating an instance.", - "type": "object", - "properties": { - "dst_sled_id": { - "description": "The ID of the sled to which to migrate the target instance.", - "type": "string", - "format": "uuid" - } - }, - "required": [ - "dst_sled_id" - ] - }, - "InstanceMinimumCpuPlatform": { - "description": "A minimum required CPU platform for an instance.\n\nWhen an instance specifies a minimum required CPU platform:\n\n- The system may expose (to the VM) new CPU features that are only present on that platform (or on newer platforms of the same lineage that also support those features). - The instance must run on hosts that have CPUs that support all the features of the supplied minimum platform.\n\nThat is, the instance is restricted to hosts that have the specified minimum host CPU type (or a more advanced, but still compatible, CPU), but in exchange the CPU features exposed by the minimum platform are available for the guest to use. Note that this may prevent an instance from starting (if the hosts it requires are full but there is capacity on other incompatible hosts).\n\nIf an instance does not specify a minimum required CPU platform, then when it starts, the control plane selects a host for the instance and then supplies the guest with the \"minimum\" CPU platform supported by that host. This maximizes the number of hosts that can run the VM if it later needs to migrate to another host.", + "InstanceCpuPlatform": { + "description": "A required CPU platform for an instance.\n\nWhen an instance specifies a required CPU platform:\n\n- The system may expose (to the VM) new CPU features that are only present on that platform (or on newer platforms of the same lineage that also support those features). - The instance must run on hosts that have CPUs that support all the features of the supplied platform.\n\nThat is, the instance is restricted to hosts that have the CPUs which support all features of the required platform, but in exchange the CPU features exposed by the platform are available for the guest to use. Note that this may prevent an instance from starting (if the hosts that could run it are full but there is capacity on other incompatible hosts).\n\nIf an instance does not specify a required CPU platform, then when it starts, the control plane selects a host for the instance and then supplies the guest with the \"minimum\" CPU platform supported by that host. This maximizes the number of hosts that can run the VM if it later needs to migrate to another host.\n\nIn all cases, the CPU features presented by a given CPU platform are a subset of what the corresponding hardware may actually support; features which cannot be used from a virtual environment or do not have full hypervisor support may be masked off. See RFD 314 for specific CPU features in a CPU platform.", "oneOf": [ { - "description": "An AMD Zen 3-compatible CPU platform.", + "description": "An AMD Milan-like CPU platform.", "type": "string", "enum": [ "amd_milan" ] }, { - "description": "An AMD Zen 5-compatible CPU platform.", + "description": "An AMD Turin-like CPU platform.", "type": "string", "enum": [ "amd_turin" @@ -4919,6 +4905,20 @@ } ] }, + "InstanceMigrateRequest": { + "description": "Parameters used when migrating an instance.", + "type": "object", + "properties": { + "dst_sled_id": { + "description": "The ID of the sled to which to migrate the target instance.", + "type": "string", + "format": "uuid" + } + }, + "required": [ + "dst_sled_id" + ] + }, "InstanceState": { "description": "Running state of an Instance (primarily: booted or stopped)\n\nThis typically reflects whether it's starting, running, stopping, or stopped, but also includes states related to the Instance's lifecycle", "oneOf": [ diff --git a/openapi/nexus.json b/openapi/nexus.json index 7f95f43c761..61712620291 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -19824,6 +19824,15 @@ "type": "string", "format": "uuid" }, + "cpu_platform": { + "nullable": true, + "description": "The CPU platform for this instance. If this is `null`, the instance requires no particular CPU platform.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceCpuPlatform" + } + ] + }, "description": { "description": "human-readable free-form text about a resource", "type": "string" @@ -19845,15 +19854,6 @@ } ] }, - "min_cpu_platform": { - "nullable": true, - "description": "The minimum required CPU platform for this instance. If this is `null`, the instance requires no particular CPU platform.", - "allOf": [ - { - "$ref": "#/components/schemas/InstanceMinimumCpuPlatform" - } - ] - }, "name": { "description": "unique, mutable, user-controlled identifier for each resource", "allOf": [ @@ -19939,6 +19939,25 @@ "format": "uint16", "minimum": 0 }, + "InstanceCpuPlatform": { + "description": "A required CPU platform for an instance.\n\nWhen an instance specifies a required CPU platform:\n\n- The system may expose (to the VM) new CPU features that are only present on that platform (or on newer platforms of the same lineage that also support those features). - The instance must run on hosts that have CPUs that support all the features of the supplied platform.\n\nThat is, the instance is restricted to hosts that have the CPUs which support all features of the required platform, but in exchange the CPU features exposed by the platform are available for the guest to use. Note that this may prevent an instance from starting (if the hosts that could run it are full but there is capacity on other incompatible hosts).\n\nIf an instance does not specify a required CPU platform, then when it starts, the control plane selects a host for the instance and then supplies the guest with the \"minimum\" CPU platform supported by that host. This maximizes the number of hosts that can run the VM if it later needs to migrate to another host.\n\nIn all cases, the CPU features presented by a given CPU platform are a subset of what the corresponding hardware may actually support; features which cannot be used from a virtual environment or do not have full hypervisor support may be masked off. See RFD 314 for specific CPU features in a CPU platform.", + "oneOf": [ + { + "description": "An AMD Milan-like CPU platform.", + "type": "string", + "enum": [ + "amd_milan" + ] + }, + { + "description": "An AMD Turin-like CPU platform.", + "type": "string", + "enum": [ + "amd_turin" + ] + } + ] + }, "InstanceCreate": { "description": "Create-time parameters for an `Instance`", "type": "object", @@ -19971,6 +19990,16 @@ } ] }, + "cpu_platform": { + "nullable": true, + "description": "The CPU platform to be used for this instance. If this is `null`, the instance requires no particular CPU platform.", + "default": null, + "allOf": [ + { + "$ref": "#/components/schemas/InstanceCpuPlatform" + } + ] + }, "description": { "type": "string" }, @@ -20006,16 +20035,6 @@ } ] }, - "min_cpu_platform": { - "nullable": true, - "description": "The minimum required CPU platform for this instance. If this is `null`, the instance requires no particular CPU platform.", - "default": null, - "allOf": [ - { - "$ref": "#/components/schemas/InstanceMinimumCpuPlatform" - } - ] - }, "name": { "$ref": "#/components/schemas/Name" }, @@ -20136,25 +20155,6 @@ } ] }, - "InstanceMinimumCpuPlatform": { - "description": "A minimum required CPU platform for an instance.\n\nWhen an instance specifies a minimum required CPU platform:\n\n- The system may expose (to the VM) new CPU features that are only present on that platform (or on newer platforms of the same lineage that also support those features). - The instance must run on hosts that have CPUs that support all the features of the supplied minimum platform.\n\nThat is, the instance is restricted to hosts that have the specified minimum host CPU type (or a more advanced, but still compatible, CPU), but in exchange the CPU features exposed by the minimum platform are available for the guest to use. Note that this may prevent an instance from starting (if the hosts it requires are full but there is capacity on other incompatible hosts).\n\nIf an instance does not specify a minimum required CPU platform, then when it starts, the control plane selects a host for the instance and then supplies the guest with the \"minimum\" CPU platform supported by that host. This maximizes the number of hosts that can run the VM if it later needs to migrate to another host.", - "oneOf": [ - { - "description": "An AMD Zen 3-compatible CPU platform.", - "type": "string", - "enum": [ - "amd_milan" - ] - }, - { - "description": "An AMD Zen 5-compatible CPU platform.", - "type": "string", - "enum": [ - "amd_turin" - ] - } - ] - }, "InstanceNetworkInterface": { "description": "An `InstanceNetworkInterface` represents a virtual network interface device attached to an instance.", "type": "object", @@ -20533,20 +20533,20 @@ } ] }, - "memory": { - "description": "The amount of memory to assign to this instance.", + "cpu_platform": { + "nullable": true, + "description": "The CPU platform to be used for this instance. If this is `null`, the instance requires no particular CPU platform.", "allOf": [ { - "$ref": "#/components/schemas/ByteCount" + "$ref": "#/components/schemas/InstanceCpuPlatform" } ] }, - "min_cpu_platform": { - "nullable": true, - "description": "The minimum required CPU platform for this instance. If this is `null`, the instance requires no particular CPU platform.", + "memory": { + "description": "The amount of memory to assign to this instance.", "allOf": [ { - "$ref": "#/components/schemas/InstanceMinimumCpuPlatform" + "$ref": "#/components/schemas/ByteCount" } ] }, diff --git a/schema/crdb/add-instance-cpu-platform/up01.sql b/schema/crdb/add-instance-cpu-platform/up01.sql new file mode 100644 index 00000000000..1946d024445 --- /dev/null +++ b/schema/crdb/add-instance-cpu-platform/up01.sql @@ -0,0 +1,4 @@ +CREATE TYPE IF NOT EXISTS omicron.public.instance_cpu_platform AS ENUM ( + 'amd_milan', + 'amd_turin' +); diff --git a/schema/crdb/add-instance-minimum-cpu-platform/up02.sql b/schema/crdb/add-instance-cpu-platform/up02.sql similarity index 100% rename from schema/crdb/add-instance-minimum-cpu-platform/up02.sql rename to schema/crdb/add-instance-cpu-platform/up02.sql diff --git a/schema/crdb/add-instance-cpu-platform/up03.sql b/schema/crdb/add-instance-cpu-platform/up03.sql new file mode 100644 index 00000000000..d53638265f2 --- /dev/null +++ b/schema/crdb/add-instance-cpu-platform/up03.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.instance + ADD COLUMN IF NOT EXISTS cpu_platform omicron.public.instance_cpu_platform; diff --git a/schema/crdb/add-instance-minimum-cpu-platform/up04.sql b/schema/crdb/add-instance-cpu-platform/up04.sql similarity index 100% rename from schema/crdb/add-instance-minimum-cpu-platform/up04.sql rename to schema/crdb/add-instance-cpu-platform/up04.sql diff --git a/schema/crdb/add-instance-minimum-cpu-platform/up05.sql b/schema/crdb/add-instance-cpu-platform/up05.sql similarity index 100% rename from schema/crdb/add-instance-minimum-cpu-platform/up05.sql rename to schema/crdb/add-instance-cpu-platform/up05.sql diff --git a/schema/crdb/add-instance-minimum-cpu-platform/up01.sql b/schema/crdb/add-instance-minimum-cpu-platform/up01.sql deleted file mode 100644 index d0b5098fe8c..00000000000 --- a/schema/crdb/add-instance-minimum-cpu-platform/up01.sql +++ /dev/null @@ -1,4 +0,0 @@ -CREATE TYPE IF NOT EXISTS omicron.public.instance_min_cpu_platform AS ENUM ( - 'amd_milan', - 'amd_turin' -); diff --git a/schema/crdb/add-instance-minimum-cpu-platform/up03.sql b/schema/crdb/add-instance-minimum-cpu-platform/up03.sql deleted file mode 100644 index c3c39120712..00000000000 --- a/schema/crdb/add-instance-minimum-cpu-platform/up03.sql +++ /dev/null @@ -1,2 +0,0 @@ -ALTER TABLE omicron.public.instance - ADD COLUMN IF NOT EXISTS min_cpu_platform omicron.public.instance_min_cpu_platform; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 403d3887f5a..6e471fc27f8 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -189,8 +189,8 @@ CREATE TYPE IF NOT EXISTS omicron.public.sled_state AS ENUM ( -- The model of CPU installed in a particular sled, discovered by sled-agent -- and reported to Nexus. This determines what VMs can run on a sled: instances --- that require a specific minimum CPU platform can only run on sleds whose --- CPUs support all the features of that platform. +-- that require a specific CPU platform can only run on sleds whose CPUs support +-- all the features of that platform. CREATE TYPE IF NOT EXISTS omicron.public.sled_cpu_family AS ENUM ( -- Sled-agent didn't recognize the sled's CPU. 'unknown', @@ -1194,7 +1194,7 @@ CREATE TYPE IF NOT EXISTS omicron.public.instance_auto_restart AS ENUM ( 'best_effort' ); -CREATE TYPE IF NOT EXISTS omicron.public.instance_min_cpu_platform AS ENUM ( +CREATE TYPE IF NOT EXISTS omicron.public.instance_cpu_platform AS ENUM ( 'amd_milan', 'amd_turin' ); @@ -1312,18 +1312,16 @@ CREATE TABLE IF NOT EXISTS omicron.public.instance ( intended_state omicron.public.instance_intended_state NOT NULL, /* - * The minimum required CPU platform for this instance. If set, the - * instance's VMs may make use of all the CPU features supplied by their - * minimum platform, but in exchange they may only run on sleds whose - * CPUs support all of those features. + * The required CPU platform for this instance. If set, the instance's VMs + * may see additional features present in that platform, but in exchange + * they may only run on sleds whose CPUs support all of those features. * - * If this is NULL, the control plane ignores CPU constraints when - * selecting a sled for this instance. Then, once it has selected a - * sled, it supplies a "lowest common denominator" CPU platform that - * is compatible with that sled to maximize the number of sleds the VM - * can migrate to. + * If this is NULL, the control plane ignores CPU constraints when selecting + * a sled for this instance. Then, once it has selected a sled, it supplies + * a "lowest common denominator" CPU platform that is compatible with that + * sled to maximize the number of sleds the VM can migrate to. */ - min_cpu_platform omicron.public.instance_min_cpu_platform, + cpu_platform omicron.public.instance_cpu_platform, CONSTRAINT vmm_iff_active_propolis CHECK ( ((state = 'vmm') AND (active_propolis_id IS NOT NULL)) OR From 33956f974e6d4fec7c3d18674b6b2eaa4d910cba Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 31 Jul 2025 19:38:06 +0000 Subject: [PATCH 19/42] i want propolis logs too please thank you --- .github/buildomat/jobs/deploy.sh | 4 ++-- common/src/api/external/mod.rs | 6 ++++++ nexus/db-model/src/sled_cpu_family.rs | 1 + nexus/db-model/src/vmm_cpu_platform.rs | 6 ++++-- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index 58d6618e8c0..2841dc26811 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -9,11 +9,11 @@ #: "%/pool/ext/*/crypt/zone/oxz_*/root/var/svc/log/oxide-*.log*", #: "%/pool/ext/*/crypt/zone/oxz_*/root/var/svc/log/system-illumos-*.log*", #: "%/pool/ext/*/crypt/zone/oxz_ntp_*/root/var/log/chrony/*.log*", -#: "!/pool/ext/*/crypt/zone/oxz_propolis-server_*/root/var/svc/log/*.log*", +#: "%/pool/ext/*/crypt/zone/oxz_propolis-server_*/root/var/svc/log/*.log*", #: "%/pool/ext/*/crypt/debug/global/oxide-sled-agent:default.log.*", #: "%/pool/ext/*/crypt/debug/oxz_*/oxide-*.log.*", #: "%/pool/ext/*/crypt/debug/oxz_*/system-illumos-*.log.*", -#: "!/pool/ext/*/crypt/debug/oxz_propolis-server_*/*.log.*", +#: "%/pool/ext/*/crypt/debug/oxz_propolis-server_*/*.log.*", #: "/tmp/kstat/*.kstat" #: ] #: skip_clone = true diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 610dd757297..e85365ca8a8 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -1298,6 +1298,12 @@ pub enum InstanceCpuPlatform { AmdMilan, /// An AMD Turin-like CPU platform. + // Note that there is only Turin, not Turin Dense - feature-wise there are + // collapsed together as the guest-visible platform is the same. + // If the two must be distinguished for instance placement, we'll want to + // track whatever the motivating constraint is more explicitly. CPU + // families, and especially the vendor code names, don't necessarily promise + // details about specific processor packaging choices. AmdTurin, } diff --git a/nexus/db-model/src/sled_cpu_family.rs b/nexus/db-model/src/sled_cpu_family.rs index 34785459008..ab6aafee22d 100644 --- a/nexus/db-model/src/sled_cpu_family.rs +++ b/nexus/db-model/src/sled_cpu_family.rs @@ -41,6 +41,7 @@ impl SledCpuFamily { Self::Unknown => crate::VmmCpuPlatform::SledDefault, Self::AmdMilan => crate::VmmCpuPlatform::AmdMilan, Self::AmdTurin => crate::VmmCpuPlatform::AmdMilan, + Self::AmdTurinDense => crate::VmmCpuPlatform::AmdMilan, } } } diff --git a/nexus/db-model/src/vmm_cpu_platform.rs b/nexus/db-model/src/vmm_cpu_platform.rs index e177ee7d351..d71f0284269 100644 --- a/nexus/db-model/src/vmm_cpu_platform.rs +++ b/nexus/db-model/src/vmm_cpu_platform.rs @@ -36,10 +36,12 @@ impl VmmCpuPlatform { pub fn compatible_sled_cpu_families(&self) -> Option<&[SledCpuFamily]> { match self { // Milan-based instances can run on both Milan and Turin processors. + // Turin and Turin Dense are equally viable from a features + // perspective. Self::AmdMilan => { - Some(&[SledCpuFamily::AmdMilan, SledCpuFamily::AmdTurin]) + Some(&[SledCpuFamily::AmdMilan, SledCpuFamily::AmdTurin, SledCpuFamily::AmdTurinDense]) } - Self::AmdTurin => Some(&[SledCpuFamily::AmdTurin]), + Self::AmdTurin => Some(&[SledCpuFamily::AmdTurin, SledCpuFamily::AmdTurinDense]), // VMMs get the "sled default" CPU platform when an instance starts // up on a sled that hasn't reported a well-known CPU family. Assume From 9b6b6ecf3091f80f0dcde9b0ee7f20e868768cb6 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 6 Aug 2025 20:20:20 +0000 Subject: [PATCH 20/42] one more pass at aligning RFD 314, what we currently expose, and the platform as defined in Nexus --- nexus/src/app/instance_platform.rs | 34 ++++++++++++++++++------------ 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/nexus/src/app/instance_platform.rs b/nexus/src/app/instance_platform.rs index 098ca5a7b42..fd24ec57fe6 100644 --- a/nexus/src/app/instance_platform.rs +++ b/nexus/src/app/instance_platform.rs @@ -536,9 +536,9 @@ fn cpuid_from_vmm_cpu_platform( cpuid_leaf!(0x0, 0x0000000D, 0x68747541, 0x444D4163, 0x69746E65), cpuid_leaf!(0x1, 0x00A00F11, 0x00000800, 0xF6F83203, 0x078BFBFF), cpuid_leaf!(0x5, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x6, 0x00000002, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x6, 0x00000004, 0x00000000, 0x00000000, 0x00000000), cpuid_subleaf!( - 0x7, 0x0, 0x00000000, 0x219C03A9, 0x00000000, 0x00000000 + 0x7, 0x0, 0x00000000, 0x219803A9, 0x00000600, 0x00000000 ), cpuid_subleaf!( 0xB, 0x0, 0x00000001, 0x00000002, 0x00000100, 0x00000000 @@ -546,6 +546,9 @@ fn cpuid_from_vmm_cpu_platform( cpuid_subleaf!( 0xB, 0x1, 0x00000000, 0x00000000, 0x00000201, 0x00000000 ), + cpuid_subleaf!( + 0xB, 0x2, 0x00000000, 0x00000000, 0x00000002, 0x00000000 + ), cpuid_subleaf!( 0xD, 0x0, 0x00000007, 0x00000340, 0x00000340, 0x00000000 ), @@ -556,16 +559,24 @@ fn cpuid_from_vmm_cpu_platform( 0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000 ), cpuid_leaf!(0x80000000, 0x80000021, 0x68747541, 0x444D4163, 0x69746E65), - cpuid_leaf!(0x80000001, 0x00A00F11, 0x40000000, 0x444001F0, 0x27D3FBFF), - cpuid_leaf!(0x80000002, 0x73736F72, 0x726F6365, 0x31332050, 0x43203737), - cpuid_leaf!(0x80000003, 0x20455059, 0x00414D44, 0x00000000, 0x00000000), - cpuid_leaf!(0x80000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + // ecx bit 23 should be flipped true at some point, but is currently + // hidden and will continue to be for the moment. + // ecx bit 3 should be masked, but is is not and advertises support for + // unsupported extensions to LAPIC space. + // + // RFD 314 talks about these bits more, but we currently allow them to + // be wrong as they have been wrong before and we'll get to them + // individually later. + cpuid_leaf!(0x80000001, 0x00A00F11, 0x40000000, 0x444001F1, 0x27D3FBFF), + cpuid_leaf!(0x80000002, 0x20444D41, 0x43595045, 0x31373720, 0x36205033), + cpuid_leaf!(0x80000003, 0x6F432D34, 0x50206572, 0x65636F72, 0x726F7373), + cpuid_leaf!(0x80000004, 0x20202020, 0x20202020, 0x20202020, 0x00202020), cpuid_leaf!(0x80000005, 0xFF40FF40, 0xFF40FF40, 0x20080140, 0x20080140), - cpuid_leaf!(0x80000006, 0x08002200, 0x68004200, 0x02006140, 0x01009140), + cpuid_leaf!(0x80000006, 0x48002200, 0x68004200, 0x02006140, 0x08009140), cpuid_leaf!(0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100), - cpuid_leaf!(0x80000008, 0x00003030, 0x111ED205, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000008, 0x00003030, 0x00000205, 0x00000000, 0x00000000), cpuid_leaf!(0x8000000A, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x80000019, 0xF040F040, 0xF040F040, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000019, 0xF040F040, 0xF0400000, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001B, 0x00000000, 0x00000000, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), @@ -581,12 +592,9 @@ fn cpuid_from_vmm_cpu_platform( cpuid_subleaf!( 0x8000001D, 0x3, 0x00000163, 0x03C0003F, 0x00007FFF, 0x00000001 ), - cpuid_subleaf!( - 0x8000001D, 0x4, 0x00000000, 0x00000000, 0x00000000, 0x00000000 - ), cpuid_leaf!(0x8000001E, 0x00000000, 0x00000100, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001F, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x80000021, 0x0000002D, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000021, 0x00000045, 0x00000000, 0x00000000, 0x00000000), ]; let cpuid = match platform { From 5cf7b9c2a6a1e649860526f81204e279d04bec27 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 6 Aug 2025 20:21:21 +0000 Subject: [PATCH 21/42] and map the CPU platform "Turin" to all Turin sled CPU types --- nexus/db-model/src/vmm_cpu_platform.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/nexus/db-model/src/vmm_cpu_platform.rs b/nexus/db-model/src/vmm_cpu_platform.rs index d71f0284269..2fffcf3c80d 100644 --- a/nexus/db-model/src/vmm_cpu_platform.rs +++ b/nexus/db-model/src/vmm_cpu_platform.rs @@ -38,10 +38,14 @@ impl VmmCpuPlatform { // Milan-based instances can run on both Milan and Turin processors. // Turin and Turin Dense are equally viable from a features // perspective. - Self::AmdMilan => { - Some(&[SledCpuFamily::AmdMilan, SledCpuFamily::AmdTurin, SledCpuFamily::AmdTurinDense]) + Self::AmdMilan => Some(&[ + SledCpuFamily::AmdMilan, + SledCpuFamily::AmdTurin, + SledCpuFamily::AmdTurinDense, + ]), + Self::AmdTurin => { + Some(&[SledCpuFamily::AmdTurin, SledCpuFamily::AmdTurinDense]) } - Self::AmdTurin => Some(&[SledCpuFamily::AmdTurin, SledCpuFamily::AmdTurinDense]), // VMMs get the "sled default" CPU platform when an instance starts // up on a sled that hasn't reported a well-known CPU family. Assume From a877f390401ca15b680bc97801814c2093f296f7 Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 7 Aug 2025 18:58:59 +0000 Subject: [PATCH 22/42] one use of SledCpuFamily i missed in the rebase --- nexus/tests/integration_tests/instances.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index c5ff7d82779..97b5eea0d6a 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -6242,7 +6242,7 @@ async fn test_can_start_instance_with_cpu_platform( Some(nexus_address), Some(&camino::Utf8Path::new("/an/unused/update/directory")), omicron_sled_agent::sim::ZpoolConfig::None, - nexus_client::types::SledCpuFamily::AmdTurin, + nexus_sled_agent_shared::inventory::SledCpuFamily::AmdTurin, ); // We have to hold on to the new simulated sled-agent otherwise it will be immediately dropped From 1d34ab2522c8f55c079e57f139c3d3520bb54dd1 Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 7 Aug 2025 20:48:05 +0000 Subject: [PATCH 23/42] revert the buildomat log collection changes --- .github/buildomat/jobs/deploy.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index 2841dc26811..58d6618e8c0 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -9,11 +9,11 @@ #: "%/pool/ext/*/crypt/zone/oxz_*/root/var/svc/log/oxide-*.log*", #: "%/pool/ext/*/crypt/zone/oxz_*/root/var/svc/log/system-illumos-*.log*", #: "%/pool/ext/*/crypt/zone/oxz_ntp_*/root/var/log/chrony/*.log*", -#: "%/pool/ext/*/crypt/zone/oxz_propolis-server_*/root/var/svc/log/*.log*", +#: "!/pool/ext/*/crypt/zone/oxz_propolis-server_*/root/var/svc/log/*.log*", #: "%/pool/ext/*/crypt/debug/global/oxide-sled-agent:default.log.*", #: "%/pool/ext/*/crypt/debug/oxz_*/oxide-*.log.*", #: "%/pool/ext/*/crypt/debug/oxz_*/system-illumos-*.log.*", -#: "%/pool/ext/*/crypt/debug/oxz_propolis-server_*/*.log.*", +#: "!/pool/ext/*/crypt/debug/oxz_propolis-server_*/*.log.*", #: "/tmp/kstat/*.kstat" #: ] #: skip_clone = true From 4275594e500b6868bf990778b78a823c73865f6e Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 28 Aug 2025 23:30:45 +0000 Subject: [PATCH 24/42] more(!) RFD 341 errors * leaf 1 should *not* have x2APIC (yet) * leaf 7 EDX bit 4 indicates FSRM, should be set --- nexus/src/app/instance_platform.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nexus/src/app/instance_platform.rs b/nexus/src/app/instance_platform.rs index fd24ec57fe6..64776f8c35b 100644 --- a/nexus/src/app/instance_platform.rs +++ b/nexus/src/app/instance_platform.rs @@ -534,11 +534,11 @@ fn cpuid_from_vmm_cpu_platform( // gnarly details. const MILAN_CPUID: [CpuidEntry; 32] = [ cpuid_leaf!(0x0, 0x0000000D, 0x68747541, 0x444D4163, 0x69746E65), - cpuid_leaf!(0x1, 0x00A00F11, 0x00000800, 0xF6F83203, 0x078BFBFF), + cpuid_leaf!(0x1, 0x00A00F11, 0x00000800, 0xF6D83203, 0x078BFBFF), cpuid_leaf!(0x5, 0x00000000, 0x00000000, 0x00000000, 0x00000000), cpuid_leaf!(0x6, 0x00000004, 0x00000000, 0x00000000, 0x00000000), cpuid_subleaf!( - 0x7, 0x0, 0x00000000, 0x219803A9, 0x00000600, 0x00000000 + 0x7, 0x0, 0x00000000, 0x219803A9, 0x00000600, 0x00000010 ), cpuid_subleaf!( 0xB, 0x0, 0x00000001, 0x00000002, 0x00000100, 0x00000000 From c686acf0c34258c8c60d1352626689d5e3e423d4 Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 29 Aug 2025 00:48:39 +0000 Subject: [PATCH 25/42] move all the cpuid stuff, but it something is not great with the new dependency --- Cargo.lock | 9 + Cargo.toml | 1 + nexus/Cargo.toml | 1 + .../src/app/instance_platform/cpu_platform.rs | 585 ++++++++++++++++++ .../mod.rs} | 100 +-- 5 files changed, 601 insertions(+), 95 deletions(-) create mode 100644 nexus/src/app/instance_platform/cpu_platform.rs rename nexus/src/app/{instance_platform.rs => instance_platform/mod.rs} (82%) diff --git a/Cargo.lock b/Cargo.lock index ff5e1ac1815..45e3e335d12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7884,6 +7884,7 @@ dependencies = [ "qorb", "rand 0.8.5", "range-requests", + "raw-cpuid", "rcgen", "ref-cast", "regex", @@ -10759,6 +10760,14 @@ dependencies = [ "unicode-width 0.2.0", ] +[[package]] +name = "raw-cpuid" +version = "11.5.0" +source = "git+https://github.com/oxidecomputer/rust-cpuid?rev=be8fc446862b548e0e6558c56439a46ba459c1c7#be8fc446862b548e0e6558c56439a46ba459c1c7" +dependencies = [ + "bitflags 2.9.1", +] + [[package]] name = "rayon" version = "1.10.0" diff --git a/Cargo.toml b/Cargo.toml index f09163a5aa2..c8edddab50b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -647,6 +647,7 @@ rand_distr = "0.4.3" rand_seeder = "0.3.0" range-requests = { path = "range-requests" } ratatui = "0.29.0" +raw-cpuid = { git = "https://github.com/oxidecomputer/rust-cpuid", rev = "be8fc446862b548e0e6558c56439a46ba459c1c7" } rayon = "1.10" rcgen = "0.12.1" reconfigurator-cli = { path = "dev-tools/reconfigurator-cli" } diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index ffe813f80e6..8073d9b318b 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -164,6 +164,7 @@ oximeter-collector.workspace = true pem.workspace = true petgraph.workspace = true pretty_assertions.workspace = true +raw-cpuid.workspace = true rcgen.workspace = true regex.workspace = true similar-asserts.workspace = true diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs new file mode 100644 index 00000000000..97d57ebfc33 --- /dev/null +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -0,0 +1,585 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use sled_agent_client::types::CpuidEntry; +use raw_cpuid::{ + ApmInfo, CpuIdDump, CpuIdReader, CpuIdResult, CpuIdWriter, ExtendedFeatureIdentification2, + ExtendedFeatures, ExtendedProcessorFeatureIdentifiers, ExtendedState, ExtendedStateInfo, + ExtendedTopologyLevel, FeatureInfo, L1CacheTlbInfo, L2And3CacheTlbInfo, + PerformanceOptimizationInfo, ProcessorCapacityAndFeatureInfo, ProcessorTopologyInfo, + ThermalPowerInfo, Tlb1gbPageInfo, Vendor, VendorInfo, +}; + +macro_rules! cpuid_leaf { + ($leaf:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { + CpuidEntry { + leaf: $leaf, + subleaf: None, + eax: $eax, + ebx: $ebx, + ecx: $ecx, + edx: $edx, + } + }; +} + +macro_rules! cpuid_subleaf { + ($leaf:literal, $sl:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { + CpuidEntry { + leaf: $leaf, + subleaf: Some($sl), + eax: $eax, + ebx: $ebx, + ecx: $ecx, + edx: $edx, + } + }; +} + +const MILAN_CPUID: [CpuidEntry; 32] = [ + cpuid_leaf!(0x0, 0x0000000D, 0x68747541, 0x444D4163, 0x69746E65), + cpuid_leaf!(0x1, 0x00A00F11, 0x00000800, 0xF6D83203, 0x078BFBFF), + cpuid_leaf!(0x5, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x6, 0x00000004, 0x00000000, 0x00000000, 0x00000000), + cpuid_subleaf!(0x7, 0x0, 0x00000000, 0x219803A9, 0x00000600, 0x00000010), + cpuid_subleaf!(0xB, 0x0, 0x00000001, 0x00000002, 0x00000100, 0x00000000), + cpuid_subleaf!(0xB, 0x1, 0x00000000, 0x00000000, 0x00000201, 0x00000000), + cpuid_subleaf!(0xB, 0x2, 0x00000000, 0x00000000, 0x00000002, 0x00000000), + cpuid_subleaf!(0xD, 0x0, 0x00000007, 0x00000340, 0x00000340, 0x00000000), + cpuid_subleaf!(0xD, 0x1, 0x00000007, 0x00000340, 0x00000000, 0x00000000), + cpuid_subleaf!(0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000000, 0x80000021, 0x68747541, 0x444D4163, 0x69746E65), + // ecx bit 23 should be flipped true at some point, but is currently + // hidden and will continue to be for the moment. + // ecx bit 3 should be masked, but is is not and advertises support for + // unsupported extensions to LAPIC space. + // + // RFD 314 talks about these bits more, but we currently allow them to + // be wrong as they have been wrong before and we'll get to them + // individually later. + cpuid_leaf!(0x80000001, 0x00A00F11, 0x40000000, 0x444001F1, 0x27D3FBFF), + cpuid_leaf!(0x80000002, 0x20444D41, 0x43595045, 0x31373720, 0x36205033), + cpuid_leaf!(0x80000003, 0x6F432D34, 0x50206572, 0x65636F72, 0x726F7373), + cpuid_leaf!(0x80000004, 0x20202020, 0x20202020, 0x20202020, 0x00202020), + cpuid_leaf!(0x80000005, 0xFF40FF40, 0xFF40FF40, 0x20080140, 0x20080140), + cpuid_leaf!(0x80000006, 0x48002200, 0x68004200, 0x02006140, 0x08009140), + cpuid_leaf!(0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100), + cpuid_leaf!(0x80000008, 0x00003030, 0x00000205, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000000A, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000019, 0xF040F040, 0xF0400000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001B, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_subleaf!(0x8000001D, 0x0, 0x00000121, 0x01C0003F, 0x0000003F, 0x00000000), + cpuid_subleaf!(0x8000001D, 0x1, 0x00000122, 0x01C0003F, 0x0000003F, 0x00000000), + cpuid_subleaf!(0x8000001D, 0x2, 0x00000143, 0x01C0003F, 0x000003FF, 0x00000002), + cpuid_subleaf!(0x8000001D, 0x3, 0x00000163, 0x03C0003F, 0x00007FFF, 0x00000001), + cpuid_leaf!(0x8000001E, 0x00000000, 0x00000100, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001F, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000021, 0x00000045, 0x00000000, 0x00000000, 0x00000000), +]; + +/// The Platonic ideal Milan. This is what we would "like" to define as "The Milan vCPU platform" +/// absent any other constraints. This is a slightly slimmer version of the Milan platform defined +/// in RFD 314, with justifications there. +/// +/// Notably, this avoids describing individual processor SKUs' characteristics, where possible. +/// This CPUID configuration as-is is untested; guests may not boot, this may be too reductive, +/// etc. +fn milan_ideal() -> CpuIdDump { + let mut bits = CpuIdDump::new(); + let mut cpuid = raw_cpuid::CpuId::with_cpuid_reader(bits); + let mut leaf = VendorInfo::amd(); + cpuid.set_vendor_info(Some(leaf)); + cpuid.set_extended_function_info(Some(leaf)); + + let mut leaf = FeatureInfo::new(Vendor::Amd); + + // Set up EAX: Family 19h model 1h. + leaf.set_extended_family_id(0x00); + leaf.set_extended_family_id(0xA); + leaf.set_base_family_id(0x0F); + leaf.set_base_model_id(0x01); + leaf.set_stepping_id(0x01); + + // Set up EBX + leaf.set_brand_index(0); + leaf.set_cflush_cache_line_size(8); // TODO: BREAKING + leaf.set_initial_local_apic_id(0); // Populated dynamically in a real system. + leaf.set_max_logical_processor_ids(0); // Populated dynamically in a real system. + + // Set up ECX + leaf.set_sse3(true); + leaf.set_pclmulqdq(true); + leaf.set_ds_area(false); + leaf.set_monitor_mwait(false); + + leaf.set_cpl(false); + leaf.set_vmx(false); + leaf.set_smx(false); + leaf.set_eist(false); + + leaf.set_tm2(false); + leaf.set_ssse3(true); + leaf.set_cnxtid(false); + // bit 11 is reserved + + leaf.set_fma(true); + leaf.set_cmpxchg16b(true); + // bit 14 is reserved + leaf.set_pdcm(false); + + //bit 16 is reserved + leaf.set_pcid(false); + leaf.set_dca(false); + leaf.set_sse41(true); + + leaf.set_sse42(true); + leaf.set_x2apic(false); + leaf.set_movbe(true); + leaf.set_popcnt(true); + + leaf.set_tsc_deadline(false); + leaf.set_aesni(true); + leaf.set_xsave(true); + leaf.set_oxsave(false); // managed dynamically in practice + + leaf.set_avx(true); + leaf.set_f16c(true); + leaf.set_rdrand(true); + leaf.set_hypervisor(true); // This CPUID leaf will be presented to hypervisor guests + + // Set up EDX + leaf.set_fpu(true); + leaf.set_vme(true); + leaf.set_de(true); + leaf.set_pse(true); + + leaf.set_tsc(true); + leaf.set_msr(true); + leaf.set_pae(true); + leaf.set_mce(true); + + leaf.set_cmpxchg8b(true); + leaf.set_apic(true); + // bit 10 is reserved + leaf.set_sysenter_sysexit(true); + + leaf.set_mtrr(true); + leaf.set_pge(true); + leaf.set_mca(true); + leaf.set_cmov(true); + + leaf.set_pat(true); + leaf.set_pse36(true); + // bit 18 is reserved + leaf.set_clflush(true); + + // bit 20 is reserved + // bit 21 is reserved + // bit 22 is reserved + leaf.set_mmx(true); + + leaf.set_fxsave_fxstor(true); + leaf.set_sse(true); + leaf.set_sse2(true); + // bit 27 is reserved + + leaf.set_htt(false); // managed dynamically in practice + // bits 29-31 are not used here. + + cpuid.set_feature_info(Some(leaf)); + + // Leaf 2, 3, 4: all skipped on AMD + + // Leaf 5: Monitor and MWait. All zero here. + cpuid.set_monitor_mwait_info(None); + + // Leaf 6: Power management and some feature bits. Power management is all zeroed. + let mut leaf = ThermalPowerInfo::empty(); + leaf.set_arat(true); + leaf.set_hw_coord_feedback(false); + + cpuid.set_thermal_power_info(Some(leaf)); + + // Leaf 7: Extended features + let mut leaf = ExtendedFeatures::new(); + leaf.set_fsgsbase(true); + leaf.set_tsc_adjust_msr(false); + leaf.set_sgx(false); + leaf.set_bmi1(true); + + leaf.set_hle(false); + leaf.set_avx2(true); + leaf.set_fdp(false); + leaf.set_smep(true); + + leaf.set_bmi2(true); + leaf.set_rep_movsb_stosb(true); // Also known as "ERMS". + leaf.set_invpcid(false); + // Bit 11 is reserved on AMD + + // PQM (bit 12) is clear here. TODO: no nice helper to set false yet. + // Bit 13 is reserved on AMD + // Bit 14 is reserved on AMD + // Bit 15 is reserved on AMD + + leaf.set_avx512f(false); + leaf.set_avx512dq(false); + leaf.set_rdseed(true); + leaf.set_adx(true); + + leaf.set_smap(true); + leaf.set_avx512_ifma(false); + // Bit 22 is reserved on AMD + leaf.set_clflushopt(true); + + leaf.set_clwb(true); + // Bit 25 is reserved on AMD + // Bit 26 is reserved on AMD + // Bit 27 is reserved on AMD + + leaf.set_avx512cd(false); + leaf.set_sha(true); + leaf.set_avx512bw(false); + leaf.set_avx512vl(false); + + // Set up leaf 7 ECX + + // Bit 0 is reserved on AMD + leaf.set_avx512vbmi(false); + leaf.set_umip(false); + leaf.set_pku(false); + + leaf.set_ospke(false); + // Bit 5 is reserved on AMD + leaf.set_avx512vbmi2(false); + leaf.set_cet_ss(false); + + leaf.set_gfni(false); // TODO: Not on Milan? Really?? + leaf.set_vaes(true); + leaf.set_vpclmulqdq(true); + leaf.set_avx512vnni(false); + + leaf.set_avx512bitalg(false); + // Bit 13 is reserved on AMD + leaf.set_avx512vpopcntdq(false); + // Bit 15 is reserved on AMD + + // Bits 16 through 31 are either reserved or zero on Milan. + + // Set up leaf 7 EDX + leaf.set_fsrm(true); + cpuid.set_extended_feature_info(Some(leaf)); + + // Hide extended topology info (leaf Bh) + cpuid.set_extended_topology_info(None); + + // TODO: kind of gross to have to pass an empty `CpuIdDump` here... + let mut state = ExtendedStateInfo::empty(CpuIdDump::new()); + state.set_xcr0_supports_legacy_x87(true); + state.set_xcr0_supports_sse_128(true); + state.set_xcr0_supports_avx_256(true); + state.set_xsave_area_size_enabled_features(0x340); // Populated dynamically in a real system. + state.set_xsave_area_size_supported_features(0x340); + + state.set_xsaveopt(true); + state.set_xsavec(true); + state.set_xgetbv(true); + state.set_xsave_size(0x340); + + let mut leaves = state.into_leaves().to_vec(); + let mut ymm_state = ExtendedState::empty(); + ymm_state.set_size(0x100); + ymm_state.set_offset(0x240); + leaves.push(Some(ymm_state.into_leaf())); + + cpuid.set_extended_state_info(Some(&leaves[..])); + + let mut leaf = ExtendedProcessorFeatureIdentifiers::empty(Vendor::Amd); + // This is the same as the leaf 1 EAX configured earlier. + leaf.set_extended_signature(0x00A00F11); + + // Set up EBX + leaf.set_pkg_type(0x4); + + // Set up ECX + leaf.set_lahf_sahf(true); + leaf.set_cmp_legacy(false); + leaf.set_svm(false); + leaf.set_ext_apic_space(false); + + leaf.set_alt_mov_cr8(true); + leaf.set_lzcnt(true); + leaf.set_sse4a(true); + leaf.set_misaligned_sse_mode(true); + + leaf.set_prefetchw(true); + leaf.set_osvw(false); // May be set in hardware, hopefully can hide hardware errata from guests + leaf.set_ibs(false); + leaf.set_xop(false); + + leaf.set_skinit(false); + leaf.set_wdt(false); + // Bit 15 is reserved here. + leaf.set_lwp(false); + + leaf.set_fma4(false); // Not on Milan + + // Bits 17-19 are reserved + + // Bit 20 is reserved + // Bit 21 is reserved, formerly TBM + leaf.set_topology_extensions(true); + leaf.set_perf_cntr_extensions(true); + + leaf.set_nb_perf_cntr_extensions(false); + // Bit 25 is reserved + leaf.set_data_access_bkpt_extension(true); + leaf.set_perf_tsc(false); + + leaf.set_perf_cntr_llc_extensions(false); + leaf.set_monitorx_mwaitx(false); + leaf.set_addr_mask_extension(true); + // Bit 31 is reserved + + // Set up EDX + leaf.set_syscall_sysret(true); + leaf.set_execute_disable(true); + leaf.set_mmx_extensions(true); + leaf.set_fast_fxsave_fxstor(true); + leaf.set_1gib_pages(true); + leaf.set_rdtscp(true); + leaf.set_64bit_mode(true); + + cpuid.set_extended_processor_and_feature_identifiers(Some(leaf)); + + // Leaves 8000_0002 through 8000_0005 + cpuid.set_processor_brand_string(Some(b"AMD EPYC 7713P 64-Core Processor")); + + // Hide L1 cache+TLB info (leaf 8000_0005h) + cpuid.set_l1_cache_and_tlb_info(None); + + // Hide L2 and L3 cache+TLB info (leaf 8000_0006h) + cpuid.set_l2_l3_cache_and_tlb_info(None); + + // Set up advanced power management info (leaf 8000_0007h) + let mut leaf = ApmInfo::empty(); + leaf.set_invariant_tsc(true); + cpuid.set_advanced_power_mgmt_info(Some(leaf)); + + // Set up processor capacity info (leaf 8000_0008h) + let mut leaf = ProcessorCapacityAndFeatureInfo::empty(); + + // Set up leaf 8000_0008 EAX + leaf.set_physical_address_bits(0x30); // TODO: BREAKING + leaf.set_linear_address_bits(0x30); // TODO: BREAKING + leaf.set_guest_physical_address_bits(0); // TODO: BREAKING + + // St up leaf 8000_0008 EBX + leaf.set_cl_zero(true); + leaf.set_restore_fp_error_ptrs(true); + leaf.set_wbnoinvd(true); + + leaf.set_num_phys_threads(1); // Populated dynamically in a real system. + leaf.set_apic_id_size(0); + leaf.set_perf_tsc_size(0); + + leaf.set_invlpgb_max_pages(0); // TODO: BREAKING + leaf.set_max_rdpru_id(0); // TODO: BREAKING + + cpuid.set_processor_capacity_feature_info(Some(leaf)); + + // Leaf 8000_000Ah is zeroed out for guests. + cpuid.set_svm_info(None); + + // Hide TLB information for 1GiB pages (leaf 8000_0019h) + cpuid.set_tlb_1gb_page_info(None); + + // Set up processor optimization info (leaf 8000_001Ah) + let mut leaf = PerformanceOptimizationInfo::empty(); + leaf.set_movu(true); // TODO: BREAKING + leaf.set_fp256(true); // TODO: BREAKINGISH? + cpuid.set_performance_optimization_info(Some(leaf)); + + // Leaf 8000_001B + // TODO: no support for leaf 8000_001B, but zero is what we wanted. + // Leaf 8000_001C + // TODO: no support for leaf 8000_001C, but zero is what we wanted. + + // Hide extended cache topology as well (Leaf 8000_001D) + cpuid.set_extended_cache_parameters(None); + + let mut leaf = ProcessorTopologyInfo::empty(); + leaf.set_threads_per_core(2); + cpuid.set_processor_topology_info(Some(leaf)); + + cpuid.set_memory_encryption_info(None); + + let mut leaf = ExtendedFeatureIdentification2::empty(); + leaf.set_no_nested_data_bp(true); // TODO: BREAKING + leaf.set_lfence_always_serializing(true); // TODO: BREAKING + leaf.set_null_select_clears_base(true); // TODO: BREAKING + cpuid.set_extended_feature_identification_2(Some(leaf)); + + cpuid.into_source() +} + +pub fn milan_rfd314() -> CpuIdDump { + // This is the Milan we'd "want" to expose, absent any other constraints. + let mut baseline = milan_ideal(); + + let mut cpuid = raw_cpuid::CpuId::with_cpuid_reader(baseline); + + let mut leaf = cpuid.get_extended_feature_info(); + + // RFD 314 describes the circumstances around RDSEED, but it is not currently available. + leaf.set_rdseed(false); + + cpuid.set_extended_feature_info(Some(leaf)); + + let mut leaf = cpuid.get_extended_processor_and_feature_identifiers(); + // RFD 314 describes these leaf 7 wrinkles. + // + // Extended APIC space support was originally provided to guests because the host supports it + // and it was passed through. The extended space is not supported in Bhyve, but we leave it set + // here to not change it from under guests. + // + // Bhyve now supports all six performance counters, so we could set the perf counter extension + // bit here, but again it is left as-is to not change CPUID from under a guest. + leaf.set_ext_apic_space(true); + leaf.set_perf_cntr_extensions(false); + + cpuid.set_extended_processor_and_feature_identifiers(Some(leaf)); + + // Set up extended topology info (leaf Bh) + let mut levels = Vec::new(); + + let mut topo_level1 = ExtendedTopologyLevel::empty(); + // EAX + // These perhaps should be dynamic based on SMT or no? + topo_level1.set_shift_right_for_next_apic_id(1); + // EBX + topo_level1.set_processors(2); + // ECX + topo_level1.set_level_number(0); + topo_level1.set_level_type(1); // If there's no SMT, there should be no SMT right..? + + levels.push(topo_level1); + + let mut topo_level2 = ExtendedTopologyLevel::empty(); + // ECX + topo_level2.set_level_number(1); + topo_level2.set_level_type(2); + + levels.push(topo_level2); + + let mut topo_level3 = ExtendedTopologyLevel::empty(); + // ECX + topo_level3.set_level_number(2); + topo_level3.set_level_type(0); // This level is invalid. + + levels.push(topo_level3); + cpuid.set_extended_topology_info(Some(levels.as_slice())); + + // VMs on Milan currently get brand string and cache topology information from the host + // processor, so replicate it to minimize changes for now. + + // Leaves 8000_0002 through 8000_0005 + cpuid.set_processor_brand_string(Some(b"AMD EPYC 7713P 64-Core Processor")); + + // Set up L1 cache+TLB info (leaf 8000_0005h) + let mut leaf = L1CacheTlbInfo::empty(); + + leaf.set_itlb_2m_4m_size(0x40); + leaf.set_itlb_2m_4m_associativity(0xff); + leaf.set_dtlb_2m_4m_size(0x40); + leaf.set_dtlb_2m_4m_associativity(0xff); + + leaf.set_itlb_4k_size(0x40); + leaf.set_itlb_4k_associativity(0xff); + leaf.set_dtlb_4k_size(0x40); + leaf.set_dtlb_4k_associativity(0xff); + + leaf.set_dcache_line_size(0x40); + leaf.set_dcache_lines_per_tag(0x01); + leaf.set_dcache_associativity(0x08); + leaf.set_dcache_size(0x20); + + leaf.set_icache_line_size(0x40); + leaf.set_icache_lines_per_tag(0x01); + leaf.set_icache_associativity(0x08); + leaf.set_icache_size(0x20); + + cpuid.set_l1_cache_and_tlb_info(Some(leaf)); + + // Set up L2 and L3 cache+TLB info (leaf 8000_0006h) + let mut leaf = L2And3CacheTlbInfo::empty(); + + // Set up leaf 8000_0006h EAX + leaf.set_itlb_2m_4m_size(0x200); + leaf.set_itlb_2m_4m_associativity(0x2); + leaf.set_dtlb_2m_4m_size(0x800); + leaf.set_dtlb_2m_4m_associativity(0x4); + + // Set up leaf 8000_0006h EBX + leaf.set_itlb_4k_size(0x200); + leaf.set_itlb_4k_associativity(0x4); + leaf.set_dtlb_4k_size(0x800); + leaf.set_dtlb_4k_associativity(0x6); + + // Set up leaf 8000_0006h ECX + leaf.set_l2cache_line_size(0x40); + leaf.set_l2cache_lines_per_tag(0x1); + leaf.set_l2cache_associativity(0x6); + leaf.set_l2cache_size(0x0200); + + // Set up leaf 8000_0006h EDX + leaf.set_l3cache_line_size(0x40); + leaf.set_l3cache_lines_per_tag(0x1); + leaf.set_l3cache_associativity(0x9); + leaf.set_l3cache_size(0x0200); + + cpuid.set_l2_l3_cache_and_tlb_info(Some(leaf)); + + // Set up TLB information for 1GiB pages (leaf 8000_0019h) + let mut leaf = Tlb1gbPageInfo::empty(); + leaf.set_dtlb_l1_1gb_associativity(0xF); + leaf.set_dtlb_l1_1gb_size(0x40); + leaf.set_itlb_l1_1gb_associativity(0xF); + leaf.set_itlb_l1_1gb_size(0x40); + leaf.set_dtlb_l2_1gb_associativity(0xF); + leaf.set_dtlb_l2_1gb_size(0x40); + leaf.set_itlb_l2_1gb_associativity(0); + leaf.set_itlb_l2_1gb_size(0); + cpuid.set_tlb_1gb_page_info(Some(leaf)); + + // Set up extended cache hierarchy info (leaf 8000_001Dh) + let mut levels = Vec::new(); + levels.push(CpuIdResult { + eax: 0x00000121, + ebx: 0x01C0003F, + ecx: 0x0000003F, + edx: 0x00000000, + }); + levels.push(CpuIdResult { + eax: 0x00000122, + ebx: 0x01C0003F, + ecx: 0x0000003F, + edx: 0x00000000, + }); + levels.push(CpuIdResult { + eax: 0x00000143, + ebx: 0x01C0003F, + ecx: 0x000003FF, + edx: 0x00000002, + }); + levels.push(CpuIdResult { + eax: 0x00000163, + ebx: 0x03C0003F, + ecx: 0x00007FFF, + edx: 0x00000001, + }); + cpuid.set_extended_cache_parameters(Some(levels.as_slice())); +} diff --git a/nexus/src/app/instance_platform.rs b/nexus/src/app/instance_platform/mod.rs similarity index 82% rename from nexus/src/app/instance_platform.rs rename to nexus/src/app/instance_platform/mod.rs index 64776f8c35b..b4c4362234a 100644 --- a/nexus/src/app/instance_platform.rs +++ b/nexus/src/app/instance_platform/mod.rs @@ -67,6 +67,9 @@ //! backends, this is easily done by using component IDs as backend names, as //! described above. +// CPU platforms are broken out only because they're wordy. +mod cpu_platform; + use std::collections::{BTreeMap, HashMap}; use crate::app::instance::InstanceRegisterReason; @@ -78,7 +81,7 @@ use omicron_common::api::external::Error; use omicron_common::api::internal::shared::NetworkInterface; use sled_agent_client::types::{ BlobStorageBackend, Board, BootOrderEntry, BootSettings, Chipset, - ComponentV0, Cpuid, CpuidEntry, CpuidVendor, CrucibleStorageBackend, + ComponentV0, Cpuid, CpuidVendor, CrucibleStorageBackend, I440Fx, InstanceSpecV0, NvmeDisk, PciPath, QemuPvpanic, SerialPort, SerialPortNumber, SpecKey, VirtioDisk, VirtioNetworkBackend, VirtioNic, VmmSpec, @@ -504,104 +507,11 @@ impl super::Nexus { fn cpuid_from_vmm_cpu_platform( platform: db::model::VmmCpuPlatform, ) -> Option { - macro_rules! cpuid_leaf { - ($leaf:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { - CpuidEntry { - leaf: $leaf, - subleaf: None, - eax: $eax, - ebx: $ebx, - ecx: $ecx, - edx: $edx, - } - }; - } - - macro_rules! cpuid_subleaf { - ($leaf:literal, $sl:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { - CpuidEntry { - leaf: $leaf, - subleaf: Some($sl), - eax: $eax, - ebx: $ebx, - ecx: $ecx, - edx: $edx, - } - }; - } - - // See [RFD 314](https://314.rfd.oxide.computer/) section 6 for all the - // gnarly details. - const MILAN_CPUID: [CpuidEntry; 32] = [ - cpuid_leaf!(0x0, 0x0000000D, 0x68747541, 0x444D4163, 0x69746E65), - cpuid_leaf!(0x1, 0x00A00F11, 0x00000800, 0xF6D83203, 0x078BFBFF), - cpuid_leaf!(0x5, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x6, 0x00000004, 0x00000000, 0x00000000, 0x00000000), - cpuid_subleaf!( - 0x7, 0x0, 0x00000000, 0x219803A9, 0x00000600, 0x00000010 - ), - cpuid_subleaf!( - 0xB, 0x0, 0x00000001, 0x00000002, 0x00000100, 0x00000000 - ), - cpuid_subleaf!( - 0xB, 0x1, 0x00000000, 0x00000000, 0x00000201, 0x00000000 - ), - cpuid_subleaf!( - 0xB, 0x2, 0x00000000, 0x00000000, 0x00000002, 0x00000000 - ), - cpuid_subleaf!( - 0xD, 0x0, 0x00000007, 0x00000340, 0x00000340, 0x00000000 - ), - cpuid_subleaf!( - 0xD, 0x1, 0x00000007, 0x00000340, 0x00000000, 0x00000000 - ), - cpuid_subleaf!( - 0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000 - ), - cpuid_leaf!(0x80000000, 0x80000021, 0x68747541, 0x444D4163, 0x69746E65), - // ecx bit 23 should be flipped true at some point, but is currently - // hidden and will continue to be for the moment. - // ecx bit 3 should be masked, but is is not and advertises support for - // unsupported extensions to LAPIC space. - // - // RFD 314 talks about these bits more, but we currently allow them to - // be wrong as they have been wrong before and we'll get to them - // individually later. - cpuid_leaf!(0x80000001, 0x00A00F11, 0x40000000, 0x444001F1, 0x27D3FBFF), - cpuid_leaf!(0x80000002, 0x20444D41, 0x43595045, 0x31373720, 0x36205033), - cpuid_leaf!(0x80000003, 0x6F432D34, 0x50206572, 0x65636F72, 0x726F7373), - cpuid_leaf!(0x80000004, 0x20202020, 0x20202020, 0x20202020, 0x00202020), - cpuid_leaf!(0x80000005, 0xFF40FF40, 0xFF40FF40, 0x20080140, 0x20080140), - cpuid_leaf!(0x80000006, 0x48002200, 0x68004200, 0x02006140, 0x08009140), - cpuid_leaf!(0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100), - cpuid_leaf!(0x80000008, 0x00003030, 0x00000205, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000000A, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x80000019, 0xF040F040, 0xF0400000, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001B, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_subleaf!( - 0x8000001D, 0x0, 0x00000121, 0x01C0003F, 0x0000003F, 0x00000000 - ), - cpuid_subleaf!( - 0x8000001D, 0x1, 0x00000122, 0x01C0003F, 0x0000003F, 0x00000000 - ), - cpuid_subleaf!( - 0x8000001D, 0x2, 0x00000143, 0x01C0003F, 0x000003FF, 0x00000002 - ), - cpuid_subleaf!( - 0x8000001D, 0x3, 0x00000163, 0x03C0003F, 0x00007FFF, 0x00000001 - ), - cpuid_leaf!(0x8000001E, 0x00000000, 0x00000100, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001F, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x80000021, 0x00000045, 0x00000000, 0x00000000, 0x00000000), - ]; - let cpuid = match platform { db::model::VmmCpuPlatform::SledDefault => return None, db::model::VmmCpuPlatform::AmdMilan | db::model::VmmCpuPlatform::AmdTurin => { - Cpuid { entries: MILAN_CPUID.to_vec(), vendor: CpuidVendor::Amd } + Cpuid { entries: cpu_platform::milan_rfd314(), vendor: CpuidVendor::Amd } } }; From 9348caf3e4d5c460a3a1478ddeb2de932bdf9796 Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 29 Aug 2025 01:40:12 +0000 Subject: [PATCH 26/42] update raw-cpuid, genericize ideal Milan, map CpuIdDump to Propolis --- Cargo.lock | 2 +- Cargo.toml | 2 +- nexus/Cargo.toml | 2 +- .../src/app/instance_platform/cpu_platform.rs | 52 +++++++++++++++---- 4 files changed, 44 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 45e3e335d12..5164596a540 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10763,7 +10763,7 @@ dependencies = [ [[package]] name = "raw-cpuid" version = "11.5.0" -source = "git+https://github.com/oxidecomputer/rust-cpuid?rev=be8fc446862b548e0e6558c56439a46ba459c1c7#be8fc446862b548e0e6558c56439a46ba459c1c7" +source = "git+https://github.com/oxidecomputer/rust-cpuid.git?rev=eb65cd77789428ea817288bd86d7f7a1e19f97a4#eb65cd77789428ea817288bd86d7f7a1e19f97a4" dependencies = [ "bitflags 2.9.1", ] diff --git a/Cargo.toml b/Cargo.toml index c8edddab50b..b9d42e48f6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -647,7 +647,7 @@ rand_distr = "0.4.3" rand_seeder = "0.3.0" range-requests = { path = "range-requests" } ratatui = "0.29.0" -raw-cpuid = { git = "https://github.com/oxidecomputer/rust-cpuid", rev = "be8fc446862b548e0e6558c56439a46ba459c1c7" } +raw-cpuid = { git = "https://github.com/oxidecomputer/rust-cpuid.git", rev = "eb65cd77789428ea817288bd86d7f7a1e19f97a4" } rayon = "1.10" rcgen = "0.12.1" reconfigurator-cli = { path = "dev-tools/reconfigurator-cli" } diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 8073d9b318b..373c7ddee01 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -132,6 +132,7 @@ oxide-tokio-rt.workspace = true oximeter.workspace = true oximeter-instruments = { workspace = true, features = ["http-instruments"] } oximeter-producer.workspace = true +raw-cpuid = { workspace = true, features = ["std"] } rustls = { workspace = true } rustls-pemfile = { workspace = true } update-common.workspace = true @@ -164,7 +165,6 @@ oximeter-collector.workspace = true pem.workspace = true petgraph.workspace = true pretty_assertions.workspace = true -raw-cpuid.workspace = true rcgen.workspace = true regex.workspace = true similar-asserts.workspace = true diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index 97d57ebfc33..c327d130083 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -4,7 +4,7 @@ use sled_agent_client::types::CpuidEntry; use raw_cpuid::{ - ApmInfo, CpuIdDump, CpuIdReader, CpuIdResult, CpuIdWriter, ExtendedFeatureIdentification2, + ApmInfo, CpuIdDump, CpuIdResult, ExtendedFeatureIdentification2, ExtendedFeatures, ExtendedProcessorFeatureIdentifiers, ExtendedState, ExtendedStateInfo, ExtendedTopologyLevel, FeatureInfo, L1CacheTlbInfo, L2And3CacheTlbInfo, PerformanceOptimizationInfo, ProcessorCapacityAndFeatureInfo, ProcessorTopologyInfo, @@ -88,9 +88,8 @@ const MILAN_CPUID: [CpuidEntry; 32] = [ /// This CPUID configuration as-is is untested; guests may not boot, this may be too reductive, /// etc. fn milan_ideal() -> CpuIdDump { - let mut bits = CpuIdDump::new(); - let mut cpuid = raw_cpuid::CpuId::with_cpuid_reader(bits); - let mut leaf = VendorInfo::amd(); + let mut cpuid = raw_cpuid::CpuId::with_cpuid_reader(CpuIdDump::new()); + let leaf = VendorInfo::amd(); cpuid.set_vendor_info(Some(leaf)); cpuid.set_extended_function_info(Some(leaf)); @@ -186,8 +185,9 @@ fn milan_ideal() -> CpuIdDump { leaf.set_sse2(true); // bit 27 is reserved - leaf.set_htt(false); // managed dynamically in practice - // bits 29-31 are not used here. + // managed dynamically in practice + leaf.set_htt(false); + // bits 29-31 are not used here. cpuid.set_feature_info(Some(leaf)); @@ -356,7 +356,7 @@ fn milan_ideal() -> CpuIdDump { cpuid.set_extended_processor_and_feature_identifiers(Some(leaf)); // Leaves 8000_0002 through 8000_0005 - cpuid.set_processor_brand_string(Some(b"AMD EPYC 7713P 64-Core Processor")); + cpuid.set_processor_brand_string(Some(b"AMD EPYC 7003-like Processor")); // Hide L1 cache+TLB info (leaf 8000_0005h) cpuid.set_l1_cache_and_tlb_info(None); @@ -426,20 +426,20 @@ fn milan_ideal() -> CpuIdDump { cpuid.into_source() } -pub fn milan_rfd314() -> CpuIdDump { +pub fn milan_rfd314() -> Vec { // This is the Milan we'd "want" to expose, absent any other constraints. - let mut baseline = milan_ideal(); + let baseline = milan_ideal(); let mut cpuid = raw_cpuid::CpuId::with_cpuid_reader(baseline); - let mut leaf = cpuid.get_extended_feature_info(); + let mut leaf = cpuid.get_extended_feature_info().expect("baseline Milan defines leaf 1"); // RFD 314 describes the circumstances around RDSEED, but it is not currently available. leaf.set_rdseed(false); cpuid.set_extended_feature_info(Some(leaf)); - let mut leaf = cpuid.get_extended_processor_and_feature_identifiers(); + let mut leaf = cpuid.get_extended_processor_and_feature_identifiers().expect("baseline Milan defines leaf 7"); // RFD 314 describes these leaf 7 wrinkles. // // Extended APIC space support was originally provided to guests because the host supports it @@ -582,4 +582,34 @@ pub fn milan_rfd314() -> CpuIdDump { edx: 0x00000001, }); cpuid.set_extended_cache_parameters(Some(levels.as_slice())); + + dump_to_cpuid_entries(cpuid.into_source()) +} + +fn dump_to_cpuid_entries(dump: CpuIdDump) -> Vec { + let mut entries = Vec::new(); + + for (leaf, subleaf, regs) in dump.into_iter() { + entries.push(CpuidEntry { + leaf: leaf, + subleaf: subleaf, + eax: regs.eax, + ebx: regs.ebx, + ecx: regs.ecx, + edx: regs.edx, + }); + } + + // Entry order does not actually matter. Sort here because it's fast (~30-35 leaves) and + // looking at the vec in logs or on the wire *so* much nicer. + entries.sort_by(|left, right| { + let by_leaf = left.leaf.cmp(&right.leaf); + if by_leaf == std::cmp::Ordering::Equal { + left.subleaf.cmp(&right.subleaf) + } else { + by_leaf + } + }); + + entries } From af4e2eeac8a61bf41e65abbf880092aaf289d114 Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 29 Aug 2025 01:40:33 +0000 Subject: [PATCH 27/42] rustfmt --- .../src/app/instance_platform/cpu_platform.rs | 35 +++++++++++++------ nexus/src/app/instance_platform/mod.rs | 11 +++--- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index c327d130083..fe29c98df47 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -2,14 +2,15 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use sled_agent_client::types::CpuidEntry; use raw_cpuid::{ ApmInfo, CpuIdDump, CpuIdResult, ExtendedFeatureIdentification2, - ExtendedFeatures, ExtendedProcessorFeatureIdentifiers, ExtendedState, ExtendedStateInfo, - ExtendedTopologyLevel, FeatureInfo, L1CacheTlbInfo, L2And3CacheTlbInfo, - PerformanceOptimizationInfo, ProcessorCapacityAndFeatureInfo, ProcessorTopologyInfo, - ThermalPowerInfo, Tlb1gbPageInfo, Vendor, VendorInfo, + ExtendedFeatures, ExtendedProcessorFeatureIdentifiers, ExtendedState, + ExtendedStateInfo, ExtendedTopologyLevel, FeatureInfo, L1CacheTlbInfo, + L2And3CacheTlbInfo, PerformanceOptimizationInfo, + ProcessorCapacityAndFeatureInfo, ProcessorTopologyInfo, ThermalPowerInfo, + Tlb1gbPageInfo, Vendor, VendorInfo, }; +use sled_agent_client::types::CpuidEntry; macro_rules! cpuid_leaf { ($leaf:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { @@ -71,10 +72,18 @@ const MILAN_CPUID: [CpuidEntry; 32] = [ cpuid_leaf!(0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001B, 0x00000000, 0x00000000, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_subleaf!(0x8000001D, 0x0, 0x00000121, 0x01C0003F, 0x0000003F, 0x00000000), - cpuid_subleaf!(0x8000001D, 0x1, 0x00000122, 0x01C0003F, 0x0000003F, 0x00000000), - cpuid_subleaf!(0x8000001D, 0x2, 0x00000143, 0x01C0003F, 0x000003FF, 0x00000002), - cpuid_subleaf!(0x8000001D, 0x3, 0x00000163, 0x03C0003F, 0x00007FFF, 0x00000001), + cpuid_subleaf!( + 0x8000001D, 0x0, 0x00000121, 0x01C0003F, 0x0000003F, 0x00000000 + ), + cpuid_subleaf!( + 0x8000001D, 0x1, 0x00000122, 0x01C0003F, 0x0000003F, 0x00000000 + ), + cpuid_subleaf!( + 0x8000001D, 0x2, 0x00000143, 0x01C0003F, 0x000003FF, 0x00000002 + ), + cpuid_subleaf!( + 0x8000001D, 0x3, 0x00000163, 0x03C0003F, 0x00007FFF, 0x00000001 + ), cpuid_leaf!(0x8000001E, 0x00000000, 0x00000100, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001F, 0x00000000, 0x00000000, 0x00000000, 0x00000000), cpuid_leaf!(0x80000021, 0x00000045, 0x00000000, 0x00000000, 0x00000000), @@ -432,14 +441,18 @@ pub fn milan_rfd314() -> Vec { let mut cpuid = raw_cpuid::CpuId::with_cpuid_reader(baseline); - let mut leaf = cpuid.get_extended_feature_info().expect("baseline Milan defines leaf 1"); + let mut leaf = cpuid + .get_extended_feature_info() + .expect("baseline Milan defines leaf 1"); // RFD 314 describes the circumstances around RDSEED, but it is not currently available. leaf.set_rdseed(false); cpuid.set_extended_feature_info(Some(leaf)); - let mut leaf = cpuid.get_extended_processor_and_feature_identifiers().expect("baseline Milan defines leaf 7"); + let mut leaf = cpuid + .get_extended_processor_and_feature_identifiers() + .expect("baseline Milan defines leaf 7"); // RFD 314 describes these leaf 7 wrinkles. // // Extended APIC space support was originally provided to guests because the host supports it diff --git a/nexus/src/app/instance_platform/mod.rs b/nexus/src/app/instance_platform/mod.rs index b4c4362234a..2b83ebeb8b1 100644 --- a/nexus/src/app/instance_platform/mod.rs +++ b/nexus/src/app/instance_platform/mod.rs @@ -81,8 +81,8 @@ use omicron_common::api::external::Error; use omicron_common::api::internal::shared::NetworkInterface; use sled_agent_client::types::{ BlobStorageBackend, Board, BootOrderEntry, BootSettings, Chipset, - ComponentV0, Cpuid, CpuidVendor, CrucibleStorageBackend, - I440Fx, InstanceSpecV0, NvmeDisk, PciPath, QemuPvpanic, SerialPort, + ComponentV0, Cpuid, CpuidVendor, CrucibleStorageBackend, I440Fx, + InstanceSpecV0, NvmeDisk, PciPath, QemuPvpanic, SerialPort, SerialPortNumber, SpecKey, VirtioDisk, VirtioNetworkBackend, VirtioNic, VmmSpec, }; @@ -510,9 +510,10 @@ fn cpuid_from_vmm_cpu_platform( let cpuid = match platform { db::model::VmmCpuPlatform::SledDefault => return None, db::model::VmmCpuPlatform::AmdMilan - | db::model::VmmCpuPlatform::AmdTurin => { - Cpuid { entries: cpu_platform::milan_rfd314(), vendor: CpuidVendor::Amd } - } + | db::model::VmmCpuPlatform::AmdTurin => Cpuid { + entries: cpu_platform::milan_rfd314(), + vendor: CpuidVendor::Amd, + }, }; Some(cpuid) From b05bae450bd97ffd14bef883e0a7c2da73c2deff Mon Sep 17 00:00:00 2001 From: iximeow Date: Fri, 29 Aug 2025 01:50:53 +0000 Subject: [PATCH 28/42] move MILAN_CPUID to an equivalence test --- .../src/app/instance_platform/cpu_platform.rs | 180 ++++++++++-------- 1 file changed, 103 insertions(+), 77 deletions(-) diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index fe29c98df47..763f479e2a5 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -12,83 +12,6 @@ use raw_cpuid::{ }; use sled_agent_client::types::CpuidEntry; -macro_rules! cpuid_leaf { - ($leaf:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { - CpuidEntry { - leaf: $leaf, - subleaf: None, - eax: $eax, - ebx: $ebx, - ecx: $ecx, - edx: $edx, - } - }; -} - -macro_rules! cpuid_subleaf { - ($leaf:literal, $sl:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { - CpuidEntry { - leaf: $leaf, - subleaf: Some($sl), - eax: $eax, - ebx: $ebx, - ecx: $ecx, - edx: $edx, - } - }; -} - -const MILAN_CPUID: [CpuidEntry; 32] = [ - cpuid_leaf!(0x0, 0x0000000D, 0x68747541, 0x444D4163, 0x69746E65), - cpuid_leaf!(0x1, 0x00A00F11, 0x00000800, 0xF6D83203, 0x078BFBFF), - cpuid_leaf!(0x5, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x6, 0x00000004, 0x00000000, 0x00000000, 0x00000000), - cpuid_subleaf!(0x7, 0x0, 0x00000000, 0x219803A9, 0x00000600, 0x00000010), - cpuid_subleaf!(0xB, 0x0, 0x00000001, 0x00000002, 0x00000100, 0x00000000), - cpuid_subleaf!(0xB, 0x1, 0x00000000, 0x00000000, 0x00000201, 0x00000000), - cpuid_subleaf!(0xB, 0x2, 0x00000000, 0x00000000, 0x00000002, 0x00000000), - cpuid_subleaf!(0xD, 0x0, 0x00000007, 0x00000340, 0x00000340, 0x00000000), - cpuid_subleaf!(0xD, 0x1, 0x00000007, 0x00000340, 0x00000000, 0x00000000), - cpuid_subleaf!(0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000), - cpuid_leaf!(0x80000000, 0x80000021, 0x68747541, 0x444D4163, 0x69746E65), - // ecx bit 23 should be flipped true at some point, but is currently - // hidden and will continue to be for the moment. - // ecx bit 3 should be masked, but is is not and advertises support for - // unsupported extensions to LAPIC space. - // - // RFD 314 talks about these bits more, but we currently allow them to - // be wrong as they have been wrong before and we'll get to them - // individually later. - cpuid_leaf!(0x80000001, 0x00A00F11, 0x40000000, 0x444001F1, 0x27D3FBFF), - cpuid_leaf!(0x80000002, 0x20444D41, 0x43595045, 0x31373720, 0x36205033), - cpuid_leaf!(0x80000003, 0x6F432D34, 0x50206572, 0x65636F72, 0x726F7373), - cpuid_leaf!(0x80000004, 0x20202020, 0x20202020, 0x20202020, 0x00202020), - cpuid_leaf!(0x80000005, 0xFF40FF40, 0xFF40FF40, 0x20080140, 0x20080140), - cpuid_leaf!(0x80000006, 0x48002200, 0x68004200, 0x02006140, 0x08009140), - cpuid_leaf!(0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100), - cpuid_leaf!(0x80000008, 0x00003030, 0x00000205, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000000A, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x80000019, 0xF040F040, 0xF0400000, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001B, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_subleaf!( - 0x8000001D, 0x0, 0x00000121, 0x01C0003F, 0x0000003F, 0x00000000 - ), - cpuid_subleaf!( - 0x8000001D, 0x1, 0x00000122, 0x01C0003F, 0x0000003F, 0x00000000 - ), - cpuid_subleaf!( - 0x8000001D, 0x2, 0x00000143, 0x01C0003F, 0x000003FF, 0x00000002 - ), - cpuid_subleaf!( - 0x8000001D, 0x3, 0x00000163, 0x03C0003F, 0x00007FFF, 0x00000001 - ), - cpuid_leaf!(0x8000001E, 0x00000000, 0x00000100, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001F, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x80000021, 0x00000045, 0x00000000, 0x00000000, 0x00000000), -]; - /// The Platonic ideal Milan. This is what we would "like" to define as "The Milan vCPU platform" /// absent any other constraints. This is a slightly slimmer version of the Milan platform defined /// in RFD 314, with justifications there. @@ -626,3 +549,106 @@ fn dump_to_cpuid_entries(dump: CpuIdDump) -> Vec { entries } + +#[test] +fn milan_rfd314_is_as_described() { + macro_rules! cpuid_leaf { + ($leaf:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { + CpuidEntry { + leaf: $leaf, + subleaf: None, + eax: $eax, + ebx: $ebx, + ecx: $ecx, + edx: $edx, + } + }; + } + + macro_rules! cpuid_subleaf { + ($leaf:literal, $sl:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { + CpuidEntry { + leaf: $leaf, + subleaf: Some($sl), + eax: $eax, + ebx: $ebx, + ecx: $ecx, + edx: $edx, + } + }; + } + + // This CPUID leaf blob is a collection of the leaves described in RFD 314. + // RFD 314 is the source of truth for what bits are set here and why. + // `milan_rfd314()` constructs what ought to be an *identical* set of bits, + // but in a manner more amenable to machine validation that pairs of CPU + // platforms are (or are not!) compatible, be they virtual (guest) CPUs or, + // later, physical (host) CPUs. + // + // This is present only to validate initial CPU platforms work and as a link + // between 314 and the present day. Actual guest CPU platforms may differ as + // we enable additional guest functionality in the future; this is not a + // source of truth for actual guest platforms. + const MILAN_CPUID: [CpuidEntry; 32] = [ + cpuid_leaf!(0x0, 0x0000000D, 0x68747541, 0x444D4163, 0x69746E65), + cpuid_leaf!(0x1, 0x00A00F11, 0x00000800, 0xF6D83203, 0x078BFBFF), + cpuid_leaf!(0x5, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x6, 0x00000004, 0x00000000, 0x00000000, 0x00000000), + cpuid_subleaf!( + 0x7, 0x0, 0x00000000, 0x219803A9, 0x00000600, 0x00000010 + ), + cpuid_subleaf!( + 0xB, 0x0, 0x00000001, 0x00000002, 0x00000100, 0x00000000 + ), + cpuid_subleaf!( + 0xB, 0x1, 0x00000000, 0x00000000, 0x00000201, 0x00000000 + ), + cpuid_subleaf!( + 0xB, 0x2, 0x00000000, 0x00000000, 0x00000002, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x0, 0x00000007, 0x00000340, 0x00000340, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x1, 0x00000007, 0x00000340, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000 + ), + cpuid_leaf!(0x80000000, 0x80000021, 0x68747541, 0x444D4163, 0x69746E65), + cpuid_leaf!(0x80000001, 0x00A00F11, 0x40000000, 0x444001F1, 0x27D3FBFF), + cpuid_leaf!(0x80000002, 0x20444D41, 0x43595045, 0x31373720, 0x36205033), + cpuid_leaf!(0x80000003, 0x6F432D34, 0x50206572, 0x65636F72, 0x726F7373), + cpuid_leaf!(0x80000004, 0x20202020, 0x20202020, 0x20202020, 0x00202020), + cpuid_leaf!(0x80000005, 0xFF40FF40, 0xFF40FF40, 0x20080140, 0x20080140), + cpuid_leaf!(0x80000006, 0x48002200, 0x68004200, 0x02006140, 0x08009140), + cpuid_leaf!(0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100), + cpuid_leaf!(0x80000008, 0x00003030, 0x00000205, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000000A, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000019, 0xF040F040, 0xF0400000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001B, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_subleaf!( + 0x8000001D, 0x0, 0x00000121, 0x01C0003F, 0x0000003F, 0x00000000 + ), + cpuid_subleaf!( + 0x8000001D, 0x1, 0x00000122, 0x01C0003F, 0x0000003F, 0x00000000 + ), + cpuid_subleaf!( + 0x8000001D, 0x2, 0x00000143, 0x01C0003F, 0x000003FF, 0x00000002 + ), + cpuid_subleaf!( + 0x8000001D, 0x3, 0x00000163, 0x03C0003F, 0x00007FFF, 0x00000001 + ), + cpuid_leaf!(0x8000001E, 0x00000000, 0x00000100, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001F, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000021, 0x00000045, 0x00000000, 0x00000000, 0x00000000), + ]; + + let computed = milan_rfd314(); + + // `milan_rfd314` sorts by leaf/subleaf, so everything *should* be in the + // same order.. just a question if it's all the same: + assert_eq!(MILAN_CPUID, computed); +} From 5f8aeddfc1f4375be9c79180c8f3d076ab18bee5 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 30 Aug 2025 03:04:41 +0000 Subject: [PATCH 29/42] outline more sensitive CPUID bits into a compatibility helper also rustfmt, resolve warnings, and fill in explicit zero leaves so that the RFD 314-defined CPUID table matches what the builder produces now. yay!! --- Cargo.lock | 2 +- Cargo.toml | 2 +- .../src/app/instance_platform/cpu_platform.rs | 388 ++++++++++++++---- 3 files changed, 304 insertions(+), 88 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5164596a540..23a6be27784 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10763,7 +10763,7 @@ dependencies = [ [[package]] name = "raw-cpuid" version = "11.5.0" -source = "git+https://github.com/oxidecomputer/rust-cpuid.git?rev=eb65cd77789428ea817288bd86d7f7a1e19f97a4#eb65cd77789428ea817288bd86d7f7a1e19f97a4" +source = "git+https://github.com/oxidecomputer/rust-cpuid.git?rev=0a8dbd2311263f6a59ea58089e33c8331436ff3a#0a8dbd2311263f6a59ea58089e33c8331436ff3a" dependencies = [ "bitflags 2.9.1", ] diff --git a/Cargo.toml b/Cargo.toml index b9d42e48f6d..31aa8954cbf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -647,7 +647,7 @@ rand_distr = "0.4.3" rand_seeder = "0.3.0" range-requests = { path = "range-requests" } ratatui = "0.29.0" -raw-cpuid = { git = "https://github.com/oxidecomputer/rust-cpuid.git", rev = "eb65cd77789428ea817288bd86d7f7a1e19f97a4" } +raw-cpuid = { git = "https://github.com/oxidecomputer/rust-cpuid.git", rev = "0a8dbd2311263f6a59ea58089e33c8331436ff3a" } rayon = "1.10" rcgen = "0.12.1" reconfigurator-cli = { path = "dev-tools/reconfigurator-cli" } diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index 763f479e2a5..43586a9cedb 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -3,32 +3,179 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use raw_cpuid::{ - ApmInfo, CpuIdDump, CpuIdResult, ExtendedFeatureIdentification2, - ExtendedFeatures, ExtendedProcessorFeatureIdentifiers, ExtendedState, - ExtendedStateInfo, ExtendedTopologyLevel, FeatureInfo, L1CacheTlbInfo, - L2And3CacheTlbInfo, PerformanceOptimizationInfo, - ProcessorCapacityAndFeatureInfo, ProcessorTopologyInfo, ThermalPowerInfo, - Tlb1gbPageInfo, Vendor, VendorInfo, + ApmInfo, CpuIdDump, CpuIdResult, CpuIdWriter, + ExtendedFeatureIdentification2, ExtendedFeatures, + ExtendedProcessorFeatureIdentifiers, ExtendedState, ExtendedStateInfo, + ExtendedTopologyLevel, FeatureInfo, L1CacheTlbInfo, L2And3CacheTlbInfo, + MonitorMwaitInfo, PerformanceOptimizationInfo, + ProcessorCapacityAndFeatureInfo, ProcessorTopologyInfo, SvmFeatures, + ThermalPowerInfo, Tlb1gbPageInfo, Vendor, VendorInfo, }; use sled_agent_client::types::CpuidEntry; -/// The Platonic ideal Milan. This is what we would "like" to define as "The Milan vCPU platform" -/// absent any other constraints. This is a slightly slimmer version of the Milan platform defined -/// in RFD 314, with justifications there. +/// Check if `target` describes a processor that agrees with `base` on +/// architectural behaviors defined in CPUID leaves. /// -/// Notably, this avoids describing individual processor SKUs' characteristics, where possible. -/// This CPUID configuration as-is is untested; guests may not boot, this may be too reductive, -/// etc. +/// Arguably this should live in a crate outside Omicron which is used by both +/// Omicron and Propolis. Perhaps the Oxide fork of `rust-cpuid`. It's here to +/// sketch the logic and expected to move later. +/// +/// NOTE: This does *not currently check ISA extensions or other feature +/// compatibility*. It assumes that the CPUID profiles are already known to be +/// feature-compatibile ahead of time. Instead, this is to check details like +/// "`clflush` operates on the same number of words". +#[allow(dead_code)] +pub fn functionally_same(base: CpuIdDump, target: CpuIdDump) -> bool { + let base = raw_cpuid::CpuId::with_cpuid_reader(base); + let target = raw_cpuid::CpuId::with_cpuid_reader(target); + + match (base.get_feature_info(), target.get_feature_info()) { + (Some(base_info), Some(target_info)) => { + let base_clflush_size = base_info.cflush_cache_line_size(); + let target_clflush_size = target_info.cflush_cache_line_size(); + if base_clflush_size != target_clflush_size { + return false; + } + } + (Some(_), None) | (None, Some(_)) | (None, None) => { + // TODO: Might be able to tolerate these cases in practice, but + // realistically we should never be here. + return false; + } + } + match ( + base.get_processor_capacity_feature_info(), + target.get_processor_capacity_feature_info(), + ) { + (Some(base_info), Some(target_info)) => { + if base_info.physical_address_bits() + < target_info.physical_address_bits() + { + return false; + } + + if base_info.linear_address_bits() + < target_info.linear_address_bits() + { + return false; + } + + // TODO: this probably could be a `<` relationship like above, but + // I'm not so familiar here and am being stricter because of it. + if base_info.guest_physical_address_bits() + != target_info.guest_physical_address_bits() + { + return false; + } + + if base_info.invlpgb_max_pages() < target_info.invlpgb_max_pages() { + return false; + } + + // TODO: really having a max RDPRU ID of anything more than 0 is + // sketchy... + if base_info.max_rdpru_id() < target_info.max_rdpru_id() { + return false; + } + } + _ => { + // TODO: Probably can tolerate `target` not having this leaf, but we + // won't be here in practice. + return false; + } + } + + // Disagreements in this leaf likely only result in suboptimal performance, + // rather than architectural misunderstanding. A permissive comparison would + // overlook differences here. + match ( + base.get_performance_optimization_info(), + target.get_performance_optimization_info(), + ) { + (Some(base_info), Some(target_info)) => { + if base_info.has_movu() != target_info.has_movu() { + return false; + } + + // TODO: this could be more precise; if the base has fp256 and the + // target has fp128, that's probably fine. Likewise, if the base has + // fp512 and the target has a smaller width, claiming the FPU + // datapath is narrower than it really is, is probably fine. + if base_info.has_fp256() != target_info.has_fp256() { + return false; + } + } + _ => { + // Specific cases here may be acceptable, but for expediency (and + // because we don't intend to support vCPUs whose profiles would not + // have this leaf), just bail here. + return false; + } + } + + // Bits checked here describe architectural behavior. If they differ, the + // base CPU will behave differently than the target wants to see. + // + // It may be okay in some cases to allow these to differ, but take a + // conservative approach until we need otherwise. + match ( + base.get_extended_feature_identification_2(), + target.get_extended_feature_identification_2(), + ) { + (Some(base_info), Some(target_info)) => { + if base_info.has_no_nested_data_bp() + != target_info.has_no_nested_data_bp() + { + return false; + } + + if base_info.has_lfence_always_serializing() + != target_info.has_lfence_always_serializing() + { + return false; + } + + if base_info.has_null_select_clears_base() + != target_info.has_null_select_clears_base() + { + return false; + } + } + _ => { + return false; + } + } + + // TODO: really not sure if we should include things like cache + // hierarchy/core topology information here. Misrepresenting the actual + // system can result in cache-sized buffers being sized incorrectly (or at + // least suboptimally), but as long as cache sizes grow rather than shrink + // it may only be "performance is not as good as it could be" rather than a + // more deleterious outcome. + + true +} + +/// The Platonic ideal Milan. This is what we would "like" to define as "The +/// Milan vCPU platform" absent any other constraints. This is a slightly +/// slimmer version of the Milan platform defined in RFD 314, with +/// justifications there. +/// +/// Notably, this avoids describing individual processor SKUs' characteristics, +/// where possible. This CPUID configuration as-is is untested; guests may not +/// boot, this may be too reductive, etc. fn milan_ideal() -> CpuIdDump { let mut cpuid = raw_cpuid::CpuId::with_cpuid_reader(CpuIdDump::new()); let leaf = VendorInfo::amd(); - cpuid.set_vendor_info(Some(leaf)); - cpuid.set_extended_function_info(Some(leaf)); + cpuid.set_vendor_info(Some(leaf)).expect("can set leaf 0"); + cpuid + .set_extended_function_info(Some(leaf)) + .expect("can set leaf 8000_0000h"); let mut leaf = FeatureInfo::new(Vendor::Amd); // Set up EAX: Family 19h model 1h. - leaf.set_extended_family_id(0x00); + leaf.set_extended_family_id(0x00); // why is this like this one of these two lines should go away leaf.set_extended_family_id(0xA); leaf.set_base_family_id(0x0F); leaf.set_base_model_id(0x01); @@ -36,9 +183,10 @@ fn milan_ideal() -> CpuIdDump { // Set up EBX leaf.set_brand_index(0); - leaf.set_cflush_cache_line_size(8); // TODO: BREAKING - leaf.set_initial_local_apic_id(0); // Populated dynamically in a real system. - leaf.set_max_logical_processor_ids(0); // Populated dynamically in a real system. + leaf.set_cflush_cache_line_size(8); // 8 quadwords (64 bytes) + // This and max logical processor ID are populated dynamically. + leaf.set_initial_local_apic_id(0); + leaf.set_max_logical_processor_ids(0); // Set up ECX leaf.set_sse3(true); @@ -74,12 +222,13 @@ fn milan_ideal() -> CpuIdDump { leaf.set_tsc_deadline(false); leaf.set_aesni(true); leaf.set_xsave(true); - leaf.set_oxsave(false); // managed dynamically in practice + leaf.set_oxsave(false); // Managed dynamically in practice leaf.set_avx(true); leaf.set_f16c(true); leaf.set_rdrand(true); - leaf.set_hypervisor(true); // This CPUID leaf will be presented to hypervisor guests + // This CPUID profile will be presented to hypervisor guests + leaf.set_hypervisor(true); // Set up EDX leaf.set_fpu(true); @@ -121,19 +270,23 @@ fn milan_ideal() -> CpuIdDump { leaf.set_htt(false); // bits 29-31 are not used here. - cpuid.set_feature_info(Some(leaf)); + cpuid.set_feature_info(Some(leaf)).expect("can set leaf 1"); // Leaf 2, 3, 4: all skipped on AMD // Leaf 5: Monitor and MWait. All zero here. - cpuid.set_monitor_mwait_info(None); + cpuid + .set_monitor_mwait_info(Some(MonitorMwaitInfo::empty())) + .expect("can set leaf 5"); - // Leaf 6: Power management and some feature bits. Power management is all zeroed. + // Leaf 6: Power management and some feature bits. + // + // Power management is all zeroed. let mut leaf = ThermalPowerInfo::empty(); leaf.set_arat(true); leaf.set_hw_coord_feedback(false); - cpuid.set_thermal_power_info(Some(leaf)); + cpuid.set_thermal_power_info(Some(leaf)).expect("can set leaf 6"); // Leaf 7: Extended features let mut leaf = ExtendedFeatures::new(); @@ -189,7 +342,7 @@ fn milan_ideal() -> CpuIdDump { leaf.set_avx512vbmi2(false); leaf.set_cet_ss(false); - leaf.set_gfni(false); // TODO: Not on Milan? Really?? + leaf.set_gfni(false); // Not in Milan leaf.set_vaes(true); leaf.set_vpclmulqdq(true); leaf.set_avx512vnni(false); @@ -203,17 +356,18 @@ fn milan_ideal() -> CpuIdDump { // Set up leaf 7 EDX leaf.set_fsrm(true); - cpuid.set_extended_feature_info(Some(leaf)); + cpuid.set_extended_feature_info(Some(leaf)).expect("can set leaf 7"); // Hide extended topology info (leaf Bh) - cpuid.set_extended_topology_info(None); + cpuid.set_extended_topology_info(None).expect("can set leaf 8"); // TODO: kind of gross to have to pass an empty `CpuIdDump` here... let mut state = ExtendedStateInfo::empty(CpuIdDump::new()); state.set_xcr0_supports_legacy_x87(true); state.set_xcr0_supports_sse_128(true); state.set_xcr0_supports_avx_256(true); - state.set_xsave_area_size_enabled_features(0x340); // Populated dynamically in a real system. + // Managed dynamically in practice. + state.set_xsave_area_size_enabled_features(0x340); state.set_xsave_area_size_supported_features(0x340); state.set_xsaveopt(true); @@ -227,7 +381,7 @@ fn milan_ideal() -> CpuIdDump { ymm_state.set_offset(0x240); leaves.push(Some(ymm_state.into_leaf())); - cpuid.set_extended_state_info(Some(&leaves[..])); + cpuid.set_extended_state_info(Some(&leaves[..])).expect("can set leaf Dh"); let mut leaf = ExtendedProcessorFeatureIdentifiers::empty(Vendor::Amd); // This is the same as the leaf 1 EAX configured earlier. @@ -285,77 +439,107 @@ fn milan_ideal() -> CpuIdDump { leaf.set_rdtscp(true); leaf.set_64bit_mode(true); - cpuid.set_extended_processor_and_feature_identifiers(Some(leaf)); + cpuid + .set_extended_processor_and_feature_identifiers(Some(leaf)) + .expect("can set leaf 8000_0001h"); // Leaves 8000_0002 through 8000_0005 - cpuid.set_processor_brand_string(Some(b"AMD EPYC 7003-like Processor")); + cpuid + .set_processor_brand_string(Some(b"AMD EPYC 7003-like Processor")) + .expect("can set vCPU brand string"); // Hide L1 cache+TLB info (leaf 8000_0005h) - cpuid.set_l1_cache_and_tlb_info(None); + cpuid.set_l1_cache_and_tlb_info(None).expect("can set leaf 8000_0005h"); // Hide L2 and L3 cache+TLB info (leaf 8000_0006h) - cpuid.set_l2_l3_cache_and_tlb_info(None); + cpuid.set_l2_l3_cache_and_tlb_info(None).expect("can set leaf 8000_0006h"); // Set up advanced power management info (leaf 8000_0007h) let mut leaf = ApmInfo::empty(); leaf.set_invariant_tsc(true); - cpuid.set_advanced_power_mgmt_info(Some(leaf)); + cpuid + .set_advanced_power_mgmt_info(Some(leaf)) + .expect("can set leaf 8000_0007h"); // Set up processor capacity info (leaf 8000_0008h) let mut leaf = ProcessorCapacityAndFeatureInfo::empty(); // Set up leaf 8000_0008 EAX - leaf.set_physical_address_bits(0x30); // TODO: BREAKING - leaf.set_linear_address_bits(0x30); // TODO: BREAKING - leaf.set_guest_physical_address_bits(0); // TODO: BREAKING + leaf.set_physical_address_bits(0x30); + leaf.set_linear_address_bits(0x30); + leaf.set_guest_physical_address_bits(0); // St up leaf 8000_0008 EBX leaf.set_cl_zero(true); leaf.set_restore_fp_error_ptrs(true); leaf.set_wbnoinvd(true); - leaf.set_num_phys_threads(1); // Populated dynamically in a real system. + // Populated dynamically in practice. + leaf.set_num_phys_threads(1); leaf.set_apic_id_size(0); leaf.set_perf_tsc_size(0); - leaf.set_invlpgb_max_pages(0); // TODO: BREAKING - leaf.set_max_rdpru_id(0); // TODO: BREAKING + leaf.set_invlpgb_max_pages(0); + leaf.set_max_rdpru_id(0); - cpuid.set_processor_capacity_feature_info(Some(leaf)); + cpuid + .set_processor_capacity_feature_info(Some(leaf)) + .expect("can set leaf 8000_0008h"); // Leaf 8000_000Ah is zeroed out for guests. - cpuid.set_svm_info(None); + cpuid + .set_svm_info(Some(SvmFeatures::empty())) + .expect("can set leaf 8000_000Ah"); // Hide TLB information for 1GiB pages (leaf 8000_0019h) - cpuid.set_tlb_1gb_page_info(None); + cpuid.set_tlb_1gb_page_info(None).expect("can set leaf 8000_0019h"); // Set up processor optimization info (leaf 8000_001Ah) let mut leaf = PerformanceOptimizationInfo::empty(); leaf.set_movu(true); // TODO: BREAKING leaf.set_fp256(true); // TODO: BREAKINGISH? - cpuid.set_performance_optimization_info(Some(leaf)); + cpuid + .set_performance_optimization_info(Some(leaf)) + .expect("can set leaf 8000_001Ah"); - // Leaf 8000_001B - // TODO: no support for leaf 8000_001B, but zero is what we wanted. - // Leaf 8000_001C - // TODO: no support for leaf 8000_001C, but zero is what we wanted. + // Leaf 8000_001B and 8000_001C are handled after all other leaves. // Hide extended cache topology as well (Leaf 8000_001D) - cpuid.set_extended_cache_parameters(None); + cpuid.set_extended_cache_parameters(None).expect("can set leaf 8000_001Dh"); let mut leaf = ProcessorTopologyInfo::empty(); leaf.set_threads_per_core(2); - cpuid.set_processor_topology_info(Some(leaf)); + cpuid + .set_processor_topology_info(Some(leaf)) + .expect("can set leaf 8000_001Eh"); - cpuid.set_memory_encryption_info(None); + cpuid.set_memory_encryption_info(None).expect("can set leaf 8000_001Fh"); let mut leaf = ExtendedFeatureIdentification2::empty(); - leaf.set_no_nested_data_bp(true); // TODO: BREAKING - leaf.set_lfence_always_serializing(true); // TODO: BREAKING - leaf.set_null_select_clears_base(true); // TODO: BREAKING - cpuid.set_extended_feature_identification_2(Some(leaf)); - - cpuid.into_source() + leaf.set_no_nested_data_bp(true); + leaf.set_lfence_always_serializing(true); + leaf.set_null_select_clears_base(true); + cpuid + .set_extended_feature_identification_2(Some(leaf)) + .expect("can set leaf 8000_0021h"); + + let mut dump = cpuid.into_source(); + + // There are a few leaves that are not yet defined in `raw-cpuid` but we + // commit to being zero. In practice, *omitted* leaves with an explicit + // CPUID specification will be zero, but setting them to zero here avoids + // all doubt. + + // First, instruction-based sampling (IBS) is hidden from guests for now + // (note `set_ibs(false)` above) + dump.set_leaf(0x8000_001B, Some(CpuIdResult::empty())); + // Lightweight profiling (LWP) is not supported by Milan, and not advertised + // to guests. (note `set_lwp(false)` above) + dump.set_leaf(0x8000_001C, Some(CpuIdResult::empty())); + // SEV is not supported in guests (note `set_sev(false)` above) + dump.set_leaf(0x8000_001F, Some(CpuIdResult::empty())); + + dump } pub fn milan_rfd314() -> Vec { @@ -368,39 +552,25 @@ pub fn milan_rfd314() -> Vec { .get_extended_feature_info() .expect("baseline Milan defines leaf 1"); - // RFD 314 describes the circumstances around RDSEED, but it is not currently available. + // RFD 314 describes the circumstances around RDSEED, but it is not + // currently available. leaf.set_rdseed(false); - cpuid.set_extended_feature_info(Some(leaf)); - - let mut leaf = cpuid - .get_extended_processor_and_feature_identifiers() - .expect("baseline Milan defines leaf 7"); - // RFD 314 describes these leaf 7 wrinkles. - // - // Extended APIC space support was originally provided to guests because the host supports it - // and it was passed through. The extended space is not supported in Bhyve, but we leave it set - // here to not change it from under guests. - // - // Bhyve now supports all six performance counters, so we could set the perf counter extension - // bit here, but again it is left as-is to not change CPUID from under a guest. - leaf.set_ext_apic_space(true); - leaf.set_perf_cntr_extensions(false); - - cpuid.set_extended_processor_and_feature_identifiers(Some(leaf)); + cpuid.set_extended_feature_info(Some(leaf)).expect("can set leaf 7h"); // Set up extended topology info (leaf Bh) let mut levels = Vec::new(); let mut topo_level1 = ExtendedTopologyLevel::empty(); // EAX - // These perhaps should be dynamic based on SMT or no? topo_level1.set_shift_right_for_next_apic_id(1); // EBX topo_level1.set_processors(2); // ECX topo_level1.set_level_number(0); - topo_level1.set_level_type(1); // If there's no SMT, there should be no SMT right..? + // This level describes SMT. If there's no SMT enabled (single-core VM?) + // then this level should not be present, probably? + topo_level1.set_level_type(1); levels.push(topo_level1); @@ -414,16 +584,46 @@ pub fn milan_rfd314() -> Vec { let mut topo_level3 = ExtendedTopologyLevel::empty(); // ECX topo_level3.set_level_number(2); - topo_level3.set_level_type(0); // This level is invalid. + // Level type 0 indicates this level is invalid. This level is included only + // to be explicit about where the topology ends. + topo_level3.set_level_type(0); levels.push(topo_level3); - cpuid.set_extended_topology_info(Some(levels.as_slice())); + cpuid + .set_extended_topology_info(Some(levels.as_slice())) + .expect("can set leaf 8000_0021h"); + + let mut leaf = cpuid + .get_extended_processor_and_feature_identifiers() + .expect("baseline Milan defines leaf 8000_0001"); + // RFD 314 describes these leaf 8000_0001 wrinkles. + // + // Extended APIC space support was originally provided to guests because the + // host supports it and it was passed through. The extended space is not + // supported in Bhyve, but we leave it set here to not change it from under + // guests. + // + // Bhyve now supports all six performance counters, so we could set the perf + // counter extension bit here, but again it is left as-is to not change + // CPUID from under a guest. + // + // RDTSCP requires some Bhyve and Propolis work to support, so it is masked + // off for now. + leaf.set_ext_apic_space(false); // TODO: I thought this was set. is it not? check PR + leaf.set_perf_cntr_extensions(false); + leaf.set_rdtscp(false); + + cpuid + .set_extended_processor_and_feature_identifiers(Some(leaf)) + .expect("can set leaf 8000_0001h"); // VMs on Milan currently get brand string and cache topology information from the host // processor, so replicate it to minimize changes for now. // Leaves 8000_0002 through 8000_0005 - cpuid.set_processor_brand_string(Some(b"AMD EPYC 7713P 64-Core Processor")); + cpuid + .set_processor_brand_string(Some(b"AMD EPYC 7713P 64-Core Processor")) + .expect("can set vCPU brand string"); // Set up L1 cache+TLB info (leaf 8000_0005h) let mut leaf = L1CacheTlbInfo::empty(); @@ -448,7 +648,9 @@ pub fn milan_rfd314() -> Vec { leaf.set_icache_associativity(0x08); leaf.set_icache_size(0x20); - cpuid.set_l1_cache_and_tlb_info(Some(leaf)); + cpuid + .set_l1_cache_and_tlb_info(Some(leaf)) + .expect("can set leaf 8000_0005h"); // Set up L2 and L3 cache+TLB info (leaf 8000_0006h) let mut leaf = L2And3CacheTlbInfo::empty(); @@ -477,7 +679,9 @@ pub fn milan_rfd314() -> Vec { leaf.set_l3cache_associativity(0x9); leaf.set_l3cache_size(0x0200); - cpuid.set_l2_l3_cache_and_tlb_info(Some(leaf)); + cpuid + .set_l2_l3_cache_and_tlb_info(Some(leaf)) + .expect("can set leaf 8000_0006h"); // Set up TLB information for 1GiB pages (leaf 8000_0019h) let mut leaf = Tlb1gbPageInfo::empty(); @@ -489,7 +693,7 @@ pub fn milan_rfd314() -> Vec { leaf.set_dtlb_l2_1gb_size(0x40); leaf.set_itlb_l2_1gb_associativity(0); leaf.set_itlb_l2_1gb_size(0); - cpuid.set_tlb_1gb_page_info(Some(leaf)); + cpuid.set_tlb_1gb_page_info(Some(leaf)).expect("can set leaf 8000_0019h"); // Set up extended cache hierarchy info (leaf 8000_001Dh) let mut levels = Vec::new(); @@ -517,7 +721,9 @@ pub fn milan_rfd314() -> Vec { ecx: 0x00007FFF, edx: 0x00000001, }); - cpuid.set_extended_cache_parameters(Some(levels.as_slice())); + cpuid + .set_extended_cache_parameters(Some(levels.as_slice())) + .expect("can set leaf 8000_001Dh"); dump_to_cpuid_entries(cpuid.into_source()) } @@ -589,7 +795,7 @@ fn milan_rfd314_is_as_described() { // between 314 and the present day. Actual guest CPU platforms may differ as // we enable additional guest functionality in the future; this is not a // source of truth for actual guest platforms. - const MILAN_CPUID: [CpuidEntry; 32] = [ + const MILAN_CPUID: [CpuidEntry; 33] = [ cpuid_leaf!(0x0, 0x0000000D, 0x68747541, 0x444D4163, 0x69746E65), cpuid_leaf!(0x1, 0x00A00F11, 0x00000800, 0xF6D83203, 0x078BFBFF), cpuid_leaf!(0x5, 0x00000000, 0x00000000, 0x00000000, 0x00000000), @@ -597,6 +803,9 @@ fn milan_rfd314_is_as_described() { cpuid_subleaf!( 0x7, 0x0, 0x00000000, 0x219803A9, 0x00000600, 0x00000010 ), + cpuid_subleaf!( + 0x7, 0x1, 0x00000000, 0x00000000, 0x00000000, 0x00000000 + ), cpuid_subleaf!( 0xB, 0x0, 0x00000001, 0x00000002, 0x00000100, 0x00000000 ), @@ -650,5 +859,12 @@ fn milan_rfd314_is_as_described() { // `milan_rfd314` sorts by leaf/subleaf, so everything *should* be in the // same order.. just a question if it's all the same: - assert_eq!(MILAN_CPUID, computed); + for (l, r) in MILAN_CPUID.iter().zip(computed.as_slice().iter()) { + eprintln!("comparing {:#08x}.{:?}", l.leaf, l.subleaf); + assert_eq!( + l, r, + "leaf {:#08x} (subleaf? {:?}) did not match", + l.leaf, l.subleaf + ); + } } From 9e92b0135cd22b3f80fce77a35b948bea27bc486 Mon Sep 17 00:00:00 2001 From: iximeow Date: Sat, 30 Aug 2025 03:07:58 +0000 Subject: [PATCH 30/42] dead code --- nexus/src/app/instance_platform/cpu_platform.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index 43586a9cedb..873f4234b26 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -175,7 +175,6 @@ fn milan_ideal() -> CpuIdDump { let mut leaf = FeatureInfo::new(Vendor::Amd); // Set up EAX: Family 19h model 1h. - leaf.set_extended_family_id(0x00); // why is this like this one of these two lines should go away leaf.set_extended_family_id(0xA); leaf.set_base_family_id(0x0F); leaf.set_base_model_id(0x01); From d4cbf2fa3e4d1eaac6c6f1a37ac002e0fdc00c96 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 3 Sep 2025 00:48:15 +0000 Subject: [PATCH 31/42] another RFD 314/PR transcription error: extended APIC space short history here: this bit is set in hardware, the extended space is not supported by bhyve, but the bit was passed through. hiding this bit "should" be fine, but out of caution we're not doing that in the first statically-defined CPU platform. this bit was not set in the MILAN_CPUID blob, though, even though the RFD was updated. --- nexus/src/app/instance_platform/cpu_platform.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index 873f4234b26..dc6a21de66c 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -608,7 +608,7 @@ pub fn milan_rfd314() -> Vec { // // RDTSCP requires some Bhyve and Propolis work to support, so it is masked // off for now. - leaf.set_ext_apic_space(false); // TODO: I thought this was set. is it not? check PR + leaf.set_ext_apic_space(true); leaf.set_perf_cntr_extensions(false); leaf.set_rdtscp(false); @@ -824,7 +824,7 @@ fn milan_rfd314_is_as_described() { 0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000 ), cpuid_leaf!(0x80000000, 0x80000021, 0x68747541, 0x444D4163, 0x69746E65), - cpuid_leaf!(0x80000001, 0x00A00F11, 0x40000000, 0x444001F1, 0x27D3FBFF), + cpuid_leaf!(0x80000001, 0x00A00F11, 0x40000000, 0x444001F9, 0x27D3FBFF), cpuid_leaf!(0x80000002, 0x20444D41, 0x43595045, 0x31373720, 0x36205033), cpuid_leaf!(0x80000003, 0x6F432D34, 0x50206572, 0x65636F72, 0x726F7373), cpuid_leaf!(0x80000004, 0x20202020, 0x20202020, 0x20202020, 0x00202020), From bfee77723fc4e2cdcd65d6d21525fc36493660bb Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 3 Sep 2025 03:49:48 +0000 Subject: [PATCH 32/42] adjust leaf 8000_001D expectations with RFD 314 adjustments --- .../src/app/instance_platform/cpu_platform.rs | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index dc6a21de66c..089f0cb1f71 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -507,6 +507,8 @@ fn milan_ideal() -> CpuIdDump { cpuid.set_extended_cache_parameters(None).expect("can set leaf 8000_001Dh"); let mut leaf = ProcessorTopologyInfo::empty(); + // This is managed dynamically, where a single vCPU instance will see this + // set to 1 instead. leaf.set_threads_per_core(2); cpuid .set_processor_topology_info(Some(leaf)) @@ -695,30 +697,28 @@ pub fn milan_rfd314() -> Vec { cpuid.set_tlb_1gb_page_info(Some(leaf)).expect("can set leaf 8000_0019h"); // Set up extended cache hierarchy info (leaf 8000_001Dh) + // + // This is the fabricated cache topology from Bhyve. We could be more + // precise, for dubious benefit. This is discussed in more detail in RFD + // 314. let mut levels = Vec::new(); levels.push(CpuIdResult { eax: 0x00000121, - ebx: 0x01C0003F, - ecx: 0x0000003F, - edx: 0x00000000, - }); - levels.push(CpuIdResult { - eax: 0x00000122, - ebx: 0x01C0003F, - ecx: 0x0000003F, + ebx: 0x0000003F, + ecx: 0x00000000, edx: 0x00000000, }); levels.push(CpuIdResult { eax: 0x00000143, - ebx: 0x01C0003F, - ecx: 0x000003FF, - edx: 0x00000002, + ebx: 0x0000003F, + ecx: 0x00000000, + edx: 0x00000000, }); levels.push(CpuIdResult { eax: 0x00000163, - ebx: 0x03C0003F, - ecx: 0x00007FFF, - edx: 0x00000001, + ebx: 0x0000003F, + ecx: 0x00000000, + edx: 0x00000000, }); cpuid .set_extended_cache_parameters(Some(levels.as_slice())) From fb12fc944d3d2c1ac2b8976bb6c6338afc32f923 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 3 Sep 2025 04:09:29 +0000 Subject: [PATCH 33/42] test demonstrating constructed CPUID tables are as exepected these tests are: * the `milan_rfd314` function computes something that matches RFD 314 * the RFD 314 definition differs in specific and justified ways from pre-314 guest CPUID profiles. --- .../src/app/instance_platform/cpu_platform.rs | 382 +++++++++++++++--- nexus/src/app/instance_platform/mod.rs | 2 +- 2 files changed, 334 insertions(+), 50 deletions(-) diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index 089f0cb1f71..3fa047e7fb9 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -3,7 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use raw_cpuid::{ - ApmInfo, CpuIdDump, CpuIdResult, CpuIdWriter, + ApmInfo, CpuId, CpuIdDump, CpuIdReader, CpuIdResult, CpuIdWriter, ExtendedFeatureIdentification2, ExtendedFeatures, ExtendedProcessorFeatureIdentifiers, ExtendedState, ExtendedStateInfo, ExtendedTopologyLevel, FeatureInfo, L1CacheTlbInfo, L2And3CacheTlbInfo, @@ -26,8 +26,8 @@ use sled_agent_client::types::CpuidEntry; /// "`clflush` operates on the same number of words". #[allow(dead_code)] pub fn functionally_same(base: CpuIdDump, target: CpuIdDump) -> bool { - let base = raw_cpuid::CpuId::with_cpuid_reader(base); - let target = raw_cpuid::CpuId::with_cpuid_reader(target); + let base = CpuId::with_cpuid_reader(base); + let target = CpuId::with_cpuid_reader(target); match (base.get_feature_info(), target.get_feature_info()) { (Some(base_info), Some(target_info)) => { @@ -165,7 +165,7 @@ pub fn functionally_same(base: CpuIdDump, target: CpuIdDump) -> bool { /// where possible. This CPUID configuration as-is is untested; guests may not /// boot, this may be too reductive, etc. fn milan_ideal() -> CpuIdDump { - let mut cpuid = raw_cpuid::CpuId::with_cpuid_reader(CpuIdDump::new()); + let mut cpuid = CpuId::with_cpuid_reader(CpuIdDump::new()); let leaf = VendorInfo::amd(); cpuid.set_vendor_info(Some(leaf)).expect("can set leaf 0"); cpuid @@ -543,11 +543,11 @@ fn milan_ideal() -> CpuIdDump { dump } -pub fn milan_rfd314() -> Vec { +pub fn milan_rfd314() -> CpuIdDump { // This is the Milan we'd "want" to expose, absent any other constraints. let baseline = milan_ideal(); - let mut cpuid = raw_cpuid::CpuId::with_cpuid_reader(baseline); + let mut cpuid = CpuId::with_cpuid_reader(baseline); let mut leaf = cpuid .get_extended_feature_info() @@ -724,10 +724,10 @@ pub fn milan_rfd314() -> Vec { .set_extended_cache_parameters(Some(levels.as_slice())) .expect("can set leaf 8000_001Dh"); - dump_to_cpuid_entries(cpuid.into_source()) + cpuid.into_source() } -fn dump_to_cpuid_entries(dump: CpuIdDump) -> Vec { +pub fn dump_to_cpuid_entries(dump: CpuIdDump) -> Vec { let mut entries = Vec::new(); for (leaf, subleaf, regs) in dump.into_iter() { @@ -755,34 +755,34 @@ fn dump_to_cpuid_entries(dump: CpuIdDump) -> Vec { entries } -#[test] -fn milan_rfd314_is_as_described() { - macro_rules! cpuid_leaf { - ($leaf:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { - CpuidEntry { - leaf: $leaf, - subleaf: None, - eax: $eax, - ebx: $ebx, - ecx: $ecx, - edx: $edx, - } - }; - } +macro_rules! cpuid_leaf { + ($leaf:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { + CpuidEntry { + leaf: $leaf, + subleaf: None, + eax: $eax, + ebx: $ebx, + ecx: $ecx, + edx: $edx, + } + }; +} - macro_rules! cpuid_subleaf { - ($leaf:literal, $sl:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { - CpuidEntry { - leaf: $leaf, - subleaf: Some($sl), - eax: $eax, - ebx: $ebx, - ecx: $ecx, - edx: $edx, - } - }; - } +macro_rules! cpuid_subleaf { + ($leaf:literal, $sl:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { + CpuidEntry { + leaf: $leaf, + subleaf: Some($sl), + eax: $eax, + ebx: $ebx, + ecx: $ecx, + edx: $edx, + } + }; +} +#[test] +fn milan_rfd314_is_as_described() { // This CPUID leaf blob is a collection of the leaves described in RFD 314. // RFD 314 is the source of truth for what bits are set here and why. // `milan_rfd314()` constructs what ought to be an *identical* set of bits, @@ -794,7 +794,7 @@ fn milan_rfd314_is_as_described() { // between 314 and the present day. Actual guest CPU platforms may differ as // we enable additional guest functionality in the future; this is not a // source of truth for actual guest platforms. - const MILAN_CPUID: [CpuidEntry; 33] = [ + const MILAN_CPUID: [CpuidEntry; 32] = [ cpuid_leaf!(0x0, 0x0000000D, 0x68747541, 0x444D4163, 0x69746E65), cpuid_leaf!(0x1, 0x00A00F11, 0x00000800, 0xF6D83203, 0x078BFBFF), cpuid_leaf!(0x5, 0x00000000, 0x00000000, 0x00000000, 0x00000000), @@ -837,24 +837,15 @@ fn milan_rfd314_is_as_described() { cpuid_leaf!(0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001B, 0x00000000, 0x00000000, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_subleaf!( - 0x8000001D, 0x0, 0x00000121, 0x01C0003F, 0x0000003F, 0x00000000 - ), - cpuid_subleaf!( - 0x8000001D, 0x1, 0x00000122, 0x01C0003F, 0x0000003F, 0x00000000 - ), - cpuid_subleaf!( - 0x8000001D, 0x2, 0x00000143, 0x01C0003F, 0x000003FF, 0x00000002 - ), - cpuid_subleaf!( - 0x8000001D, 0x3, 0x00000163, 0x03C0003F, 0x00007FFF, 0x00000001 - ), + cpuid_subleaf!(0x8000001D, 0x00, 0x00000121, 0x0000003F, 0x00000000, 0x00000000), + cpuid_subleaf!(0x8000001D, 0x01, 0x00000143, 0x0000003F, 0x00000000, 0x00000000), + cpuid_subleaf!(0x8000001D, 0x02, 0x00000163, 0x0000003F, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001E, 0x00000000, 0x00000100, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001F, 0x00000000, 0x00000000, 0x00000000, 0x00000000), cpuid_leaf!(0x80000021, 0x00000045, 0x00000000, 0x00000000, 0x00000000), ]; - let computed = milan_rfd314(); + let computed = dump_to_cpuid_entries(milan_rfd314()); // `milan_rfd314` sorts by leaf/subleaf, so everything *should* be in the // same order.. just a question if it's all the same: @@ -862,7 +853,300 @@ fn milan_rfd314_is_as_described() { eprintln!("comparing {:#08x}.{:?}", l.leaf, l.subleaf); assert_eq!( l, r, - "leaf {:#08x} (subleaf? {:?}) did not match", + "leaf 0x{:08x} (subleaf? {:?}) did not match", + l.leaf, l.subleaf + ); + } +} + +#[test] +fn milan_current_vs_rfd314_is_understood() { + // This CPUID leaf blob is what a guest booted on a Gimlet as of around + // August 2025 would have gotten as its passed-through CPUID leaves. + // + // This is present only to validate initial CPU platforms work and in + // particular that the initial specified-up-front CPU platform does not + // differ in unexpected ways from what guests had been getting to that + // point. + const MILAN_BEFORE_RFD314: [CpuidEntry; 30] = [ + cpuid_leaf!(0x0, 0x00000010, 0x68747541, 0x444D4163, 0x69746E65), + cpuid_leaf!(0x1, 0x00A00F11, 0x01020800, 0xFEDA3203, 0x178BFBFF), + cpuid_leaf!(0x5, 0x00000040, 0x00000040, 0x00000003, 0x00000011), + cpuid_leaf!(0x6, 0x00000004, 0x00000000, 0x00000000, 0x00000000), + cpuid_subleaf!(0x7, 0x0, 0x00000000, 0x201003A9, 0x00000600, 0x00000000), + cpuid_subleaf!(0x7, 0x1, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + // leaf B is missing, and leaf D is the synthetic topology Bhyve invents. + cpuid_subleaf!(0xD, 0x0, 0x00000007, 0x00000340, 0x00000340, 0x00000000), + cpuid_subleaf!(0xD, 0x1, 0x00000001, 0x00000340, 0x00000000, 0x00000000), + cpuid_subleaf!(0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000), + // Include the all-zero leaf 10h explicitly so that the maximum standard + // leaf matches below. + cpuid_leaf!(0x10, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000000, 0x80000023, 0x68747541, 0x444D4163, 0x69746E65), + cpuid_leaf!(0x80000001, 0x00A00F11, 0x40000000, 0x444031FB, 0x25D3FBFF), + cpuid_leaf!(0x80000002, 0x20444D41, 0x43595045, 0x31373720, 0x36205033), + cpuid_leaf!(0x80000003, 0x6F432D34, 0x50206572, 0x65636F72, 0x726F7373), + cpuid_leaf!(0x80000004, 0x20202020, 0x20202020, 0x20202020, 0x00202020), + cpuid_leaf!(0x80000005, 0xFF40FF40, 0xFF40FF40, 0x20080140, 0x20080140), + cpuid_leaf!(0x80000006, 0x48002200, 0x68004200, 0x02006140, 0x08009140), + cpuid_leaf!(0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100), + cpuid_leaf!(0x80000008, 0x00003030, 0x00000007, 0x00000000, 0x00010007), + cpuid_leaf!(0x8000000A, 0x00000001, 0x00008000, 0x00000000, 0x119BBCFF), + cpuid_leaf!(0x80000019, 0xF040F040, 0xF0400000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001B, 0x000003FF, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_subleaf!(0x8000001D, 0x00, 0x00004121, 0x0000003F, 0x00000000, 0x00000000), + cpuid_subleaf!(0x8000001D, 0x01, 0x00004143, 0x0000003F, 0x00000000, 0x00000000), + cpuid_subleaf!(0x8000001D, 0x02, 0x00004163, 0x0000003F, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001E, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001F, 0x0101FD3F, 0x00004173, 0x000001FD, 0x00000001), + cpuid_leaf!(0x80000021, 0x0000204D, 0x00000000, 0x00000000, 0x00000000), + ]; + + let mut cpuid = CpuId::with_cpuid_reader(milan_rfd314()); + + let mut feature_info = cpuid.get_feature_info().expect("can get leaf 1h"); + // The representative CPUID dump happened to come from processor 1 on a + // two-processor VM. + feature_info.set_max_logical_processor_ids(2); + feature_info.set_initial_local_apic_id(1); + // TODO: Guests were told PCID was supported, but 314 says that it is not + // supported..? + feature_info.set_pcid(true); + // The snapshot comes from a VM that enabled XSAVE. + feature_info.set_oxsave(true); + // The snapshot comes from a VM where HTT was dynamically managed to "true". + feature_info.set_htt(true); + cpuid.set_feature_info(Some(feature_info)).expect("can set leaf 1h"); + + let mut monitor_mwait = cpuid.get_monitor_mwait_info().expect("can get leaf 5h"); + // The monitor/mwait leaf was passed through non-zeroed even though + // monitor/mwait support is hidden. + monitor_mwait.set_smallest_monitor_line(0x40); + monitor_mwait.set_largest_monitor_line(0x40); + monitor_mwait.set_extensions_supported(1); + monitor_mwait.set_interrupts_as_break_event(1); + // These are "reserved" according to the AMD APM, but in practice look quite + // similar to their Intel meaning... + monitor_mwait.set_supported_c0_states(1); + monitor_mwait.set_supported_c1_states(1); + cpuid.set_monitor_mwait_info(Some(monitor_mwait)).expect("can set leaf 5h"); + + let mut ext_features = cpuid.get_extended_feature_info().expect("can get leaf 7h"); + // Byhve didn't/doesn't pass ADX through from the host + ext_features.set_adx(false); + // ... or CLFLUSHOPT? + ext_features.set_clflushopt(false); + // ... or CLWB? + ext_features.set_clwb(false); + + // or FSRM + ext_features.set_fsrm(false); + + cpuid.set_extended_feature_info(Some(ext_features)).expect("can set leaf 7h"); + + let mut ext_processor_features = cpuid.get_extended_processor_and_feature_identifiers().expect("can get leaf 8000_0001h"); + // This is dynamically managed, true in the sampled VM. + ext_processor_features.set_cmp_legacy(true); + // Neither of these features are actually available to guests, but byhve had + // been passing the CPUID bits through + ext_processor_features.set_skinit(true); + ext_processor_features.set_wdt(true); + // TODO: Fast FXSAVE was not passed through? + ext_processor_features.set_fast_fxsave_fxstor(false); + cpuid.set_extended_processor_and_feature_identifiers(Some(ext_processor_features)).expect("can set leaf 8000_0001h"); + + let mut leaf = cpuid + .get_processor_capacity_feature_info() + .expect("can get leaf 8000_0008h"); + + // Support for the instructions retired MSR was passed through by bhyve even + // though the MSR itself is not available to guests. + leaf.set_inst_ret_cntr_msr(true); + + // TODO: Support for `wbnoinvd` was hidden from guests by byhve? + leaf.set_wbnoinvd(false); + + // INVLPGB and RDPRU max were passed through even those instructions are not + // supported. + leaf.set_invlpgb_max_pages(7); + leaf.set_max_rdpru_id(1); + + cpuid + .set_processor_capacity_feature_info(Some(leaf)) + .expect("can set leaf 8000_0008h"); + + // Set up L1 cache+TLB info (leaf 8000_0005h) + let mut leaf = L1CacheTlbInfo::empty(); + + leaf.set_itlb_2m_4m_size(0x40); + leaf.set_itlb_2m_4m_associativity(0xff); + leaf.set_dtlb_2m_4m_size(0x40); + leaf.set_dtlb_2m_4m_associativity(0xff); + + leaf.set_itlb_4k_size(0x40); + leaf.set_itlb_4k_associativity(0xff); + leaf.set_dtlb_4k_size(0x40); + leaf.set_dtlb_4k_associativity(0xff); + + leaf.set_dcache_line_size(0x40); + leaf.set_dcache_lines_per_tag(0x01); + leaf.set_dcache_associativity(0x08); + leaf.set_dcache_size(0x20); + + leaf.set_icache_line_size(0x40); + leaf.set_icache_lines_per_tag(0x01); + leaf.set_icache_associativity(0x08); + leaf.set_icache_size(0x20); + + cpuid + .set_l1_cache_and_tlb_info(Some(leaf)) + .expect("can set leaf 8000_0005h"); + + // Set up L2 and L3 cache+TLB info (leaf 8000_0006h) + let mut leaf = L2And3CacheTlbInfo::empty(); + + // Set up leaf 8000_0006h EAX + leaf.set_itlb_2m_4m_size(0x200); + leaf.set_itlb_2m_4m_associativity(0x2); + leaf.set_dtlb_2m_4m_size(0x800); + leaf.set_dtlb_2m_4m_associativity(0x4); + + // Set up leaf 8000_0006h EBX + leaf.set_itlb_4k_size(0x200); + leaf.set_itlb_4k_associativity(0x4); + leaf.set_dtlb_4k_size(0x800); + leaf.set_dtlb_4k_associativity(0x6); + + // Set up leaf 8000_0006h ECX + leaf.set_l2cache_line_size(0x40); + leaf.set_l2cache_lines_per_tag(0x1); + leaf.set_l2cache_associativity(0x6); + leaf.set_l2cache_size(0x0200); + + // Set up leaf 8000_0006h EDX + leaf.set_l3cache_line_size(0x40); + leaf.set_l3cache_lines_per_tag(0x1); + leaf.set_l3cache_associativity(0x9); + leaf.set_l3cache_size(0x0200); + + cpuid + .set_l2_l3_cache_and_tlb_info(Some(leaf)) + .expect("can set leaf 8000_0006h"); + + // Set up TLB information for 1GiB pages (leaf 8000_0019h) + let mut leaf = Tlb1gbPageInfo::empty(); + leaf.set_dtlb_l1_1gb_associativity(0xF); + leaf.set_dtlb_l1_1gb_size(0x40); + leaf.set_itlb_l1_1gb_associativity(0xF); + leaf.set_itlb_l1_1gb_size(0x40); + leaf.set_dtlb_l2_1gb_associativity(0xF); + leaf.set_dtlb_l2_1gb_size(0x40); + leaf.set_itlb_l2_1gb_associativity(0); + leaf.set_itlb_l2_1gb_size(0); + cpuid.set_tlb_1gb_page_info(Some(leaf)).expect("can set leaf 8000_0019h"); + + let mut processor_topo = cpuid.get_processor_topology_info().expect("can get leaf 8000_001Eh"); + // By virtue of having a single vCPU, the representative VM has one thread + // per core rather than two. + processor_topo.set_threads_per_core(1); + cpuid.set_processor_topology_info(Some(processor_topo)).expect("can set leaf 8000_001Eh"); + + let mut ext_features_2 = cpuid.get_extended_feature_identification_2().expect("can get leaf 8000_0021h"); + // Bhyve passed through the feature bit for this MSR, though the MSR itself + // is not allowed. + ext_features_2.set_prefetch_ctl_msr(true); + // Bhyve passed through the feature bit for SMM page config lock, though + // guests cannot actually control it. + ext_features_2.set_smm_pg_cfg_lock(true); + cpuid.set_extended_feature_identification_2(Some(ext_features_2)).expect("can set leaf 8000_0021h"); + + // Now touch up the RFD314 Milan definition in the specific ways we know it + // differs from what guests got at the time. + + // Some non-feature tweaks: + let mut dump = cpuid.into_source(); + + // Leaf B is not passed through from the host on AMD systems: + // https://www.illumos.org/issues/17529 + dump.set_leaf(0xB, None); + + // Leaf D (extended state information) doesn't have a nice read/write API in + // `rust-cpuid`, so adjust expectations more manually.. + // + // Guests had `xsavec` and `xgetbv w/ ecx=1` hidden before. + let mut ext_state = dump.cpuid2(0xD, 1); + ext_state.eax &= !0x0000_0006; + dump.set_subleaf(0xD, 1, Some(ext_state)); + + // SVM features were not zeroed, but the SVM bit itself was not passed + // through. + let mut svm = CpuIdResult::empty(); + svm.eax = 0x0000_0001; + svm.ebx = 0x0000_8000; + svm.ecx = 0x0000_0000; + svm.edx = 0x119B_BCFF; + dump.set_leaf(0x8000_000A, Some(svm)); + + // IBS capabilities were not zeroed, but the IBS MSRs are not + // guest-accessible. + let mut ibs = CpuIdResult::empty(); + ibs.eax = 0x0000_03FF; + ibs.ebx = 0x0000_0000; + ibs.ecx = 0x0000_0000; + ibs.edx = 0x0000_0000; + dump.set_leaf(0x8000_001B, Some(ibs)); + + // The "cores sharing cache" bits under leaf 8000_001D are somewhat dynamic. + // For L1 and L2 caches, these are the number of threads per core, and for + // L3 this is threads in the virtual processor. The representative VM had + // two cores, which is presented as an SMT pair, so all levels read as 2. + // + // This is stored as one minus the actual value at each level, so one core + // is a bit pattern of all zeroes. The "cores sharing cache" field starts at + // bit 14. So we want to store the bit pattern `0...1` at that offset. There + // isn't a nice way to patch this into an existing cache topology in + // `raw_cpuid`, so we have to get a bit gross with it.. + for level in 0..3 { + let mut leaf = dump.cpuid2(0x8000_001D, level); + // Mask out all the bits for "cores sharing cache" + leaf.eax &= !0x03ffc000; + leaf.eax |= 1 << 14; + dump.set_subleaf(0x8000_001D, level, Some(leaf)); + } + + // Memory encryption features were not zeroed, but the feature itself is not + // supported. + let mut sme = CpuIdResult::empty(); + sme.eax = 0x0101_FD3F; + sme.ebx = 0x0000_4173; + sme.ecx = 0x0000_01FD; + sme.edx = 0x0000_0001; + dump.set_leaf(0x8000_001F, Some(sme)); + + // Milan has standard leaves up to 0x10, but Bhyve zeroes out the last few. + // Nothing reduces the max standard leaf, so guests saw a different value + // than the `0x0000000D` that RFD 314 describes. To get here with + // `raw_cpuid`, add a zeroed out leaf "0x10" to drag the max standard leaf + // that high. + dump.set_leaf(0x10, Some(CpuIdResult::empty())); + + // Similar to above, extended leaves go to 0x8000_0021, but hardware goes up + // to 0x8000_0023 and when zeroing the last few leaves the max valid leaf + // did not get moved back down. Add a zeroed out leaf "0x8000_0023" to drag + // the max extended leaf as high as before. + dump.set_leaf(0x8000_0023, Some(CpuIdResult::empty())); + + let computed = dump_to_cpuid_entries(dump); + + // `milan_rfd314` sorts by leaf/subleaf, so everything *should* be in the + // same order.. just a question if it's all the same: + for (l, r) in MILAN_BEFORE_RFD314.iter().zip(computed.as_slice().iter()) { + eprintln!("comparing {:#08x}.{:?}", l.leaf, l.subleaf); + assert_eq!( + l, r, + "leaf 0x{:08x} (subleaf? {:?}) did not match", l.leaf, l.subleaf ); } diff --git a/nexus/src/app/instance_platform/mod.rs b/nexus/src/app/instance_platform/mod.rs index 2b83ebeb8b1..4b0628eb824 100644 --- a/nexus/src/app/instance_platform/mod.rs +++ b/nexus/src/app/instance_platform/mod.rs @@ -511,7 +511,7 @@ fn cpuid_from_vmm_cpu_platform( db::model::VmmCpuPlatform::SledDefault => return None, db::model::VmmCpuPlatform::AmdMilan | db::model::VmmCpuPlatform::AmdTurin => Cpuid { - entries: cpu_platform::milan_rfd314(), + entries: cpu_platform::dump_to_cpuid_entries(cpu_platform::milan_rfd314()), vendor: CpuidVendor::Amd, }, }; From d59a8428ac79136bddd4f541cd56da6c9b1959e8 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 3 Sep 2025 04:11:21 +0000 Subject: [PATCH 34/42] rustfmt --- .../src/app/instance_platform/cpu_platform.rs | 90 +++++++++++++------ nexus/src/app/instance_platform/mod.rs | 4 +- 2 files changed, 68 insertions(+), 26 deletions(-) diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index 3fa047e7fb9..94e216c83dd 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -401,7 +401,8 @@ fn milan_ideal() -> CpuIdDump { leaf.set_misaligned_sse_mode(true); leaf.set_prefetchw(true); - leaf.set_osvw(false); // May be set in hardware, hopefully can hide hardware errata from guests + // May be set in hardware, hopefully can hide hardware errata from guests + leaf.set_osvw(false); leaf.set_ibs(false); leaf.set_xop(false); @@ -618,8 +619,8 @@ pub fn milan_rfd314() -> CpuIdDump { .set_extended_processor_and_feature_identifiers(Some(leaf)) .expect("can set leaf 8000_0001h"); - // VMs on Milan currently get brand string and cache topology information from the host - // processor, so replicate it to minimize changes for now. + // VMs on Milan currently get brand string and cache topology information + // from the host processor, so replicate it to minimize changes for now. // Leaves 8000_0002 through 8000_0005 cpuid @@ -741,8 +742,8 @@ pub fn dump_to_cpuid_entries(dump: CpuIdDump) -> Vec { }); } - // Entry order does not actually matter. Sort here because it's fast (~30-35 leaves) and - // looking at the vec in logs or on the wire *so* much nicer. + // Entry order does not actually matter. Sort here because it's fast (~30-35 + // leaves) and looking at the vec in logs or on the wire *so* much nicer. entries.sort_by(|left, right| { let by_leaf = left.leaf.cmp(&right.leaf); if by_leaf == std::cmp::Ordering::Equal { @@ -837,9 +838,15 @@ fn milan_rfd314_is_as_described() { cpuid_leaf!(0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001B, 0x00000000, 0x00000000, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_subleaf!(0x8000001D, 0x00, 0x00000121, 0x0000003F, 0x00000000, 0x00000000), - cpuid_subleaf!(0x8000001D, 0x01, 0x00000143, 0x0000003F, 0x00000000, 0x00000000), - cpuid_subleaf!(0x8000001D, 0x02, 0x00000163, 0x0000003F, 0x00000000, 0x00000000), + cpuid_subleaf!( + 0x8000001D, 0x00, 0x00000121, 0x0000003F, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0x8000001D, 0x01, 0x00000143, 0x0000003F, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0x8000001D, 0x02, 0x00000163, 0x0000003F, 0x00000000, 0x00000000 + ), cpuid_leaf!(0x8000001E, 0x00000000, 0x00000100, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001F, 0x00000000, 0x00000000, 0x00000000, 0x00000000), cpuid_leaf!(0x80000021, 0x00000045, 0x00000000, 0x00000000, 0x00000000), @@ -873,12 +880,22 @@ fn milan_current_vs_rfd314_is_understood() { cpuid_leaf!(0x1, 0x00A00F11, 0x01020800, 0xFEDA3203, 0x178BFBFF), cpuid_leaf!(0x5, 0x00000040, 0x00000040, 0x00000003, 0x00000011), cpuid_leaf!(0x6, 0x00000004, 0x00000000, 0x00000000, 0x00000000), - cpuid_subleaf!(0x7, 0x0, 0x00000000, 0x201003A9, 0x00000600, 0x00000000), - cpuid_subleaf!(0x7, 0x1, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_subleaf!( + 0x7, 0x0, 0x00000000, 0x201003A9, 0x00000600, 0x00000000 + ), + cpuid_subleaf!( + 0x7, 0x1, 0x00000000, 0x00000000, 0x00000000, 0x00000000 + ), // leaf B is missing, and leaf D is the synthetic topology Bhyve invents. - cpuid_subleaf!(0xD, 0x0, 0x00000007, 0x00000340, 0x00000340, 0x00000000), - cpuid_subleaf!(0xD, 0x1, 0x00000001, 0x00000340, 0x00000000, 0x00000000), - cpuid_subleaf!(0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000), + cpuid_subleaf!( + 0xD, 0x0, 0x00000007, 0x00000340, 0x00000340, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x1, 0x00000001, 0x00000340, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000 + ), // Include the all-zero leaf 10h explicitly so that the maximum standard // leaf matches below. cpuid_leaf!(0x10, 0x00000000, 0x00000000, 0x00000000, 0x00000000), @@ -896,9 +913,15 @@ fn milan_current_vs_rfd314_is_understood() { cpuid_leaf!(0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001B, 0x000003FF, 0x00000000, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_subleaf!(0x8000001D, 0x00, 0x00004121, 0x0000003F, 0x00000000, 0x00000000), - cpuid_subleaf!(0x8000001D, 0x01, 0x00004143, 0x0000003F, 0x00000000, 0x00000000), - cpuid_subleaf!(0x8000001D, 0x02, 0x00004163, 0x0000003F, 0x00000000, 0x00000000), + cpuid_subleaf!( + 0x8000001D, 0x00, 0x00004121, 0x0000003F, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0x8000001D, 0x01, 0x00004143, 0x0000003F, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0x8000001D, 0x02, 0x00004163, 0x0000003F, 0x00000000, 0x00000000 + ), cpuid_leaf!(0x8000001E, 0x00000000, 0x00000000, 0x00000000, 0x00000000), cpuid_leaf!(0x8000001F, 0x0101FD3F, 0x00004173, 0x000001FD, 0x00000001), cpuid_leaf!(0x80000021, 0x0000204D, 0x00000000, 0x00000000, 0x00000000), @@ -920,7 +943,8 @@ fn milan_current_vs_rfd314_is_understood() { feature_info.set_htt(true); cpuid.set_feature_info(Some(feature_info)).expect("can set leaf 1h"); - let mut monitor_mwait = cpuid.get_monitor_mwait_info().expect("can get leaf 5h"); + let mut monitor_mwait = + cpuid.get_monitor_mwait_info().expect("can get leaf 5h"); // The monitor/mwait leaf was passed through non-zeroed even though // monitor/mwait support is hidden. monitor_mwait.set_smallest_monitor_line(0x40); @@ -933,7 +957,8 @@ fn milan_current_vs_rfd314_is_understood() { monitor_mwait.set_supported_c1_states(1); cpuid.set_monitor_mwait_info(Some(monitor_mwait)).expect("can set leaf 5h"); - let mut ext_features = cpuid.get_extended_feature_info().expect("can get leaf 7h"); + let mut ext_features = + cpuid.get_extended_feature_info().expect("can get leaf 7h"); // Byhve didn't/doesn't pass ADX through from the host ext_features.set_adx(false); // ... or CLFLUSHOPT? @@ -944,9 +969,13 @@ fn milan_current_vs_rfd314_is_understood() { // or FSRM ext_features.set_fsrm(false); - cpuid.set_extended_feature_info(Some(ext_features)).expect("can set leaf 7h"); + cpuid + .set_extended_feature_info(Some(ext_features)) + .expect("can set leaf 7h"); - let mut ext_processor_features = cpuid.get_extended_processor_and_feature_identifiers().expect("can get leaf 8000_0001h"); + let mut ext_processor_features = cpuid + .get_extended_processor_and_feature_identifiers() + .expect("can get leaf 8000_0001h"); // This is dynamically managed, true in the sampled VM. ext_processor_features.set_cmp_legacy(true); // Neither of these features are actually available to guests, but byhve had @@ -955,7 +984,11 @@ fn milan_current_vs_rfd314_is_understood() { ext_processor_features.set_wdt(true); // TODO: Fast FXSAVE was not passed through? ext_processor_features.set_fast_fxsave_fxstor(false); - cpuid.set_extended_processor_and_feature_identifiers(Some(ext_processor_features)).expect("can set leaf 8000_0001h"); + cpuid + .set_extended_processor_and_feature_identifiers(Some( + ext_processor_features, + )) + .expect("can set leaf 8000_0001h"); let mut leaf = cpuid .get_processor_capacity_feature_info() @@ -1047,20 +1080,27 @@ fn milan_current_vs_rfd314_is_understood() { leaf.set_itlb_l2_1gb_size(0); cpuid.set_tlb_1gb_page_info(Some(leaf)).expect("can set leaf 8000_0019h"); - let mut processor_topo = cpuid.get_processor_topology_info().expect("can get leaf 8000_001Eh"); + let mut processor_topo = + cpuid.get_processor_topology_info().expect("can get leaf 8000_001Eh"); // By virtue of having a single vCPU, the representative VM has one thread // per core rather than two. processor_topo.set_threads_per_core(1); - cpuid.set_processor_topology_info(Some(processor_topo)).expect("can set leaf 8000_001Eh"); + cpuid + .set_processor_topology_info(Some(processor_topo)) + .expect("can set leaf 8000_001Eh"); - let mut ext_features_2 = cpuid.get_extended_feature_identification_2().expect("can get leaf 8000_0021h"); + let mut ext_features_2 = cpuid + .get_extended_feature_identification_2() + .expect("can get leaf 8000_0021h"); // Bhyve passed through the feature bit for this MSR, though the MSR itself // is not allowed. ext_features_2.set_prefetch_ctl_msr(true); // Bhyve passed through the feature bit for SMM page config lock, though // guests cannot actually control it. ext_features_2.set_smm_pg_cfg_lock(true); - cpuid.set_extended_feature_identification_2(Some(ext_features_2)).expect("can set leaf 8000_0021h"); + cpuid + .set_extended_feature_identification_2(Some(ext_features_2)) + .expect("can set leaf 8000_0021h"); // Now touch up the RFD314 Milan definition in the specific ways we know it // differs from what guests got at the time. diff --git a/nexus/src/app/instance_platform/mod.rs b/nexus/src/app/instance_platform/mod.rs index 4b0628eb824..2760e0e5588 100644 --- a/nexus/src/app/instance_platform/mod.rs +++ b/nexus/src/app/instance_platform/mod.rs @@ -511,7 +511,9 @@ fn cpuid_from_vmm_cpu_platform( db::model::VmmCpuPlatform::SledDefault => return None, db::model::VmmCpuPlatform::AmdMilan | db::model::VmmCpuPlatform::AmdTurin => Cpuid { - entries: cpu_platform::dump_to_cpuid_entries(cpu_platform::milan_rfd314()), + entries: cpu_platform::dump_to_cpuid_entries( + cpu_platform::milan_rfd314(), + ), vendor: CpuidVendor::Amd, }, }; From d9acd90424d0d36285c23de77d0a05958345a2ac Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 3 Sep 2025 04:25:46 +0000 Subject: [PATCH 35/42] describe the CPU profile tests a bit better, formatting --- .../src/app/instance_platform/cpu_platform.rs | 851 ++++++++++-------- 1 file changed, 478 insertions(+), 373 deletions(-) diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index 94e216c83dd..04ab1da965a 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -756,34 +756,45 @@ pub fn dump_to_cpuid_entries(dump: CpuIdDump) -> Vec { entries } -macro_rules! cpuid_leaf { - ($leaf:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { - CpuidEntry { - leaf: $leaf, - subleaf: None, - eax: $eax, - ebx: $ebx, - ecx: $ecx, - edx: $edx, - } - }; -} +/// Tests for CPU platforms are morally similar to snapshot tests, in that the +/// platform should not (without very good reason) change after its initial +/// committed definition. Or at least after its definition becomes available in +/// the public API of a released Nexus. +/// +/// The RFD 314 definition of Milan includes description and rationalization of +/// most bits communicated by CPUID as of its writing, and is somewhat more +/// verbose than we expect future CPU profile definitions to be. It may or may +/// not prove useful to include snapshots of future CPU platforms here and +/// validate that the bits they were initially defined as do not change even as +/// the functions to compute them in `raw-cpuid` might. +#[cfg(test)] +mod test { + macro_rules! cpuid_leaf { + ($leaf:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { + CpuidEntry { + leaf: $leaf, + subleaf: None, + eax: $eax, + ebx: $ebx, + ecx: $ecx, + edx: $edx, + } + }; + } -macro_rules! cpuid_subleaf { - ($leaf:literal, $sl:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { - CpuidEntry { - leaf: $leaf, - subleaf: Some($sl), - eax: $eax, - ebx: $ebx, - ecx: $ecx, - edx: $edx, - } - }; -} + macro_rules! cpuid_subleaf { + ($leaf:literal, $sl:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { + CpuidEntry { + leaf: $leaf, + subleaf: Some($sl), + eax: $eax, + ebx: $ebx, + ecx: $ecx, + edx: $edx, + } + }; + } -#[test] -fn milan_rfd314_is_as_described() { // This CPUID leaf blob is a collection of the leaves described in RFD 314. // RFD 314 is the source of truth for what bits are set here and why. // `milan_rfd314()` constructs what ought to be an *identical* set of bits, @@ -824,370 +835,464 @@ fn milan_rfd314_is_as_described() { cpuid_subleaf!( 0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000 ), - cpuid_leaf!(0x80000000, 0x80000021, 0x68747541, 0x444D4163, 0x69746E65), - cpuid_leaf!(0x80000001, 0x00A00F11, 0x40000000, 0x444001F9, 0x27D3FBFF), - cpuid_leaf!(0x80000002, 0x20444D41, 0x43595045, 0x31373720, 0x36205033), - cpuid_leaf!(0x80000003, 0x6F432D34, 0x50206572, 0x65636F72, 0x726F7373), - cpuid_leaf!(0x80000004, 0x20202020, 0x20202020, 0x20202020, 0x00202020), - cpuid_leaf!(0x80000005, 0xFF40FF40, 0xFF40FF40, 0x20080140, 0x20080140), - cpuid_leaf!(0x80000006, 0x48002200, 0x68004200, 0x02006140, 0x08009140), - cpuid_leaf!(0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100), - cpuid_leaf!(0x80000008, 0x00003030, 0x00000205, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000000A, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x80000019, 0xF040F040, 0xF0400000, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001B, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_subleaf!( - 0x8000001D, 0x00, 0x00000121, 0x0000003F, 0x00000000, 0x00000000 + cpuid_leaf!( + 0x80000000, 0x80000021, 0x68747541, 0x444D4163, 0x69746E65 ), - cpuid_subleaf!( - 0x8000001D, 0x01, 0x00000143, 0x0000003F, 0x00000000, 0x00000000 + cpuid_leaf!( + 0x80000001, 0x00A00F11, 0x40000000, 0x444001F9, 0x27D3FBFF ), - cpuid_subleaf!( - 0x8000001D, 0x02, 0x00000163, 0x0000003F, 0x00000000, 0x00000000 + cpuid_leaf!( + 0x80000002, 0x20444D41, 0x43595045, 0x31373720, 0x36205033 ), - cpuid_leaf!(0x8000001E, 0x00000000, 0x00000100, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001F, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x80000021, 0x00000045, 0x00000000, 0x00000000, 0x00000000), - ]; - - let computed = dump_to_cpuid_entries(milan_rfd314()); - - // `milan_rfd314` sorts by leaf/subleaf, so everything *should* be in the - // same order.. just a question if it's all the same: - for (l, r) in MILAN_CPUID.iter().zip(computed.as_slice().iter()) { - eprintln!("comparing {:#08x}.{:?}", l.leaf, l.subleaf); - assert_eq!( - l, r, - "leaf 0x{:08x} (subleaf? {:?}) did not match", - l.leaf, l.subleaf - ); - } -} - -#[test] -fn milan_current_vs_rfd314_is_understood() { - // This CPUID leaf blob is what a guest booted on a Gimlet as of around - // August 2025 would have gotten as its passed-through CPUID leaves. - // - // This is present only to validate initial CPU platforms work and in - // particular that the initial specified-up-front CPU platform does not - // differ in unexpected ways from what guests had been getting to that - // point. - const MILAN_BEFORE_RFD314: [CpuidEntry; 30] = [ - cpuid_leaf!(0x0, 0x00000010, 0x68747541, 0x444D4163, 0x69746E65), - cpuid_leaf!(0x1, 0x00A00F11, 0x01020800, 0xFEDA3203, 0x178BFBFF), - cpuid_leaf!(0x5, 0x00000040, 0x00000040, 0x00000003, 0x00000011), - cpuid_leaf!(0x6, 0x00000004, 0x00000000, 0x00000000, 0x00000000), - cpuid_subleaf!( - 0x7, 0x0, 0x00000000, 0x201003A9, 0x00000600, 0x00000000 + cpuid_leaf!( + 0x80000003, 0x6F432D34, 0x50206572, 0x65636F72, 0x726F7373 ), - cpuid_subleaf!( - 0x7, 0x1, 0x00000000, 0x00000000, 0x00000000, 0x00000000 + cpuid_leaf!( + 0x80000004, 0x20202020, 0x20202020, 0x20202020, 0x00202020 ), - // leaf B is missing, and leaf D is the synthetic topology Bhyve invents. - cpuid_subleaf!( - 0xD, 0x0, 0x00000007, 0x00000340, 0x00000340, 0x00000000 + cpuid_leaf!( + 0x80000005, 0xFF40FF40, 0xFF40FF40, 0x20080140, 0x20080140 ), - cpuid_subleaf!( - 0xD, 0x1, 0x00000001, 0x00000340, 0x00000000, 0x00000000 + cpuid_leaf!( + 0x80000006, 0x48002200, 0x68004200, 0x02006140, 0x08009140 ), - cpuid_subleaf!( - 0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000 + cpuid_leaf!( + 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100 + ), + cpuid_leaf!( + 0x80000008, 0x00003030, 0x00000205, 0x00000000, 0x00000000 + ), + cpuid_leaf!( + 0x8000000A, 0x00000000, 0x00000000, 0x00000000, 0x00000000 + ), + cpuid_leaf!( + 0x80000019, 0xF040F040, 0xF0400000, 0x00000000, 0x00000000 + ), + cpuid_leaf!( + 0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000 + ), + cpuid_leaf!( + 0x8000001B, 0x00000000, 0x00000000, 0x00000000, 0x00000000 + ), + cpuid_leaf!( + 0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000 ), - // Include the all-zero leaf 10h explicitly so that the maximum standard - // leaf matches below. - cpuid_leaf!(0x10, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x80000000, 0x80000023, 0x68747541, 0x444D4163, 0x69746E65), - cpuid_leaf!(0x80000001, 0x00A00F11, 0x40000000, 0x444031FB, 0x25D3FBFF), - cpuid_leaf!(0x80000002, 0x20444D41, 0x43595045, 0x31373720, 0x36205033), - cpuid_leaf!(0x80000003, 0x6F432D34, 0x50206572, 0x65636F72, 0x726F7373), - cpuid_leaf!(0x80000004, 0x20202020, 0x20202020, 0x20202020, 0x00202020), - cpuid_leaf!(0x80000005, 0xFF40FF40, 0xFF40FF40, 0x20080140, 0x20080140), - cpuid_leaf!(0x80000006, 0x48002200, 0x68004200, 0x02006140, 0x08009140), - cpuid_leaf!(0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100), - cpuid_leaf!(0x80000008, 0x00003030, 0x00000007, 0x00000000, 0x00010007), - cpuid_leaf!(0x8000000A, 0x00000001, 0x00008000, 0x00000000, 0x119BBCFF), - cpuid_leaf!(0x80000019, 0xF040F040, 0xF0400000, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001B, 0x000003FF, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), cpuid_subleaf!( - 0x8000001D, 0x00, 0x00004121, 0x0000003F, 0x00000000, 0x00000000 + 0x8000001D, 0x00, 0x00000121, 0x0000003F, 0x00000000, + 0x00000000 ), cpuid_subleaf!( - 0x8000001D, 0x01, 0x00004143, 0x0000003F, 0x00000000, 0x00000000 + 0x8000001D, 0x01, 0x00000143, 0x0000003F, 0x00000000, + 0x00000000 ), cpuid_subleaf!( - 0x8000001D, 0x02, 0x00004163, 0x0000003F, 0x00000000, 0x00000000 + 0x8000001D, 0x02, 0x00000163, 0x0000003F, 0x00000000, + 0x00000000 + ), + cpuid_leaf!( + 0x8000001E, 0x00000000, 0x00000100, 0x00000000, 0x00000000 + ), + cpuid_leaf!( + 0x8000001F, 0x00000000, 0x00000000, 0x00000000, 0x00000000 + ), + cpuid_leaf!( + 0x80000021, 0x00000045, 0x00000000, 0x00000000, 0x00000000 ), - cpuid_leaf!(0x8000001E, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!(0x8000001F, 0x0101FD3F, 0x00004173, 0x000001FD, 0x00000001), - cpuid_leaf!(0x80000021, 0x0000204D, 0x00000000, 0x00000000, 0x00000000), ]; - let mut cpuid = CpuId::with_cpuid_reader(milan_rfd314()); - - let mut feature_info = cpuid.get_feature_info().expect("can get leaf 1h"); - // The representative CPUID dump happened to come from processor 1 on a - // two-processor VM. - feature_info.set_max_logical_processor_ids(2); - feature_info.set_initial_local_apic_id(1); - // TODO: Guests were told PCID was supported, but 314 says that it is not - // supported..? - feature_info.set_pcid(true); - // The snapshot comes from a VM that enabled XSAVE. - feature_info.set_oxsave(true); - // The snapshot comes from a VM where HTT was dynamically managed to "true". - feature_info.set_htt(true); - cpuid.set_feature_info(Some(feature_info)).expect("can set leaf 1h"); - - let mut monitor_mwait = - cpuid.get_monitor_mwait_info().expect("can get leaf 5h"); - // The monitor/mwait leaf was passed through non-zeroed even though - // monitor/mwait support is hidden. - monitor_mwait.set_smallest_monitor_line(0x40); - monitor_mwait.set_largest_monitor_line(0x40); - monitor_mwait.set_extensions_supported(1); - monitor_mwait.set_interrupts_as_break_event(1); - // These are "reserved" according to the AMD APM, but in practice look quite - // similar to their Intel meaning... - monitor_mwait.set_supported_c0_states(1); - monitor_mwait.set_supported_c1_states(1); - cpuid.set_monitor_mwait_info(Some(monitor_mwait)).expect("can set leaf 5h"); - - let mut ext_features = - cpuid.get_extended_feature_info().expect("can get leaf 7h"); - // Byhve didn't/doesn't pass ADX through from the host - ext_features.set_adx(false); - // ... or CLFLUSHOPT? - ext_features.set_clflushopt(false); - // ... or CLWB? - ext_features.set_clwb(false); - - // or FSRM - ext_features.set_fsrm(false); - - cpuid - .set_extended_feature_info(Some(ext_features)) - .expect("can set leaf 7h"); - - let mut ext_processor_features = cpuid - .get_extended_processor_and_feature_identifiers() - .expect("can get leaf 8000_0001h"); - // This is dynamically managed, true in the sampled VM. - ext_processor_features.set_cmp_legacy(true); - // Neither of these features are actually available to guests, but byhve had - // been passing the CPUID bits through - ext_processor_features.set_skinit(true); - ext_processor_features.set_wdt(true); - // TODO: Fast FXSAVE was not passed through? - ext_processor_features.set_fast_fxsave_fxstor(false); - cpuid - .set_extended_processor_and_feature_identifiers(Some( - ext_processor_features, - )) - .expect("can set leaf 8000_0001h"); - - let mut leaf = cpuid - .get_processor_capacity_feature_info() - .expect("can get leaf 8000_0008h"); - - // Support for the instructions retired MSR was passed through by bhyve even - // though the MSR itself is not available to guests. - leaf.set_inst_ret_cntr_msr(true); - - // TODO: Support for `wbnoinvd` was hidden from guests by byhve? - leaf.set_wbnoinvd(false); - - // INVLPGB and RDPRU max were passed through even those instructions are not - // supported. - leaf.set_invlpgb_max_pages(7); - leaf.set_max_rdpru_id(1); - - cpuid - .set_processor_capacity_feature_info(Some(leaf)) - .expect("can set leaf 8000_0008h"); - - // Set up L1 cache+TLB info (leaf 8000_0005h) - let mut leaf = L1CacheTlbInfo::empty(); - - leaf.set_itlb_2m_4m_size(0x40); - leaf.set_itlb_2m_4m_associativity(0xff); - leaf.set_dtlb_2m_4m_size(0x40); - leaf.set_dtlb_2m_4m_associativity(0xff); - - leaf.set_itlb_4k_size(0x40); - leaf.set_itlb_4k_associativity(0xff); - leaf.set_dtlb_4k_size(0x40); - leaf.set_dtlb_4k_associativity(0xff); - - leaf.set_dcache_line_size(0x40); - leaf.set_dcache_lines_per_tag(0x01); - leaf.set_dcache_associativity(0x08); - leaf.set_dcache_size(0x20); - - leaf.set_icache_line_size(0x40); - leaf.set_icache_lines_per_tag(0x01); - leaf.set_icache_associativity(0x08); - leaf.set_icache_size(0x20); - - cpuid - .set_l1_cache_and_tlb_info(Some(leaf)) - .expect("can set leaf 8000_0005h"); - - // Set up L2 and L3 cache+TLB info (leaf 8000_0006h) - let mut leaf = L2And3CacheTlbInfo::empty(); - - // Set up leaf 8000_0006h EAX - leaf.set_itlb_2m_4m_size(0x200); - leaf.set_itlb_2m_4m_associativity(0x2); - leaf.set_dtlb_2m_4m_size(0x800); - leaf.set_dtlb_2m_4m_associativity(0x4); - - // Set up leaf 8000_0006h EBX - leaf.set_itlb_4k_size(0x200); - leaf.set_itlb_4k_associativity(0x4); - leaf.set_dtlb_4k_size(0x800); - leaf.set_dtlb_4k_associativity(0x6); - - // Set up leaf 8000_0006h ECX - leaf.set_l2cache_line_size(0x40); - leaf.set_l2cache_lines_per_tag(0x1); - leaf.set_l2cache_associativity(0x6); - leaf.set_l2cache_size(0x0200); - - // Set up leaf 8000_0006h EDX - leaf.set_l3cache_line_size(0x40); - leaf.set_l3cache_lines_per_tag(0x1); - leaf.set_l3cache_associativity(0x9); - leaf.set_l3cache_size(0x0200); - - cpuid - .set_l2_l3_cache_and_tlb_info(Some(leaf)) - .expect("can set leaf 8000_0006h"); - - // Set up TLB information for 1GiB pages (leaf 8000_0019h) - let mut leaf = Tlb1gbPageInfo::empty(); - leaf.set_dtlb_l1_1gb_associativity(0xF); - leaf.set_dtlb_l1_1gb_size(0x40); - leaf.set_itlb_l1_1gb_associativity(0xF); - leaf.set_itlb_l1_1gb_size(0x40); - leaf.set_dtlb_l2_1gb_associativity(0xF); - leaf.set_dtlb_l2_1gb_size(0x40); - leaf.set_itlb_l2_1gb_associativity(0); - leaf.set_itlb_l2_1gb_size(0); - cpuid.set_tlb_1gb_page_info(Some(leaf)).expect("can set leaf 8000_0019h"); - - let mut processor_topo = - cpuid.get_processor_topology_info().expect("can get leaf 8000_001Eh"); - // By virtue of having a single vCPU, the representative VM has one thread - // per core rather than two. - processor_topo.set_threads_per_core(1); - cpuid - .set_processor_topology_info(Some(processor_topo)) - .expect("can set leaf 8000_001Eh"); - - let mut ext_features_2 = cpuid - .get_extended_feature_identification_2() - .expect("can get leaf 8000_0021h"); - // Bhyve passed through the feature bit for this MSR, though the MSR itself - // is not allowed. - ext_features_2.set_prefetch_ctl_msr(true); - // Bhyve passed through the feature bit for SMM page config lock, though - // guests cannot actually control it. - ext_features_2.set_smm_pg_cfg_lock(true); - cpuid - .set_extended_feature_identification_2(Some(ext_features_2)) - .expect("can set leaf 8000_0021h"); - - // Now touch up the RFD314 Milan definition in the specific ways we know it - // differs from what guests got at the time. - - // Some non-feature tweaks: - let mut dump = cpuid.into_source(); - - // Leaf B is not passed through from the host on AMD systems: - // https://www.illumos.org/issues/17529 - dump.set_leaf(0xB, None); - - // Leaf D (extended state information) doesn't have a nice read/write API in - // `rust-cpuid`, so adjust expectations more manually.. - // - // Guests had `xsavec` and `xgetbv w/ ecx=1` hidden before. - let mut ext_state = dump.cpuid2(0xD, 1); - ext_state.eax &= !0x0000_0006; - dump.set_subleaf(0xD, 1, Some(ext_state)); - - // SVM features were not zeroed, but the SVM bit itself was not passed - // through. - let mut svm = CpuIdResult::empty(); - svm.eax = 0x0000_0001; - svm.ebx = 0x0000_8000; - svm.ecx = 0x0000_0000; - svm.edx = 0x119B_BCFF; - dump.set_leaf(0x8000_000A, Some(svm)); - - // IBS capabilities were not zeroed, but the IBS MSRs are not - // guest-accessible. - let mut ibs = CpuIdResult::empty(); - ibs.eax = 0x0000_03FF; - ibs.ebx = 0x0000_0000; - ibs.ecx = 0x0000_0000; - ibs.edx = 0x0000_0000; - dump.set_leaf(0x8000_001B, Some(ibs)); - - // The "cores sharing cache" bits under leaf 8000_001D are somewhat dynamic. - // For L1 and L2 caches, these are the number of threads per core, and for - // L3 this is threads in the virtual processor. The representative VM had - // two cores, which is presented as an SMT pair, so all levels read as 2. - // - // This is stored as one minus the actual value at each level, so one core - // is a bit pattern of all zeroes. The "cores sharing cache" field starts at - // bit 14. So we want to store the bit pattern `0...1` at that offset. There - // isn't a nice way to patch this into an existing cache topology in - // `raw_cpuid`, so we have to get a bit gross with it.. - for level in 0..3 { - let mut leaf = dump.cpuid2(0x8000_001D, level); - // Mask out all the bits for "cores sharing cache" - leaf.eax &= !0x03ffc000; - leaf.eax |= 1 << 14; - dump.set_subleaf(0x8000_001D, level, Some(leaf)); + // Test that the initial RFD 314 definition matches what we compute as the + // CPUID profile with that configuration in `milan_rfd314()`. + #[test] + fn milan_rfd314_is_as_described() { + let computed = dump_to_cpuid_entries(milan_rfd314()); + + // `milan_rfd314` sorts by leaf/subleaf, so everything *should* be in + // the same order.. just a question if it's all the same: + for (l, r) in MILAN_CPUID.iter().zip(computed.as_slice().iter()) { + eprintln!("comparing {:#08x}.{:?}", l.leaf, l.subleaf); + assert_eq!( + l, r, + "leaf 0x{:08x} (subleaf? {:?}) did not match", + l.leaf, l.subleaf + ); + } } - // Memory encryption features were not zeroed, but the feature itself is not - // supported. - let mut sme = CpuIdResult::empty(); - sme.eax = 0x0101_FD3F; - sme.ebx = 0x0000_4173; - sme.ecx = 0x0000_01FD; - sme.edx = 0x0000_0001; - dump.set_leaf(0x8000_001F, Some(sme)); - - // Milan has standard leaves up to 0x10, but Bhyve zeroes out the last few. - // Nothing reduces the max standard leaf, so guests saw a different value - // than the `0x0000000D` that RFD 314 describes. To get here with - // `raw_cpuid`, add a zeroed out leaf "0x10" to drag the max standard leaf - // that high. - dump.set_leaf(0x10, Some(CpuIdResult::empty())); - - // Similar to above, extended leaves go to 0x8000_0021, but hardware goes up - // to 0x8000_0023 and when zeroing the last few leaves the max valid leaf - // did not get moved back down. Add a zeroed out leaf "0x8000_0023" to drag - // the max extended leaf as high as before. - dump.set_leaf(0x8000_0023, Some(CpuIdResult::empty())); - - let computed = dump_to_cpuid_entries(dump); - - // `milan_rfd314` sorts by leaf/subleaf, so everything *should* be in the - // same order.. just a question if it's all the same: - for (l, r) in MILAN_BEFORE_RFD314.iter().zip(computed.as_slice().iter()) { - eprintln!("comparing {:#08x}.{:?}", l.leaf, l.subleaf); - assert_eq!( - l, r, - "leaf 0x{:08x} (subleaf? {:?}) did not match", - l.leaf, l.subleaf - ); + // This CPUID leaf blob is what a guest booted on a Gimlet as of around + // August 2025 would have gotten as its passed-through CPUID leaves. + // + // This is present only to validate initial CPU platforms work and in + // particular that the initial specified-up-front CPU platform does not + // differ in unexpected ways from what guests had been getting to that + // point. + const MILAN_BEFORE_RFD314: [CpuidEntry; 30] = [ + cpuid_leaf!(0x0, 0x00000010, 0x68747541, 0x444D4163, 0x69746E65), + cpuid_leaf!(0x1, 0x00A00F11, 0x01020800, 0xFEDA3203, 0x178BFBFF), + cpuid_leaf!(0x5, 0x00000040, 0x00000040, 0x00000003, 0x00000011), + cpuid_leaf!(0x6, 0x00000004, 0x00000000, 0x00000000, 0x00000000), + cpuid_subleaf!( + 0x7, 0x0, 0x00000000, 0x201003A9, 0x00000600, 0x00000000 + ), + cpuid_subleaf!( + 0x7, 0x1, 0x00000000, 0x00000000, 0x00000000, 0x00000000 + ), + // leaf B is missing, and leaf D is the synthetic topology Bhyve invents. + cpuid_subleaf!( + 0xD, 0x0, 0x00000007, 0x00000340, 0x00000340, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x1, 0x00000001, 0x00000340, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000 + ), + // Include the all-zero leaf 10h explicitly so that the maximum standard + // leaf matches below. + cpuid_leaf!(0x10, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!( + 0x80000000, 0x80000023, 0x68747541, 0x444D4163, 0x69746E65 + ), + cpuid_leaf!( + 0x80000001, 0x00A00F11, 0x40000000, 0x444031FB, 0x25D3FBFF + ), + cpuid_leaf!( + 0x80000002, 0x20444D41, 0x43595045, 0x31373720, 0x36205033 + ), + cpuid_leaf!( + 0x80000003, 0x6F432D34, 0x50206572, 0x65636F72, 0x726F7373 + ), + cpuid_leaf!( + 0x80000004, 0x20202020, 0x20202020, 0x20202020, 0x00202020 + ), + cpuid_leaf!( + 0x80000005, 0xFF40FF40, 0xFF40FF40, 0x20080140, 0x20080140 + ), + cpuid_leaf!( + 0x80000006, 0x48002200, 0x68004200, 0x02006140, 0x08009140 + ), + cpuid_leaf!( + 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100 + ), + cpuid_leaf!( + 0x80000008, 0x00003030, 0x00000007, 0x00000000, 0x00010007 + ), + cpuid_leaf!( + 0x8000000A, 0x00000001, 0x00008000, 0x00000000, 0x119BBCFF + ), + cpuid_leaf!( + 0x80000019, 0xF040F040, 0xF0400000, 0x00000000, 0x00000000 + ), + cpuid_leaf!( + 0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000 + ), + cpuid_leaf!( + 0x8000001B, 0x000003FF, 0x00000000, 0x00000000, 0x00000000 + ), + cpuid_leaf!( + 0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0x8000001D, 0x00, 0x00004121, 0x0000003F, 0x00000000, + 0x00000000 + ), + cpuid_subleaf!( + 0x8000001D, 0x01, 0x00004143, 0x0000003F, 0x00000000, + 0x00000000 + ), + cpuid_subleaf!( + 0x8000001D, 0x02, 0x00004163, 0x0000003F, 0x00000000, + 0x00000000 + ), + cpuid_leaf!( + 0x8000001E, 0x00000000, 0x00000000, 0x00000000, 0x00000000 + ), + cpuid_leaf!( + 0x8000001F, 0x0101FD3F, 0x00004173, 0x000001FD, 0x00000001 + ), + cpuid_leaf!( + 0x80000021, 0x0000204D, 0x00000000, 0x00000000, 0x00000000 + ), + ]; + + // The difference between guests on Milan with Byhve default CPUID masking + // and the profile defined by RFD 314 is *relatively* small, and mostly in + // the direction of "Bhyve permitted things we wouldn't necessarily have + // wanted to". This test enumerates the differences by adjusting the RFD 314 + // definition to match a snapshot of a pre-Milan-v1 guest, and why. + #[test] + fn milan_current_vs_rfd314_is_understood() { + let mut cpuid = CpuId::with_cpuid_reader(milan_rfd314()); + + let mut feature_info = + cpuid.get_feature_info().expect("can get leaf 1h"); + // The representative CPUID dump happened to come from processor 1 on a + // two-processor VM. + feature_info.set_max_logical_processor_ids(2); + feature_info.set_initial_local_apic_id(1); + // TODO: Guests were told PCID was supported, but 314 says that it is + // not supported..? + feature_info.set_pcid(true); + // The snapshot comes from a VM that enabled XSAVE. + feature_info.set_oxsave(true); + // The snapshot comes from a VM where HTT was dynamically managed to + // "true". + feature_info.set_htt(true); + cpuid.set_feature_info(Some(feature_info)).expect("can set leaf 1h"); + + let mut monitor_mwait = + cpuid.get_monitor_mwait_info().expect("can get leaf 5h"); + // The monitor/mwait leaf was passed through non-zeroed even though + // monitor/mwait support is hidden. + monitor_mwait.set_smallest_monitor_line(0x40); + monitor_mwait.set_largest_monitor_line(0x40); + monitor_mwait.set_extensions_supported(1); + monitor_mwait.set_interrupts_as_break_event(1); + // These are "reserved" according to the AMD APM, but in practice look + // quite similar to their Intel meaning... + monitor_mwait.set_supported_c0_states(1); + monitor_mwait.set_supported_c1_states(1); + cpuid + .set_monitor_mwait_info(Some(monitor_mwait)) + .expect("can set leaf 5h"); + + let mut ext_features = + cpuid.get_extended_feature_info().expect("can get leaf 7h"); + // Byhve didn't/doesn't pass ADX through from the host + ext_features.set_adx(false); + // ... or CLFLUSHOPT + ext_features.set_clflushopt(false); + // ... or CLWB. + ext_features.set_clwb(false); + + // Likewise with FSRM. + ext_features.set_fsrm(false); + + cpuid + .set_extended_feature_info(Some(ext_features)) + .expect("can set leaf 7h"); + + let mut ext_processor_features = cpuid + .get_extended_processor_and_feature_identifiers() + .expect("can get leaf 8000_0001h"); + // This is dynamically managed, true in the sampled VM. + ext_processor_features.set_cmp_legacy(true); + // Neither of these features are actually available to guests, but byhve + // had been passing the CPUID bits through + ext_processor_features.set_skinit(true); + ext_processor_features.set_wdt(true); + // TODO: Fast FXSAVE was not passed through? + ext_processor_features.set_fast_fxsave_fxstor(false); + cpuid + .set_extended_processor_and_feature_identifiers(Some( + ext_processor_features, + )) + .expect("can set leaf 8000_0001h"); + + let mut leaf = cpuid + .get_processor_capacity_feature_info() + .expect("can get leaf 8000_0008h"); + + // Support for the instructions retired MSR was passed through by bhyve + // even though the MSR itself is not available to guests. + leaf.set_inst_ret_cntr_msr(true); + + // TODO: Support for `wbnoinvd` was hidden from guests by byhve? + leaf.set_wbnoinvd(false); + + // INVLPGB and RDPRU max were passed through even those instructions are not + // supported. + leaf.set_invlpgb_max_pages(7); + leaf.set_max_rdpru_id(1); + + cpuid + .set_processor_capacity_feature_info(Some(leaf)) + .expect("can set leaf 8000_0008h"); + + // Set up L1 cache+TLB info (leaf 8000_0005h) + let mut leaf = L1CacheTlbInfo::empty(); + + leaf.set_itlb_2m_4m_size(0x40); + leaf.set_itlb_2m_4m_associativity(0xff); + leaf.set_dtlb_2m_4m_size(0x40); + leaf.set_dtlb_2m_4m_associativity(0xff); + + leaf.set_itlb_4k_size(0x40); + leaf.set_itlb_4k_associativity(0xff); + leaf.set_dtlb_4k_size(0x40); + leaf.set_dtlb_4k_associativity(0xff); + + leaf.set_dcache_line_size(0x40); + leaf.set_dcache_lines_per_tag(0x01); + leaf.set_dcache_associativity(0x08); + leaf.set_dcache_size(0x20); + + leaf.set_icache_line_size(0x40); + leaf.set_icache_lines_per_tag(0x01); + leaf.set_icache_associativity(0x08); + leaf.set_icache_size(0x20); + + cpuid + .set_l1_cache_and_tlb_info(Some(leaf)) + .expect("can set leaf 8000_0005h"); + + // Set up L2 and L3 cache+TLB info (leaf 8000_0006h) + let mut leaf = L2And3CacheTlbInfo::empty(); + + // Set up leaf 8000_0006h EAX + leaf.set_itlb_2m_4m_size(0x200); + leaf.set_itlb_2m_4m_associativity(0x2); + leaf.set_dtlb_2m_4m_size(0x800); + leaf.set_dtlb_2m_4m_associativity(0x4); + + // Set up leaf 8000_0006h EBX + leaf.set_itlb_4k_size(0x200); + leaf.set_itlb_4k_associativity(0x4); + leaf.set_dtlb_4k_size(0x800); + leaf.set_dtlb_4k_associativity(0x6); + + // Set up leaf 8000_0006h ECX + leaf.set_l2cache_line_size(0x40); + leaf.set_l2cache_lines_per_tag(0x1); + leaf.set_l2cache_associativity(0x6); + leaf.set_l2cache_size(0x0200); + + // Set up leaf 8000_0006h EDX + leaf.set_l3cache_line_size(0x40); + leaf.set_l3cache_lines_per_tag(0x1); + leaf.set_l3cache_associativity(0x9); + leaf.set_l3cache_size(0x0200); + + cpuid + .set_l2_l3_cache_and_tlb_info(Some(leaf)) + .expect("can set leaf 8000_0006h"); + + // Set up TLB information for 1GiB pages (leaf 8000_0019h) + let mut leaf = Tlb1gbPageInfo::empty(); + leaf.set_dtlb_l1_1gb_associativity(0xF); + leaf.set_dtlb_l1_1gb_size(0x40); + leaf.set_itlb_l1_1gb_associativity(0xF); + leaf.set_itlb_l1_1gb_size(0x40); + leaf.set_dtlb_l2_1gb_associativity(0xF); + leaf.set_dtlb_l2_1gb_size(0x40); + leaf.set_itlb_l2_1gb_associativity(0); + leaf.set_itlb_l2_1gb_size(0); + cpuid + .set_tlb_1gb_page_info(Some(leaf)) + .expect("can set leaf 8000_0019h"); + + let mut processor_topo = cpuid + .get_processor_topology_info() + .expect("can get leaf 8000_001Eh"); + // The synthetic topology describes caches that make for a system + // topology like "two sockets with single-core processors each" - caches + // are allegedly not shared across cores (even L3!). + processor_topo.set_threads_per_core(1); + cpuid + .set_processor_topology_info(Some(processor_topo)) + .expect("can set leaf 8000_001Eh"); + + let mut ext_features_2 = cpuid + .get_extended_feature_identification_2() + .expect("can get leaf 8000_0021h"); + // Bhyve passed through the feature bit for this MSR, though the MSR itself + // is not allowed. + ext_features_2.set_prefetch_ctl_msr(true); + // Bhyve passed through the feature bit for SMM page config lock, though + // guests cannot actually control it. + ext_features_2.set_smm_pg_cfg_lock(true); + cpuid + .set_extended_feature_identification_2(Some(ext_features_2)) + .expect("can set leaf 8000_0021h"); + + // Some leaves are unwieldy or impossible to tweak in place with nice + // `raw-cpuid` helpers, so we'll just fix up CPUID leaves directly in + // those cases. + + // Some non-feature tweaks: + let mut dump = cpuid.into_source(); + + // Leaf B is not passed through from the host on AMD systems: + // https://www.illumos.org/issues/17529 + dump.set_leaf(0xB, None); + + // Leaf D (extended state information) doesn't have a nice read/write API in + // `rust-cpuid`, so adjust expectations more manually.. + // + // Guests had `xsavec` and `xgetbv w/ ecx=1` hidden before. + let mut ext_state = dump.cpuid2(0xD, 1); + ext_state.eax &= !0x0000_0006; + dump.set_subleaf(0xD, 1, Some(ext_state)); + + // SVM features were not zeroed, but the SVM bit itself was not passed + // through. + let mut svm = CpuIdResult::empty(); + svm.eax = 0x0000_0001; + svm.ebx = 0x0000_8000; + svm.ecx = 0x0000_0000; + svm.edx = 0x119B_BCFF; + dump.set_leaf(0x8000_000A, Some(svm)); + + // IBS capabilities were not zeroed, but the IBS MSRs are not + // guest-accessible. + let mut ibs = CpuIdResult::empty(); + ibs.eax = 0x0000_03FF; + ibs.ebx = 0x0000_0000; + ibs.ecx = 0x0000_0000; + ibs.edx = 0x0000_0000; + dump.set_leaf(0x8000_001B, Some(ibs)); + + // The "cores sharing cache" bits under leaf 8000_001D are somewhat dynamic. + // For L1 and L2 caches, these are the number of threads per core, and for + // L3 this is threads in the virtual processor. The representative VM had + // two cores, which is presented as an SMT pair, so all levels read as 2. + // + // This is stored as one minus the actual value at each level, so one core + // is a bit pattern of all zeroes. The "cores sharing cache" field starts at + // bit 14. So we want to store the bit pattern `0...1` at that offset. There + // isn't a nice way to patch this into an existing cache topology in + // `raw_cpuid`, so we have to get a bit gross with it.. + for level in 0..3 { + let mut leaf = dump.cpuid2(0x8000_001D, level); + // Mask out all the bits for "cores sharing cache" + leaf.eax &= !0x03ffc000; + leaf.eax |= 1 << 14; + dump.set_subleaf(0x8000_001D, level, Some(leaf)); + } + + // Memory encryption features were not zeroed, but the feature itself is not + // supported. + let mut sme = CpuIdResult::empty(); + sme.eax = 0x0101_FD3F; + sme.ebx = 0x0000_4173; + sme.ecx = 0x0000_01FD; + sme.edx = 0x0000_0001; + dump.set_leaf(0x8000_001F, Some(sme)); + + // Milan has standard leaves up to 0x10, but Bhyve zeroes out the last few. + // Nothing reduces the max standard leaf, so guests saw a different value + // than the `0x0000000D` that RFD 314 describes. To get here with + // `raw_cpuid`, add a zeroed out leaf "0x10" to drag the max standard leaf + // that high. + dump.set_leaf(0x10, Some(CpuIdResult::empty())); + + // Similar to above, extended leaves go to 0x8000_0021, but hardware goes up + // to 0x8000_0023 and when zeroing the last few leaves the max valid leaf + // did not get moved back down. Add a zeroed out leaf "0x8000_0023" to drag + // the max extended leaf as high as before. + dump.set_leaf(0x8000_0023, Some(CpuIdResult::empty())); + + let computed = dump_to_cpuid_entries(dump); + + // `milan_rfd314` sorts by leaf/subleaf, so everything *should* be in the + // same order.. just a question if it's all the same: + for (l, r) in MILAN_BEFORE_RFD314.iter().zip(computed.as_slice().iter()) + { + eprintln!("comparing {:#08x}.{:?}", l.leaf, l.subleaf); + assert_eq!( + l, r, + "leaf 0x{:08x} (subleaf? {:?}) did not match", + l.leaf, l.subleaf + ); + } } } From 63a5326d0d5e28f1b88f0548f8744ef1a086fea7 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 3 Sep 2025 04:25:55 +0000 Subject: [PATCH 36/42] more fmt --- .../src/app/instance_platform/cpu_platform.rs | 230 ++++++------------ 1 file changed, 78 insertions(+), 152 deletions(-) diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index 04ab1da965a..72adac37958 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -835,69 +835,32 @@ mod test { cpuid_subleaf!( 0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000 ), - cpuid_leaf!( - 0x80000000, 0x80000021, 0x68747541, 0x444D4163, 0x69746E65 - ), - cpuid_leaf!( - 0x80000001, 0x00A00F11, 0x40000000, 0x444001F9, 0x27D3FBFF - ), - cpuid_leaf!( - 0x80000002, 0x20444D41, 0x43595045, 0x31373720, 0x36205033 - ), - cpuid_leaf!( - 0x80000003, 0x6F432D34, 0x50206572, 0x65636F72, 0x726F7373 - ), - cpuid_leaf!( - 0x80000004, 0x20202020, 0x20202020, 0x20202020, 0x00202020 - ), - cpuid_leaf!( - 0x80000005, 0xFF40FF40, 0xFF40FF40, 0x20080140, 0x20080140 - ), - cpuid_leaf!( - 0x80000006, 0x48002200, 0x68004200, 0x02006140, 0x08009140 - ), - cpuid_leaf!( - 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100 - ), - cpuid_leaf!( - 0x80000008, 0x00003030, 0x00000205, 0x00000000, 0x00000000 - ), - cpuid_leaf!( - 0x8000000A, 0x00000000, 0x00000000, 0x00000000, 0x00000000 - ), - cpuid_leaf!( - 0x80000019, 0xF040F040, 0xF0400000, 0x00000000, 0x00000000 - ), - cpuid_leaf!( - 0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000 - ), - cpuid_leaf!( - 0x8000001B, 0x00000000, 0x00000000, 0x00000000, 0x00000000 - ), - cpuid_leaf!( - 0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000 - ), + cpuid_leaf!(0x80000000, 0x80000021, 0x68747541, 0x444D4163, 0x69746E65), + cpuid_leaf!(0x80000001, 0x00A00F11, 0x40000000, 0x444001F9, 0x27D3FBFF), + cpuid_leaf!(0x80000002, 0x20444D41, 0x43595045, 0x31373720, 0x36205033), + cpuid_leaf!(0x80000003, 0x6F432D34, 0x50206572, 0x65636F72, 0x726F7373), + cpuid_leaf!(0x80000004, 0x20202020, 0x20202020, 0x20202020, 0x00202020), + cpuid_leaf!(0x80000005, 0xFF40FF40, 0xFF40FF40, 0x20080140, 0x20080140), + cpuid_leaf!(0x80000006, 0x48002200, 0x68004200, 0x02006140, 0x08009140), + cpuid_leaf!(0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100), + cpuid_leaf!(0x80000008, 0x00003030, 0x00000205, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000000A, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000019, 0xF040F040, 0xF0400000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001B, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), cpuid_subleaf!( - 0x8000001D, 0x00, 0x00000121, 0x0000003F, 0x00000000, - 0x00000000 + 0x8000001D, 0x00, 0x00000121, 0x0000003F, 0x00000000, 0x00000000 ), cpuid_subleaf!( - 0x8000001D, 0x01, 0x00000143, 0x0000003F, 0x00000000, - 0x00000000 + 0x8000001D, 0x01, 0x00000143, 0x0000003F, 0x00000000, 0x00000000 ), cpuid_subleaf!( - 0x8000001D, 0x02, 0x00000163, 0x0000003F, 0x00000000, - 0x00000000 - ), - cpuid_leaf!( - 0x8000001E, 0x00000000, 0x00000100, 0x00000000, 0x00000000 - ), - cpuid_leaf!( - 0x8000001F, 0x00000000, 0x00000000, 0x00000000, 0x00000000 - ), - cpuid_leaf!( - 0x80000021, 0x00000045, 0x00000000, 0x00000000, 0x00000000 + 0x8000001D, 0x02, 0x00000163, 0x0000003F, 0x00000000, 0x00000000 ), + cpuid_leaf!(0x8000001E, 0x00000000, 0x00000100, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001F, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000021, 0x00000045, 0x00000000, 0x00000000, 0x00000000), ]; // Test that the initial RFD 314 definition matches what we compute as the @@ -918,101 +881,64 @@ mod test { } } - // This CPUID leaf blob is what a guest booted on a Gimlet as of around - // August 2025 would have gotten as its passed-through CPUID leaves. - // - // This is present only to validate initial CPU platforms work and in - // particular that the initial specified-up-front CPU platform does not - // differ in unexpected ways from what guests had been getting to that - // point. - const MILAN_BEFORE_RFD314: [CpuidEntry; 30] = [ - cpuid_leaf!(0x0, 0x00000010, 0x68747541, 0x444D4163, 0x69746E65), - cpuid_leaf!(0x1, 0x00A00F11, 0x01020800, 0xFEDA3203, 0x178BFBFF), - cpuid_leaf!(0x5, 0x00000040, 0x00000040, 0x00000003, 0x00000011), - cpuid_leaf!(0x6, 0x00000004, 0x00000000, 0x00000000, 0x00000000), - cpuid_subleaf!( - 0x7, 0x0, 0x00000000, 0x201003A9, 0x00000600, 0x00000000 - ), - cpuid_subleaf!( - 0x7, 0x1, 0x00000000, 0x00000000, 0x00000000, 0x00000000 - ), - // leaf B is missing, and leaf D is the synthetic topology Bhyve invents. - cpuid_subleaf!( - 0xD, 0x0, 0x00000007, 0x00000340, 0x00000340, 0x00000000 - ), - cpuid_subleaf!( - 0xD, 0x1, 0x00000001, 0x00000340, 0x00000000, 0x00000000 - ), - cpuid_subleaf!( - 0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000 - ), - // Include the all-zero leaf 10h explicitly so that the maximum standard - // leaf matches below. - cpuid_leaf!(0x10, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - cpuid_leaf!( - 0x80000000, 0x80000023, 0x68747541, 0x444D4163, 0x69746E65 - ), - cpuid_leaf!( - 0x80000001, 0x00A00F11, 0x40000000, 0x444031FB, 0x25D3FBFF - ), - cpuid_leaf!( - 0x80000002, 0x20444D41, 0x43595045, 0x31373720, 0x36205033 - ), - cpuid_leaf!( - 0x80000003, 0x6F432D34, 0x50206572, 0x65636F72, 0x726F7373 - ), - cpuid_leaf!( - 0x80000004, 0x20202020, 0x20202020, 0x20202020, 0x00202020 - ), - cpuid_leaf!( - 0x80000005, 0xFF40FF40, 0xFF40FF40, 0x20080140, 0x20080140 - ), - cpuid_leaf!( - 0x80000006, 0x48002200, 0x68004200, 0x02006140, 0x08009140 - ), - cpuid_leaf!( - 0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100 - ), - cpuid_leaf!( - 0x80000008, 0x00003030, 0x00000007, 0x00000000, 0x00010007 - ), - cpuid_leaf!( - 0x8000000A, 0x00000001, 0x00008000, 0x00000000, 0x119BBCFF - ), - cpuid_leaf!( - 0x80000019, 0xF040F040, 0xF0400000, 0x00000000, 0x00000000 - ), - cpuid_leaf!( - 0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000 - ), - cpuid_leaf!( - 0x8000001B, 0x000003FF, 0x00000000, 0x00000000, 0x00000000 - ), - cpuid_leaf!( - 0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000 - ), - cpuid_subleaf!( - 0x8000001D, 0x00, 0x00004121, 0x0000003F, 0x00000000, - 0x00000000 - ), - cpuid_subleaf!( - 0x8000001D, 0x01, 0x00004143, 0x0000003F, 0x00000000, - 0x00000000 - ), - cpuid_subleaf!( - 0x8000001D, 0x02, 0x00004163, 0x0000003F, 0x00000000, - 0x00000000 - ), - cpuid_leaf!( - 0x8000001E, 0x00000000, 0x00000000, 0x00000000, 0x00000000 - ), - cpuid_leaf!( - 0x8000001F, 0x0101FD3F, 0x00004173, 0x000001FD, 0x00000001 - ), - cpuid_leaf!( - 0x80000021, 0x0000204D, 0x00000000, 0x00000000, 0x00000000 - ), - ]; + // This CPUID leaf blob is what a guest booted on a Gimlet as of around + // August 2025 would have gotten as its passed-through CPUID leaves. + // + // This is present only to validate initial CPU platforms work and in + // particular that the initial specified-up-front CPU platform does not + // differ in unexpected ways from what guests had been getting to that + // point. + const MILAN_BEFORE_RFD314: [CpuidEntry; 30] = [ + cpuid_leaf!(0x0, 0x00000010, 0x68747541, 0x444D4163, 0x69746E65), + cpuid_leaf!(0x1, 0x00A00F11, 0x01020800, 0xFEDA3203, 0x178BFBFF), + cpuid_leaf!(0x5, 0x00000040, 0x00000040, 0x00000003, 0x00000011), + cpuid_leaf!(0x6, 0x00000004, 0x00000000, 0x00000000, 0x00000000), + cpuid_subleaf!( + 0x7, 0x0, 0x00000000, 0x201003A9, 0x00000600, 0x00000000 + ), + cpuid_subleaf!( + 0x7, 0x1, 0x00000000, 0x00000000, 0x00000000, 0x00000000 + ), + // leaf B is missing, and leaf D is the synthetic topology Bhyve invents. + cpuid_subleaf!( + 0xD, 0x0, 0x00000007, 0x00000340, 0x00000340, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x1, 0x00000001, 0x00000340, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000 + ), + // Include the all-zero leaf 10h explicitly so that the maximum standard + // leaf matches below. + cpuid_leaf!(0x10, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x80000000, 0x80000023, 0x68747541, 0x444D4163, 0x69746E65), + cpuid_leaf!(0x80000001, 0x00A00F11, 0x40000000, 0x444031FB, 0x25D3FBFF), + cpuid_leaf!(0x80000002, 0x20444D41, 0x43595045, 0x31373720, 0x36205033), + cpuid_leaf!(0x80000003, 0x6F432D34, 0x50206572, 0x65636F72, 0x726F7373), + cpuid_leaf!(0x80000004, 0x20202020, 0x20202020, 0x20202020, 0x00202020), + cpuid_leaf!(0x80000005, 0xFF40FF40, 0xFF40FF40, 0x20080140, 0x20080140), + cpuid_leaf!(0x80000006, 0x48002200, 0x68004200, 0x02006140, 0x08009140), + cpuid_leaf!(0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100), + cpuid_leaf!(0x80000008, 0x00003030, 0x00000007, 0x00000000, 0x00010007), + cpuid_leaf!(0x8000000A, 0x00000001, 0x00008000, 0x00000000, 0x119BBCFF), + cpuid_leaf!(0x80000019, 0xF040F040, 0xF0400000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001A, 0x00000006, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001B, 0x000003FF, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_subleaf!( + 0x8000001D, 0x00, 0x00004121, 0x0000003F, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0x8000001D, 0x01, 0x00004143, 0x0000003F, 0x00000000, 0x00000000 + ), + cpuid_subleaf!( + 0x8000001D, 0x02, 0x00004163, 0x0000003F, 0x00000000, 0x00000000 + ), + cpuid_leaf!(0x8000001E, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cpuid_leaf!(0x8000001F, 0x0101FD3F, 0x00004173, 0x000001FD, 0x00000001), + cpuid_leaf!(0x80000021, 0x0000204D, 0x00000000, 0x00000000, 0x00000000), + ]; // The difference between guests on Milan with Byhve default CPUID masking // and the profile defined by RFD 314 is *relatively* small, and mostly in From e5ba59433ff2ff0df9aed233c8ac4b4600cf5a26 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 3 Sep 2025 17:27:34 +0000 Subject: [PATCH 37/42] turbo nit --- sled-hardware/types/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-hardware/types/src/lib.rs b/sled-hardware/types/src/lib.rs index ce4a29da4c0..63588ef94b2 100644 --- a/sled-hardware/types/src/lib.rs +++ b/sled-hardware/types/src/lib.rs @@ -142,6 +142,6 @@ impl SledCpuFamily { impl std::fmt::Display for SledCpuFamily { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.as_str()) + f.write_str(self.as_str()) } } From 401d58b917a12bc26036fdfa6688dd35206468b3 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 3 Sep 2025 17:54:33 +0000 Subject: [PATCH 38/42] forgot to import everything the tests need --- nexus/src/app/instance_platform/cpu_platform.rs | 11 ++++++++++- nexus/types/src/external_api/params.rs | 4 ++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index 72adac37958..5cce59948b7 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -3,7 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use raw_cpuid::{ - ApmInfo, CpuId, CpuIdDump, CpuIdReader, CpuIdResult, CpuIdWriter, + ApmInfo, CpuId, CpuIdDump, CpuIdResult, CpuIdWriter, ExtendedFeatureIdentification2, ExtendedFeatures, ExtendedProcessorFeatureIdentifiers, ExtendedState, ExtendedStateInfo, ExtendedTopologyLevel, FeatureInfo, L1CacheTlbInfo, L2And3CacheTlbInfo, @@ -769,6 +769,15 @@ pub fn dump_to_cpuid_entries(dump: CpuIdDump) -> Vec { /// the functions to compute them in `raw-cpuid` might. #[cfg(test)] mod test { + use crate::app::instance_platform::cpu_platform::{ + dump_to_cpuid_entries, milan_rfd314, + }; + use raw_cpuid::{ + CpuId, CpuIdReader, CpuIdResult, CpuIdWriter, L1CacheTlbInfo, + L2And3CacheTlbInfo, Tlb1gbPageInfo, + }; + use sled_agent_client::types::CpuidEntry; + macro_rules! cpuid_leaf { ($leaf:literal, $eax:literal, $ebx:literal, $ecx:literal, $edx:literal) => { CpuidEntry { diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index ecf69e5d86d..19bf45e2247 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -13,8 +13,8 @@ use omicron_common::api::external::{ AddressLotKind, AffinityPolicy, AllowedSourceIps, BfdMode, BgpPeer, ByteCount, FailureDomain, Hostname, IdentityMetadataCreateParams, IdentityMetadataUpdateParams, InstanceAutoRestartPolicy, InstanceCpuCount, - InstanceCpuPlatform, IpVersion, LinkFec, LinkSpeed, Name, NameOrId, Nullable, PaginationOrder, - RouteDestination, RouteTarget, UserId, + InstanceCpuPlatform, IpVersion, LinkFec, LinkSpeed, Name, NameOrId, + Nullable, PaginationOrder, RouteDestination, RouteTarget, UserId, }; use omicron_common::disk::DiskVariant; use omicron_uuid_kinds::SiloGroupUuid; From 5364358a972d0c5389e3df4f982b92f3f96d4c07 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 3 Sep 2025 17:55:50 +0000 Subject: [PATCH 39/42] clippy --- .../src/app/instance_platform/cpu_platform.rs | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index 5cce59948b7..11d9bed0b85 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -702,25 +702,26 @@ pub fn milan_rfd314() -> CpuIdDump { // This is the fabricated cache topology from Bhyve. We could be more // precise, for dubious benefit. This is discussed in more detail in RFD // 314. - let mut levels = Vec::new(); - levels.push(CpuIdResult { - eax: 0x00000121, - ebx: 0x0000003F, - ecx: 0x00000000, - edx: 0x00000000, - }); - levels.push(CpuIdResult { - eax: 0x00000143, - ebx: 0x0000003F, - ecx: 0x00000000, - edx: 0x00000000, - }); - levels.push(CpuIdResult { - eax: 0x00000163, - ebx: 0x0000003F, - ecx: 0x00000000, - edx: 0x00000000, - }); + let mut levels = vec![ + CpuIdResult { + eax: 0x00000121, + ebx: 0x0000003F, + ecx: 0x00000000, + edx: 0x00000000, + }, + CpuIdResult { + eax: 0x00000143, + ebx: 0x0000003F, + ecx: 0x00000000, + edx: 0x00000000, + }, + CpuIdResult { + eax: 0x00000163, + ebx: 0x0000003F, + ecx: 0x00000000, + edx: 0x00000000, + }, + ]; cpuid .set_extended_cache_parameters(Some(levels.as_slice())) .expect("can set leaf 8000_001Dh"); @@ -733,8 +734,8 @@ pub fn dump_to_cpuid_entries(dump: CpuIdDump) -> Vec { for (leaf, subleaf, regs) in dump.into_iter() { entries.push(CpuidEntry { - leaf: leaf, - subleaf: subleaf, + leaf, + subleaf, eax: regs.eax, ebx: regs.ebx, ecx: regs.ecx, From d17b8aaa92d6d6a8dbca882c5b9acb26c643eaaa Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 3 Sep 2025 21:39:44 +0000 Subject: [PATCH 40/42] hide leaf B for now (in coordination with rfd314 updates) --- .../src/app/instance_platform/cpu_platform.rs | 56 ++----------------- 1 file changed, 6 insertions(+), 50 deletions(-) diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index 11d9bed0b85..a4126a87a69 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -6,10 +6,10 @@ use raw_cpuid::{ ApmInfo, CpuId, CpuIdDump, CpuIdResult, CpuIdWriter, ExtendedFeatureIdentification2, ExtendedFeatures, ExtendedProcessorFeatureIdentifiers, ExtendedState, ExtendedStateInfo, - ExtendedTopologyLevel, FeatureInfo, L1CacheTlbInfo, L2And3CacheTlbInfo, - MonitorMwaitInfo, PerformanceOptimizationInfo, - ProcessorCapacityAndFeatureInfo, ProcessorTopologyInfo, SvmFeatures, - ThermalPowerInfo, Tlb1gbPageInfo, Vendor, VendorInfo, + FeatureInfo, L1CacheTlbInfo, L2And3CacheTlbInfo, MonitorMwaitInfo, + PerformanceOptimizationInfo, ProcessorCapacityAndFeatureInfo, + ProcessorTopologyInfo, SvmFeatures, ThermalPowerInfo, Tlb1gbPageInfo, + Vendor, VendorInfo, }; use sled_agent_client::types::CpuidEntry; @@ -560,41 +560,6 @@ pub fn milan_rfd314() -> CpuIdDump { cpuid.set_extended_feature_info(Some(leaf)).expect("can set leaf 7h"); - // Set up extended topology info (leaf Bh) - let mut levels = Vec::new(); - - let mut topo_level1 = ExtendedTopologyLevel::empty(); - // EAX - topo_level1.set_shift_right_for_next_apic_id(1); - // EBX - topo_level1.set_processors(2); - // ECX - topo_level1.set_level_number(0); - // This level describes SMT. If there's no SMT enabled (single-core VM?) - // then this level should not be present, probably? - topo_level1.set_level_type(1); - - levels.push(topo_level1); - - let mut topo_level2 = ExtendedTopologyLevel::empty(); - // ECX - topo_level2.set_level_number(1); - topo_level2.set_level_type(2); - - levels.push(topo_level2); - - let mut topo_level3 = ExtendedTopologyLevel::empty(); - // ECX - topo_level3.set_level_number(2); - // Level type 0 indicates this level is invalid. This level is included only - // to be explicit about where the topology ends. - topo_level3.set_level_type(0); - - levels.push(topo_level3); - cpuid - .set_extended_topology_info(Some(levels.as_slice())) - .expect("can set leaf 8000_0021h"); - let mut leaf = cpuid .get_extended_processor_and_feature_identifiers() .expect("baseline Milan defines leaf 8000_0001"); @@ -702,7 +667,7 @@ pub fn milan_rfd314() -> CpuIdDump { // This is the fabricated cache topology from Bhyve. We could be more // precise, for dubious benefit. This is discussed in more detail in RFD // 314. - let mut levels = vec![ + let levels = vec![ CpuIdResult { eax: 0x00000121, ebx: 0x0000003F, @@ -816,7 +781,7 @@ mod test { // between 314 and the present day. Actual guest CPU platforms may differ as // we enable additional guest functionality in the future; this is not a // source of truth for actual guest platforms. - const MILAN_CPUID: [CpuidEntry; 32] = [ + const MILAN_CPUID: [CpuidEntry; 29] = [ cpuid_leaf!(0x0, 0x0000000D, 0x68747541, 0x444D4163, 0x69746E65), cpuid_leaf!(0x1, 0x00A00F11, 0x00000800, 0xF6D83203, 0x078BFBFF), cpuid_leaf!(0x5, 0x00000000, 0x00000000, 0x00000000, 0x00000000), @@ -827,15 +792,6 @@ mod test { cpuid_subleaf!( 0x7, 0x1, 0x00000000, 0x00000000, 0x00000000, 0x00000000 ), - cpuid_subleaf!( - 0xB, 0x0, 0x00000001, 0x00000002, 0x00000100, 0x00000000 - ), - cpuid_subleaf!( - 0xB, 0x1, 0x00000000, 0x00000000, 0x00000201, 0x00000000 - ), - cpuid_subleaf!( - 0xB, 0x2, 0x00000000, 0x00000000, 0x00000002, 0x00000000 - ), cpuid_subleaf!( 0xD, 0x0, 0x00000007, 0x00000340, 0x00000340, 0x00000000 ), From 10e67c18cadf6ad29bc5929d86b7c17b93e00757 Mon Sep 17 00:00:00 2001 From: iximeow Date: Wed, 3 Sep 2025 21:42:57 +0000 Subject: [PATCH 41/42] pretty sure this forbids irresponsible migrations.. --- .../src/app/instance_platform/cpu_platform.rs | 9 ++-- nexus/src/app/sagas/instance_migrate.rs | 47 +++++++++++++++++++ nexus/types/src/external_api/params.rs | 4 +- 3 files changed, 56 insertions(+), 4 deletions(-) diff --git a/nexus/src/app/instance_platform/cpu_platform.rs b/nexus/src/app/instance_platform/cpu_platform.rs index a4126a87a69..bd496449b27 100644 --- a/nexus/src/app/instance_platform/cpu_platform.rs +++ b/nexus/src/app/instance_platform/cpu_platform.rs @@ -149,9 +149,12 @@ pub fn functionally_same(base: CpuIdDump, target: CpuIdDump) -> bool { // TODO: really not sure if we should include things like cache // hierarchy/core topology information here. Misrepresenting the actual // system can result in cache-sized buffers being sized incorrectly (or at - // least suboptimally), but as long as cache sizes grow rather than shrink - // it may only be "performance is not as good as it could be" rather than a - // more deleterious outcome. + // least suboptimally). If the processor caches do not match the reported + // topology, software that tries to use the topology to flush caches by + // flushing each set individually would not behave correctly; if caches are + // larger than reported, such a manual flushing procedure would not actually + // flush the whole cache. Anyone doing this *really* should use WBINVD (or + // INVD) anyway. true } diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 6500d3efcfb..3bbddfc22ee 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -11,6 +11,7 @@ use crate::app::sagas::{ declare_saga_actions, instance_common::allocate_vmm_ipv6, }; use nexus_db_lookup::LookupPath; +use nexus_db_model::VmmCpuPlatform; use nexus_db_queries::db::identity::Resource; use nexus_db_queries::{authn, authz, db}; use nexus_types::internal_api::params::InstanceMigrateRequest; @@ -179,10 +180,56 @@ async fn sim_reserve_sled_resources( let params = sagactx.saga_params::()?; let propolis_id = sagactx.lookup::("dst_propolis_id")?; + let Some(compatible_sled_families) = + params.src_vmm.cpu_platform.compatible_sled_cpu_families() + else { + // If we're here there are no compatible sled families for the VMM's CPU + // platform. That means the VMM's CPU platform is "SledDefault" and we + // just don't know what sleds might be compatible with the CPU platform + // the VM has in practice. A VMM in this state implies two things: + // * The instance does not specify a CPU platform. + // * The instance was placed on a sled with an unknown CPU type. + // + // If the instance specified a CPU platform, it would have been placed + // on a sled with a known compatible CPU platform. So it must not have + // one. + // + // Since the instance was placed on a sled with an unknown CPU type, we + // have no idea if the migration target is actually safe to move it to. + // For all we know we could be asked to move VM from an AMD CPU to an + // ARM CPU. + // + // For now, refuse the migration in this circumstance. This probably + // impacts test environments in particular (sorry James!) and one might + // imagine a config option to allow such potentially-problematic + // migrations. + if params.src_vmm.cpu_platform != VmmCpuPlatform::SledDefault { + // Well, the claim about the VMM CPU platform was wrong, but the + // consequences still hold. This implies a broken case in + // `compatible_sled_cpu_families`. Log about the bug, but we still + // can't migrate this instance. + warn!(osagactx.log(), + "VMM has no compatible sled CPU families \ + but has a real CPU platform?!"; + "instance_id" => %params.instance.id(), + "src_propolis_id" => %params.src_vmm.id, + "src_vmm_cpu_platform" => %params.src_vmm.cpu_platform); + } + return Err(ActionError::action_failed(Error::invalid_request( + "cannot migrate an instance with a VMM CPU platform of SledDefault", + ))); + }; + // Add a constraint that requires the allocator to reserve on the // migration's destination sled instead of a random sled. + // + // The destination sled ID is arbitrary (from the internal API), so it's + // possible that we were told to migrate to a sled that is incompatible with + // the VMM's CPU platform. Constrain by that so we'll fail to pick the + // destination sled if it's truly incompatible. let constraints = db::model::SledReservationConstraintBuilder::new() .must_select_from(&[params.migrate_params.dst_sled_id]) + .cpu_families(compatible_sled_families) .build(); let resource = super::instance_common::reserve_vmm_resources( diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 19bf45e2247..6cbd3372e68 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1283,7 +1283,9 @@ pub struct InstanceCreate { pub anti_affinity_groups: Vec, /// The CPU platform to be used for this instance. If this is `null`, the - /// instance requires no particular CPU platform. + /// instance requires no particular CPU platform; when it is started the + /// instance will have the most general CPU platform supported by the sled + /// it is initially placed on. #[serde(default)] pub cpu_platform: Option, } From 75ed627d02ff362b789f540b7e295bfb875cb081 Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 4 Sep 2025 00:09:13 +0000 Subject: [PATCH 42/42] extremely confused about ANY() but de-eww success! For a reason I can't figure out, when this was ``` AND sled.cpu_family IN (...) ``` binding a parameter made that query end up like ``` AND sled.cpu_family IN ($1,) ``` which then expects a single `sled_cpu_family` enum for later binding. Binding an array then yielded a type error like ``` DatabaseError(Unknown, "invalid cast: sled_cpu_family[] -> sled_cpu_family") ``` ... but writing it like `AND sled.cpu_family = ANY (...)` does not get the extra comma and instead renders like `ANY ($1)` which happily takes a `sled_cpu_family[]`. --- .../src/db/queries/sled_reservation.rs | 23 ++++--------------- .../sled_find_targets_query_with_cpu.sql | 2 +- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/nexus/db-queries/src/db/queries/sled_reservation.rs b/nexus/db-queries/src/db/queries/sled_reservation.rs index e1ccefa3310..a5fee2ce9c1 100644 --- a/nexus/db-queries/src/db/queries/sled_reservation.rs +++ b/nexus/db-queries/src/db/queries/sled_reservation.rs @@ -110,21 +110,10 @@ pub fn sled_find_targets_query( ", ); - // TODO(gjc): eww. the correct way to do this is to write this as - // - // "AND sled.cpu_family = ANY (" - // - // and then just have one `param` which can be bound to a - // `sql_types::Array` - if let Some(families) = sled_families { - query.sql(" AND sled.cpu_family IN ("); - for i in 0..families.len() { - if i > 0 { - query.sql(", "); - } - query.param(); - } - query.sql(")"); + if sled_families.is_some() { + query.sql(" AND sled.cpu_family = ANY ("); + query.param(); + query.sql(") "); } query.sql("GROUP BY sled.id @@ -248,9 +237,7 @@ pub fn sled_find_targets_query( ); if let Some(families) = sled_families { - for f in families { - query.bind::(*f); - } + query.bind::, _>(families.to_vec()); } query diff --git a/nexus/db-queries/tests/output/sled_find_targets_query_with_cpu.sql b/nexus/db-queries/tests/output/sled_find_targets_query_with_cpu.sql index 6fc5a1b369e..3dcc75d1198 100644 --- a/nexus/db-queries/tests/output/sled_find_targets_query_with_cpu.sql +++ b/nexus/db-queries/tests/output/sled_find_targets_query_with_cpu.sql @@ -9,7 +9,7 @@ WITH sled.time_deleted IS NULL AND sled.sled_policy = 'in_service' AND sled.sled_state = 'active' - AND sled.cpu_family IN ($1,) + AND sled.cpu_family = ANY ($1) GROUP BY sled.id HAVING