Skip to content
Open
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
4130d30
make sleds report their CPU families to Nexus
gjcolombo Apr 15, 2025
d316a2e
differentiate Turin and Turin Dense for the control plane
iximeow Jul 30, 2025
1367289
unwind CPU families from the public sled API
iximeow Jul 30, 2025
b5eaf68
review notes
iximeow Jul 30, 2025
114f383
fix links ugh
iximeow Jul 30, 2025
4c40d47
migration still needs to know about turin dense
iximeow Jul 30, 2025
5ec45d3
sled-agent needs to expose cpu_family for inventory collections too
iximeow Aug 2, 2025
e9cbbdd
it compiles (might work now?)
iximeow Aug 2, 2025
bf7ccae
migrations need to be... right ...
iximeow Aug 2, 2025
0a79d5e
and that's the missing update of cpu_family.
iximeow Aug 2, 2025
ea59a26
non-illumos has to build too ofc
iximeow Aug 2, 2025
10cd335
fix expectorated output and, oh, docs are in the openapi spec
iximeow Aug 3, 2025
99a37f0
cleanup
iximeow Aug 6, 2025
6846a4a
move SledCpuFamily to a more fitting place
iximeow Aug 6, 2025
5f94661
rustfmt AGH
iximeow Aug 6, 2025
543bdc9
and expectorate up the reconfigurator output
iximeow Aug 6, 2025
34516b4
instance minimum CPU platforms
gjcolombo Apr 21, 2025
3831038
walk back "minimum"ness of CPU platforms
iximeow Jul 28, 2025
33956f9
i want propolis logs too please thank you
iximeow Jul 31, 2025
9b6b6ec
one more pass at aligning RFD 314, what we currently expose, and the …
iximeow Aug 6, 2025
5cf7b9c
and map the CPU platform "Turin" to all Turin sled CPU types
iximeow Aug 6, 2025
a877f39
one use of SledCpuFamily i missed in the rebase
iximeow Aug 7, 2025
1d34ab2
revert the buildomat log collection changes
iximeow Aug 7, 2025
4275594
more(!) RFD 341 errors
iximeow Aug 28, 2025
c686acf
move all the cpuid stuff, but it something is not great with the new …
iximeow Aug 29, 2025
9348caf
update raw-cpuid, genericize ideal Milan, map CpuIdDump to Propolis
iximeow Aug 29, 2025
af4e2ee
rustfmt
iximeow Aug 29, 2025
b05bae4
move MILAN_CPUID to an equivalence test
iximeow Aug 29, 2025
5f8aedd
outline more sensitive CPUID bits into a compatibility helper
iximeow Aug 30, 2025
9e92b01
dead code
iximeow Aug 30, 2025
d4cbf2f
another RFD 314/PR transcription error: extended APIC space
iximeow Sep 3, 2025
bfee777
adjust leaf 8000_001D expectations with RFD 314 adjustments
iximeow Sep 3, 2025
fb12fc9
test demonstrating constructed CPUID tables are as exepected
iximeow Sep 3, 2025
d59a842
rustfmt
iximeow Sep 3, 2025
d9acd90
describe the CPU profile tests a bit better, formatting
iximeow Sep 3, 2025
63a5326
more fmt
iximeow Sep 3, 2025
e5ba594
turbo nit
iximeow Sep 3, 2025
b2fd208
Merge remote-tracking branch 'github/main' into minimum-cpu-platforms
iximeow Sep 3, 2025
401d58b
forgot to import everything the tests need
iximeow Sep 3, 2025
5364358
clippy
iximeow Sep 3, 2025
d17b8aa
hide leaf B for now (in coordination with rfd314 updates)
iximeow Sep 3, 2025
10e67c1
pretty sure this forbids irresponsible migrations..
iximeow Sep 3, 2025
75ed627
extremely confused about ANY() but de-eww success!
iximeow Sep 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,7 @@ rand_distr = "0.4.3"
rand_seeder = "0.3.0"
range-requests = { path = "range-requests" }
ratatui = "0.29.0"
raw-cpuid = { git = "https://github.com/oxidecomputer/rust-cpuid.git", rev = "0a8dbd2311263f6a59ea58089e33c8331436ff3a" }
rayon = "1.10"
rcgen = "0.12.1"
reconfigurator-cli = { path = "dev-tools/reconfigurator-cli" }
Expand Down
49 changes: 49 additions & 0 deletions common/src/api/external/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1194,6 +1194,10 @@ pub struct Instance {

#[serde(flatten)]
pub auto_restart_status: InstanceAutoRestartStatus,

/// The CPU platform for this instance. If this is `null`, the instance
/// requires no particular CPU platform.
pub cpu_platform: Option<InstanceCpuPlatform>,
}

/// Status of control-plane driven automatic failure recovery for this instance.
Expand Down Expand Up @@ -1258,6 +1262,51 @@ pub enum InstanceAutoRestartPolicy {
BestEffort,
}

/// A required CPU platform for an instance.
///
/// When an instance specifies a required CPU platform:
///
/// - The system may expose (to the VM) new CPU features that are only present
/// on that platform (or on newer platforms of the same lineage that also
/// support those features).
/// - The instance must run on hosts that have CPUs that support all the
/// features of the supplied platform.
///
/// That is, the instance is restricted to hosts that have the CPUs which
/// support all features of the required platform, but in exchange the CPU
/// features exposed by the platform are available for the guest to use. Note
/// that this may prevent an instance from starting (if the hosts that could run
/// it are full but there is capacity on other incompatible hosts).
///
/// If an instance does not specify a required CPU platform, then when
/// it starts, the control plane selects a host for the instance and then
/// supplies the guest with the "minimum" CPU platform supported by that host.
/// This maximizes the number of hosts that can run the VM if it later needs to
/// migrate to another host.
///
/// In all cases, the CPU features presented by a given CPU platform are a
/// subset of what the corresponding hardware may actually support; features
/// which cannot be used from a virtual environment or do not have full
/// hypervisor support may be masked off. See RFD 314 for specific CPU features
/// in a CPU platform.
Comment on lines +1292 to +1293
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is RFD 314 public, and do we intend to make it public? If we're referencing it in a comment that will eventually make it into the public API docs, perhaps we should?

#[derive(
Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, Eq, PartialEq,
)]
#[serde(rename_all = "snake_case")]
pub enum InstanceCpuPlatform {
/// An AMD Milan-like CPU platform.
AmdMilan,

/// An AMD Turin-like CPU platform.
// Note that there is only Turin, not Turin Dense - feature-wise there are
// collapsed together as the guest-visible platform is the same.
// If the two must be distinguished for instance placement, we'll want to
// track whatever the motivating constraint is more explicitly. CPU
// families, and especially the vendor code names, don't necessarily promise
// details about specific processor packaging choices.
AmdTurin,
}

// AFFINITY GROUPS

/// Affinity policy used to describe "what to do when a request cannot be satisfied"
Expand Down
6 changes: 6 additions & 0 deletions dev-tools/omdb/src/bin/omdb/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4760,6 +4760,7 @@ async fn cmd_db_instance_info(
propolis_ip: _,
propolis_port: _,
instance_id: _,
cpu_platform: _,
time_created,
time_deleted,
runtime:
Expand Down Expand Up @@ -7356,6 +7357,7 @@ fn prettyprint_vmm(
const INSTANCE_ID: &'static str = "instance ID";
const SLED_ID: &'static str = "sled ID";
const SLED_SERIAL: &'static str = "sled serial";
const CPU_PLATFORM: &'static str = "CPU platform";
const ADDRESS: &'static str = "propolis address";
const STATE: &'static str = "state";
const WIDTH: usize = const_max_len(&[
Expand All @@ -7366,6 +7368,7 @@ fn prettyprint_vmm(
INSTANCE_ID,
SLED_ID,
SLED_SERIAL,
CPU_PLATFORM,
STATE,
ADDRESS,
]);
Expand All @@ -7379,6 +7382,7 @@ fn prettyprint_vmm(
sled_id,
propolis_ip,
propolis_port,
cpu_platform,
runtime: db::model::VmmRuntimeState { state, r#gen, time_state_updated },
} = vmm;

Expand All @@ -7405,6 +7409,7 @@ fn prettyprint_vmm(
if let Some(serial) = sled_serial {
println!("{indent}{SLED_SERIAL:>width$}: {serial}");
}
println!("{indent}{CPU_PLATFORM:>width$}: {cpu_platform}");
}

async fn cmd_db_vmm_list(
Expand Down Expand Up @@ -7480,6 +7485,7 @@ async fn cmd_db_vmm_list(
sled_id,
propolis_ip: _,
propolis_port: _,
cpu_platform: _,
runtime:
db::model::VmmRuntimeState {
state,
Expand Down
3 changes: 3 additions & 0 deletions dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout
Original file line number Diff line number Diff line change
Expand Up @@ -1101,6 +1101,7 @@ sled 2eb69596-f081-4e2d-9425-9994926e0832 (role = Gimlet, serial serial1)
found at: <REDACTED_TIMESTAMP> from fake sled agent
address: [fd00:1122:3344:102::1]:12345
usable hw threads: 10
CPU family: amd_milan
usable memory (GiB): 0
reservoir (GiB): 0
physical disks:
Expand Down Expand Up @@ -1210,6 +1211,7 @@ sled 32d8d836-4d8a-4e54-8fa9-f31d79c42646 (role = Gimlet, serial serial2)
found at: <REDACTED_TIMESTAMP> from fake sled agent
address: [fd00:1122:3344:103::1]:12345
usable hw threads: 10
CPU family: amd_milan
usable memory (GiB): 0
reservoir (GiB): 0
physical disks:
Expand Down Expand Up @@ -1319,6 +1321,7 @@ sled 89d02b1b-478c-401a-8e28-7a26f74fa41b (role = Gimlet, serial serial0)
found at: <REDACTED_TIMESTAMP> from fake sled agent
address: [fd00:1122:3344:101::1]:12345
usable hw threads: 10
CPU family: amd_milan
usable memory (GiB): 0
reservoir (GiB): 0
physical disks:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ sled 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c (role = Gimlet, serial serial1)
found at: <REDACTED_TIMESTAMP> from fake sled agent
address: [fd00:1122:3344:102::1]:12345
usable hw threads: 10
CPU family: amd_milan
usable memory (GiB): 0
reservoir (GiB): 0
physical disks:
Expand Down Expand Up @@ -194,6 +195,7 @@ sled 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 (role = Gimlet, serial serial0)
found at: <REDACTED_TIMESTAMP> from fake sled agent
address: [fd00:1122:3344:101::1]:12345
usable hw threads: 10
CPU family: amd_milan
usable memory (GiB): 0
reservoir (GiB): 0
physical disks:
Expand Down Expand Up @@ -302,6 +304,7 @@ sled d81c6a84-79b8-4958-ae41-ea46c9b19763 (role = Gimlet, serial serial2)
found at: <REDACTED_TIMESTAMP> from fake sled agent
address: [fd00:1122:3344:103::1]:12345
usable hw threads: 10
CPU family: amd_milan
usable memory (GiB): 0
reservoir (GiB): 0
physical disks:
Expand Down
1 change: 1 addition & 0 deletions end-to-end-tests/src/instance_launch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ async fn instance_launch() -> Result<()> {
start: true,
auto_restart_policy: Default::default(),
anti_affinity_groups: Vec::new(),
cpu_platform: None,
})
.send()
.await?;
Expand Down
5 changes: 3 additions & 2 deletions nexus-sled-agent-shared/src/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ use omicron_uuid_kinds::{SledUuid, ZpoolUuid};
use schemars::schema::{Schema, SchemaObject};
use schemars::{JsonSchema, SchemaGenerator};
use serde::{Deserialize, Serialize};
// Export this type for convenience -- this way, dependents don't have to
// Export these types for convenience -- this way, dependents don't have to
// depend on sled-hardware-types.
pub use sled_hardware_types::Baseboard;
pub use sled_hardware_types::{Baseboard, SledCpuFamily};
use strum::EnumIter;
use tufaceous_artifact::{ArtifactHash, KnownArtifactKind};

Expand Down Expand Up @@ -121,6 +121,7 @@ pub struct Inventory {
pub baseboard: Baseboard,
pub usable_hardware_threads: u32,
pub usable_physical_ram: ByteCount,
pub cpu_family: SledCpuFamily,
pub reservoir_size: ByteCount,
pub disks: Vec<InventoryDisk>,
pub zpools: Vec<InventoryZpool>,
Expand Down
1 change: 1 addition & 0 deletions nexus/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ oxide-tokio-rt.workspace = true
oximeter.workspace = true
oximeter-instruments = { workspace = true, features = ["http-instruments"] }
oximeter-producer.workspace = true
raw-cpuid = { workspace = true, features = ["std"] }
rustls = { workspace = true }
rustls-pemfile = { workspace = true }
update-common.workspace = true
Expand Down
8 changes: 7 additions & 1 deletion nexus/db-model/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
use super::InstanceIntendedState as IntendedState;
use super::{
ByteCount, Disk, ExternalIp, Generation, InstanceAutoRestartPolicy,
InstanceCpuCount, InstanceState, Vmm, VmmState,
InstanceCpuCount, InstanceCpuPlatform, InstanceState, Vmm, VmmState,
};
use crate::collection::DatastoreAttachTargetConfig;
use crate::serde_time_delta::optional_time_delta;
Expand Down Expand Up @@ -68,6 +68,9 @@ pub struct Instance {
#[diesel(column_name = boot_disk_id)]
pub boot_disk_id: Option<Uuid>,

/// The instance's required CPU platform.
pub cpu_platform: Option<InstanceCpuPlatform>,

#[diesel(embed)]
pub runtime_state: InstanceRuntimeState,

Expand Down Expand Up @@ -139,6 +142,7 @@ impl Instance {
// Intentionally ignore `params.boot_disk_id` here: we can't set
// `boot_disk_id` until the referenced disk is attached.
boot_disk_id: None,
cpu_platform: params.cpu_platform.map(Into::into),

runtime_state,
intended_state,
Expand Down Expand Up @@ -493,4 +497,6 @@ pub struct InstanceUpdate {
pub ncpus: InstanceCpuCount,

pub memory: ByteCount,

pub cpu_platform: Option<InstanceCpuPlatform>,
}
65 changes: 65 additions & 0 deletions nexus/db-model/src/instance_cpu_platform.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use crate::SledCpuFamily;

use super::impl_enum_type;
use serde::{Deserialize, Serialize};

impl_enum_type!(
InstanceCpuPlatformEnum:

#[derive(
Copy,
Clone,
Debug,
PartialEq,
AsExpression,
FromSqlRow,
Serialize,
Deserialize
)]
pub enum InstanceCpuPlatform;

AmdMilan => b"amd_milan"
AmdTurin => b"amd_turin"
);

impl InstanceCpuPlatform {
/// Returns a slice containing the set of sled CPU families that can
/// accommodate an instance with this CPU platform.
pub fn compatible_sled_cpu_families(&self) -> &'static [SledCpuFamily] {
match self {
// Turin-based sleds have a superset of the features made available
// in a guest's Milan CPU platform
Self::AmdMilan => {
&[SledCpuFamily::AmdMilan, SledCpuFamily::AmdTurin]
}
Self::AmdTurin => &[SledCpuFamily::AmdTurin],
}
}
}

impl From<omicron_common::api::external::InstanceCpuPlatform>
for InstanceCpuPlatform
{
fn from(value: omicron_common::api::external::InstanceCpuPlatform) -> Self {
use omicron_common::api::external::InstanceCpuPlatform as ApiPlatform;
match value {
ApiPlatform::AmdMilan => Self::AmdMilan,
ApiPlatform::AmdTurin => Self::AmdTurin,
}
}
}

impl From<InstanceCpuPlatform>
for omicron_common::api::external::InstanceCpuPlatform
{
fn from(value: InstanceCpuPlatform) -> Self {
match value {
InstanceCpuPlatform::AmdMilan => Self::AmdMilan,
InstanceCpuPlatform::AmdTurin => Self::AmdTurin,
}
}
}
3 changes: 3 additions & 0 deletions nexus/db-model/src/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::ArtifactHash;
use crate::Generation;
use crate::PhysicalDiskKind;
use crate::omicron_zone_config::{self, OmicronZoneNic};
use crate::sled_cpu_family::SledCpuFamily;
use crate::typed_uuid::DbTypedUuid;
use crate::{
ByteCount, MacAddr, Name, ServiceKind, SqlU8, SqlU16, SqlU32,
Expand Down Expand Up @@ -887,6 +888,7 @@ pub struct InvSledAgent {
pub sled_role: SledRole,
pub usable_hardware_threads: SqlU32,
pub usable_physical_ram: ByteCount,
pub cpu_family: SledCpuFamily,
pub reservoir_size: ByteCount,
// Soft foreign key to an `InvOmicronSledConfig`
pub ledgered_sled_config: Option<DbTypedUuid<OmicronSledConfigKind>>,
Expand Down Expand Up @@ -1300,6 +1302,7 @@ impl InvSledAgent {
usable_physical_ram: ByteCount::from(
sled_agent.usable_physical_ram,
),
cpu_family: sled_agent.cpu_family.into(),
reservoir_size: ByteCount::from(sled_agent.reservoir_size),
ledgered_sled_config: ledgered_sled_config.map(From::from),
reconciler_status,
Expand Down
Loading