Skip to content

Commit 4130d30

Browse files
gjcolomboiximeow
authored andcommitted
make sleds report their CPU families to Nexus
RFD 505 proposes that instances should be able to set a "minimum hardware platform" or "minimum CPU platform" that allows users to constrain an instance to run on sleds that have a specific set of CPU features available. This allows a user to opt a VM into advanced hardware features (e.g. AVX-512 support) by constraining it to run only on sleds that support those features. For this to work, Nexus needs to understand what CPUs are present in which sleds. Have sled-agent query CPUID to get CPU vendor and family information and report this to Nexus as part of the sled hardware manifest.
1 parent bd22075 commit 4130d30

File tree

35 files changed

+495
-17
lines changed

35 files changed

+495
-17
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

nexus/db-model/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ mod silo_group;
103103
mod silo_user;
104104
mod silo_user_password_hash;
105105
mod sled;
106+
mod sled_cpu_family;
106107
mod sled_instance;
107108
mod sled_policy;
108109
mod sled_resource_vmm;
@@ -223,6 +224,7 @@ pub use silo_group::*;
223224
pub use silo_user::*;
224225
pub use silo_user_password_hash::*;
225226
pub use sled::*;
227+
pub use sled_cpu_family::*;
226228
pub use sled_instance::*;
227229
pub use sled_policy::to_db_sled_policy; // Do not expose DbSledPolicy
228230
pub use sled_resource_vmm::*;

nexus/db-model/src/schema_versions.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock};
1616
///
1717
/// This must be updated when you change the database schema. Refer to
1818
/// schema/crdb/README.adoc in the root of this repository for details.
19-
pub const SCHEMA_VERSION: Version = Version::new(173, 0, 0);
19+
pub const SCHEMA_VERSION: Version = Version::new(174, 0, 0);
2020

2121
/// List of all past database schema versions, in *reverse* order
2222
///
@@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock<Vec<KnownVersion>> = LazyLock::new(|| {
2828
// | leaving the first copy as an example for the next person.
2929
// v
3030
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
31+
KnownVersion::new(174, "sled-cpu-family"),
3132
KnownVersion::new(173, "inv-internal-dns"),
3233
KnownVersion::new(172, "add-zones-with-mupdate-override"),
3334
KnownVersion::new(171, "inv-clear-mupdate-override"),

nexus/db-model/src/sled.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use super::{ByteCount, Generation, SledState, SqlU16, SqlU32};
66
use crate::collection::DatastoreCollectionConfig;
77
use crate::ipv6;
88
use crate::sled::shared::Baseboard;
9+
use crate::sled_cpu_family::SledCpuFamily;
910
use crate::sled_policy::DbSledPolicy;
1011
use chrono::{DateTime, Utc};
1112
use db_macros::Asset;
@@ -40,6 +41,8 @@ pub struct SledSystemHardware {
4041

4142
// current VMM reservoir size
4243
pub reservoir_size: ByteCount,
44+
45+
pub cpu_family: SledCpuFamily,
4346
}
4447

4548
/// Database representation of a Sled.
@@ -84,6 +87,9 @@ pub struct Sled {
8487

8588
// ServiceAddress (Repo Depot API). Uses `ip`.
8689
pub repo_depot_port: SqlU16,
90+
91+
/// The family of this sled's CPU.
92+
pub cpu_family: SledCpuFamily,
8793
}
8894

8995
impl Sled {
@@ -141,6 +147,7 @@ impl From<Sled> for views::Sled {
141147
state: sled.state.into(),
142148
usable_hardware_threads: sled.usable_hardware_threads.0,
143149
usable_physical_ram: *sled.usable_physical_ram,
150+
cpu_family: sled.cpu_family.into(),
144151
}
145152
}
146153
}
@@ -185,6 +192,7 @@ impl From<Sled> for params::SledAgentInfo {
185192
usable_physical_ram: sled.usable_physical_ram.into(),
186193
reservoir_size: sled.reservoir_size.into(),
187194
generation: sled.sled_agent_gen.into(),
195+
cpu_family: sled.cpu_family.into(),
188196
decommissioned,
189197
}
190198
}
@@ -229,6 +237,8 @@ pub struct SledUpdate {
229237
// ServiceAddress (Repo Depot API). Uses `ip`.
230238
pub repo_depot_port: SqlU16,
231239

240+
pub cpu_family: SledCpuFamily,
241+
232242
// Generation number - owned and incremented by sled-agent.
233243
pub sled_agent_gen: Generation,
234244
}
@@ -258,6 +268,7 @@ impl SledUpdate {
258268
ip: addr.ip().into(),
259269
port: addr.port().into(),
260270
repo_depot_port: repo_depot_port.into(),
271+
cpu_family: hardware.cpu_family,
261272
sled_agent_gen,
262273
}
263274
}
@@ -296,6 +307,7 @@ impl SledUpdate {
296307
repo_depot_port: self.repo_depot_port,
297308
last_used_address,
298309
sled_agent_gen: self.sled_agent_gen,
310+
cpu_family: self.cpu_family,
299311
}
300312
}
301313

nexus/db-model/src/sled_cpu_family.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
5+
use super::impl_enum_type;
6+
use serde::{Deserialize, Serialize};
7+
8+
impl_enum_type!(
9+
SledCpuFamilyEnum:
10+
11+
#[derive(
12+
Copy,
13+
Clone,
14+
Debug,
15+
PartialEq,
16+
AsExpression,
17+
FromSqlRow,
18+
Serialize,
19+
Deserialize
20+
)]
21+
pub enum SledCpuFamily;
22+
23+
Unknown => b"unknown"
24+
AmdMilan => b"amd_milan"
25+
AmdTurin => b"amd_turin"
26+
);
27+
28+
impl From<nexus_types::internal_api::params::SledCpuFamily> for SledCpuFamily {
29+
fn from(value: nexus_types::internal_api::params::SledCpuFamily) -> Self {
30+
use nexus_types::internal_api::params::SledCpuFamily as InputFamily;
31+
match value {
32+
InputFamily::Unknown => Self::Unknown,
33+
InputFamily::AmdMilan => Self::AmdMilan,
34+
InputFamily::AmdTurin => Self::AmdTurin,
35+
}
36+
}
37+
}
38+
39+
impl From<SledCpuFamily> for nexus_types::internal_api::params::SledCpuFamily {
40+
fn from(value: SledCpuFamily) -> Self {
41+
match value {
42+
SledCpuFamily::Unknown => Self::Unknown,
43+
SledCpuFamily::AmdMilan => Self::AmdMilan,
44+
SledCpuFamily::AmdTurin => Self::AmdTurin,
45+
}
46+
}
47+
}
48+
49+
impl From<SledCpuFamily> for nexus_types::external_api::views::SledCpuFamily {
50+
fn from(value: SledCpuFamily) -> Self {
51+
match value {
52+
SledCpuFamily::Unknown => Self::Unknown,
53+
SledCpuFamily::AmdMilan => Self::AmdMilan,
54+
SledCpuFamily::AmdTurin => Self::AmdTurin,
55+
}
56+
}
57+
}

nexus/db-queries/src/db/datastore/crucible_dataset.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ mod test {
294294
use crate::db::pub_test_utils::TestDatabase;
295295
use nexus_db_model::Generation;
296296
use nexus_db_model::SledBaseboard;
297+
use nexus_db_model::SledCpuFamily;
297298
use nexus_db_model::SledSystemHardware;
298299
use nexus_db_model::SledUpdate;
299300
use omicron_common::api::external::ByteCount;
@@ -323,6 +324,7 @@ mod test {
323324
usable_hardware_threads: 128,
324325
usable_physical_ram: (64 << 30).try_into().unwrap(),
325326
reservoir_size: (16 << 30).try_into().unwrap(),
327+
cpu_family: SledCpuFamily::AmdMilan,
326328
},
327329
Uuid::new_v4(),
328330
Generation::new(),

nexus/db-queries/src/db/datastore/support_bundle.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,7 @@ mod test {
515515
use crate::db::pub_test_utils::TestDatabase;
516516
use nexus_db_model::Generation;
517517
use nexus_db_model::SledBaseboard;
518+
use nexus_db_model::SledCpuFamily;
518519
use nexus_db_model::SledSystemHardware;
519520
use nexus_db_model::SledUpdate;
520521
use nexus_db_model::Zpool;
@@ -617,6 +618,7 @@ mod test {
617618
usable_hardware_threads: 128,
618619
usable_physical_ram: (64 << 30).try_into().unwrap(),
619620
reservoir_size: (16 << 30).try_into().unwrap(),
621+
cpu_family: SledCpuFamily::AmdMilan,
620622
},
621623
rack_id,
622624
Generation::new(),

nexus/db-queries/src/db/pub_test_utils/helpers.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use nexus_db_model::ProjectImage;
2525
use nexus_db_model::ProjectImageIdentity;
2626
use nexus_db_model::Resources;
2727
use nexus_db_model::SledBaseboard;
28+
use nexus_db_model::SledCpuFamily;
2829
use nexus_db_model::SledSystemHardware;
2930
use nexus_db_model::SledUpdate;
3031
use nexus_db_model::Snapshot;
@@ -77,6 +78,7 @@ pub struct SledSystemHardwareBuilder {
7778
usable_hardware_threads: u32,
7879
usable_physical_ram: i64,
7980
reservoir_size: i64,
81+
cpu_family: SledCpuFamily,
8082
}
8183

8284
impl Default for SledSystemHardwareBuilder {
@@ -86,6 +88,7 @@ impl Default for SledSystemHardwareBuilder {
8688
usable_hardware_threads: 4,
8789
usable_physical_ram: 1 << 40,
8890
reservoir_size: 1 << 39,
91+
cpu_family: SledCpuFamily::AmdMilan,
8992
}
9093
}
9194
}
@@ -121,12 +124,18 @@ impl SledSystemHardwareBuilder {
121124
self
122125
}
123126

127+
pub fn cpu_family(&mut self, family: SledCpuFamily) -> &mut Self {
128+
self.cpu_family = family;
129+
self
130+
}
131+
124132
pub fn build(&self) -> SledSystemHardware {
125133
SledSystemHardware {
126134
is_scrimlet: self.is_scrimlet,
127135
usable_hardware_threads: self.usable_hardware_threads,
128136
usable_physical_ram: self.usable_physical_ram.try_into().unwrap(),
129137
reservoir_size: self.reservoir_size.try_into().unwrap(),
138+
cpu_family: self.cpu_family,
130139
}
131140
}
132141
}

nexus/db-schema/src/enums.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ define_enums! {
7272
RouterRouteKindEnum => "router_route_kind",
7373
SagaStateEnum => "saga_state",
7474
ServiceKindEnum => "service_kind",
75+
SledCpuFamilyEnum => "sled_cpu_family",
7576
SledPolicyEnum => "sled_policy",
7677
SledRoleEnum => "sled_role",
7778
SledStateEnum => "sled_state",

nexus/db-schema/src/schema.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -971,6 +971,7 @@ table! {
971971
sled_state -> crate::enums::SledStateEnum,
972972
sled_agent_gen -> Int8,
973973
repo_depot_port -> Int4,
974+
cpu_family -> crate::enums::SledCpuFamilyEnum,
974975
}
975976
}
976977

0 commit comments

Comments
 (0)