Skip to content

Commit 811ee9a

Browse files
authored
[inventory] Add active host phase 1 slot (#8793)
This was an oversight; we're already collecting the host phase 1 hashes; we also need to know which slot is active (for host OS updates).
1 parent 107abb1 commit 811ee9a

File tree

14 files changed

+395
-37
lines changed

14 files changed

+395
-37
lines changed

dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,8 +1030,9 @@ Sled serial0
10301030
part number: model0
10311031
power: A2
10321032
revision: 0
1033-
MGS slot: Sled 0 (cubby 0)
1033+
MGS slot: Sled 0
10341034
found at: <REDACTED_TIMESTAMP> from fake MGS 1
1035+
host phase 1 active slot: A
10351036
host phase 1 hashes:
10361037
SLOT HASH
10371038
A 0101010101010101010101010101010101010101010101010101010101010101
@@ -1053,8 +1054,9 @@ Sled serial1
10531054
part number: model1
10541055
power: A2
10551056
revision: 0
1056-
MGS slot: Sled 1 (cubby 1)
1057+
MGS slot: Sled 1
10571058
found at: <REDACTED_TIMESTAMP> from fake MGS 1
1059+
host phase 1 active slot: A
10581060
host phase 1 hashes:
10591061
SLOT HASH
10601062
A 0101010101010101010101010101010101010101010101010101010101010101
@@ -1076,8 +1078,9 @@ Sled serial2
10761078
part number: model2
10771079
power: A2
10781080
revision: 0
1079-
MGS slot: Sled 2 (cubby 2)
1081+
MGS slot: Sled 2
10801082
found at: <REDACTED_TIMESTAMP> from fake MGS 1
1083+
host phase 1 active slot: A
10811084
host phase 1 hashes:
10821085
SLOT HASH
10831086
A 0101010101010101010101010101010101010101010101010101010101010101

nexus/db-model/src/inventory.rs

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ use nexus_db_schema::schema::inv_zone_manifest_zone;
3030
use nexus_db_schema::schema::{
3131
hw_baseboard_id, inv_caboose, inv_clickhouse_keeper_membership,
3232
inv_cockroachdb_status, inv_collection, inv_collection_error, inv_dataset,
33-
inv_host_phase_1_flash_hash, inv_internal_dns,
34-
inv_last_reconciliation_dataset_result,
33+
inv_host_phase_1_active_slot, inv_host_phase_1_flash_hash,
34+
inv_internal_dns, inv_last_reconciliation_dataset_result,
3535
inv_last_reconciliation_disk_result,
3636
inv_last_reconciliation_orphaned_dataset,
3737
inv_last_reconciliation_zone_result, inv_mupdate_override_non_boot,
@@ -62,6 +62,7 @@ use nexus_sled_agent_shared::inventory::{
6262
ConfigReconcilerInventoryResult, OmicronSledConfig, OmicronZoneConfig,
6363
OmicronZoneDataset, OmicronZoneImageSource, OmicronZoneType,
6464
};
65+
use nexus_types::inventory::HostPhase1ActiveSlot;
6566
use nexus_types::inventory::{
6667
BaseboardId, Caboose, CockroachStatus, Collection,
6768
InternalDnsGenerationStatus, NvmeFirmware, PowerState, RotPage, RotSlot,
@@ -786,6 +787,28 @@ impl From<InvRootOfTrust> for nexus_types::inventory::RotState {
786787
}
787788
}
788789

790+
/// See [`nexus_types::inventory::HostPhase1ActiveSlot`].
791+
#[derive(Queryable, Clone, Debug, Selectable)]
792+
#[diesel(table_name = inv_host_phase_1_active_slot)]
793+
pub struct InvHostPhase1ActiveSlot {
794+
pub inv_collection_id: Uuid,
795+
pub hw_baseboard_id: Uuid,
796+
pub time_collected: DateTime<Utc>,
797+
pub source: String,
798+
799+
pub slot: HwM2Slot,
800+
}
801+
802+
impl From<InvHostPhase1ActiveSlot> for HostPhase1ActiveSlot {
803+
fn from(value: InvHostPhase1ActiveSlot) -> Self {
804+
Self {
805+
time_collected: value.time_collected,
806+
source: value.source,
807+
slot: value.slot.into(),
808+
}
809+
}
810+
}
811+
789812
/// See [`nexus_types::inventory::HostPhase1FlashHash`].
790813
#[derive(Queryable, Clone, Debug, Selectable)]
791814
#[diesel(table_name = inv_host_phase_1_flash_hash)]

nexus/db-model/src/schema_versions.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock};
1616
///
1717
/// This must be updated when you change the database schema. Refer to
1818
/// schema/crdb/README.adoc in the root of this repository for details.
19-
pub const SCHEMA_VERSION: Version = Version::new(174, 0, 0);
19+
pub const SCHEMA_VERSION: Version = Version::new(175, 0, 0);
2020

2121
/// List of all past database schema versions, in *reverse* order
2222
///
@@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock<Vec<KnownVersion>> = LazyLock::new(|| {
2828
// | leaving the first copy as an example for the next person.
2929
// v
3030
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
31+
KnownVersion::new(175, "inv-host-phase-1-active-slot"),
3132
KnownVersion::new(174, "add-tuf-rot-by-sign"),
3233
KnownVersion::new(173, "inv-internal-dns"),
3334
KnownVersion::new(172, "add-zones-with-mupdate-override"),

nexus/db-queries/src/db/datastore/inventory.rs

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ use nexus_db_model::InvCollectionError;
3939
use nexus_db_model::InvConfigReconcilerStatus;
4040
use nexus_db_model::InvConfigReconcilerStatusKind;
4141
use nexus_db_model::InvDataset;
42+
use nexus_db_model::InvHostPhase1ActiveSlot;
4243
use nexus_db_model::InvHostPhase1FlashHash;
4344
use nexus_db_model::InvInternalDns;
4445
use nexus_db_model::InvLastReconciliationDatasetResult;
@@ -690,6 +691,64 @@ impl DataStore {
690691
}
691692
}
692693

694+
// Insert rows for the host phase 1 active slots that we found.
695+
// Like service processors, we do this using INSERT INTO ... SELECT.
696+
{
697+
use nexus_db_schema::schema::hw_baseboard_id::dsl as baseboard_dsl;
698+
use nexus_db_schema::schema::inv_host_phase_1_active_slot::dsl as phase1_dsl;
699+
700+
for (baseboard_id, phase1) in
701+
&collection.host_phase_1_active_slots
702+
{
703+
let selection = nexus_db_schema::schema::hw_baseboard_id::table
704+
.select((
705+
db_collection_id
706+
.into_sql::<diesel::sql_types::Uuid>(),
707+
baseboard_dsl::id,
708+
phase1.time_collected
709+
.into_sql::<diesel::sql_types::Timestamptz>(),
710+
phase1.source
711+
.clone()
712+
.into_sql::<diesel::sql_types::Text>(),
713+
HwM2Slot::from(phase1.slot)
714+
.into_sql::<HwM2SlotEnum>(),
715+
))
716+
.filter(
717+
baseboard_dsl::part_number
718+
.eq(baseboard_id.part_number.clone()),
719+
)
720+
.filter(
721+
baseboard_dsl::serial_number
722+
.eq(baseboard_id.serial_number.clone()),
723+
);
724+
725+
let _ = diesel::insert_into(
726+
nexus_db_schema::schema::inv_host_phase_1_active_slot::table,
727+
)
728+
.values(selection)
729+
.into_columns((
730+
phase1_dsl::inv_collection_id,
731+
phase1_dsl::hw_baseboard_id,
732+
phase1_dsl::time_collected,
733+
phase1_dsl::source,
734+
phase1_dsl::slot,
735+
))
736+
.execute_async(&conn)
737+
.await?;
738+
739+
// See the comment in the above block (where we use
740+
// `inv_service_processor::all_columns()`). The same
741+
// applies here.
742+
let (
743+
_inv_collection_id,
744+
_hw_baseboard_id,
745+
_time_collected,
746+
_source,
747+
_slot,
748+
) = phase1_dsl::inv_host_phase_1_active_slot::all_columns();
749+
}
750+
}
751+
693752
// Insert rows for the host phase 1 flash hashes that we found.
694753
// Like service processors, we do this using INSERT INTO ... SELECT.
695754
{
@@ -1798,6 +1857,7 @@ impl DataStore {
17981857
struct NumRowsDeleted {
17991858
ncollections: usize,
18001859
nsps: usize,
1860+
nhost_phase1_active_slots: usize,
18011861
nhost_phase1_flash_hashes: usize,
18021862
nrots: usize,
18031863
ncabooses: usize,
@@ -1831,6 +1891,7 @@ impl DataStore {
18311891
let NumRowsDeleted {
18321892
ncollections,
18331893
nsps,
1894+
nhost_phase1_active_slots,
18341895
nhost_phase1_flash_hashes,
18351896
nrots,
18361897
ncabooses,
@@ -1883,6 +1944,16 @@ impl DataStore {
18831944
.await?
18841945
};
18851946

1947+
// Remove rows for host phase 1 active slots.
1948+
let nhost_phase1_active_slots = {
1949+
use nexus_db_schema::schema::inv_host_phase_1_active_slot::dsl;
1950+
diesel::delete(dsl::inv_host_phase_1_active_slot.filter(
1951+
dsl::inv_collection_id.eq(db_collection_id),
1952+
))
1953+
.execute_async(&conn)
1954+
.await?
1955+
};
1956+
18861957
// Remove rows for host phase 1 flash hashes.
18871958
let nhost_phase1_flash_hashes = {
18881959
use nexus_db_schema::schema::inv_host_phase_1_flash_hash::dsl;
@@ -2152,6 +2223,7 @@ impl DataStore {
21522223
Ok(NumRowsDeleted {
21532224
ncollections,
21542225
nsps,
2226+
nhost_phase1_active_slots,
21552227
nhost_phase1_flash_hashes,
21562228
nrots,
21572229
ncabooses,
@@ -2191,6 +2263,7 @@ impl DataStore {
21912263
"collection_id" => collection_id.to_string(),
21922264
"ncollections" => ncollections,
21932265
"nsps" => nsps,
2266+
"nhost_phase1_active_slots" => nhost_phase1_active_slots,
21942267
"nhost_phase1_flash_hashes" => nhost_phase1_flash_hashes,
21952268
"nrots" => nrots,
21962269
"ncabooses" => ncabooses,
@@ -2698,6 +2771,45 @@ impl DataStore {
26982771
})
26992772
.collect::<Result<BTreeMap<_, _>, _>>()?;
27002773

2774+
// Fetch the host phase 1 active slots found.
2775+
let host_phase_1_active_slots = {
2776+
use nexus_db_schema::schema::inv_host_phase_1_active_slot::dsl;
2777+
2778+
let mut slots = BTreeMap::new();
2779+
2780+
let mut paginator = Paginator::new(
2781+
batch_size,
2782+
dropshot::PaginationOrder::Ascending,
2783+
);
2784+
while let Some(p) = paginator.next() {
2785+
let batch = paginated(
2786+
dsl::inv_host_phase_1_active_slot,
2787+
dsl::hw_baseboard_id,
2788+
&p.current_pagparams(),
2789+
)
2790+
.filter(dsl::inv_collection_id.eq(db_id))
2791+
.select(InvHostPhase1ActiveSlot::as_select())
2792+
.load_async(&*conn)
2793+
.await
2794+
.map_err(|e| {
2795+
public_error_from_diesel(e, ErrorHandler::Server)
2796+
})?;
2797+
paginator = p.found_batch(&batch, &|row| row.hw_baseboard_id);
2798+
for row in batch {
2799+
let bb = baseboards_by_id
2800+
.get(&row.hw_baseboard_id)
2801+
.ok_or_else(|| {
2802+
Error::internal_error(
2803+
"missing baseboard that we should have fetched",
2804+
)
2805+
})?;
2806+
slots.insert(Arc::clone(bb), row.into());
2807+
}
2808+
}
2809+
2810+
slots
2811+
};
2812+
27012813
// Fetch records of host phase 1 flash hashes found.
27022814
let inv_host_phase_1_flash_hash_rows = {
27032815
use nexus_db_schema::schema::inv_host_phase_1_flash_hash::dsl;
@@ -3941,6 +4053,7 @@ impl DataStore {
39414053
cabooses: cabooses_by_id.values().cloned().collect(),
39424054
rot_pages: rot_pages_by_id.values().cloned().collect(),
39434055
sps,
4056+
host_phase_1_active_slots,
39444057
host_phase_1_flash_hashes,
39454058
rots,
39464059
cabooses_found,

nexus/db-schema/src/schema.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1559,6 +1559,17 @@ table! {
15591559
}
15601560
}
15611561

1562+
table! {
1563+
inv_host_phase_1_active_slot (inv_collection_id, hw_baseboard_id) {
1564+
inv_collection_id -> Uuid,
1565+
hw_baseboard_id -> Uuid,
1566+
time_collected -> Timestamptz,
1567+
source -> Text,
1568+
1569+
slot -> crate::enums::HwM2SlotEnum,
1570+
}
1571+
}
1572+
15621573
table! {
15631574
inv_host_phase_1_flash_hash (inv_collection_id, hw_baseboard_id, slot) {
15641575
inv_collection_id -> Uuid,

nexus/inventory/src/builder.rs

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use nexus_types::inventory::CabooseFound;
2525
use nexus_types::inventory::CabooseWhich;
2626
use nexus_types::inventory::CockroachStatus;
2727
use nexus_types::inventory::Collection;
28+
use nexus_types::inventory::HostPhase1ActiveSlot;
2829
use nexus_types::inventory::HostPhase1FlashHash;
2930
use nexus_types::inventory::InternalDnsGenerationStatus;
3031
use nexus_types::inventory::RotPage;
@@ -116,6 +117,7 @@ pub struct CollectionBuilder {
116117
cabooses: BTreeSet<Arc<Caboose>>,
117118
rot_pages: BTreeSet<Arc<RotPage>>,
118119
sps: BTreeMap<Arc<BaseboardId>, ServiceProcessor>,
120+
host_phase_1_active_slots: BTreeMap<Arc<BaseboardId>, HostPhase1ActiveSlot>,
119121
host_phase_1_flash_hashes:
120122
BTreeMap<M2Slot, BTreeMap<Arc<BaseboardId>, HostPhase1FlashHash>>,
121123
rots: BTreeMap<Arc<BaseboardId>, RotState>,
@@ -153,6 +155,7 @@ impl CollectionBuilder {
153155
cabooses: BTreeSet::new(),
154156
rot_pages: BTreeSet::new(),
155157
sps: BTreeMap::new(),
158+
host_phase_1_active_slots: BTreeMap::new(),
156159
host_phase_1_flash_hashes: BTreeMap::new(),
157160
rots: BTreeMap::new(),
158161
cabooses_found: BTreeMap::new(),
@@ -178,6 +181,7 @@ impl CollectionBuilder {
178181
cabooses: self.cabooses,
179182
rot_pages: self.rot_pages,
180183
sps: self.sps,
184+
host_phase_1_active_slots: self.host_phase_1_active_slots,
181185
host_phase_1_flash_hashes: self.host_phase_1_flash_hashes,
182186
rots: self.rots,
183187
cabooses_found: self.cabooses_found,
@@ -318,6 +322,63 @@ impl CollectionBuilder {
318322
Some(baseboard)
319323
}
320324

325+
/// Returns true if we already found the active host phase 1 flash slot for
326+
/// baseboard `baseboard`
327+
///
328+
/// This is used to avoid requesting it multiple times (from multiple MGS
329+
/// instances).
330+
pub fn found_host_phase_1_active_slot_already(
331+
&self,
332+
baseboard: &BaseboardId,
333+
) -> bool {
334+
self.host_phase_1_active_slots.contains_key(baseboard)
335+
}
336+
337+
/// Record the given host phase 1 active slot found for the given baseboard
338+
///
339+
/// The baseboard must previously have been reported using
340+
/// `found_sp_state()`.
341+
///
342+
/// `source` is an arbitrary string for debugging that describes the MGS
343+
/// that reported this data (generally a URL string).
344+
pub fn found_host_phase_1_active_slot(
345+
&mut self,
346+
baseboard: &BaseboardId,
347+
source: &str,
348+
slot: M2Slot,
349+
) -> Result<(), CollectorBug> {
350+
let (baseboard, _) =
351+
self.sps.get_key_value(baseboard).ok_or_else(|| {
352+
anyhow!(
353+
"reporting host phase 1 active slot for unknown baseboard: \
354+
{baseboard:?} ({slot:?})",
355+
)
356+
})?;
357+
if let Some(previous) = self.host_phase_1_active_slots.insert(
358+
baseboard.clone(),
359+
HostPhase1ActiveSlot {
360+
time_collected: now_db_precision(),
361+
source: source.to_owned(),
362+
slot,
363+
},
364+
) {
365+
let error = if previous.slot == slot {
366+
anyhow!("reported multiple times (same value)")
367+
} else {
368+
anyhow!(
369+
"reported host phase 1 flash hash \
370+
(previously {}, now {slot})",
371+
previous.slot,
372+
)
373+
};
374+
Err(CollectorBug::from(
375+
error.context(format!("baseboard {baseboard:?}")),
376+
))
377+
} else {
378+
Ok(())
379+
}
380+
}
381+
321382
/// Returns true if we already found the host phase 1 flash hash for `slot`
322383
/// for baseboard `baseboard`
323384
///

0 commit comments

Comments
 (0)