Skip to content

Commit f2a3273

Browse files
authored
[reconfigurator] Refactor MGS driven updates (#9118)
Just a few style changes as suggested in #9001 Closes: #9068
1 parent 6731ce2 commit f2a3273

File tree

9 files changed

+1109
-246
lines changed

9 files changed

+1109
-246
lines changed

nexus/reconfigurator/planning/src/mgs_updates/host_phase_1.rs

Lines changed: 41 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@
66
77
use super::MgsUpdateStatus;
88
use super::MgsUpdateStatusError;
9+
use crate::mgs_updates::MgsUpdateOutcome;
910
use nexus_types::deployment::BlueprintArtifactVersion;
1011
use nexus_types::deployment::BlueprintHostPhase2DesiredContents;
1112
use nexus_types::deployment::PendingMgsUpdate;
1213
use nexus_types::deployment::PendingMgsUpdateDetails;
1314
use nexus_types::deployment::PendingMgsUpdateHostPhase1Details;
14-
use nexus_types::deployment::planning_report::FailedMgsUpdateReason;
15+
use nexus_types::deployment::planning_report::FailedHostOsUpdateReason;
1516
use nexus_types::inventory::BaseboardId;
1617
use nexus_types::inventory::Collection;
1718
use nexus_types::inventory::SpType;
@@ -22,7 +23,6 @@ use omicron_uuid_kinds::SledUuid;
2223
use slog::Logger;
2324
use slog::debug;
2425
use slog::error;
25-
use slog::warn;
2626
use std::collections::BTreeMap;
2727
use std::sync::Arc;
2828
use tufaceous_artifact::ArtifactHash;
@@ -273,39 +273,36 @@ pub(super) fn try_make_update(
273273
baseboard_id: &Arc<BaseboardId>,
274274
inventory: &Collection,
275275
current_artifacts: &TufRepoDescription,
276-
// TODO-K: Instead of this convoluted return type use an enum as suggested in
277-
// https://github.com/oxidecomputer/omicron/pull/9001#discussion_r2372837627
278-
) -> Result<
279-
Option<(PendingMgsUpdate, PendingHostPhase2Changes)>,
280-
FailedMgsUpdateReason,
281-
> {
276+
) -> Result<MgsUpdateOutcome, FailedHostOsUpdateReason> {
282277
let Some(sp_info) = inventory.sps.get(baseboard_id) else {
283-
return Err(FailedMgsUpdateReason::SpNotInInventory);
278+
return Err(FailedHostOsUpdateReason::SpNotInInventory);
284279
};
285280

286281
// Only configure host OS updates for sleds.
287282
//
288-
// We don't bother logging a return value of `None` for non-sleds, because
289-
// we will never attempt to configure an update for them (nor should we).
290-
// For the same reason, we do not return an error.
283+
// We don't bother logging a return value of `NoUpdateNeeded` for non-sleds,
284+
// because we will never attempt to configure an update for them (nor should
285+
// we). For the same reason, we do not return an error.
291286
match sp_info.sp_type {
292287
SpType::Sled => (),
293-
SpType::Power | SpType::Switch => return Ok(None),
288+
SpType::Power | SpType::Switch => {
289+
return Ok(MgsUpdateOutcome::NoUpdateNeeded);
290+
}
294291
}
295292

296293
let Some(sled_agent) = inventory.sled_agents.iter().find(|sled_agent| {
297294
sled_agent.baseboard_id.as_ref() == Some(baseboard_id)
298295
}) else {
299-
return Err(FailedMgsUpdateReason::SledAgentInfoNotInInventory);
296+
return Err(FailedHostOsUpdateReason::SledAgentInfoNotInInventory);
300297
};
301298
let Some(last_reconciliation) = sled_agent.last_reconciliation.as_ref()
302299
else {
303-
return Err(FailedMgsUpdateReason::LastReconciliationNotInInventory);
300+
return Err(FailedHostOsUpdateReason::LastReconciliationNotInInventory);
304301
};
305302
let boot_disk = match &last_reconciliation.boot_partitions.boot_disk {
306303
Ok(boot_disk) => *boot_disk,
307304
Err(err) => {
308-
return Err(FailedMgsUpdateReason::UnableToDetermineBootDisk(
305+
return Err(FailedHostOsUpdateReason::UnableToDetermineBootDisk(
309306
err.to_string(),
310307
));
311308
}
@@ -315,17 +312,19 @@ pub(super) fn try_make_update(
315312
Ok(details) => details.artifact_hash,
316313
Err(err) => {
317314
return Err(
318-
FailedMgsUpdateReason::UnableToRetrieveBootDiskPhase2Image(
319-
err.to_string(),
320-
),
321-
);
315+
FailedHostOsUpdateReason::UnableToRetrieveBootDiskPhase2Image(
316+
err.to_string(),
317+
),
318+
);
322319
}
323320
};
324321

325322
let Some(active_phase_1_slot) =
326323
inventory.host_phase_1_active_slot_for(baseboard_id).map(|s| s.slot)
327324
else {
328-
return Err(FailedMgsUpdateReason::ActiveHostPhase1SlotNotInInventory);
325+
return Err(
326+
FailedHostOsUpdateReason::ActiveHostPhase1SlotNotInInventory,
327+
);
329328
};
330329

331330
// TODO-correctness What should we do if the active phase 1 slot doesn't
@@ -341,7 +340,7 @@ pub(super) fn try_make_update(
341340
// this current implementation. As far as we know they shouldn't happen.
342341
if active_phase_1_slot != boot_disk {
343342
return Err(
344-
FailedMgsUpdateReason::ActiveHostPhase1SlotBootDiskMismatch(
343+
FailedHostOsUpdateReason::ActiveHostPhase1SlotBootDiskMismatch(
345344
active_phase_1_slot,
346345
),
347346
);
@@ -351,9 +350,11 @@ pub(super) fn try_make_update(
351350
.host_phase_1_flash_hash_for(active_phase_1_slot, baseboard_id)
352351
.map(|h| h.hash)
353352
else {
354-
return Err(FailedMgsUpdateReason::ActiveHostPhase1HashNotInInventory(
355-
active_phase_1_slot,
356-
));
353+
return Err(
354+
FailedHostOsUpdateReason::ActiveHostPhase1HashNotInInventory(
355+
active_phase_1_slot,
356+
),
357+
);
357358
};
358359

359360
let Some(inactive_phase_1_hash) = inventory
@@ -363,15 +364,8 @@ pub(super) fn try_make_update(
363364
)
364365
.map(|h| h.hash)
365366
else {
366-
warn!(
367-
log,
368-
"cannot configure host OS update for board \
369-
(missing inactive phase 1 hash from inventory)";
370-
baseboard_id,
371-
"slot" => ?active_phase_1_slot.toggled(),
372-
);
373367
return Err(
374-
FailedMgsUpdateReason::InactiveHostPhase1HashNotInInventory(
368+
FailedHostOsUpdateReason::InactiveHostPhase1HashNotInInventory(
375369
active_phase_1_slot.toggled(),
376370
),
377371
);
@@ -392,19 +386,26 @@ pub(super) fn try_make_update(
392386
match (phase_1_artifacts.as_slice(), phase_2_artifacts.as_slice()) {
393387
// Common case: Exactly 1 of each artifact.
394388
([p1], [p2]) => (p1, p2),
395-
// "TUF is broken" cases: missing one or the other.
389+
// "TUF is broken" cases: missing both, one or the other.
390+
([], []) => {
391+
return Err(FailedHostOsUpdateReason::NoMatchingArtifactsFound);
392+
}
396393
([], _) => {
397-
return Err(FailedMgsUpdateReason::NoMatchingArtifactFound);
394+
return Err(
395+
FailedHostOsUpdateReason::NoMatchingPhase1ArtifactFound,
396+
);
398397
}
399398
(_, []) => {
400-
return Err(FailedMgsUpdateReason::NoMatchingArtifactFound);
399+
return Err(
400+
FailedHostOsUpdateReason::NoMatchingPhase2ArtifactFound,
401+
);
401402
}
402403
// "TUF is broken" cases: have multiple of one or the other. This
403404
// should be impossible unless we shipped a TUF repo with multiple
404405
// host OS images. We can't proceed, because we don't know how to
405406
// pair up which phase 1 matches which phase 2.
406407
(_, _) => {
407-
return Err(FailedMgsUpdateReason::TooManyMatchingArtifacts);
408+
return Err(FailedHostOsUpdateReason::TooManyMatchingArtifacts);
408409
}
409410
};
410411

@@ -416,7 +417,7 @@ pub(super) fn try_make_update(
416417
// this sled will fail to boot if it were rebooted now.)
417418
if active_phase_2_hash == phase_2_artifact.hash {
418419
debug!(log, "no host OS update needed for board"; baseboard_id);
419-
return Ok(None);
420+
return Ok(MgsUpdateOutcome::NoUpdateNeeded);
420421
}
421422

422423
// Before we can proceed with the phase 1 update, we need sled-agent to
@@ -432,7 +433,7 @@ pub(super) fn try_make_update(
432433
phase_2_artifact,
433434
);
434435

435-
Ok(Some((
436+
Ok(MgsUpdateOutcome::Pending(
436437
PendingMgsUpdate {
437438
baseboard_id: baseboard_id.clone(),
438439
sp_type: sp_info.sp_type,
@@ -452,7 +453,7 @@ pub(super) fn try_make_update(
452453
artifact_version: phase_1_artifact.id.version.clone(),
453454
},
454455
pending_host_phase_2_changes,
455-
)))
456+
))
456457
}
457458

458459
#[cfg(test)]

0 commit comments

Comments
 (0)