Skip to content
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d8cf4d5
first draft, incomplete and broken
jgallagher Aug 7, 2025
07dabfa
check active phase 1 against boot disk
jgallagher Aug 8, 2025
f5aa905
update sled configs for required phase 2 when planning OS updates
jgallagher Aug 8, 2025
99fba14
get active phase 1 slot from inventory
jgallagher Aug 8, 2025
edbff3f
update status errors specific to host OS
jgallagher Aug 8, 2025
a7a97f0
update target release test for host OS updates (and fix bugs it found)
jgallagher Aug 11, 2025
00dbedb
update mupdate-update-flow test
jgallagher Aug 11, 2025
25b6564
cargo fmt
jgallagher Aug 11, 2025
a9f800b
test fixups
jgallagher Aug 12, 2025
f523291
test fixups
jgallagher Aug 14, 2025
e2ee4c8
add host OS to test_whole_system_* planner tests
jgallagher Aug 14, 2025
4f9db9c
return a struct instead of a tuple
jgallagher Aug 15, 2025
94b32d6
whole system tests: also verify phase 2 changes
jgallagher Aug 15, 2025
c010d14
add test_basic_host_os()
jgallagher Aug 15, 2025
8deb34f
add test_sled_move()
jgallagher Aug 15, 2025
86e2fc6
expectorate (logs only)
jgallagher Aug 18, 2025
505da90
use get_sled_mut() helper throughout
jgallagher Aug 18, 2025
4aa14bb
remove sled_ prefix on method that can change more than one sled
jgallagher Aug 18, 2025
a51d00e
comment and log fixups from PR review
jgallagher Aug 18, 2025
388e4dc
fix commented-out line in target-release test
jgallagher Aug 18, 2025
69bd2d6
cargo fmt
jgallagher Aug 18, 2025
4f49c49
add link to issue for TODO
jgallagher Aug 19, 2025
d8eedd4
Merge branch 'main' into john/host-os-updates-planner
jgallagher Aug 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions dev-tools/reconfigurator-cli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,12 @@ fn process_command(
}
Commands::SledUpdateRot(args) => cmd_sled_update_rot(sim, args),
Commands::SledUpdateSp(args) => cmd_sled_update_sp(sim, args),
Commands::SledUpdateHostPhase1(args) => {
cmd_sled_update_host_phase_1(sim, args)
}
Commands::SledUpdateHostPhase2(args) => {
cmd_sled_update_host_phase_2(sim, args)
}
Commands::SledUpdateRotBootloader(args) => {
cmd_sled_update_rot_bootlaoder(sim, args)
}
Expand Down Expand Up @@ -300,6 +306,10 @@ enum Commands {
SledUpdateRotBootloader(SledUpdateRotBootloaderArgs),
/// simulate updating the sled's SP versions
SledUpdateSp(SledUpdateSpArgs),
/// simulate updating the sled's host OS phase 1 artifacts
SledUpdateHostPhase1(SledUpdateHostPhase1Args),
/// simulate updating the sled's host OS phase 2 artifacts
SledUpdateHostPhase2(SledUpdateHostPhase2Args),

/// list silos
SiloList,
Expand Down Expand Up @@ -545,6 +555,42 @@ struct SledUpdateRotArgs {
slot_b: Option<ExpectedVersion>,
}

#[derive(Debug, Args)]
struct SledUpdateHostPhase1Args {
/// id of the sled
sled_id: SledOpt,

/// sets which phase 1 slot is active
#[clap(long, value_parser = parse_m2_slot)]
active: Option<M2Slot>,

/// sets the artifact hash reported for host OS phase 1 slot A
#[clap(long)]
slot_a: Option<ArtifactHash>,

/// sets the artifact hash reported for host OS phase 1 slot B
#[clap(long)]
slot_b: Option<ArtifactHash>,
}

#[derive(Debug, Args)]
struct SledUpdateHostPhase2Args {
/// id of the sled
sled_id: SledOpt,

/// sets which phase 2 slot is the boot disk
#[clap(long, value_parser = parse_m2_slot)]
boot_disk: Option<M2Slot>,

/// sets the artifact hash reported for host OS phase 2 slot A
#[clap(long)]
slot_a: Option<ArtifactHash>,

/// sets the artifact hash reported for host OS phase 2 slot B
#[clap(long)]
slot_b: Option<ArtifactHash>,
}

#[derive(Debug, Args)]
struct SledSetMupdateOverrideArgs {
#[clap(flatten)]
Expand Down Expand Up @@ -1689,6 +1735,90 @@ fn cmd_sled_update_rot(
)))
}

fn cmd_sled_update_host_phase_1(
sim: &mut ReconfiguratorSim,
args: SledUpdateHostPhase1Args,
) -> anyhow::Result<Option<String>> {
let SledUpdateHostPhase1Args { sled_id, active, slot_a, slot_b } = args;

let mut labels = Vec::new();
if let Some(active) = active {
labels.push(format!("active -> {active:?}"));
}
if let Some(slot_a) = slot_a {
labels.push(format!("A -> {slot_a}"));
}
if let Some(slot_b) = slot_b {
labels.push(format!("B -> {slot_b}"));
}
if labels.is_empty() {
bail!("sled-update-host-phase1 called with no changes");
}

let mut state = sim.current_state().to_mut();
let system = state.system_mut();
let sled_id = sled_id.to_sled_id(system.description())?;
system
.description_mut()
.sled_update_host_phase_1_artifacts(sled_id, active, slot_a, slot_b)?;

sim.commit_and_bump(
format!(
"reconfigurator-cli sled-update-host-phase1: {sled_id}: {}",
labels.join(", "),
),
state,
);

Ok(Some(format!(
"set sled {} host phase 1 details: {}",
sled_id,
labels.join(", ")
)))
}

fn cmd_sled_update_host_phase_2(
sim: &mut ReconfiguratorSim,
args: SledUpdateHostPhase2Args,
) -> anyhow::Result<Option<String>> {
let SledUpdateHostPhase2Args { sled_id, boot_disk, slot_a, slot_b } = args;

let mut labels = Vec::new();
if let Some(boot_disk) = boot_disk {
labels.push(format!("boot_disk -> {boot_disk:?}"));
}
if let Some(slot_a) = slot_a {
labels.push(format!("A -> {slot_a}"));
}
if let Some(slot_b) = slot_b {
labels.push(format!("B -> {slot_b}"));
}
if labels.is_empty() {
bail!("sled-update-host-phase2 called with no changes");
}

let mut state = sim.current_state().to_mut();
let system = state.system_mut();
let sled_id = sled_id.to_sled_id(system.description())?;
system.description_mut().sled_update_host_phase_2_artifacts(
sled_id, boot_disk, slot_a, slot_b,
)?;

sim.commit_and_bump(
format!(
"reconfigurator-cli sled-update-host-phase2: {sled_id}: {}",
labels.join(", "),
),
state,
);

Ok(Some(format!(
"set sled {} host phase 2 details: {}",
sled_id,
labels.join(", ")
)))
}

fn cmd_inventory_list(
sim: &mut ReconfiguratorSim,
) -> anyhow::Result<Option<String>> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,33 @@ blueprint-diff latest
# planner starts working.
sled-set serial1 mupdate-override unset
inventory-generate

# This will attempt to update the first sled's host OS. Walk through that update
# and the host OS of the two other sleds.
blueprint-plan latest latest
blueprint-show latest
blueprint-diff latest
sled-update-host-phase2 serial0 --boot-disk B --slot-b 0c0362b640cece5b9a5e86d8fa683bd2eb84c3e7f90731f597197d604ffa76e3
sled-update-host-phase1 serial0 --active B --slot-b 44714733af7600b30a50bfd2cbaf707ff7ee9724073ff70a6732e55a88864cf6
inventory-generate

# Second sled host OS
blueprint-plan latest latest
blueprint-diff latest
sled-update-host-phase2 serial1 --boot-disk B --slot-b 0c0362b640cece5b9a5e86d8fa683bd2eb84c3e7f90731f597197d604ffa76e3
sled-update-host-phase1 serial1 --active B --slot-b 44714733af7600b30a50bfd2cbaf707ff7ee9724073ff70a6732e55a88864cf6
inventory-generate

# Third sled host OS
blueprint-plan latest latest
blueprint-diff latest
sled-update-host-phase2 serial2 --boot-disk B --slot-b 0c0362b640cece5b9a5e86d8fa683bd2eb84c3e7f90731f597197d604ffa76e3
sled-update-host-phase1 serial2 --active B --slot-b 44714733af7600b30a50bfd2cbaf707ff7ee9724073ff70a6732e55a88864cf6
inventory-generate

# All host OS updates complete
blueprint-plan latest latest
blueprint-diff latest

# Test that the add-zones-with-mupdate-override chicken switch works as
# expected. We do this by:
Expand Down
84 changes: 74 additions & 10 deletions dev-tools/reconfigurator-cli/tests/input/cmds-target-release.txt
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for sharing this! It shouldn't clash too terribly with what I had before d4f31f9 I'll keep a look out.

Original file line number Diff line number Diff line change
Expand Up @@ -42,26 +42,60 @@ blueprint-diff latest
# Now, update the simulated RoT bootloader to reflect that the update completed.
# Collect inventory from it and use that collection for another planning step.
# This should report that the update completed, remove that update, and add one
# for an SP on the same sled.
# for an RoT on the same sled.
sled-update-rot-bootloader 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --stage0 1.0.0
inventory-generate
blueprint-plan latest latest
blueprint-diff latest

# After the RoT bootloader update has completed, we update the simulated RoT to
# reflect that update has completed as well.
# Like before, collect inventory from it and use that collection for the next
# step.
# This should report that the update completed, remove that update, and add one
# for another sled.
# reflect that update has completed as well. Like before, collect inventory from
# it and use that collection for the next step. This should report that the
# update completed, remove that update, and add one for another sled.
sled-update-rot 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --slot-a 1.0.0
inventory-generate
blueprint-plan latest latest
blueprint-diff latest

# We repeat the same procedure with the SP
sled-update-sp 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --active 1.0.0

# Collect inventory from it and use that collection for another planning step.
# This should report that the update completed, remove that update, and add a
# host OS update for this same sled.
inventory-generate
blueprint-plan latest latest
blueprint-diff latest

# If we generate another plan, there should be no change.
blueprint-plan latest latest
blueprint-diff latest

# Update only the simulated host phase 2; this is a prerequisite for updating
# the phase 1, and must be done first.
sled-update-host-phase2 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --slot-b f3dd0c7a1bd4500ea0d8bcf67581f576d47752b2f1998a4cb0f0c3155c483008
inventory-generate

# Planning after only phase 2 has changed should make no changes. We're still
# waiting on phase 1 to change.
blueprint-plan latest latest
blueprint-diff latest

# Now update the simulated SP to reflect that the phase 1 update is done.
sled-update-host-phase1 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --active B --slot-b 2053f8594971bbf0a7326c833e2ffc12b065b9d823b9c0b967d275fa595e4e89
inventory-generate

# Planning _still_ shouldn't make any new changes; the OS update as a whole
# isn't done until sled-agent reports it has booted from the new image.
blueprint-plan latest latest
blueprint-diff latest

# Update the sled's boot disk; this finishes the host OS update.
sled-update-host-phase2 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --boot-disk B
inventory-generate

# Planning should now remove the host OS update and plan the next RoT bootloader
# update.
blueprint-plan latest latest
blueprint-diff latest

Expand Down Expand Up @@ -107,9 +141,30 @@ blueprint-plan latest latest
blueprint-diff latest

# Let's simulate the successful SP update as well.
# A few more planning steps should try to update the last sled.
sled-update-sp 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --active 1.0.0
inventory-generate

# Planning should remove this update and add an OS update for this sled.
blueprint-plan latest latest
blueprint-diff latest

# Try a host OS impossible update replacement: write an unknown artifact to the
# sled's phase 1. The planner should realize the update is impossible and
# replace it. As with the impossible SP update test above, we have to bump the
# "ignore impossible MGS updates" timestamp.)
set ignore-impossible-mgs-updates-since now
sled-update-host-phase1 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --slot-b ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
inventory-generate
blueprint-plan latest latest
blueprint-diff latest

# Now simulate the update completing successfully.
sled-update-host-phase2 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --boot-disk B --slot-b f3dd0c7a1bd4500ea0d8bcf67581f576d47752b2f1998a4cb0f0c3155c483008
sled-update-host-phase1 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --active B --slot-b 2053f8594971bbf0a7326c833e2ffc12b065b9d823b9c0b967d275fa595e4e89
inventory-generate

# Another planning step should try to update the last sled, starting with the
# RoT bootloader.
blueprint-plan latest latest
blueprint-diff latest

Expand All @@ -127,13 +182,22 @@ inventory-generate
blueprint-plan latest latest
blueprint-diff latest

# Finish updating the last sled and do one more planning run.
# This should update one control plane zone.
# Finish updating the last sled's SP.
# There should be a pending host phase 1 update.
sled-update-sp d81c6a84-79b8-4958-ae41-ea46c9b19763 --active 1.0.0
inventory-generate
blueprint-plan latest latest
blueprint-diff latest

# Finish updating the last sled's host OS.
sled-update-host-phase2 d81c6a84-79b8-4958-ae41-ea46c9b19763 --boot-disk B --slot-b f3dd0c7a1bd4500ea0d8bcf67581f576d47752b2f1998a4cb0f0c3155c483008
sled-update-host-phase1 d81c6a84-79b8-4958-ae41-ea46c9b19763 --active B --slot-b 2053f8594971bbf0a7326c833e2ffc12b065b9d823b9c0b967d275fa595e4e89
inventory-generate

# Do one more planning run. This should update one control plane zone.
blueprint-plan latest latest
blueprint-diff latest

# We should continue walking through the update. We need to build out a
# reconfigurator-cli subcommand to simulate updated zone image sources (just
# like we have sled-update-sp for simulated SP updates).
# like we have sled-update-sp for simulated SP updates).
Loading
Loading