Skip to content

Commit 0d5d6c2

Browse files
authored
[Reconfigurator] Host OS update planner support (#8832)
1 parent a9d9f1e commit 0d5d6c2

File tree

12 files changed

+4852
-835
lines changed

12 files changed

+4852
-835
lines changed

dev-tools/reconfigurator-cli/src/lib.rs

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,12 @@ fn process_command(
237237
}
238238
Commands::SledUpdateRot(args) => cmd_sled_update_rot(sim, args),
239239
Commands::SledUpdateSp(args) => cmd_sled_update_sp(sim, args),
240+
Commands::SledUpdateHostPhase1(args) => {
241+
cmd_sled_update_host_phase_1(sim, args)
242+
}
243+
Commands::SledUpdateHostPhase2(args) => {
244+
cmd_sled_update_host_phase_2(sim, args)
245+
}
240246
Commands::SledUpdateRotBootloader(args) => {
241247
cmd_sled_update_rot_bootlaoder(sim, args)
242248
}
@@ -300,6 +306,10 @@ enum Commands {
300306
SledUpdateRotBootloader(SledUpdateRotBootloaderArgs),
301307
/// simulate updating the sled's SP versions
302308
SledUpdateSp(SledUpdateSpArgs),
309+
/// simulate updating the sled's host OS phase 1 artifacts
310+
SledUpdateHostPhase1(SledUpdateHostPhase1Args),
311+
/// simulate updating the sled's host OS phase 2 artifacts
312+
SledUpdateHostPhase2(SledUpdateHostPhase2Args),
303313

304314
/// list silos
305315
SiloList,
@@ -545,6 +555,42 @@ struct SledUpdateRotArgs {
545555
slot_b: Option<ExpectedVersion>,
546556
}
547557

558+
#[derive(Debug, Args)]
559+
struct SledUpdateHostPhase1Args {
560+
/// id of the sled
561+
sled_id: SledOpt,
562+
563+
/// sets which phase 1 slot is active
564+
#[clap(long, value_parser = parse_m2_slot)]
565+
active: Option<M2Slot>,
566+
567+
/// sets the artifact hash reported for host OS phase 1 slot A
568+
#[clap(long)]
569+
slot_a: Option<ArtifactHash>,
570+
571+
/// sets the artifact hash reported for host OS phase 1 slot B
572+
#[clap(long)]
573+
slot_b: Option<ArtifactHash>,
574+
}
575+
576+
#[derive(Debug, Args)]
577+
struct SledUpdateHostPhase2Args {
578+
/// id of the sled
579+
sled_id: SledOpt,
580+
581+
/// sets which phase 2 slot is the boot disk
582+
#[clap(long, value_parser = parse_m2_slot)]
583+
boot_disk: Option<M2Slot>,
584+
585+
/// sets the artifact hash reported for host OS phase 2 slot A
586+
#[clap(long)]
587+
slot_a: Option<ArtifactHash>,
588+
589+
/// sets the artifact hash reported for host OS phase 2 slot B
590+
#[clap(long)]
591+
slot_b: Option<ArtifactHash>,
592+
}
593+
548594
#[derive(Debug, Args)]
549595
struct SledSetMupdateOverrideArgs {
550596
#[clap(flatten)]
@@ -1689,6 +1735,90 @@ fn cmd_sled_update_rot(
16891735
)))
16901736
}
16911737

1738+
fn cmd_sled_update_host_phase_1(
1739+
sim: &mut ReconfiguratorSim,
1740+
args: SledUpdateHostPhase1Args,
1741+
) -> anyhow::Result<Option<String>> {
1742+
let SledUpdateHostPhase1Args { sled_id, active, slot_a, slot_b } = args;
1743+
1744+
let mut labels = Vec::new();
1745+
if let Some(active) = active {
1746+
labels.push(format!("active -> {active:?}"));
1747+
}
1748+
if let Some(slot_a) = slot_a {
1749+
labels.push(format!("A -> {slot_a}"));
1750+
}
1751+
if let Some(slot_b) = slot_b {
1752+
labels.push(format!("B -> {slot_b}"));
1753+
}
1754+
if labels.is_empty() {
1755+
bail!("sled-update-host-phase1 called with no changes");
1756+
}
1757+
1758+
let mut state = sim.current_state().to_mut();
1759+
let system = state.system_mut();
1760+
let sled_id = sled_id.to_sled_id(system.description())?;
1761+
system
1762+
.description_mut()
1763+
.sled_update_host_phase_1_artifacts(sled_id, active, slot_a, slot_b)?;
1764+
1765+
sim.commit_and_bump(
1766+
format!(
1767+
"reconfigurator-cli sled-update-host-phase1: {sled_id}: {}",
1768+
labels.join(", "),
1769+
),
1770+
state,
1771+
);
1772+
1773+
Ok(Some(format!(
1774+
"set sled {} host phase 1 details: {}",
1775+
sled_id,
1776+
labels.join(", ")
1777+
)))
1778+
}
1779+
1780+
fn cmd_sled_update_host_phase_2(
1781+
sim: &mut ReconfiguratorSim,
1782+
args: SledUpdateHostPhase2Args,
1783+
) -> anyhow::Result<Option<String>> {
1784+
let SledUpdateHostPhase2Args { sled_id, boot_disk, slot_a, slot_b } = args;
1785+
1786+
let mut labels = Vec::new();
1787+
if let Some(boot_disk) = boot_disk {
1788+
labels.push(format!("boot_disk -> {boot_disk:?}"));
1789+
}
1790+
if let Some(slot_a) = slot_a {
1791+
labels.push(format!("A -> {slot_a}"));
1792+
}
1793+
if let Some(slot_b) = slot_b {
1794+
labels.push(format!("B -> {slot_b}"));
1795+
}
1796+
if labels.is_empty() {
1797+
bail!("sled-update-host-phase2 called with no changes");
1798+
}
1799+
1800+
let mut state = sim.current_state().to_mut();
1801+
let system = state.system_mut();
1802+
let sled_id = sled_id.to_sled_id(system.description())?;
1803+
system.description_mut().sled_update_host_phase_2_artifacts(
1804+
sled_id, boot_disk, slot_a, slot_b,
1805+
)?;
1806+
1807+
sim.commit_and_bump(
1808+
format!(
1809+
"reconfigurator-cli sled-update-host-phase2: {sled_id}: {}",
1810+
labels.join(", "),
1811+
),
1812+
state,
1813+
);
1814+
1815+
Ok(Some(format!(
1816+
"set sled {} host phase 2 details: {}",
1817+
sled_id,
1818+
labels.join(", ")
1819+
)))
1820+
}
1821+
16921822
fn cmd_inventory_list(
16931823
sim: &mut ReconfiguratorSim,
16941824
) -> anyhow::Result<Option<String>> {

dev-tools/reconfigurator-cli/tests/input/cmds-mupdate-update-flow.txt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,33 @@ blueprint-diff latest
123123
# planner starts working.
124124
sled-set serial1 mupdate-override unset
125125
inventory-generate
126+
127+
# This will attempt to update the first sled's host OS. Walk through that update
128+
# and the host OS of the two other sleds.
126129
blueprint-plan latest latest
127130
blueprint-show latest
128131
blueprint-diff latest
132+
sled-update-host-phase2 serial0 --boot-disk B --slot-b 0c0362b640cece5b9a5e86d8fa683bd2eb84c3e7f90731f597197d604ffa76e3
133+
sled-update-host-phase1 serial0 --active B --slot-b 44714733af7600b30a50bfd2cbaf707ff7ee9724073ff70a6732e55a88864cf6
134+
inventory-generate
135+
136+
# Second sled host OS
137+
blueprint-plan latest latest
138+
blueprint-diff latest
139+
sled-update-host-phase2 serial1 --boot-disk B --slot-b 0c0362b640cece5b9a5e86d8fa683bd2eb84c3e7f90731f597197d604ffa76e3
140+
sled-update-host-phase1 serial1 --active B --slot-b 44714733af7600b30a50bfd2cbaf707ff7ee9724073ff70a6732e55a88864cf6
141+
inventory-generate
142+
143+
# Third sled host OS
144+
blueprint-plan latest latest
145+
blueprint-diff latest
146+
sled-update-host-phase2 serial2 --boot-disk B --slot-b 0c0362b640cece5b9a5e86d8fa683bd2eb84c3e7f90731f597197d604ffa76e3
147+
sled-update-host-phase1 serial2 --active B --slot-b 44714733af7600b30a50bfd2cbaf707ff7ee9724073ff70a6732e55a88864cf6
148+
inventory-generate
149+
150+
# All host OS updates complete
151+
blueprint-plan latest latest
152+
blueprint-diff latest
129153

130154
# Test that the add-zones-with-mupdate-override chicken switch works as
131155
# expected. We do this by:

dev-tools/reconfigurator-cli/tests/input/cmds-target-release.txt

Lines changed: 74 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,26 +42,60 @@ blueprint-diff latest
4242
# Now, update the simulated RoT bootloader to reflect that the update completed.
4343
# Collect inventory from it and use that collection for another planning step.
4444
# This should report that the update completed, remove that update, and add one
45-
# for an SP on the same sled.
45+
# for an RoT on the same sled.
4646
sled-update-rot-bootloader 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --stage0 1.0.0
4747
inventory-generate
4848
blueprint-plan latest latest
4949
blueprint-diff latest
5050

5151
# After the RoT bootloader update has completed, we update the simulated RoT to
52-
# reflect that update has completed as well.
53-
# Like before, collect inventory from it and use that collection for the next
54-
# step.
55-
# This should report that the update completed, remove that update, and add one
56-
# for another sled.
52+
# reflect that update has completed as well. Like before, collect inventory from
53+
# it and use that collection for the next step. This should report that the
54+
# update completed, remove that update, and add one for another sled.
5755
sled-update-rot 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --slot-a 1.0.0
5856
inventory-generate
5957
blueprint-plan latest latest
6058
blueprint-diff latest
6159

6260
# We repeat the same procedure with the SP
6361
sled-update-sp 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --active 1.0.0
62+
63+
# Collect inventory from it and use that collection for another planning step.
64+
# This should report that the update completed, remove that update, and add a
65+
# host OS update for this same sled.
66+
inventory-generate
67+
blueprint-plan latest latest
68+
blueprint-diff latest
69+
70+
# If we generate another plan, there should be no change.
71+
blueprint-plan latest latest
72+
blueprint-diff latest
73+
74+
# Update only the simulated host phase 2; this is a prerequisite for updating
75+
# the phase 1, and must be done first.
76+
sled-update-host-phase2 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --slot-b f3dd0c7a1bd4500ea0d8bcf67581f576d47752b2f1998a4cb0f0c3155c483008
77+
inventory-generate
78+
79+
# Planning after only phase 2 has changed should make no changes. We're still
80+
# waiting on phase 1 to change.
81+
blueprint-plan latest latest
82+
blueprint-diff latest
83+
84+
# Now update the simulated SP to reflect that the phase 1 update is done.
85+
sled-update-host-phase1 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --active B --slot-b 2053f8594971bbf0a7326c833e2ffc12b065b9d823b9c0b967d275fa595e4e89
6486
inventory-generate
87+
88+
# Planning _still_ shouldn't make any new changes; the OS update as a whole
89+
# isn't done until sled-agent reports it has booted from the new image.
90+
blueprint-plan latest latest
91+
blueprint-diff latest
92+
93+
# Update the sled's boot disk; this finishes the host OS update.
94+
sled-update-host-phase2 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 --boot-disk B
95+
inventory-generate
96+
97+
# Planning should now remove the host OS update and plan the next RoT bootloader
98+
# update.
6599
blueprint-plan latest latest
66100
blueprint-diff latest
67101

@@ -107,9 +141,30 @@ blueprint-plan latest latest
107141
blueprint-diff latest
108142

109143
# Let's simulate the successful SP update as well.
110-
# A few more planning steps should try to update the last sled.
111144
sled-update-sp 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --active 1.0.0
112145
inventory-generate
146+
147+
# Planning should remove this update and add an OS update for this sled.
148+
blueprint-plan latest latest
149+
blueprint-diff latest
150+
151+
# Try a host OS impossible update replacement: write an unknown artifact to the
152+
# sled's phase 1. The planner should realize the update is impossible and
153+
# replace it. As with the impossible SP update test above, we have to bump the
154+
# "ignore impossible MGS updates" timestamp.)
155+
set ignore-impossible-mgs-updates-since now
156+
sled-update-host-phase1 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --slot-b ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
157+
inventory-generate
158+
blueprint-plan latest latest
159+
blueprint-diff latest
160+
161+
# Now simulate the update completing successfully.
162+
sled-update-host-phase2 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --boot-disk B --slot-b f3dd0c7a1bd4500ea0d8bcf67581f576d47752b2f1998a4cb0f0c3155c483008
163+
sled-update-host-phase1 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c --active B --slot-b 2053f8594971bbf0a7326c833e2ffc12b065b9d823b9c0b967d275fa595e4e89
164+
inventory-generate
165+
166+
# Another planning step should try to update the last sled, starting with the
167+
# RoT bootloader.
113168
blueprint-plan latest latest
114169
blueprint-diff latest
115170

@@ -127,13 +182,22 @@ inventory-generate
127182
blueprint-plan latest latest
128183
blueprint-diff latest
129184

130-
# Finish updating the last sled and do one more planning run.
131-
# This should update one control plane zone.
185+
# Finish updating the last sled's SP.
186+
# There should be a pending host phase 1 update.
132187
sled-update-sp d81c6a84-79b8-4958-ae41-ea46c9b19763 --active 1.0.0
133188
inventory-generate
134189
blueprint-plan latest latest
135190
blueprint-diff latest
136191

192+
# Finish updating the last sled's host OS.
193+
sled-update-host-phase2 d81c6a84-79b8-4958-ae41-ea46c9b19763 --boot-disk B --slot-b f3dd0c7a1bd4500ea0d8bcf67581f576d47752b2f1998a4cb0f0c3155c483008
194+
sled-update-host-phase1 d81c6a84-79b8-4958-ae41-ea46c9b19763 --active B --slot-b 2053f8594971bbf0a7326c833e2ffc12b065b9d823b9c0b967d275fa595e4e89
195+
inventory-generate
196+
197+
# Do one more planning run. This should update one control plane zone.
198+
blueprint-plan latest latest
199+
blueprint-diff latest
200+
137201
# We should continue walking through the update. We need to build out a
138202
# reconfigurator-cli subcommand to simulate updated zone image sources (just
139-
# like we have sled-update-sp for simulated SP updates).
203+
# like we have sled-update-sp for simulated SP updates).

0 commit comments

Comments
 (0)