Skip to content

Commit bacc044

Browse files
authored
[40/n] blueprint planner logic + sled agent code to honor mupdate overrides (#8456)
Implement the blueprint planner logic for mupdate overrides, as well as Sled Agent logic for honoring them. These two changes are clubbed into one atomic commit because system correctness depends on both honoring and clearing mupdate overrides happening within the same commit. See RFD 556 for more information.
1 parent 76fb94c commit bacc044

File tree

20 files changed

+4157
-104
lines changed

20 files changed

+4157
-104
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

clients/sled-agent-client/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ progenitor::generate_api!(
6262
InventoryDisk = nexus_sled_agent_shared::inventory::InventoryDisk,
6363
InventoryZpool = nexus_sled_agent_shared::inventory::InventoryZpool,
6464
MacAddr = omicron_common::api::external::MacAddr,
65+
MupdateOverrideBootInventory = nexus_sled_agent_shared::inventory::MupdateOverrideBootInventory,
6566
Name = omicron_common::api::external::Name,
6667
NetworkInterface = omicron_common::api::internal::shared::NetworkInterface,
6768
OmicronPhysicalDiskConfig = omicron_common::disk::OmicronPhysicalDiskConfig,

dev-tools/reconfigurator-cli/tests/input/cmds-mupdate-update-flow.txt

Lines changed: 133 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,148 @@ set target-release repo-1.0.0.zip
1111
# (This populates the zone manifest, used for no-op conversions from
1212
# install dataset to artifact down the road.)
1313
sled-update-install-dataset serial0 --to-target-release
14+
15+
# Set one of sled 0's zone's image sources to a specific artifact, and
16+
# also set MGS and host phase 2 updates on the sled. Both should be
17+
# reset as part of this process.
18+
blueprint-edit latest set-zone-image 0c71b3b2-6ceb-4e8f-b020-b08675e83038 artifact 1.2.3 e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
19+
blueprint-edit latest set-sp-update serial0 e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 1.1.0 sp 1.0.0 1.0.1
20+
blueprint-edit latest set-host-phase2 serial0 A artifact 1.0.0 3a9607047b03ccaab6d222249d890e93ca51b94ad631c7ca38be74cba60802ff
21+
blueprint-edit latest set-host-phase2 serial0 B artifact 1.0.0 044d45ad681b44e89c10e056cabdedf19fd8b1e54bc95e6622bcdd23f16bc8f2
22+
1423
# Simulate a mupdate on sled 0 by setting the mupdate override field to a
1524
# new UUID (generated using uuidgen).
1625
sled-set serial0 mupdate-override 6123eac1-ec5b-42ba-b73f-9845105a9971
1726

1827
# On sled 1, simulate an error obtaining the mupdate override.
1928
sled-set serial1 mupdate-override --with-error
2029

30+
# Also set SP and host phase 2 updates on this sled. They will be cleared
31+
# because of the error (which reflects the reality that Sled Agent is not
32+
# going to proceed with updates until the situation is resolved).
33+
blueprint-edit latest set-sp-update serial1 e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 newest sp newer older
34+
blueprint-edit latest set-host-phase2 serial1 A artifact 1.0.0 3a9607047b03ccaab6d222249d890e93ca51b94ad631c7ca38be74cba60802ff
35+
blueprint-edit latest set-host-phase2 serial1 B artifact 1.0.0 044d45ad681b44e89c10e056cabdedf19fd8b1e54bc95e6622bcdd23f16bc8f2
36+
2137
# Simulate a mupdate on sled 2 as well.
2238
sled-set serial2 mupdate-override 203fa72c-85c1-466a-8ed3-338ee029530d
2339

24-
# Generate an inventory and display it.
25-
#
26-
# TODO: in the future, we'll plan against this inventory.
40+
# Generate a new inventory and plan against that.
2741
inventory-generate
2842
inventory-show latest
43+
blueprint-plan latest latest
44+
45+
# Diff the blueprints. This diff should show:
46+
#
47+
# * for sled 0:
48+
# * "+ will remove mupdate override"
49+
# * for zone 0c71b3b2-6ceb-4e8f-b020-b08675e83038, a change from artifact to install-dataset
50+
# * the pending MGS update cleared
51+
# * host phase 2 contents set to current contents
52+
# * for sled 1, no change, because the mupdate override field had an error
53+
# * for sled 2, "+ will remove mupdate override"
54+
# * the target release's minimum generation bumped from 1 to 3
55+
# (the 3 is because generation 2 is repo-1.0.0.zip)
56+
blueprint-diff latest
57+
58+
# Hide sled 0 from inventory temporarily -- this does two things:
59+
# 1. Tests that mupdate/update state transitions don't happen when
60+
# the sled isn't present in inventory.
61+
# 2. We don't want sled 0 to participate in the next few operations
62+
# below.
63+
sled-set serial0 inventory-hidden
64+
65+
# Set the target release to a new repo, causing a generation number bump
66+
# to 3.
67+
set target-release repo-1.0.0.zip
68+
69+
# Invoke the planner -- should not proceed with adding or updating zones
70+
# because sled 0 has a remove-mupdate-override set in the blueprint.
71+
inventory-generate
72+
blueprint-plan latest latest
73+
blueprint-diff latest
74+
75+
# Now simulate the new config being applied to sled 0, which would
76+
# cause the mupdate override to be removed.
77+
sled-set serial0 mupdate-override unset
78+
sled-set serial0 inventory-visible
79+
80+
# But simulate a second mupdate on sled 2. This should invalidate the existing
81+
# mupdate override on sled 2 and cause another target release minimum
82+
# generation bump.
83+
tuf-assemble ../../update-common/manifests/fake-non-semver.toml --allow-non-semver
84+
sled-update-install-dataset serial2 --from-repo repo-2.0.0.zip
85+
sled-set serial2 mupdate-override 1c0ce176-6dc8-4a90-adea-d4a8000751da
86+
87+
# Generate a new inventory and plan against that.
88+
inventory-generate
89+
blueprint-plan latest latest
90+
91+
# Diff the blueprints. This diff should show:
92+
# * on sled 0:
93+
# * the "remove mupdate override" line going away
94+
# * no-op image source switches from install dataset to artifact
95+
# * on sled 1, no changes
96+
# * on sled 2, a _change_ in the will-remove-mupdate-override field
97+
# * another bump to the target release minimum generation, this time to 4.
98+
blueprint-diff latest
99+
100+
# Clear the mupdate override on sled 2, signifying that the config has been
101+
# applied.
102+
sled-set serial2 mupdate-override unset
103+
104+
# Run the planner again. This will cause sled 2's blueprint
105+
# remove_mupdate_override to be unset. But no further planning steps will
106+
# happen because the target release generation is not new enough.
107+
#
108+
# TODO: we want to block further planning steps until the target release is
109+
# uploaded and all install-dataset zones have been converted to artifact ones.
110+
inventory-generate
111+
blueprint-plan latest latest
112+
blueprint-show latest
113+
blueprint-diff latest
114+
115+
# Now set the target release -- at this point, we're still waiting on the
116+
# sled with the mupdate override error to be cleared.
117+
set target-release repo-2.0.0.zip
118+
blueprint-plan latest latest
119+
blueprint-show latest
120+
blueprint-diff latest
121+
122+
# Now clear the mupdate override error. At this point, the rest of the
123+
# planner starts working.
124+
sled-set serial1 mupdate-override unset
125+
inventory-generate
126+
blueprint-plan latest latest
127+
blueprint-show latest
128+
blueprint-diff latest
129+
130+
# Test that the add-zones-with-mupdate-override chicken switch works as
131+
# expected. We do this by:
132+
# * setting the mupdate override on a sled
133+
# * adding a new sled
134+
#
135+
# With the chicken switch disabled (the current state), the planner will
136+
# not proceed with adding new zones. But with the chicken switch enabled,
137+
# new zones will be added.
138+
sled-set serial0 mupdate-override c8fba912-63ae-473a-9115-0495d10fb3bc
139+
sled-add c3bc4c6d-fdde-4fc4-8493-89d2a1e5ee6b
140+
inventory-generate
141+
142+
# This will *not* generate the datasets and internal NTP zone on the new
143+
# sled.
144+
blueprint-plan latest latest
145+
blueprint-diff latest
146+
147+
# This *will* generate the datasets and internal NTP zone on the new sled.
148+
set chicken-switches --add-zones-with-mupdate-override true
149+
blueprint-plan latest latest
150+
blueprint-diff latest
151+
152+
# Set the target release minimum generation to a large value -- we're going to
153+
# test that the planner bails if it attempts a rollback of the target release
154+
# minimum generation.
155+
blueprint-edit latest set-target-release-min-gen 1000
156+
sled-set serial1 mupdate-override cc724abe-80c1-47e6-9771-19e6540531a9
157+
inventory-generate
158+
blueprint-plan latest latest

dev-tools/reconfigurator-cli/tests/input/cmds-noop-image-source.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ sled-update-install-dataset serial0 --to-target-release
2323
sled-update-install-dataset serial1 --with-manifest-error
2424

2525
# On a third sled, update the install dataset and simulate a mupdate override.
26-
# (Currently we do this in the blueprint, but with
27-
# https://github.com/oxidecomputer/omicron/pull/8456 we should update this test and
28-
# set a mupdate-override on the sled directly.)
26+
# Also set it in the blueprint -- this simulates the situation where the mupdate
27+
# override is in progress and will be cleared in the future.
2928
sled-update-install-dataset serial2 --to-target-release
29+
sled-set serial2 mupdate-override ffffffff-ffff-ffff-ffff-ffffffffffff
3030
blueprint-edit latest set-remove-mupdate-override serial2 ffffffff-ffff-ffff-ffff-ffffffffffff
3131

3232
# On a fourth sled, simulate an error validating the install dataset image on one zone.

0 commit comments

Comments
 (0)