Skip to content

Commit 9a650c3

Browse files
authored
[36/n] [sled-agent] factor out code to boot Omicron and switch zones (#8679)
I'm making some changes here, and this code is long and complex enough that it's worth (a) factoring out into a couple of different functions and (b) preserving blame through judicious use of indentation. This is part 1 of 2. Part 2 is #8680.
1 parent 5cf5d6d commit 9a650c3

File tree

1 file changed

+99
-55
lines changed

1 file changed

+99
-55
lines changed

sled-agent/src/services.rs

Lines changed: 99 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,6 +1425,55 @@ impl ServiceManager {
14251425
.install()
14261426
.await?;
14271427

1428+
let running_zone = match &request {
1429+
ZoneArgs::Omicron(config) => {
1430+
self.boot_omicron_zone(config, installed_zone).await?
1431+
}
1432+
ZoneArgs::Switch(config) => {
1433+
self.boot_switch_zone(
1434+
config,
1435+
installed_zone,
1436+
&links_need_link_local,
1437+
bootstrap_name_and_address,
1438+
&device_names,
1439+
)
1440+
.await?
1441+
}
1442+
};
1443+
1444+
// Now that we've booted the zone, we'll notify the sled-agent about:
1445+
//
1446+
// - Its control VNIC (all zones have one)
1447+
// - Any bootstrap network VNIC (only the switch zone has one)
1448+
// - Any OPTE ports (instance zones, or Oxide zones with external
1449+
// connectivity).
1450+
//
1451+
// Note that we'll almost always have started the sled-agent at this
1452+
// point. The only exception is the switch zone, during bootstrapping
1453+
// but before we've either run RSS or unlocked the rack. In both those
1454+
// cases, we have a `StartSledAgentRequest`, and so a metrics queue.
1455+
if let Some(queue) = self.maybe_metrics_queue() {
1456+
match queue.track_zone_links(&running_zone) {
1457+
Ok(_) => debug!(self.inner.log, "Tracking zone datalinks"),
1458+
Err(errors) => {
1459+
error!(
1460+
self.inner.log,
1461+
"Failed to track one or more links in the zone, \
1462+
some metrics will not be produced";
1463+
"zone_name" => running_zone.name(),
1464+
"errors" => ?errors,
1465+
);
1466+
}
1467+
}
1468+
}
1469+
Ok(running_zone)
1470+
}
1471+
1472+
async fn boot_omicron_zone(
1473+
&self,
1474+
config: &OmicronZoneConfig,
1475+
installed_zone: InstalledZone,
1476+
) -> Result<RunningZone, Error> {
14281477
let disabled_ssh_service = ServiceBuilder::new("network/ssh")
14291478
.add_instance(ServiceInstanceBuilder::new("default").disable());
14301479

@@ -1436,11 +1485,11 @@ impl ServiceManager {
14361485
ServiceBuilder::new("network/dns/client")
14371486
.add_instance(ServiceInstanceBuilder::new("default"));
14381487

1439-
let running_zone = match &request {
1440-
ZoneArgs::Omicron(OmicronZoneConfig {
1488+
let running_zone = match config {
1489+
OmicronZoneConfig {
14411490
zone_type: OmicronZoneType::Clickhouse { address, .. },
14421491
..
1443-
}) => {
1492+
} => {
14441493
let Some(info) = self.inner.sled_info.get() else {
14451494
return Err(Error::SledAgentNotReady);
14461495
};
@@ -1522,10 +1571,10 @@ impl ServiceManager {
15221571
RunningZone::boot(installed_zone).await?
15231572
}
15241573

1525-
ZoneArgs::Omicron(OmicronZoneConfig {
1574+
OmicronZoneConfig {
15261575
zone_type: OmicronZoneType::ClickhouseServer { address, .. },
15271576
..
1528-
}) => {
1577+
} => {
15291578
let Some(info) = self.inner.sled_info.get() else {
15301579
return Err(Error::SledAgentNotReady);
15311580
};
@@ -1607,10 +1656,10 @@ impl ServiceManager {
16071656
RunningZone::boot(installed_zone).await?
16081657
}
16091658

1610-
ZoneArgs::Omicron(OmicronZoneConfig {
1659+
OmicronZoneConfig {
16111660
zone_type: OmicronZoneType::ClickhouseKeeper { address, .. },
16121661
..
1613-
}) => {
1662+
} => {
16141663
let Some(info) = self.inner.sled_info.get() else {
16151664
return Err(Error::SledAgentNotReady);
16161665
};
@@ -1685,11 +1734,11 @@ impl ServiceManager {
16851734
RunningZone::boot(installed_zone).await?
16861735
}
16871736

1688-
ZoneArgs::Omicron(OmicronZoneConfig {
1737+
OmicronZoneConfig {
16891738
id: zone_id,
16901739
zone_type: OmicronZoneType::CockroachDb { address, .. },
16911740
..
1692-
}) => {
1741+
} => {
16931742
let Some(info) = self.inner.sled_info.get() else {
16941743
return Err(Error::SledAgentNotReady);
16951744
};
@@ -1761,10 +1810,10 @@ impl ServiceManager {
17611810
RunningZone::boot(installed_zone).await?
17621811
}
17631812

1764-
ZoneArgs::Omicron(OmicronZoneConfig {
1813+
OmicronZoneConfig {
17651814
zone_type: OmicronZoneType::Crucible { address, dataset },
17661815
..
1767-
}) => {
1816+
} => {
17681817
let Some(info) = self.inner.sled_info.get() else {
17691818
return Err(Error::SledAgentNotReady);
17701819
};
@@ -1814,10 +1863,10 @@ impl ServiceManager {
18141863
RunningZone::boot(installed_zone).await?
18151864
}
18161865

1817-
ZoneArgs::Omicron(OmicronZoneConfig {
1866+
OmicronZoneConfig {
18181867
zone_type: OmicronZoneType::CruciblePantry { address },
18191868
..
1820-
}) => {
1869+
} => {
18211870
let Some(info) = self.inner.sled_info.get() else {
18221871
return Err(Error::SledAgentNotReady);
18231872
};
@@ -1857,11 +1906,11 @@ impl ServiceManager {
18571906
.map_err(|err| Error::io("crucible pantry profile", err))?;
18581907
RunningZone::boot(installed_zone).await?
18591908
}
1860-
ZoneArgs::Omicron(OmicronZoneConfig {
1909+
OmicronZoneConfig {
18611910
id,
18621911
zone_type: OmicronZoneType::Oximeter { address },
18631912
..
1864-
}) => {
1913+
} => {
18651914
let Some(info) = self.inner.sled_info.get() else {
18661915
return Err(Error::SledAgentNotReady);
18671916
};
@@ -1894,7 +1943,7 @@ impl ServiceManager {
18941943
})?;
18951944
RunningZone::boot(installed_zone).await?
18961945
}
1897-
ZoneArgs::Omicron(OmicronZoneConfig {
1946+
OmicronZoneConfig {
18981947
zone_type:
18991948
OmicronZoneType::ExternalDns {
19001949
http_address,
@@ -1903,7 +1952,7 @@ impl ServiceManager {
19031952
..
19041953
},
19051954
..
1906-
}) => {
1955+
} => {
19071956
let Some(info) = self.inner.sled_info.get() else {
19081957
return Err(Error::SledAgentNotReady);
19091958
};
@@ -1953,7 +2002,7 @@ impl ServiceManager {
19532002
})?;
19542003
RunningZone::boot(installed_zone).await?
19552004
}
1956-
ZoneArgs::Omicron(OmicronZoneConfig {
2005+
OmicronZoneConfig {
19572006
zone_type:
19582007
OmicronZoneType::BoundaryNtp {
19592008
address,
@@ -1963,7 +2012,7 @@ impl ServiceManager {
19632012
..
19642013
},
19652014
..
1966-
}) => {
2015+
} => {
19672016
let Some(info) = self.inner.sled_info.get() else {
19682017
return Err(Error::SledAgentNotReady);
19692018
};
@@ -2057,10 +2106,10 @@ impl ServiceManager {
20572106

20582107
RunningZone::boot(installed_zone).await?
20592108
}
2060-
ZoneArgs::Omicron(OmicronZoneConfig {
2109+
OmicronZoneConfig {
20612110
zone_type: OmicronZoneType::InternalNtp { address },
20622111
..
2063-
}) => {
2112+
} => {
20642113
let Some(info) = self.inner.sled_info.get() else {
20652114
return Err(Error::SledAgentNotReady);
20662115
};
@@ -2134,7 +2183,7 @@ impl ServiceManager {
21342183

21352184
RunningZone::boot(installed_zone).await?
21362185
}
2137-
ZoneArgs::Omicron(OmicronZoneConfig {
2186+
OmicronZoneConfig {
21382187
zone_type:
21392188
OmicronZoneType::InternalDns {
21402189
http_address,
@@ -2144,7 +2193,7 @@ impl ServiceManager {
21442193
..
21452194
},
21462195
..
2147-
}) => {
2196+
} => {
21482197
let underlay_ips = if http_address.ip() == dns_address.ip() {
21492198
vec![*http_address.ip()]
21502199
} else {
@@ -2225,7 +2274,7 @@ impl ServiceManager {
22252274
})?;
22262275
RunningZone::boot(installed_zone).await?
22272276
}
2228-
ZoneArgs::Omicron(OmicronZoneConfig {
2277+
OmicronZoneConfig {
22292278
zone_type:
22302279
OmicronZoneType::Nexus {
22312280
internal_address,
@@ -2235,7 +2284,7 @@ impl ServiceManager {
22352284
},
22362285
id,
22372286
..
2238-
}) => {
2287+
} => {
22392288
let Some(info) = self.inner.sled_info.get() else {
22402289
return Err(Error::SledAgentNotReady);
22412290
};
@@ -2369,7 +2418,29 @@ impl ServiceManager {
23692418
})?;
23702419
RunningZone::boot(installed_zone).await?
23712420
}
2372-
ZoneArgs::Switch(SwitchZoneConfig { id, services, addresses }) => {
2421+
};
2422+
2423+
Ok(running_zone)
2424+
}
2425+
2426+
async fn boot_switch_zone(
2427+
&self,
2428+
config: &SwitchZoneConfig,
2429+
installed_zone: InstalledZone,
2430+
links_need_link_local: &[bool],
2431+
bootstrap_name_and_address: Option<(String, Ipv6Addr)>,
2432+
device_names: &[String],
2433+
) -> Result<RunningZone, Error> {
2434+
// Temporary double-indent to avoid breaking blame.
2435+
{
2436+
{
2437+
let SwitchZoneConfig { id, services, addresses } = config;
2438+
2439+
let disabled_dns_client_service = ServiceBuilder::new(
2440+
"network/dns/client",
2441+
)
2442+
.add_instance(ServiceInstanceBuilder::new("default").disable());
2443+
23732444
let info = self.inner.sled_info.get();
23742445

23752446
let gw_addr = match info {
@@ -2420,7 +2491,7 @@ impl ServiceManager {
24202491
for (link, needs_link_local) in
24212492
installed_zone.links().iter().zip(links_need_link_local)
24222493
{
2423-
if needs_link_local {
2494+
if *needs_link_local {
24242495
switch_zone_setup_config = switch_zone_setup_config
24252496
.add_property(
24262497
"link_local_links",
@@ -3081,36 +3152,9 @@ impl ServiceManager {
30813152
.map_err(|err| {
30823153
Error::io("Failed to setup Switch zone profile", err)
30833154
})?;
3084-
RunningZone::boot(installed_zone).await?
3085-
}
3086-
};
3087-
3088-
// Now that we've booted the zone, we'll notify the sled-agent about:
3089-
//
3090-
// - Its control VNIC (all zones have one)
3091-
// - Any bootstrap network VNIC (only the switch zone has one)
3092-
// - Any OPTE ports (instance zones, or Oxide zones with external
3093-
// connectivity).
3094-
//
3095-
// Note that we'll almost always have started the sled-agent at this
3096-
// point. The only exception is the switch zone, during bootstrapping
3097-
// but before we've either run RSS or unlocked the rack. In both those
3098-
// cases, we have a `StartSledAgentRequest`, and so a metrics queue.
3099-
if let Some(queue) = self.maybe_metrics_queue() {
3100-
match queue.track_zone_links(&running_zone) {
3101-
Ok(_) => debug!(self.inner.log, "Tracking zone datalinks"),
3102-
Err(errors) => {
3103-
error!(
3104-
self.inner.log,
3105-
"Failed to track one or more links in the zone, \
3106-
some metrics will not be produced";
3107-
"zone_name" => running_zone.name(),
3108-
"errors" => ?errors,
3109-
);
3110-
}
3155+
Ok(RunningZone::boot(installed_zone).await?)
31113156
}
31123157
}
3113-
Ok(running_zone)
31143158
}
31153159

31163160
// Attempt to start a single Omicron zone.

0 commit comments

Comments
 (0)