Skip to content

Commit 94969cd

Browse files
committed
propolis: improve robustness of VM zone management
1 parent 2c9d720 commit 94969cd

File tree

5 files changed

+112
-23
lines changed

5 files changed

+112
-23
lines changed

factory/propolis/smf/site.xml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
<?xml version='1.0'?>
2+
<!DOCTYPE service_bundle SYSTEM '/usr/share/lib/xml/dtd/service_bundle.dtd.1'>
3+
<service_bundle type='profile' name='default'>
4+
<!-- Disable a bunch of services that we don't need in the VM zone: -->
5+
<service name='network/ssh' version='1' type='service'>
6+
<instance name='default' enabled='false' />
7+
</service>
8+
<service name='system/cron' version='1' type='service'>
9+
<instance name='default' enabled='false' />
10+
</service>
11+
<service name='system/filesystem/autofs' version='1' type='service'>
12+
<instance name='default' enabled='false' />
13+
</service>
14+
<service name='network/rpc/bind' version='1' type='service'>
15+
<instance name='default' enabled='false' />
16+
</service>
17+
<service name='network/nfs/status' version='1' type='service'>
18+
<instance name='default' enabled='false' />
19+
</service>
20+
<service name='network/nfs/nlockmgr' version='1' type='service'>
21+
<instance name='default' enabled='false' />
22+
</service>
23+
<service name='network/nfs/client' version='1' type='service'>
24+
<instance name='default' enabled='false' />
25+
</service>
26+
<service name='network/nfs/cbd' version='1' type='service'>
27+
<instance name='default' enabled='false' />
28+
</service>
29+
<service name='network/nfs/mapid' version='1' type='service'>
30+
<instance name='default' enabled='false' />
31+
</service>
32+
<service name='network/nfs/rquota' version='1' type='service'>
33+
<instance name='default' enabled='false' />
34+
</service>
35+
<service name='network/nfs/server' version='1' type='service'>
36+
<instance name='default' enabled='false' />
37+
</service>
38+
39+
<!-- See: https://www.illumos.org/issues/14006 -->
40+
<service name='network/routing/route' version='1' type='service'>
41+
<instance name='default' enabled='false' />
42+
</service>
43+
<service name='network/routing-setup' version='1' type='service'>
44+
<instance name='default'>
45+
<property_group name='routeadm' type='application'>
46+
<propval name='ipv4-routing-set' type='boolean' value='true' />
47+
</property_group>
48+
</instance>
49+
</service>
50+
</service_bundle>

factory/propolis/src/main.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023 Oxide Computer Company
2+
* Copyright 2024 Oxide Computer Company
33
*/
44

55
use std::{sync::Arc, time::Duration};
@@ -19,6 +19,7 @@ mod serial;
1919
mod svc;
2020
mod ucred;
2121
mod vm;
22+
mod zones;
2223

2324
struct Central {
2425
log: Logger,

factory/propolis/src/serial.rs

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
11
/*
2-
* Copyright 2023 Oxide Computer Company
2+
* Copyright 2024 Oxide Computer Company
33
*/
44

55
use std::{
66
collections::HashMap,
7-
ffi::CString,
87
os::{fd::AsRawFd, unix::prelude::PermissionsExt},
98
path::PathBuf,
109
sync::{Arc, Mutex},
1110
time::Duration,
1211
};
1312

1413
use crate::ucred::PeerUCred;
14+
use crate::zones::*;
1515
use anyhow::{anyhow, bail, Result};
16-
use libc::zoneid_t;
1716
use slog::{debug, error, info, o, trace, warn, Logger};
1817
use tokio::net::UnixListener;
1918
use tokio::{io::Interest, net::UnixStream};
@@ -181,7 +180,9 @@ impl Serial {
181180
}
182181

183182
pub fn zone_add(&self, name: &str) -> Result<SerialForZone> {
184-
let zoneid = zone_name_to_id(name)?;
183+
let Some(zoneid) = zone_name_to_id(name)? else {
184+
bail!("zone {name:?} not found!");
185+
};
185186

186187
let (tx, rx) = tokio::sync::mpsc::channel(100);
187188
let (shut_tx, shut_rx) = tokio::sync::watch::channel(false);
@@ -214,23 +215,6 @@ impl Serial {
214215
}
215216
}
216217

217-
#[link(name = "c")]
218-
extern "C" {
219-
fn getzoneidbyname(name: *const libc::c_char) -> zoneid_t;
220-
}
221-
222-
fn zone_name_to_id(name: &str) -> Result<zoneid_t> {
223-
let cs = CString::new(name)?;
224-
225-
let id = unsafe { getzoneidbyname(cs.as_ptr()) };
226-
if id < 0 {
227-
let e = std::io::Error::last_os_error();
228-
bail!("getzoneidbyname({name}): {e}");
229-
}
230-
231-
Ok(id)
232-
}
233-
234218
fn clean_line(linebuf: &[u8]) -> Option<String> {
235219
let s = String::from_utf8_lossy(linebuf);
236220
let s = s.replace('\x1b', "^[");

factory/propolis/src/vm.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023 Oxide Computer Company
2+
* Copyright 2024 Oxide Computer Company
33
*/
44

55
use std::{
@@ -15,6 +15,7 @@ use crate::{
1515
config::ImageSource,
1616
db::types::*,
1717
net::{dladm_create_vnic, dladm_delete_vnic, dladm_vnic_get, Vnic},
18+
zones::*,
1819
Central,
1920
};
2021
use anyhow::{anyhow, bail, Result};
@@ -437,6 +438,8 @@ async fn instance_worker_one(
437438
let vmdir = root.join("vm");
438439
let smfdir =
439440
root.join("var").join("svc").join("manifest").join("site");
441+
let siteprofile =
442+
root.join("var").join("svc").join("profile").join("site.xml");
440443

441444
info!(log, "add /vm files to zone {zn}...");
442445
if vmdir.exists() {
@@ -462,6 +465,8 @@ async fn instance_worker_one(
462465
std::fs::write(&smfdir.join(format!("{name}.xml")), bundle)?;
463466
}
464467

468+
std::fs::write(&siteprofile, include_str!("../smf/site.xml"))?;
469+
465470
info!(log, "make root disk...");
466471
match targ.source()? {
467472
ImageSource::File(image_path) => {
@@ -523,6 +528,17 @@ async fn instance_worker_one(
523528
Ok(DoNext::Immediate)
524529
}
525530
InstanceState::ZoneOnline => {
531+
/*
532+
* Confirm that the zone exists. It's possible that the host, which
533+
* boots from a ramdisk, has rebooted and taken some partial zones
534+
* along with it.
535+
*/
536+
if !zone_exists(&zn)? {
537+
warn!(log, "zone {zn}: no longer exists; giving up!");
538+
c.db.instance_new_state(&id, InstanceState::Destroying)?;
539+
return Ok(DoNext::Immediate);
540+
}
541+
526542
let ser = if let Some(ser) = ser.as_mut() {
527543
ser
528544
} else {

factory/propolis/src/zones.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/*
2+
* Copyright 2024 Oxide Computer Company
3+
*/
4+
5+
use std::ffi::CString;
6+
7+
use anyhow::{bail, Result};
8+
pub use libc::zoneid_t;
9+
10+
#[link(name = "c")]
11+
extern "C" {
12+
fn getzoneidbyname(name: *const libc::c_char) -> zoneid_t;
13+
}
14+
15+
pub fn zone_name_to_id(name: &str) -> Result<Option<zoneid_t>> {
16+
let cs = CString::new(name)?;
17+
18+
let id = unsafe { getzoneidbyname(cs.as_ptr()) };
19+
if id < 0 {
20+
let e = unsafe { *libc::___errno() };
21+
if e == libc::EINVAL {
22+
/*
23+
* According to the documentation, this actually means the zone does
24+
* not exist on the system.
25+
*/
26+
return Ok(None);
27+
}
28+
29+
let e = std::io::Error::from_raw_os_error(e);
30+
bail!("getzoneidbyname({name}): {e}");
31+
}
32+
33+
Ok(Some(id))
34+
}
35+
36+
pub fn zone_exists(name: &str) -> Result<bool> {
37+
Ok(zone_name_to_id(name)?.is_some())
38+
}

0 commit comments

Comments
 (0)