Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,10 @@ progenitor-client = "0.9.1"
bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc" }
propolis_api_types = { git = "https://github.com/oxidecomputer/propolis", rev = "95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc" }
propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc" }
propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc" }
# NOTE: propolis-mock-server is currently out of sync with other propolis deps,
# but when we update propolis to a commit after 2652487, please just bump this
# as well and delete this comment.
propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "2652487c19ede2db2cbc634b0dee3a78848dfea1" }
# NOTE: see above!
proptest = "1.5.0"
qorb = "0.2.1"
Expand Down
277 changes: 274 additions & 3 deletions sled-agent/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ use sled_storage::manager::StorageHandle;
use slog::Logger;
use std::net::IpAddr;
use std::net::SocketAddr;
use std::pin::Pin;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;

Expand Down Expand Up @@ -472,8 +474,31 @@ struct InstanceRunner {
}

impl InstanceRunner {
/// How long to wait for VMM shutdown to complete before forcefully
/// terminating the zone.
const STOP_GRACE_PERIOD: Duration = Duration::from_secs(60 * 10);

async fn run(mut self, mut terminate_rx: mpsc::Receiver<TerminateRequest>) {
use InstanceRequest::*;

// Timeout for stopping the instance gracefully.
//
// When we send Propolis a put-state request to transition to
// Stopped`, we start this timer. If Propolis does not report back
// that it has stopped the instance within `STOP_GRACE_PERIOD`, we
// will forcibly terminate the VMM. This is to ensure that a totally
// stuck VMM does not prevent the Propolis zone from being cleaned up.
let mut stop_timeout = None;
async fn stop_timeout_completed(
stop_timeout: &mut Option<Pin<Box<tokio::time::Sleep>>>,
) {
if let Some(ref mut timeout) = stop_timeout {
timeout.await
} else {
std::future::pending().await
}
}

while !self.should_terminate {
tokio::select! {
biased;
Expand Down Expand Up @@ -539,6 +564,25 @@ impl InstanceRunner {
},
}
},

// We are waiting for the VMM to stop, and the grace period has
// elapsed without us hearing back from Propolis that it has
// stopped the instance. Time to terminate it violently!
//
// Note that this must be a lower priority in the `select!` than
// instance monitor requests, as we would prefer to honor a
// message from the instance monitor indicating a successful
// stop, even if we have reached the timeout.
_ = stop_timeout_completed(&mut stop_timeout) => {
warn!(
self.log,
"Instance failed to stop within the grace period, \
terminating it violently!",
);
let mark_failed = false;
self.terminate(mark_failed).await;
}

// Requests to terminate the instance take priority over any
// other request to the instance.
request = terminate_rx.recv() => {
Expand Down Expand Up @@ -584,7 +628,22 @@ impl InstanceRunner {
tx.send(Ok(self.current_state()))
.map_err(|_| Error::FailedSendClientClosed)
},
PutState{ state, tx } => {
PutState { state, tx } => {
// If we're going to stop the instance, start
// the timeout after which we will forcefully
// terminate the VMM.
if let VmmStateRequested::Stopped = state {
// Only start the stop timeout if there
// isn't one already, so that additional
// requests to stop coming in don't reset
// the clock.
Comment on lines +636 to +639
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does seem that we'd call put_state again in this case though -- is that desirable?

(a valid answer may be: "Sure!" - I'm just confirming)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤷‍♀️ we would call it twice before this change, so I didn't mess with it. I don't see any obvious reason why it would be bad to do it again, and maybe it's useful for e.g. a higher-level component retrying?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW my general philosophy here has been to have sled agent pass all state change requests right through to Propolis and let Propolis decide how to deal with any duplicates. (In this case it should just decide the second stop request can be ignored because it's already put one on its queue.)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that makes sense to me. I thought not resetting the timeout was worthwhile, though: you can imagine a scenario where an instance fails to stop, and a frustrated user or retrying client keeps sending more stop requests, each of which resets the grace period and stops the stuck VMM from being killed.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I definitely agree--once the sled agent finds out about the control plane's intent to stop the instance, it should arm the timer and not reset it if the same intent is expressed again. Mostly I just wanted to provide a bit of color about calling Propolis twice; the Propolis state machine is designed to handle that kind of thing precisely so that sled agent doesn't have to worry about it.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, thanks for that! I figured we were on the same page about the timer but felt like it was worth clarifying further.

if stop_timeout.is_none() {
stop_timeout = Some(Box::pin(tokio::time::sleep(
Self::STOP_GRACE_PERIOD
)));
}
}

tx.send(self.put_state(state).await
.map(|r| VmmPutStateResponse { updated_runtime: Some(r) })
.map_err(|e| e.into()))
Expand Down Expand Up @@ -639,6 +698,7 @@ impl InstanceRunner {

}
}

self.publish_state_to_nexus().await;

// Okay, now that we've terminated the instance, drain any outstanding
Expand Down Expand Up @@ -2383,8 +2443,6 @@ mod tests {

// note the "mock" here is different from the vnic/zone contexts above.
// this is actually running code for a dropshot server from propolis.
// (might we want a locally-defined fake whose behavior we can control
// more directly from the test driver?)
// TODO: factor out, this is also in sled-agent-sim.
fn propolis_mock_server(
log: &Logger,
Expand All @@ -2407,6 +2465,30 @@ mod tests {
(srv, client)
}

async fn propolis_mock_set_mode(
client: &PropolisClient,
mode: propolis_mock_server::MockMode,
) {
let url = format!("{}/mock/mode", client.baseurl());
client
.client()
.put(url)
.json(&mode)
.send()
.await
.expect("setting mock mode failed unexpectedly");
}

async fn propolis_mock_step(client: &PropolisClient) {
let url = format!("{}/mock/step", client.baseurl());
client
.client()
.put(url)
.send()
.await
.expect("single-stepping mock server failed unexpectedly");
}

async fn setup_storage_manager(log: &Logger) -> StorageManagerTestHarness {
let mut harness = StorageManagerTestHarness::new(log).await;
let raw_disks =
Expand Down Expand Up @@ -2957,4 +3039,193 @@ mod tests {
storage_harness.cleanup().await;
logctx.cleanup_successful();
}

// Tests a scenario in which a propolis-server process fails to stop in a
// timely manner (i.e. it's gotten stuck somehow). This test asserts that
// the sled-agent will eventually forcibly terminate the VMM process, tear
// down the zone, and tell Nexus that the VMM has been destroyed, even
// if Propolis can't shut itself down nicely.
#[tokio::test]
async fn test_instance_manager_stop_timeout() {
let logctx = omicron_test_utils::dev::test_setup_log(
"test_instance_manager_stop_timeout",
);
let log = logctx.log.new(o!(FileKv));

// automock'd things used during this test
let _mock_vnic_contexts = mock_vnic_contexts();
let _mock_zone_contexts = mock_zone_contexts();

let mut storage_harness = setup_storage_manager(&logctx.log).await;
let storage_handle = storage_harness.handle().clone();

let FakeNexusParts {
nexus_client,
mut state_rx,
_dns_server,
_nexus_server,
} = FakeNexusParts::new(&log).await;

let temp_guard = Utf8TempDir::new().unwrap();
let temp_dir = temp_guard.path().to_string();

let (services, _metrics_rx) = fake_instance_manager_services(
&log,
storage_handle,
nexus_client,
&temp_dir,
);
let InstanceManagerServices {
nexus_client,
vnic_allocator: _,
port_manager,
storage,
zone_bundler,
zone_builder_factory,
metrics_queue,
} = services;

let etherstub = Etherstub("mystub".to_string());

let vmm_reservoir_manager = VmmReservoirManagerHandle::stub_for_test();

let mgr = crate::instance_manager::InstanceManager::new(
logctx.log.new(o!("component" => "InstanceManager")),
nexus_client,
etherstub,
port_manager,
storage,
zone_bundler,
zone_builder_factory,
vmm_reservoir_manager,
metrics_queue,
)
.unwrap();

let (propolis_server, propolis_client) =
propolis_mock_server(&logctx.log);
let propolis_addr = propolis_server.local_addr();

let instance_id = InstanceUuid::new_v4();
let propolis_id = PropolisUuid::from_untyped_uuid(PROPOLIS_ID);
let zone_name = propolis_zone_name(&propolis_id);
let InstanceInitialState {
hardware,
vmm_runtime,
propolis_addr,
migration_id: _,
} = fake_instance_initial_state(propolis_addr);

let metadata = InstanceMetadata {
silo_id: Uuid::new_v4(),
project_id: Uuid::new_v4(),
};
let sled_identifiers = SledIdentifiers {
rack_id: Uuid::new_v4(),
sled_id: Uuid::new_v4(),
model: "fake-model".into(),
revision: 1,
serial: "fake-serial".into(),
};

mgr.ensure_registered(
propolis_id,
InstanceEnsureBody {
instance_id,
migration_id: None,
hardware,
vmm_runtime,
propolis_addr,
metadata,
},
sled_identifiers,
)
.await
.unwrap();

mgr.ensure_state(propolis_id, VmmStateRequested::Running)
.await
.unwrap();

timeout(
TIMEOUT_DURATION,
state_rx.wait_for(|maybe_state| match maybe_state {
ReceivedInstanceState::InstancePut(sled_inst_state) => {
sled_inst_state.vmm_state.state == VmmState::Running
}
_ => false,
}),
)
.await
.expect("timed out waiting for InstanceState::Running in FakeNexus")
.expect("failed to receive VmmState' InstanceState");

// Place the mock propolis in single-step mode.
propolis_mock_set_mode(
&propolis_client,
propolis_mock_server::MockMode::SingleStep,
)
.await;

// Request the VMM stop
mgr.ensure_state(propolis_id, VmmStateRequested::Stopped)
.await
.unwrap();

// Single-step once.
propolis_mock_step(&propolis_client).await;

timeout(
TIMEOUT_DURATION,
state_rx.wait_for(|maybe_state| match maybe_state {
ReceivedInstanceState::InstancePut(sled_inst_state) => {
sled_inst_state.vmm_state.state == VmmState::Stopping
}
_ => false,
}),
)
.await
.expect("timed out waiting for VmmState::Stopping in FakeNexus")
.expect("failed to receive FakeNexus' InstanceState");

// NOW WE STOP ADVANCING THE MOCK PROPOLIS STATE MACHINE --- IT WILL
// NEVER REACH `Stopped`.

// Expect that the `InstanceRunner` will attempt to halt and remove the
// zone.
let halt_ctx = MockZones::halt_and_remove_logged_context();
halt_ctx.expect().returning(move |_, name| {
assert_eq!(name, &zone_name);
Ok(())
});

// Now, pause time and advance the Tokio clock past the stop grace
// period. This should casue the stop timeout to fire, without requiring
// the test to actually wait for ten minutes.
tokio::time::pause();
tokio::time::advance(
super::InstanceRunner::STOP_GRACE_PERIOD + Duration::from_secs(1),
)
.await;
tokio::time::resume();

// The timeout should now fire and sled-agent will murder propolis,
// allowing the zone to be destroyed.

timeout(
TIMEOUT_DURATION,
state_rx.wait_for(|maybe_state| match maybe_state {
ReceivedInstanceState::InstancePut(sled_inst_state) => {
sled_inst_state.vmm_state.state == VmmState::Destroyed
}
_ => false,
}),
)
.await
.expect("timed out waiting for VmmState::Stopped in FakeNexus")
.expect("failed to receive FakeNexus' InstanceState");

storage_harness.cleanup().await;
logctx.cleanup_successful();
}
}
Loading