diff --git a/Cargo.lock b/Cargo.lock index 5e8e429d45e..2aa47368150 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9289,7 +9289,7 @@ dependencies = [ [[package]] name = "propolis-mock-server" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc#95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc" +source = "git+https://github.com/oxidecomputer/propolis?rev=2652487c19ede2db2cbc634b0dee3a78848dfea1#2652487c19ede2db2cbc634b0dee3a78848dfea1" dependencies = [ "anyhow", "atty", @@ -9299,7 +9299,7 @@ dependencies = [ "futures", "hyper", "progenitor 0.9.1", - "propolis_types", + "propolis_types 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=2652487c19ede2db2cbc634b0dee3a78848dfea1)", "rand 0.8.5", "reqwest", "schemars", @@ -9334,7 +9334,7 @@ version = "0.0.0" source = "git+https://github.com/oxidecomputer/propolis?rev=95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc#95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc" dependencies = [ "crucible-client-types", - "propolis_types", + "propolis_types 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc)", "schemars", "serde", "serde_with", @@ -9342,6 +9342,15 @@ dependencies = [ "uuid", ] +[[package]] +name = "propolis_types" +version = "0.0.0" +source = "git+https://github.com/oxidecomputer/propolis?rev=2652487c19ede2db2cbc634b0dee3a78848dfea1#2652487c19ede2db2cbc634b0dee3a78848dfea1" +dependencies = [ + "schemars", + "serde", +] + [[package]] name = "propolis_types" version = "0.0.0" diff --git a/Cargo.toml b/Cargo.toml index 6b4adcd3895..39cd2fb300a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -569,7 +569,10 @@ progenitor-client = "0.9.1" bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc" } propolis_api_types = { git = "https://github.com/oxidecomputer/propolis", rev = "95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc" } propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc" } -propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc" } +# NOTE: propolis-mock-server is currently out of sync with other propolis deps, +# but when we update propolis to a commit after 2652487, please just bump this +# as well and delete this comment. +propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "2652487c19ede2db2cbc634b0dee3a78848dfea1" } # NOTE: see above! proptest = "1.5.0" qorb = "0.2.1" diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 39b6ea967e1..08dc5466256 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -47,7 +47,9 @@ use sled_storage::manager::StorageHandle; use slog::Logger; use std::net::IpAddr; use std::net::SocketAddr; +use std::pin::Pin; use std::sync::Arc; +use std::time::Duration; use tokio::sync::{mpsc, oneshot}; use uuid::Uuid; @@ -472,8 +474,31 @@ struct InstanceRunner { } impl InstanceRunner { + /// How long to wait for VMM shutdown to complete before forcefully + /// terminating the zone. + const STOP_GRACE_PERIOD: Duration = Duration::from_secs(60 * 10); + async fn run(mut self, mut terminate_rx: mpsc::Receiver) { use InstanceRequest::*; + + // Timeout for stopping the instance gracefully. + // + // When we send Propolis a put-state request to transition to + // Stopped`, we start this timer. If Propolis does not report back + // that it has stopped the instance within `STOP_GRACE_PERIOD`, we + // will forcibly terminate the VMM. This is to ensure that a totally + // stuck VMM does not prevent the Propolis zone from being cleaned up. + let mut stop_timeout = None; + async fn stop_timeout_completed( + stop_timeout: &mut Option>>, + ) { + if let Some(ref mut timeout) = stop_timeout { + timeout.await + } else { + std::future::pending().await + } + } + while !self.should_terminate { tokio::select! { biased; @@ -539,6 +564,25 @@ impl InstanceRunner { }, } }, + + // We are waiting for the VMM to stop, and the grace period has + // elapsed without us hearing back from Propolis that it has + // stopped the instance. Time to terminate it violently! + // + // Note that this must be a lower priority in the `select!` than + // instance monitor requests, as we would prefer to honor a + // message from the instance monitor indicating a successful + // stop, even if we have reached the timeout. + _ = stop_timeout_completed(&mut stop_timeout) => { + warn!( + self.log, + "Instance failed to stop within the grace period, \ + terminating it violently!", + ); + let mark_failed = false; + self.terminate(mark_failed).await; + } + // Requests to terminate the instance take priority over any // other request to the instance. request = terminate_rx.recv() => { @@ -584,7 +628,22 @@ impl InstanceRunner { tx.send(Ok(self.current_state())) .map_err(|_| Error::FailedSendClientClosed) }, - PutState{ state, tx } => { + PutState { state, tx } => { + // If we're going to stop the instance, start + // the timeout after which we will forcefully + // terminate the VMM. + if let VmmStateRequested::Stopped = state { + // Only start the stop timeout if there + // isn't one already, so that additional + // requests to stop coming in don't reset + // the clock. + if stop_timeout.is_none() { + stop_timeout = Some(Box::pin(tokio::time::sleep( + Self::STOP_GRACE_PERIOD + ))); + } + } + tx.send(self.put_state(state).await .map(|r| VmmPutStateResponse { updated_runtime: Some(r) }) .map_err(|e| e.into())) @@ -639,6 +698,7 @@ impl InstanceRunner { } } + self.publish_state_to_nexus().await; // Okay, now that we've terminated the instance, drain any outstanding @@ -2383,8 +2443,6 @@ mod tests { // note the "mock" here is different from the vnic/zone contexts above. // this is actually running code for a dropshot server from propolis. - // (might we want a locally-defined fake whose behavior we can control - // more directly from the test driver?) // TODO: factor out, this is also in sled-agent-sim. fn propolis_mock_server( log: &Logger, @@ -2407,6 +2465,30 @@ mod tests { (srv, client) } + async fn propolis_mock_set_mode( + client: &PropolisClient, + mode: propolis_mock_server::MockMode, + ) { + let url = format!("{}/mock/mode", client.baseurl()); + client + .client() + .put(url) + .json(&mode) + .send() + .await + .expect("setting mock mode failed unexpectedly"); + } + + async fn propolis_mock_step(client: &PropolisClient) { + let url = format!("{}/mock/step", client.baseurl()); + client + .client() + .put(url) + .send() + .await + .expect("single-stepping mock server failed unexpectedly"); + } + async fn setup_storage_manager(log: &Logger) -> StorageManagerTestHarness { let mut harness = StorageManagerTestHarness::new(log).await; let raw_disks = @@ -2957,4 +3039,193 @@ mod tests { storage_harness.cleanup().await; logctx.cleanup_successful(); } + + // Tests a scenario in which a propolis-server process fails to stop in a + // timely manner (i.e. it's gotten stuck somehow). This test asserts that + // the sled-agent will eventually forcibly terminate the VMM process, tear + // down the zone, and tell Nexus that the VMM has been destroyed, even + // if Propolis can't shut itself down nicely. + #[tokio::test] + async fn test_instance_manager_stop_timeout() { + let logctx = omicron_test_utils::dev::test_setup_log( + "test_instance_manager_stop_timeout", + ); + let log = logctx.log.new(o!(FileKv)); + + // automock'd things used during this test + let _mock_vnic_contexts = mock_vnic_contexts(); + let _mock_zone_contexts = mock_zone_contexts(); + + let mut storage_harness = setup_storage_manager(&logctx.log).await; + let storage_handle = storage_harness.handle().clone(); + + let FakeNexusParts { + nexus_client, + mut state_rx, + _dns_server, + _nexus_server, + } = FakeNexusParts::new(&log).await; + + let temp_guard = Utf8TempDir::new().unwrap(); + let temp_dir = temp_guard.path().to_string(); + + let (services, _metrics_rx) = fake_instance_manager_services( + &log, + storage_handle, + nexus_client, + &temp_dir, + ); + let InstanceManagerServices { + nexus_client, + vnic_allocator: _, + port_manager, + storage, + zone_bundler, + zone_builder_factory, + metrics_queue, + } = services; + + let etherstub = Etherstub("mystub".to_string()); + + let vmm_reservoir_manager = VmmReservoirManagerHandle::stub_for_test(); + + let mgr = crate::instance_manager::InstanceManager::new( + logctx.log.new(o!("component" => "InstanceManager")), + nexus_client, + etherstub, + port_manager, + storage, + zone_bundler, + zone_builder_factory, + vmm_reservoir_manager, + metrics_queue, + ) + .unwrap(); + + let (propolis_server, propolis_client) = + propolis_mock_server(&logctx.log); + let propolis_addr = propolis_server.local_addr(); + + let instance_id = InstanceUuid::new_v4(); + let propolis_id = PropolisUuid::from_untyped_uuid(PROPOLIS_ID); + let zone_name = propolis_zone_name(&propolis_id); + let InstanceInitialState { + hardware, + vmm_runtime, + propolis_addr, + migration_id: _, + } = fake_instance_initial_state(propolis_addr); + + let metadata = InstanceMetadata { + silo_id: Uuid::new_v4(), + project_id: Uuid::new_v4(), + }; + let sled_identifiers = SledIdentifiers { + rack_id: Uuid::new_v4(), + sled_id: Uuid::new_v4(), + model: "fake-model".into(), + revision: 1, + serial: "fake-serial".into(), + }; + + mgr.ensure_registered( + propolis_id, + InstanceEnsureBody { + instance_id, + migration_id: None, + hardware, + vmm_runtime, + propolis_addr, + metadata, + }, + sled_identifiers, + ) + .await + .unwrap(); + + mgr.ensure_state(propolis_id, VmmStateRequested::Running) + .await + .unwrap(); + + timeout( + TIMEOUT_DURATION, + state_rx.wait_for(|maybe_state| match maybe_state { + ReceivedInstanceState::InstancePut(sled_inst_state) => { + sled_inst_state.vmm_state.state == VmmState::Running + } + _ => false, + }), + ) + .await + .expect("timed out waiting for InstanceState::Running in FakeNexus") + .expect("failed to receive VmmState' InstanceState"); + + // Place the mock propolis in single-step mode. + propolis_mock_set_mode( + &propolis_client, + propolis_mock_server::MockMode::SingleStep, + ) + .await; + + // Request the VMM stop + mgr.ensure_state(propolis_id, VmmStateRequested::Stopped) + .await + .unwrap(); + + // Single-step once. + propolis_mock_step(&propolis_client).await; + + timeout( + TIMEOUT_DURATION, + state_rx.wait_for(|maybe_state| match maybe_state { + ReceivedInstanceState::InstancePut(sled_inst_state) => { + sled_inst_state.vmm_state.state == VmmState::Stopping + } + _ => false, + }), + ) + .await + .expect("timed out waiting for VmmState::Stopping in FakeNexus") + .expect("failed to receive FakeNexus' InstanceState"); + + // NOW WE STOP ADVANCING THE MOCK PROPOLIS STATE MACHINE --- IT WILL + // NEVER REACH `Stopped`. + + // Expect that the `InstanceRunner` will attempt to halt and remove the + // zone. + let halt_ctx = MockZones::halt_and_remove_logged_context(); + halt_ctx.expect().returning(move |_, name| { + assert_eq!(name, &zone_name); + Ok(()) + }); + + // Now, pause time and advance the Tokio clock past the stop grace + // period. This should cause the stop timeout to fire, without requiring + // the test to actually wait for ten minutes. + tokio::time::pause(); + tokio::time::advance( + super::InstanceRunner::STOP_GRACE_PERIOD + Duration::from_secs(1), + ) + .await; + tokio::time::resume(); + + // The timeout should now fire and sled-agent will murder propolis, + // allowing the zone to be destroyed. + + timeout( + TIMEOUT_DURATION, + state_rx.wait_for(|maybe_state| match maybe_state { + ReceivedInstanceState::InstancePut(sled_inst_state) => { + sled_inst_state.vmm_state.state == VmmState::Destroyed + } + _ => false, + }), + ) + .await + .expect("timed out waiting for VmmState::Stopped in FakeNexus") + .expect("failed to receive FakeNexus' InstanceState"); + + storage_harness.cleanup().await; + logctx.cleanup_successful(); + } }