@@ -47,7 +47,9 @@ use sled_storage::manager::StorageHandle;
4747use slog:: Logger ;
4848use std:: net:: IpAddr ;
4949use std:: net:: SocketAddr ;
50+ use std:: pin:: Pin ;
5051use std:: sync:: Arc ;
52+ use std:: time:: Duration ;
5153use tokio:: sync:: { mpsc, oneshot} ;
5254use uuid:: Uuid ;
5355
@@ -472,8 +474,31 @@ struct InstanceRunner {
472474}
473475
474476impl InstanceRunner {
477+ /// How long to wait for VMM shutdown to complete before forcefully
478+ /// terminating the zone.
479+ const STOP_GRACE_PERIOD : Duration = Duration :: from_secs ( 60 * 10 ) ;
480+
475481 async fn run ( mut self , mut terminate_rx : mpsc:: Receiver < TerminateRequest > ) {
476482 use InstanceRequest :: * ;
483+
484+ // Timeout for stopping the instance gracefully.
485+ //
486+ // When we send Propolis a put-state request to transition to
487+ // Stopped`, we start this timer. If Propolis does not report back
488+ // that it has stopped the instance within `STOP_GRACE_PERIOD`, we
489+ // will forcibly terminate the VMM. This is to ensure that a totally
490+ // stuck VMM does not prevent the Propolis zone from being cleaned up.
491+ let mut stop_timeout = None ;
492+ async fn stop_timeout_completed (
493+ stop_timeout : & mut Option < Pin < Box < tokio:: time:: Sleep > > > ,
494+ ) {
495+ if let Some ( ref mut timeout) = stop_timeout {
496+ timeout. await
497+ } else {
498+ std:: future:: pending ( ) . await
499+ }
500+ }
501+
477502 while !self . should_terminate {
478503 tokio:: select! {
479504 biased;
@@ -539,6 +564,25 @@ impl InstanceRunner {
539564 } ,
540565 }
541566 } ,
567+
568+ // We are waiting for the VMM to stop, and the grace period has
569+ // elapsed without us hearing back from Propolis that it has
570+ // stopped the instance. Time to terminate it violently!
571+ //
572+ // Note that this must be a lower priority in the `select!` than
573+ // instance monitor requests, as we would prefer to honor a
574+ // message from the instance monitor indicating a successful
575+ // stop, even if we have reached the timeout.
576+ _ = stop_timeout_completed( & mut stop_timeout) => {
577+ warn!(
578+ self . log,
579+ "Instance failed to stop within the grace period, \
580+ terminating it violently!",
581+ ) ;
582+ let mark_failed = false ;
583+ self . terminate( mark_failed) . await ;
584+ }
585+
542586 // Requests to terminate the instance take priority over any
543587 // other request to the instance.
544588 request = terminate_rx. recv( ) => {
@@ -584,7 +628,22 @@ impl InstanceRunner {
584628 tx. send( Ok ( self . current_state( ) ) )
585629 . map_err( |_| Error :: FailedSendClientClosed )
586630 } ,
587- PutState { state, tx } => {
631+ PutState { state, tx } => {
632+ // If we're going to stop the instance, start
633+ // the timeout after which we will forcefully
634+ // terminate the VMM.
635+ if let VmmStateRequested :: Stopped = state {
636+ // Only start the stop timeout if there
637+ // isn't one already, so that additional
638+ // requests to stop coming in don't reset
639+ // the clock.
640+ if stop_timeout. is_none( ) {
641+ stop_timeout = Some ( Box :: pin( tokio:: time:: sleep(
642+ Self :: STOP_GRACE_PERIOD
643+ ) ) ) ;
644+ }
645+ }
646+
588647 tx. send( self . put_state( state) . await
589648 . map( |r| VmmPutStateResponse { updated_runtime: Some ( r) } )
590649 . map_err( |e| e. into( ) ) )
@@ -639,6 +698,7 @@ impl InstanceRunner {
639698
640699 }
641700 }
701+
642702 self . publish_state_to_nexus ( ) . await ;
643703
644704 // Okay, now that we've terminated the instance, drain any outstanding
@@ -2383,8 +2443,6 @@ mod tests {
23832443
23842444 // note the "mock" here is different from the vnic/zone contexts above.
23852445 // this is actually running code for a dropshot server from propolis.
2386- // (might we want a locally-defined fake whose behavior we can control
2387- // more directly from the test driver?)
23882446 // TODO: factor out, this is also in sled-agent-sim.
23892447 fn propolis_mock_server (
23902448 log : & Logger ,
@@ -2407,6 +2465,30 @@ mod tests {
24072465 ( srv, client)
24082466 }
24092467
2468+ async fn propolis_mock_set_mode (
2469+ client : & PropolisClient ,
2470+ mode : propolis_mock_server:: MockMode ,
2471+ ) {
2472+ let url = format ! ( "{}/mock/mode" , client. baseurl( ) ) ;
2473+ client
2474+ . client ( )
2475+ . put ( url)
2476+ . json ( & mode)
2477+ . send ( )
2478+ . await
2479+ . expect ( "setting mock mode failed unexpectedly" ) ;
2480+ }
2481+
2482+ async fn propolis_mock_step ( client : & PropolisClient ) {
2483+ let url = format ! ( "{}/mock/step" , client. baseurl( ) ) ;
2484+ client
2485+ . client ( )
2486+ . put ( url)
2487+ . send ( )
2488+ . await
2489+ . expect ( "single-stepping mock server failed unexpectedly" ) ;
2490+ }
2491+
24102492 async fn setup_storage_manager ( log : & Logger ) -> StorageManagerTestHarness {
24112493 let mut harness = StorageManagerTestHarness :: new ( log) . await ;
24122494 let raw_disks =
@@ -2957,4 +3039,193 @@ mod tests {
29573039 storage_harness. cleanup ( ) . await ;
29583040 logctx. cleanup_successful ( ) ;
29593041 }
3042+
3043+ // Tests a scenario in which a propolis-server process fails to stop in a
3044+ // timely manner (i.e. it's gotten stuck somehow). This test asserts that
3045+ // the sled-agent will eventually forcibly terminate the VMM process, tear
3046+ // down the zone, and tell Nexus that the VMM has been destroyed, even
3047+ // if Propolis can't shut itself down nicely.
3048+ #[ tokio:: test]
3049+ async fn test_instance_manager_stop_timeout ( ) {
3050+ let logctx = omicron_test_utils:: dev:: test_setup_log (
3051+ "test_instance_manager_stop_timeout" ,
3052+ ) ;
3053+ let log = logctx. log . new ( o ! ( FileKv ) ) ;
3054+
3055+ // automock'd things used during this test
3056+ let _mock_vnic_contexts = mock_vnic_contexts ( ) ;
3057+ let _mock_zone_contexts = mock_zone_contexts ( ) ;
3058+
3059+ let mut storage_harness = setup_storage_manager ( & logctx. log ) . await ;
3060+ let storage_handle = storage_harness. handle ( ) . clone ( ) ;
3061+
3062+ let FakeNexusParts {
3063+ nexus_client,
3064+ mut state_rx,
3065+ _dns_server,
3066+ _nexus_server,
3067+ } = FakeNexusParts :: new ( & log) . await ;
3068+
3069+ let temp_guard = Utf8TempDir :: new ( ) . unwrap ( ) ;
3070+ let temp_dir = temp_guard. path ( ) . to_string ( ) ;
3071+
3072+ let ( services, _metrics_rx) = fake_instance_manager_services (
3073+ & log,
3074+ storage_handle,
3075+ nexus_client,
3076+ & temp_dir,
3077+ ) ;
3078+ let InstanceManagerServices {
3079+ nexus_client,
3080+ vnic_allocator : _,
3081+ port_manager,
3082+ storage,
3083+ zone_bundler,
3084+ zone_builder_factory,
3085+ metrics_queue,
3086+ } = services;
3087+
3088+ let etherstub = Etherstub ( "mystub" . to_string ( ) ) ;
3089+
3090+ let vmm_reservoir_manager = VmmReservoirManagerHandle :: stub_for_test ( ) ;
3091+
3092+ let mgr = crate :: instance_manager:: InstanceManager :: new (
3093+ logctx. log . new ( o ! ( "component" => "InstanceManager" ) ) ,
3094+ nexus_client,
3095+ etherstub,
3096+ port_manager,
3097+ storage,
3098+ zone_bundler,
3099+ zone_builder_factory,
3100+ vmm_reservoir_manager,
3101+ metrics_queue,
3102+ )
3103+ . unwrap ( ) ;
3104+
3105+ let ( propolis_server, propolis_client) =
3106+ propolis_mock_server ( & logctx. log ) ;
3107+ let propolis_addr = propolis_server. local_addr ( ) ;
3108+
3109+ let instance_id = InstanceUuid :: new_v4 ( ) ;
3110+ let propolis_id = PropolisUuid :: from_untyped_uuid ( PROPOLIS_ID ) ;
3111+ let zone_name = propolis_zone_name ( & propolis_id) ;
3112+ let InstanceInitialState {
3113+ hardware,
3114+ vmm_runtime,
3115+ propolis_addr,
3116+ migration_id : _,
3117+ } = fake_instance_initial_state ( propolis_addr) ;
3118+
3119+ let metadata = InstanceMetadata {
3120+ silo_id : Uuid :: new_v4 ( ) ,
3121+ project_id : Uuid :: new_v4 ( ) ,
3122+ } ;
3123+ let sled_identifiers = SledIdentifiers {
3124+ rack_id : Uuid :: new_v4 ( ) ,
3125+ sled_id : Uuid :: new_v4 ( ) ,
3126+ model : "fake-model" . into ( ) ,
3127+ revision : 1 ,
3128+ serial : "fake-serial" . into ( ) ,
3129+ } ;
3130+
3131+ mgr. ensure_registered (
3132+ propolis_id,
3133+ InstanceEnsureBody {
3134+ instance_id,
3135+ migration_id : None ,
3136+ hardware,
3137+ vmm_runtime,
3138+ propolis_addr,
3139+ metadata,
3140+ } ,
3141+ sled_identifiers,
3142+ )
3143+ . await
3144+ . unwrap ( ) ;
3145+
3146+ mgr. ensure_state ( propolis_id, VmmStateRequested :: Running )
3147+ . await
3148+ . unwrap ( ) ;
3149+
3150+ timeout (
3151+ TIMEOUT_DURATION ,
3152+ state_rx. wait_for ( |maybe_state| match maybe_state {
3153+ ReceivedInstanceState :: InstancePut ( sled_inst_state) => {
3154+ sled_inst_state. vmm_state . state == VmmState :: Running
3155+ }
3156+ _ => false ,
3157+ } ) ,
3158+ )
3159+ . await
3160+ . expect ( "timed out waiting for InstanceState::Running in FakeNexus" )
3161+ . expect ( "failed to receive VmmState' InstanceState" ) ;
3162+
3163+ // Place the mock propolis in single-step mode.
3164+ propolis_mock_set_mode (
3165+ & propolis_client,
3166+ propolis_mock_server:: MockMode :: SingleStep ,
3167+ )
3168+ . await ;
3169+
3170+ // Request the VMM stop
3171+ mgr. ensure_state ( propolis_id, VmmStateRequested :: Stopped )
3172+ . await
3173+ . unwrap ( ) ;
3174+
3175+ // Single-step once.
3176+ propolis_mock_step ( & propolis_client) . await ;
3177+
3178+ timeout (
3179+ TIMEOUT_DURATION ,
3180+ state_rx. wait_for ( |maybe_state| match maybe_state {
3181+ ReceivedInstanceState :: InstancePut ( sled_inst_state) => {
3182+ sled_inst_state. vmm_state . state == VmmState :: Stopping
3183+ }
3184+ _ => false ,
3185+ } ) ,
3186+ )
3187+ . await
3188+ . expect ( "timed out waiting for VmmState::Stopping in FakeNexus" )
3189+ . expect ( "failed to receive FakeNexus' InstanceState" ) ;
3190+
3191+ // NOW WE STOP ADVANCING THE MOCK PROPOLIS STATE MACHINE --- IT WILL
3192+ // NEVER REACH `Stopped`.
3193+
3194+ // Expect that the `InstanceRunner` will attempt to halt and remove the
3195+ // zone.
3196+ let halt_ctx = MockZones :: halt_and_remove_logged_context ( ) ;
3197+ halt_ctx. expect ( ) . returning ( move |_, name| {
3198+ assert_eq ! ( name, & zone_name) ;
3199+ Ok ( ( ) )
3200+ } ) ;
3201+
3202+ // Now, pause time and advance the Tokio clock past the stop grace
3203+ // period. This should cause the stop timeout to fire, without requiring
3204+ // the test to actually wait for ten minutes.
3205+ tokio:: time:: pause ( ) ;
3206+ tokio:: time:: advance (
3207+ super :: InstanceRunner :: STOP_GRACE_PERIOD + Duration :: from_secs ( 1 ) ,
3208+ )
3209+ . await ;
3210+ tokio:: time:: resume ( ) ;
3211+
3212+ // The timeout should now fire and sled-agent will murder propolis,
3213+ // allowing the zone to be destroyed.
3214+
3215+ timeout (
3216+ TIMEOUT_DURATION ,
3217+ state_rx. wait_for ( |maybe_state| match maybe_state {
3218+ ReceivedInstanceState :: InstancePut ( sled_inst_state) => {
3219+ sled_inst_state. vmm_state . state == VmmState :: Destroyed
3220+ }
3221+ _ => false ,
3222+ } ) ,
3223+ )
3224+ . await
3225+ . expect ( "timed out waiting for VmmState::Stopped in FakeNexus" )
3226+ . expect ( "failed to receive FakeNexus' InstanceState" ) ;
3227+
3228+ storage_harness. cleanup ( ) . await ;
3229+ logctx. cleanup_successful ( ) ;
3230+ }
29603231}
0 commit comments