oxidecomputer · hawkw · Oct 4, 2024 · Oct 4, 2024 · Oct 7, 2024 · Oct 7, 2024
diff --git a/nexus/external-api/output/nexus_tags.txt b/nexus/external-api/output/nexus_tags.txt
@@ -50,6 +50,7 @@ instance_disk_list                       GET      /v1/instances/{instance}/disks
 instance_ephemeral_ip_attach             POST     /v1/instances/{instance}/external-ips/ephemeral
 instance_ephemeral_ip_detach             DELETE   /v1/instances/{instance}/external-ips/ephemeral
 instance_external_ip_list                GET      /v1/instances/{instance}/external-ips
+instance_force_terminate                 POST     /v1/instances/{instance}/force-terminate
 instance_list                            GET      /v1/instances
 instance_network_interface_create        POST     /v1/network-interfaces
 instance_network_interface_delete        DELETE   /v1/network-interfaces/{interface}

diff --git a/nexus/external-api/src/lib.rs b/nexus/external-api/src/lib.rs
@@ -1166,6 +1166,24 @@ pub trait NexusExternalApi {
         path_params: Path<params::InstancePath>,
     ) -> Result<HttpResponseAccepted<Instance>, HttpError>;
 
+    /// Terminate instance
+    ///
+    /// Immediately halts a running instance by rudely terminating its
+    /// virtual machine process. This immediately moves the instance to the
+    /// "stopped" state without transitioning through the "stopping" state.
+    /// This operation can be used to recover an instance that is not
+    /// responding to requests to stop issued through the instance stop API.
+    #[endpoint {
+        method = POST,
+        path = "/v1/instances/{instance}/force-terminate",
+        tags = ["instances"],
+    }]
+    async fn instance_force_terminate(
+        rqctx: RequestContext<Self::Context>,
+        query_params: Query<params::OptionalProjectSelector>,
+        path_params: Path<params::InstancePath>,
+    ) -> Result<HttpResponseAccepted<Instance>, HttpError>;
+
     /// Fetch instance serial console
     #[endpoint {
         method = GET,

diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs
@@ -774,6 +774,190 @@ impl super::Nexus {
             .map_err(Into::into)
     }
 
+    /// Forcefully stop a running instance, causing its sled-agent to rudely
+    /// terminate its VMM process and unregister the instance.
+    pub(crate) async fn instance_force_terminate(
+        &self,
+        opctx: &OpContext,
+        instance_lookup: &lookup::Instance<'_>,
+    ) -> Result<InstanceAndActiveVmm, InstanceStateChangeError> {
+        let (.., authz_instance) =
+            instance_lookup.lookup_for(authz::Action::Modify).await?;
+
+        let db::datastore::InstanceGestalt {
+            instance,
+            active_vmm,
+            target_vmm,
+            migration: _,
+        } = self
+            .db_datastore
+            .instance_fetch_all(opctx, &authz_instance)
+            .await?;
+
+        // If the instance is currently incarnated by VMM process(es), hunt down
+        // and destroy them.
+        let terminated_active = match active_vmm {
+            Some(vmm) => {
+                self.instance_force_terminate_vmm(
+                    opctx,
+                    &authz_instance,
+                    vmm,
+                    "active",
+                )
+                .await
+            }
+            None => {
+                debug!(
+                    opctx.log,
+                    "asked to force terminate an instance that has no active VMM";
+                    "instance_id" => %authz_instance.id(),
+                    "instance_state" => ?instance.runtime(),
+                );
+                Ok(())
+            }
+        };
+        let terminated_target = match target_vmm {
+            Some(vmm) => {
+                self.instance_force_terminate_vmm(
+                    opctx,
+                    &authz_instance,
+                    vmm,
+                    "target",
+                )
+                .await
+            }
+            None => Ok(()),
+        };
+
+        // If our attempt to terminate either VMM failed, bail --- but only
+        // after both futures completed.
+        terminated_active?;
+        terminated_target?;
+
+        // Ladies and gentlemen, we got him!
+        self.db_datastore
+            .instance_fetch_with_vmm(opctx, &authz_instance)
+            .await
+            .map_err(Into::into)
+    }
+
+    /// Forcefully terminate a VMM associated with an instance (by calling
+    /// [`Self::instance_ensure_unregistered`]), and then update the instance's
+    /// state to reflect that the VMM has been unregistered.
+    ///
+    /// # Arguments
+    ///
+    /// - `opctx`: the [`OpContext`] for this action
+    /// - `authz_instance`: the instance associated with the VMM, so that the
+    ///   instance can be updated to reflect the new VMM state.
+    /// - `vmm`: the VMM to forcefully terminate
+    /// - `vmm_role`: a string ("active" or "target") for logging which VMM is
+    ///   being terminated.
+    async fn instance_force_terminate_vmm(
+        &self,
+        opctx: &OpContext,
+        authz_instance: &authz::Instance,
+        vmm: db::model::Vmm,
+        vmm_role: &str,
+    ) -> Result<(), InstanceStateChangeError> {
+        let propolis_id = PropolisUuid::from_untyped_uuid(vmm.id);
+        let sled_id = SledUuid::from_untyped_uuid(vmm.sled_id);
+        let unregister_result =
+            self.instance_ensure_unregistered(&propolis_id, &sled_id).await;
 match e { 
     InstanceStateChangeError::SledAgent(inner) if inner.vmm_gone() => { 
         error!(osagactx.log(), 
                "start saga: failing instance after unregister failure"; 
                "instance_id" => %instance_id, 
                "start_reason" => ?params.reason, 
                "error" => ?inner); 
         if let Err(set_failed_error) = osagactx 
             .nexus() 
             .mark_vmm_failed(&opctx, authz_instance, &db_vmm, &inner) 
             .await 
         { 
             error!(osagactx.log(), 
                    "start saga: failed to mark instance as failed"; 
                    "instance_id" => %instance_id, 
                    "start_reason" => ?params.reason, 
                    "error" => ?set_failed_error); 
             Err(set_failed_error.into()) 
 // XXX: It's not clear what to do with this error; should it be 
 // bubbled back up to the caller? 
 Err(e) => error!(self.log, 
                 "failed to write Failed instance state to DB"; 
                 "instance_id" => %instance_id, 
                 "vmm_id" => %vmm_id, 
                 "error" => ?e), 
 // If the failure came from talking to sled agent, and the error code 
 // indicates the instance or sled might be unhealthy, manual 
 // intervention is likely to be needed, so try to mark the instance as 
 // Failed and then bail on unwinding. 
 // 
 // If sled agent is in good shape but just doesn't know about the 
 // instance, this saga still owns the instance's state, so allow 
 // unwinding to continue. 
 // 
 // If some other Nexus error occurred, this saga is in bad shape, so 
 // return an error indicating that intervention is needed without trying 
 // to modify the instance further. 
 // 
 // TODO(#3238): `instance_unhealthy` does not take an especially nuanced 
 // view of the meanings of the error codes sled agent could return, so 
 // assuming that an error that isn't `instance_unhealthy` means 
 // that everything is hunky-dory and it's OK to continue unwinding may 
 // be a bit of a stretch. See the definition of `instance_unhealthy` for 
 // more details. 
 match e { 
     InstanceStateChangeError::SledAgent(inner) if inner.vmm_gone() => { 
         error!(osagactx.log(), 
                "start saga: failing instance after unregister failure"; 
                "instance_id" => %instance_id, 
                "start_reason" => ?params.reason, 
                "error" => ?inner); 
  
         if let Err(set_failed_error) = osagactx 
             .nexus() 
             .mark_vmm_failed(&opctx, authz_instance, &db_vmm, &inner) 
             .await 
         { 
             error!(osagactx.log(), 
                    "start saga: failed to mark instance as failed"; 
                    "instance_id" => %instance_id, 
                    "start_reason" => ?params.reason, 
                    "error" => ?set_failed_error); 
  
             Err(set_failed_error.into()) 
 // XXX: It's not clear what to do with this error; should it be 
 // bubbled back up to the caller? 
 Err(e) => error!(self.log, 
                 "failed to write Failed instance state to DB"; 
                 "instance_id" => %instance_id, 
                 "vmm_id" => %vmm_id, 
                 "error" => ?e), 
 // If the failure came from talking to sled agent, and the error code 
 // indicates the instance or sled might be unhealthy, manual 
 // intervention is likely to be needed, so try to mark the instance as 
 // Failed and then bail on unwinding. 
 // 
 // If sled agent is in good shape but just doesn't know about the 
 // instance, this saga still owns the instance's state, so allow 
 // unwinding to continue. 
 // 
 // If some other Nexus error occurred, this saga is in bad shape, so 
 // return an error indicating that intervention is needed without trying 
 // to modify the instance further. 
 // 
 // TODO(#3238): `instance_unhealthy` does not take an especially nuanced 
 // view of the meanings of the error codes sled agent could return, so 
 // assuming that an error that isn't `instance_unhealthy` means 
 // that everything is hunky-dory and it's OK to continue unwinding may 
 // be a bit of a stretch. See the definition of `instance_unhealthy` for 
 // more details. 
+        match unregister_result {
+            // VMM unregistered, now process the state transition.
+            Ok(Some(state)) => {
+                info!(
+                    opctx.log,
+                    "instance's {vmm_role} VMM terminated with extreme \
+                     prejudice";
+                    "instance_id" => %authz_instance.id(),
+                    "vmm_id" => %propolis_id,
+                    "sled_id" => %sled_id,
+                );
+
+                // We would like the caller to see the instance they are
+                // attempting to terminate go to "Stopped", so run the
+                // instance-update saga synchronously if possible. This is
+                // particularly important in the case where a migrating instance
+                // is force-terminated, as an instance with a migration ID will
+                // remain "Migrating" (not "Stopping") until its migration ID is
+                // unset, and it seems a bit sad to return a "Migrating"
+                // instance to a caller who tries to force-kill it.
+                //
+                // TODO(eliza): in the case where we are terminating both an
+                // active VMM and migration target, we will unregister them in
+                // sequence, running separate update sagas for both VMMs being
+                // terminated. It would be a bit more efficient to terminate
+                // both VMMs, update CRDB, and then run a *single* update saga
+                // to process both VMMs being destroyed. However, this requires
+                // a bit of annoying refactoring to existing APIs, and I'm not
+                // sure if improving the `instance-force-terminate` endpoint's
+                // latency is particularly important...
+                if let Some((_, saga)) = process_vmm_update(
+                    &self.db_datastore,
+                    opctx,
+                    propolis_id,
+                    &state,
+                )
+                .await?
+                {
+                    self.sagas
+                        .saga_prepare(saga)
+                        .await?
+                        .start()
+                        .await?
+                        .wait_until_stopped()
+                        .await
+                        .into_omicron_result()?;
+                }
+            }
+            // If the returned state from sled-agent is `None`, then the
+            // instance was already unregistered. This may have been from a
+            // prior attempt to stop the instance (either normally or
+            // forcefully). But, since we observed an active VMM above, the
+            // current observed VMM generation doesn't know that the VMM is
+            // gone, so it is possible that the sled-agent has misplaced this
+            // instance. Therefore, we will attempt to mark the VMM as `Failed`
+            // at the generation after which we observed the VMM. This is safe
+            // to do here, because if the instance has been unregistered due to
+            // a race with another instance-ensure-unregistered request (rather
+            // than a sled-agent failure), that other call will have advanced
+            // the state generation, and our attempt to write the failed state
+            // will not succeed, which is fine.
+            //
+            // Either way, the caller should not observe a returned instance
+            // state that believes itself to be running.
+            Ok(None) => {
+                info!(
+                    opctx.log,
+                    "asked to force terminate an instance's {vmm_role} VMM ;
+                     thatwas already unregistered";
+                    "instance_id" => %authz_instance.id(),
+                    "vmm_id" => %propolis_id,
+                    "sled_id" => %sled_id,
+                );
+                let _ = self
+                    .mark_vmm_failed(
+                        &opctx,
+                        authz_instance.clone(),
+                        &vmm,
+                        &"instance already unregistered",
+                    )
+                    .await;
+            }
+            // If the error indicates that the VMM wasn't there to terminate,
+            // mark it as Failed instead.
+            Err(InstanceStateChangeError::SledAgent(e)) if e.vmm_gone() => {
+                let _ = self
+                    .mark_vmm_failed(&opctx, authz_instance.clone(), &vmm, &e)
+                    .await;
+            }
+            Err(e) => return Err(e),
+        }
+        Ok(())
+    }
+
     /// Idempotently ensures that the sled specified in `db_instance` does not
     /// have a record of the instance. If the instance is currently running on
     /// this sled, this operation rudely terminates it.
@@ -1407,19 +1591,22 @@ impl super::Nexus {
     /// execute, as this may just mean that another saga is already updating the
     /// instance. The update will be performed eventually even if this method
     /// could not update the instance.
-    pub(crate) async fn mark_vmm_failed(
+    pub(crate) async fn mark_vmm_failed<R>(
         &self,
         opctx: &OpContext,
         authz_instance: authz::Instance,
         vmm: &db::model::Vmm,
-        reason: &SledAgentInstanceError,
-    ) -> Result<(), Error> {
+        reason: &R,
+    ) -> Result<(), Error>
+    where
+        R: std::fmt::Display,
+    {
         let instance_id = InstanceUuid::from_untyped_uuid(authz_instance.id());
         let vmm_id = PropolisUuid::from_untyped_uuid(vmm.id);
         error!(self.log, "marking VMM failed due to sled agent API error";
                "instance_id" => %instance_id,
                "vmm_id" => %vmm_id,
-               "error" => ?reason);
+               "error" => %reason);
 
         let new_runtime = VmmRuntimeState {
             state: db::model::VmmState::Failed,
@@ -1433,7 +1620,7 @@ impl super::Nexus {
                 info!(self.log, "marked VMM as Failed, preparing update saga";
                     "instance_id" => %instance_id,
                     "vmm_id" => %vmm_id,
-                    "reason" => ?reason,
+                    "reason" => %reason,
                 );
                 let saga = instance_update::SagaInstanceUpdate::prepare(
                     &instance_update::Params {

diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs
@@ -2271,6 +2271,36 @@ impl NexusExternalApi for NexusExternalApiImpl {
             .await
     }
 
+    async fn instance_force_terminate(
+        rqctx: RequestContext<Self::Context>,
+        query_params: Query<params::OptionalProjectSelector>,
+        path_params: Path<params::InstancePath>,
+    ) -> Result<HttpResponseAccepted<Instance>, HttpError> {
+        let apictx = rqctx.context();
+        let nexus = &apictx.context.nexus;
+        let path = path_params.into_inner();
+        let query = query_params.into_inner();
+        let instance_selector = params::InstanceSelector {
+            project: query.project,
+            instance: path.instance,
+        };
+        let handler = async {
+            let opctx =
+                crate::context::op_context_for_external_api(&rqctx).await?;
+            let instance_lookup =
+                nexus.instance_lookup(&opctx, instance_selector)?;
+            let instance = nexus
+                .instance_force_terminate(&opctx, &instance_lookup)
+                .await?;
+            Ok(HttpResponseAccepted(instance.into()))
+        };
+        apictx
+            .context
+            .external_latencies
+            .instrument_dropshot_handler(&rqctx, handler)
+            .await
+    }
+
     async fn instance_serial_console(
         rqctx: RequestContext<ApiContext>,
         path_params: Path<params::InstancePath>,