Merge pull request ceph#60096 from adk3798/maintenance-mode-dead-host

adk3798 · web-flow · commit 9190d057fc8f · 2024-10-30T15:29:20.000-04:00
mgr/cephadm: offline host handling for maintenance exit command

Reviewed-by: John Mulligan &lt;jmulligan@redhat.com&gt;
diff --git a/doc/cephadm/host-management.rst b/doc/cephadm/host-management.rst
@@ -233,11 +233,16 @@ Place a host in and out of maintenance mode (stops all Ceph daemons on host):
 .. prompt:: bash #
 
    ceph orch host maintenance enter <hostname> [--force] [--yes-i-really-mean-it]
-   ceph orch host maintenance exit <hostname>
-
-The ``--force`` flag allows the user to bypass warnings (but not alerts). The ``--yes-i-really-mean-it``
-flag bypasses all safety checks and will attempt to force the host into maintenance mode no
-matter what.
+   ceph orch host maintenance exit <hostname> [--force] [--offline]
+
+The ``--force`` flag on the ``enter`` command allows the user to bypass warnings (but not alerts).
+The ``--yes-i-really-mean-it`` flag bypasses all safety checks and will attempt to force the
+host into maintenance mode no matter what. The ``--force`` and ``--offline`` flags to the ``exit`` command
+can be used to to have cephadm mark a host that is in maintenance mode and offline as no longer
+in maintenance mode. Note in this case if the host comes online, the Ceph daemons
+on the host will remain in the stopped state. The ``--force`` and ``--offline`` flags to the ``exit``
+command are intended to be run for hosts in maintenance mode that are permanently offline
+before removing the host entirely from cephadm management using the ``ceph orch host rm`` command.
 
 .. warning:: Using the --yes-i-really-mean-it flag to force the host to enter maintenance
    mode can potentially cause loss of data availability, the mon quorum to break down due
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py
@@ -1903,7 +1903,7 @@ def _add_host(self, spec):
         self.inventory.add_host(spec)
         self.offline_hosts_remove(spec.hostname)
         if spec.status == 'maintenance':
-            self._set_maintenance_healthcheck()
+            self.set_maintenance_healthcheck()
         self.event.set()  # refresh stray health check
         self.log.info('Added host %s' % spec.hostname)
         return "Added host '{}' with addr '{}'".format(spec.hostname, spec.addr)
@@ -2074,6 +2074,7 @@ def run_cmd(cmd_args: dict) -> None:
         self.ssh.reset_con(host)
         # if host was in offline host list, we should remove it now.
         self.offline_hosts_remove(host)
+        self.set_maintenance_healthcheck()
         self.event.set()  # refresh stray health check
         self.log.info('Removed host %s' % host)
         return "Removed {} host '{}'".format('offline' if offline else '', host)
@@ -2188,7 +2189,7 @@ def host_ok_to_stop(self, hostname: str) -> str:
         self.log.info(msg)
         return msg
 
-    def _set_maintenance_healthcheck(self) -> None:
+    def set_maintenance_healthcheck(self) -> None:
         """Raise/update or clear the maintenance health check as needed"""
 
         in_maintenance = self.inventory.get_host_with_state("maintenance")
@@ -2272,19 +2273,21 @@ def enter_host_maintenance(self, hostname: str, force: bool = False, yes_i_reall
         self.inventory._inventory[hostname] = tgt_host
         self.inventory.save()
 
-        self._set_maintenance_healthcheck()
+        self.set_maintenance_healthcheck()
         return f'Daemons for Ceph cluster {self._cluster_fsid} stopped on host {hostname}. Host {hostname} moved to maintenance mode'
 
     @handle_orch_error
     @host_exists()
-    def exit_host_maintenance(self, hostname: str) -> str:
+    def exit_host_maintenance(self, hostname: str, force: bool = False, offline: bool = False) -> str:
         """Exit maintenance mode and return a host to an operational state
 
         Returning from maintenance will enable the clusters systemd target and
         start it, and remove any noout that has been added for the host if the
         host has osd daemons
 
         :param hostname: (str) host name
+        :param force: (bool) force removal of the host from maintenance mode
+        :param offline: (bool) to remove hosts that are offline from maintenance mode
 
         :raises OrchestratorError: Unable to return from maintenance, or unset the
                                    noout flag
@@ -2293,37 +2296,74 @@ def exit_host_maintenance(self, hostname: str) -> str:
         if tgt_host['status'] != "maintenance":
             raise OrchestratorError(f"Host {hostname} is not in maintenance mode")
 
-        with self.async_timeout_handler(hostname, 'cephadm host-maintenance exit'):
-            outs, errs, _code = self.wait_async(
-                CephadmServe(self)._run_cephadm(hostname, cephadmNoImage,
-                                                'host-maintenance', ['exit'], error_ok=True))
-        returned_msg = errs[0].split('\n')[-1]
-        if returned_msg.startswith('failed') or returned_msg.startswith('ERROR'):
-            raise OrchestratorError(
-                f"Failed to exit maintenance state for host {hostname}, cluster {self._cluster_fsid}")
-
-        if "osd" in self.cache.get_daemon_types(hostname):
-            crush_node = hostname if '.' not in hostname else hostname.split('.')[0]
-            rc, _out, _err = self.mon_command({
-                'prefix': 'osd unset-group',
-                'flags': 'noout',
-                'who': [crush_node],
-                'format': 'json'
-            })
-            if rc:
+        # Given we do not regularly check maintenance mode hosts for being offline,
+        # we have no idea at this point whether the host is online or not.
+        # Keep in mind this goes both ways, as users could have run
+        # "ceph cephadm check-host <hostname>" when the host was in maintenance
+        # mode and offline and the host could have since come online. This following
+        # "cephadm check-host" command is being run purely so we know if the host
+        # is online or offline, as those should be handled differently
+        try:
+            with self.async_timeout_handler(hostname, 'cephadm check-host'):
+                outs, errs, _code = self.wait_async(
+                    CephadmServe(self)._run_cephadm(
+                        hostname, cephadmNoImage,
+                        'check-host', [], error_ok=False
+                    )
+                )
+        except OrchestratorError:
+            pass
+
+        host_offline = hostname in self.offline_hosts
+
+        if host_offline and not offline:
+            raise OrchestratorValidationError(
+                f'{hostname} is offline, please use --offline and --force to take this host out of maintenance mode')
+
+        if not host_offline and offline:
+            raise OrchestratorValidationError(
+                f'{hostname} is online, please take host out of maintenance mode without --offline.')
+
+        if offline and not force:
+            raise OrchestratorValidationError("Taking an offline host out of maintenance mode requires --force")
+
+        # no point trying these parts if we know the host is offline
+        if not host_offline:
+            with self.async_timeout_handler(hostname, 'cephadm host-maintenance exit'):
+                outs, errs, _code = self.wait_async(
+                    CephadmServe(self)._run_cephadm(hostname, cephadmNoImage,
+                                                    'host-maintenance', ['exit'], error_ok=True))
+            returned_msg = errs[0].split('\n')[-1]
+            if (returned_msg.startswith('failed') or returned_msg.startswith('ERROR')):
                 self.log.warning(
-                    f"exit maintenance request failed to UNSET the noout group for {hostname}, (rc={rc})")
-                raise OrchestratorError(f"Unable to set the osds on {hostname} to noout (rc={rc})")
-            else:
-                self.log.info(
-                    f"exit maintenance request has UNSET for the noout group on host {hostname}")
+                    f"Failed to exit maintenance state for host {hostname}, cluster {self._cluster_fsid}")
+                if not force:
+                    raise OrchestratorError(
+                        f"Failed to exit maintenance state for host {hostname}, cluster {self._cluster_fsid}")
+
+            if "osd" in self.cache.get_daemon_types(hostname):
+                crush_node = hostname if '.' not in hostname else hostname.split('.')[0]
+                rc, _out, _err = self.mon_command({
+                    'prefix': 'osd unset-group',
+                    'flags': 'noout',
+                    'who': [crush_node],
+                    'format': 'json'
+                })
+                if rc:
+                    self.log.warning(
+                        f"exit maintenance request failed to UNSET the noout group for {hostname}, (rc={rc})")
+                    if not force:
+                        raise OrchestratorError(f"Unable to set the osds on {hostname} to noout (rc={rc})")
+                else:
+                    self.log.info(
+                        f"exit maintenance request has UNSET for the noout group on host {hostname}")
 
         # update the host record status
         tgt_host['status'] = ""
         self.inventory._inventory[hostname] = tgt_host
         self.inventory.save()
 
-        self._set_maintenance_healthcheck()
+        self.set_maintenance_healthcheck()
 
         return f"Ceph cluster {self._cluster_fsid} on {hostname} has exited maintenance mode"
 
diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py
@@ -2720,6 +2720,7 @@ def test_tuned_profiles_settings_validation(self, facts, settings, expected_valu
             cephadm_module.cache.facts = facts
             assert cephadm_module._validate_tunedprofile_settings(spec) == expected_value
 
+    @mock.patch("cephadm.CephadmOrchestrator.set_maintenance_healthcheck", lambda _: None)
     @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
     def test_tuned_profiles_validation(self, cephadm_module):
         with with_host(cephadm_module, 'test'):
diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py
@@ -503,7 +503,7 @@ def enter_host_maintenance(self, hostname: str, force: bool = False, yes_i_reall
         """
         raise NotImplementedError()
 
-    def exit_host_maintenance(self, hostname: str) -> OrchResult:
+    def exit_host_maintenance(self, hostname: str, force: bool = False, offline: bool = False) -> OrchResult:
         """
         Return a host from maintenance, restarting the clusters systemd target
         """
diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py
@@ -799,11 +799,11 @@ def _host_maintenance_enter(self, hostname: str, force: bool = False, yes_i_real
         return HandleCommandResult(stdout=completion.result_str())
 
     @_cli_write_command('orch host maintenance exit')
-    def _host_maintenance_exit(self, hostname: str) -> HandleCommandResult:
+    def _host_maintenance_exit(self, hostname: str, force: bool = False, offline: bool = False) -> HandleCommandResult:
         """
         Return a host from maintenance, restarting all Ceph daemons (cephadm only)
         """
-        completion = self.exit_host_maintenance(hostname)
+        completion = self.exit_host_maintenance(hostname, force, offline)
         raise_if_exception(completion)
 
         return HandleCommandResult(stdout=completion.result_str())