canonical · MiaAltieri · Oct 14, 2024 · Oct 15, 2024 · Oct 16, 2024 · Oct 17, 2024
diff --git a/src/charm.py b/src/charm.py
@@ -42,6 +42,9 @@
     CrossAppVersionChecker,
     get_charm_revision,
 )
+from lightkube import Client
+from lightkube.resources.apps_v1 import StatefulSet
+from lightkube.types import PatchType
 from ops.charm import (
     ActionEvent,
     CharmBase,
@@ -75,7 +78,11 @@
 )
 
 from config import Config
-from exceptions import AdminUserCreationError, MissingSecretError
+from exceptions import (
+    AdminUserCreationError,
+    EarlyRemovalOfConfigServerError,
+    MissingSecretError,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -85,6 +92,8 @@
 UNIT_SCOPE = Config.Relations.UNIT_SCOPE
 Scopes = Config.Relations.Scopes
 
+ONE_MINUTE = 60
+ONE_YEAR = 31540000
 USER_CREATING_MAX_ATTEMPTS = 5
 USER_CREATION_COOLDOWN = 30
 REPLICA_SET_INIT_CHECK_TIMEOUT = 10
@@ -95,10 +104,10 @@ class MongoDBCharm(CharmBase):
 
     def __init__(self, *args):
         super().__init__(*args)
-
         self.framework.observe(self.on.mongod_pebble_ready, self._on_mongod_pebble_ready)
         self.framework.observe(self.on.config_changed, self._on_config_changed)
         self.framework.observe(self.on.start, self._on_start)
+        self.framework.observe(self.on.stop, self._on_stop)
         self.framework.observe(self.on.update_status, self._on_update_status)
         self.framework.observe(
             self.on[Config.Relations.PEERS].relation_joined, self._relation_changes_handler
@@ -117,7 +126,7 @@ def __init__(self, *args):
 
         self.framework.observe(self.on.get_password_action, self._on_get_password)
         self.framework.observe(self.on.set_password_action, self._on_set_password)
-        self.framework.observe(self.on.stop, self._on_stop)
+        self.framework.observe(self.on.mongodb_storage_detaching, self.mongodb_storage_detaching)
 
         self.framework.observe(self.on.secret_remove, self._on_secret_remove)
         self.framework.observe(self.on.secret_changed, self._on_secret_changed)
@@ -153,6 +162,10 @@ def __init__(self, *args):
         )
 
     # BEGIN: properties
+    @property
+    def _is_removing_last_replica(self) -> bool:
+        """Returns True if the last replica (juju unit) is getting removed."""
+        return self.app.planned_units() == 0 and len(self.peers_units) == 0
 
     @property
     def monitoring_jobs(self) -> list[dict[str, Any]]:
@@ -549,6 +562,7 @@ def _compare_secret_ids(secret_id1: str, secret_id2: str) -> bool:
         return False
 
     # BEGIN: charm events
+
     def _on_mongod_pebble_ready(self, event) -> None:
         """Configure MongoDB pebble layer specification."""
         # Get a reference the container attribute
@@ -646,6 +660,19 @@ def _on_start(self, event) -> None:
         It is needed to install mongodb-clients inside the charm container
         to make this function work correctly.
         """
+        # Patch the stateful set to have an increased termination period to prevent data loss on
+        # removed shards. As Juju gives us a termination period of 30 seconds:
+        # https://bugs.launchpad.net/juju/+bug/2035102
+
+        # It doesn't matter if we patch the stateful set before or after the charm has started.
+        # The usual start hooks emitted by juju will have already been emitted, so we can expect
+        # two rounds of restarts on one or more units (some units that get initialised late will
+        # only have one round of restarts). The second round of start hooks will be emitted
+        # **only after the replica set has been initialized**, we have 0 control over that.
+
+        if self.unit.is_leader() and self.get_current_termination_period() != ONE_YEAR:
+            self.update_termination_grace_period(ONE_YEAR)
+
         container = self.unit.get_container(Config.CONTAINER_NAME)
         if not container.can_connect():
             logger.debug("mongod container is not ready yet.")
@@ -691,10 +718,6 @@ def _relation_changes_handler(self, event: RelationEvent) -> None:
         self._connect_mongodb_exporter()
         self._connect_pbm_agent()
 
-        if isinstance(event, RelationDepartedEvent):
-            if event.departing_unit.name == self.unit.name:
-                self.unit_peer_data.setdefault("unit_departed", "True")
-
         if not self.unit.is_leader():
             return
 
@@ -759,19 +782,107 @@ def _reconcile_mongo_hosts_and_users(self, event: RelationEvent) -> None:
                 logger.info("Deferring reconfigure: error=%r", e)
                 event.defer()
 
-    def _on_stop(self, event) -> None:
-        if "True" == self.unit_peer_data.get("unit_departed", "False"):
-            logger.debug(f"{self.unit.name} blocking on_stop")
-            is_in_replica_set = True
-            timeout = UNIT_REMOVAL_TIMEOUT
-            while is_in_replica_set and timeout > 0:
-                is_in_replica_set = self.is_unit_in_replica_set()
-                time.sleep(1)
-                timeout -= 1
-                if timeout < 0:
-                    raise Exception(f"{self.unit.name}.on_stop timeout exceeded")
-            logger.debug(f"{self.unit.name} releasing on_stop")
-            self.unit_peer_data["unit_departed"] = ""
+    def get_current_termination_period(self) -> int:
+        """Returns the current termination period for the stateful set of this juju application."""
+        client = Client()
+        statefulset = client.get(StatefulSet, name=self.app.name, namespace=self.model.name)
+        return statefulset.spec.template.spec.terminationGracePeriodSeconds
+
+    def update_termination_grace_period(self, seconds: int) -> None:
+        """Patch the termination grace period for the stateful set of this juju application."""
+        # updating the termination grace period is only useful for shards, whose sudden removal
+        # can result in data-loss
+        if not self.is_role(Config.Role.SHARD):
+            return
+
+        client = Client()
+        patch_data = {
+            "spec": {
+                "template": {
+                    "spec": {"terminationGracePeriodSeconds": ONE_YEAR},
+                    "metadata": {"annotations": {"force-update": str(int(time.time()))}},
+                }
+            }
+        }
+        client.patch(
+            StatefulSet,
+            name=self.app.name,
+            namespace=self.model.name,
+            obj=patch_data,
+            patch_type=PatchType.MERGE,
+        )
+
+    def mongodb_storage_detaching(self, event) -> None:
+        """Before storage detaches, allow removing unit to remove itself from the set.
+
+        If the removing unit is primary also allow it to step down and elect another unit as
+        primary while it still has access to its storage.
+        """
+        if self.upgrade_in_progress:
+            # We cannot defer and prevent a user from removing a unit, log a warning instead.
+            logger.warning(
+                "Removing replicas during an upgrade is not supported. The charm may be in a broken, unrecoverable state"
+            )
+
+        # A single replica cannot step down as primary and we cannot reconfigure the replica set to
+        # have 0 members.
+        if self._is_removing_last_replica:
+            # removing config-server from a sharded cluster can be disaterous.
+            if self.is_role(Config.Role.CONFIG_SERVER) and self.config_server.has_shards():
+                current_shards = self.config_server.get_related_shards()
+                early_removal_message = f"Cannot remove config-server, still related to shards {', '.join(current_shards)}"
+                logger.error(early_removal_message)
+                # question: what happens in ks if you raise in storage detached? I assume the pod
+                # is still removed
+                raise EarlyRemovalOfConfigServerError(early_removal_message)
+
+            # cannot drain shard after storage detached.
+            if self.is_role(Config.Role.SHARD) and self.shard.has_config_server():
+                logger.info("Wait for shard to drain before detaching storage.")
+                self.status.set_and_share_status(MaintenanceStatus("Draining shard from cluster"))
+                mongos_hosts = self.shard.get_mongos_hosts()
+                # TODO need to update this function to attempt to patch the statefulset
+                self.shard.wait_for_draining(mongos_hosts)
+                logger.info("Shard successfully drained storage.")
+
+        try:
+            # retries over a period of 10 minutes in an attempt to resolve race conditions it is
+            # not possible to defer in storage detached.
+            logger.debug("Removing %s from replica set", self.unit_host(self.unit))
+            for attempt in Retrying(
+                stop=stop_after_attempt(10),
+                wait=wait_fixed(1),
+                reraise=True,
+            ):
+                with attempt:
+                    # remove_replset_member retries for 60 seconds
+                    with MongoDBConnection(self.mongodb_config) as mongo:
+                        mongo.remove_replset_member(self.unit_host(self.unit))
+
+        except NotReadyError:
+            logger.info(
+                "Failed to remove %s from replica set, another member is syncing", self.unit.name
+            )
+        except PyMongoError as e:
+            logger.error("Failed to remove %s from replica set, error=%r", self.unit.name, e)
+
+    def _on_stop(self, _) -> None:
+        """Handle on_stop event.
+
+        On stop can occur after a user has refreshed, after a unit has been removed, or when a pod
+        is getting restarted.
+        """
+        # I can add this functionality to mongodb lib - i.e. a function wait_for_new_primary, but
+        # this is just a POC
+        waiting = 0
+        while (
+            self.unit.name == self.primary and len(self.peers_units) > 1 and waiting < ONE_MINUTE
+        ):
+            logger.debug("Stepping down current primary, before stopping.")
+            with MongoDBConnection(self.mongodb_config) as mongo:
+                mongo.step_down_primary()
+            time.sleep(1)
+            waiting += 1
 
     def _on_update_status(self, event: UpdateStatusEvent):
         # user-made mistakes might result in other incorrect statues. Prioritise informing users of
@@ -807,6 +918,17 @@ def _on_update_status(self, event: UpdateStatusEvent):
 
         self.status.set_and_share_status(self.status.process_statuses())
 
+        # We must ensure that juju does not overwrite our termination period, so we should update
+        # it as needed. However, updating the termination period can result in an onslaught of
+        # events, including the upgrade event. To prevent this from messing with upgrades do not
+        # update the termination period when an upgrade is occurring.
+        if (
+            self.unit.is_leader()
+            and self.get_current_termination_period() != ONE_YEAR
+            and not self.upgrade_in_progress
+        ):
+            self.update_termination_grace_period(ONE_YEAR)
+
     # END: charm events
 
     # BEGIN: actions

diff --git a/src/exceptions.py b/src/exceptions.py
@@ -16,6 +16,10 @@ class ApplicationHostNotFoundError(MongoError):
     """Raised when a queried host is not in the application peers or the current host."""
 
 
+class EarlyRemovalOfConfigServerError(Exception):
+    """Raised when there is an attempt to remove a config-server, while related to a shard."""
+
+
 class MongoSecretError(MongoError):
     """Common parent for all Mongo Secret Exceptions."""