Skip to content

Commit f726e56

Browse files
committed
revert storage detached changes and save for a seperate PR
1 parent 1d822c1 commit f726e56

File tree

2 files changed

+19
-70
lines changed

2 files changed

+19
-70
lines changed

src/charm.py

Lines changed: 19 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,7 @@
7878
)
7979

8080
from config import Config
81-
from exceptions import (
82-
AdminUserCreationError,
83-
EarlyRemovalOfConfigServerError,
84-
MissingSecretError,
85-
)
81+
from exceptions import AdminUserCreationError, MissingSecretError
8682

8783
logger = logging.getLogger(__name__)
8884

@@ -107,7 +103,6 @@ def __init__(self, *args):
107103
self.framework.observe(self.on.mongod_pebble_ready, self._on_mongod_pebble_ready)
108104
self.framework.observe(self.on.config_changed, self._on_config_changed)
109105
self.framework.observe(self.on.start, self._on_start)
110-
self.framework.observe(self.on.stop, self._on_stop)
111106
self.framework.observe(self.on.update_status, self._on_update_status)
112107
self.framework.observe(
113108
self.on[Config.Relations.PEERS].relation_joined, self._relation_changes_handler
@@ -126,7 +121,7 @@ def __init__(self, *args):
126121

127122
self.framework.observe(self.on.get_password_action, self._on_get_password)
128123
self.framework.observe(self.on.set_password_action, self._on_set_password)
129-
self.framework.observe(self.on.mongodb_storage_detaching, self.mongodb_storage_detaching)
124+
self.framework.observe(self.on.stop, self._on_stop)
130125

131126
self.framework.observe(self.on.secret_remove, self._on_secret_remove)
132127
self.framework.observe(self.on.secret_changed, self._on_secret_changed)
@@ -162,11 +157,6 @@ def __init__(self, *args):
162157
)
163158

164159
# BEGIN: properties
165-
@property
166-
def _is_removing_last_replica(self) -> bool:
167-
"""Returns True if the last replica (juju unit) is getting removed."""
168-
return self.app.planned_units() == 0 and len(self.peers_units) == 0
169-
170160
@property
171161
def monitoring_jobs(self) -> list[dict[str, Any]]:
172162
"""Defines the labels and targets for metrics."""
@@ -718,6 +708,10 @@ def _relation_changes_handler(self, event: RelationEvent) -> None:
718708
self._connect_mongodb_exporter()
719709
self._connect_pbm_agent()
720710

711+
if isinstance(event, RelationDepartedEvent):
712+
if event.departing_unit.name == self.unit.name:
713+
self.unit_peer_data.setdefault("unit_departed", "True")
714+
721715
if not self.unit.is_leader():
722716
return
723717

@@ -812,66 +806,25 @@ def update_termination_grace_period(self, seconds: int) -> None:
812806
patch_type=PatchType.MERGE,
813807
)
814808

815-
def mongodb_storage_detaching(self, event) -> None:
816-
"""Before storage detaches, allow removing unit to remove itself from the set.
817-
818-
If the removing unit is primary also allow it to step down and elect another unit as
819-
primary while it still has access to its storage.
820-
"""
821-
if self.upgrade_in_progress:
822-
# We cannot defer and prevent a user from removing a unit, log a warning instead.
823-
logger.warning(
824-
"Removing replicas during an upgrade is not supported. The charm may be in a broken, unrecoverable state"
825-
)
826-
827-
# A single replica cannot step down as primary and we cannot reconfigure the replica set to
828-
# have 0 members.
829-
if self._is_removing_last_replica:
830-
# removing config-server from a sharded cluster can be disaterous.
831-
if self.is_role(Config.Role.CONFIG_SERVER) and self.config_server.has_shards():
832-
current_shards = self.config_server.get_related_shards()
833-
early_removal_message = f"Cannot remove config-server, still related to shards {', '.join(current_shards)}"
834-
logger.error(early_removal_message)
835-
# question: what happens in ks if you raise in storage detached? I assume the pod
836-
# is still removed
837-
raise EarlyRemovalOfConfigServerError(early_removal_message)
838-
839-
# cannot drain shard after storage detached.
840-
if self.is_role(Config.Role.SHARD) and self.shard.has_config_server():
841-
logger.info("Wait for shard to drain before detaching storage.")
842-
self.status.set_and_share_status(MaintenanceStatus("Draining shard from cluster"))
843-
mongos_hosts = self.shard.get_mongos_hosts()
844-
# TODO need to update this function to attempt to patch the statefulset
845-
self.shard.wait_for_draining(mongos_hosts)
846-
logger.info("Shard successfully drained storage.")
847-
848-
try:
849-
# retries over a period of 10 minutes in an attempt to resolve race conditions it is
850-
# not possible to defer in storage detached.
851-
logger.debug("Removing %s from replica set", self.unit_host(self.unit))
852-
for attempt in Retrying(
853-
stop=stop_after_attempt(10),
854-
wait=wait_fixed(1),
855-
reraise=True,
856-
):
857-
with attempt:
858-
# remove_replset_member retries for 60 seconds
859-
with MongoDBConnection(self.mongodb_config) as mongo:
860-
mongo.remove_replset_member(self.unit_host(self.unit))
861-
862-
except NotReadyError:
863-
logger.info(
864-
"Failed to remove %s from replica set, another member is syncing", self.unit.name
865-
)
866-
except PyMongoError as e:
867-
logger.error("Failed to remove %s from replica set, error=%r", self.unit.name, e)
868-
869809
def _on_stop(self, _) -> None:
870810
"""Handle on_stop event.
871811
872812
On stop can occur after a user has refreshed, after a unit has been removed, or when a pod
873813
is getting restarted.
874814
"""
815+
if "True" == self.unit_peer_data.get("unit_departed", "False"):
816+
logger.debug(f"{self.unit.name} blocking on_stop")
817+
is_in_replica_set = True
818+
timeout = UNIT_REMOVAL_TIMEOUT
819+
while is_in_replica_set and timeout > 0:
820+
is_in_replica_set = self.is_unit_in_replica_set()
821+
time.sleep(1)
822+
timeout -= 1
823+
if timeout < 0:
824+
raise Exception(f"{self.unit.name}.on_stop timeout exceeded")
825+
logger.debug(f"{self.unit.name} releasing on_stop")
826+
self.unit_peer_data["unit_departed"] = ""
827+
875828
# I can add this functionality to mongodb lib - i.e. a function wait_for_new_primary, but
876829
# this is just a POC
877830
waiting = 0

src/exceptions.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,6 @@ class ApplicationHostNotFoundError(MongoError):
1616
"""Raised when a queried host is not in the application peers or the current host."""
1717

1818

19-
class EarlyRemovalOfConfigServerError(Exception):
20-
"""Raised when there is an attempt to remove a config-server, while related to a shard."""
21-
22-
2319
class MongoSecretError(MongoError):
2420
"""Common parent for all Mongo Secret Exceptions."""
2521

0 commit comments

Comments
 (0)