@@ -126,7 +126,7 @@ def __init__(self, *args):
126
126
127
127
self .framework .observe (self .on .get_password_action , self ._on_get_password )
128
128
self .framework .observe (self .on .set_password_action , self ._on_set_password )
129
- self .framework .observe (self .on .mongodb_storage_detaching , self .mongodb_storage_detaching )
129
+ self .framework .observe (self .on .stop , self ._on_stop )
130
130
131
131
self .framework .observe (self .on .secret_remove , self ._on_secret_remove )
132
132
self .framework .observe (self .on .secret_changed , self ._on_secret_changed )
@@ -162,11 +162,6 @@ def __init__(self, *args):
162
162
)
163
163
164
164
# BEGIN: properties
165
- @property
166
- def _is_removing_last_replica (self ) -> bool :
167
- """Returns True if the last replica (juju unit) is getting removed."""
168
- return self .app .planned_units () == 0 and len (self .peers_units ) == 0
169
-
170
165
@property
171
166
def monitoring_jobs (self ) -> list [dict [str , Any ]]:
172
167
"""Defines the labels and targets for metrics."""
@@ -718,6 +713,10 @@ def _relation_changes_handler(self, event: RelationEvent) -> None:
718
713
self ._connect_mongodb_exporter ()
719
714
self ._connect_pbm_agent ()
720
715
716
+ if isinstance (event , RelationDepartedEvent ):
717
+ if event .departing_unit .name == self .unit .name :
718
+ self .unit_peer_data .setdefault ("unit_departed" , "True" )
719
+
721
720
if not self .unit .is_leader ():
722
721
return
723
722
@@ -812,66 +811,25 @@ def update_termination_grace_period(self, seconds: int) -> None:
812
811
patch_type = PatchType .MERGE ,
813
812
)
814
813
815
- def mongodb_storage_detaching (self , event ) -> None :
816
- """Before storage detaches, allow removing unit to remove itself from the set.
817
-
818
- If the removing unit is primary also allow it to step down and elect another unit as
819
- primary while it still has access to its storage.
820
- """
821
- if self .upgrade_in_progress :
822
- # We cannot defer and prevent a user from removing a unit, log a warning instead.
823
- logger .warning (
824
- "Removing replicas during an upgrade is not supported. The charm may be in a broken, unrecoverable state"
825
- )
826
-
827
- # A single replica cannot step down as primary and we cannot reconfigure the replica set to
828
- # have 0 members.
829
- if self ._is_removing_last_replica :
830
- # removing config-server from a sharded cluster can be disaterous.
831
- if self .is_role (Config .Role .CONFIG_SERVER ) and self .config_server .has_shards ():
832
- current_shards = self .config_server .get_related_shards ()
833
- early_removal_message = f"Cannot remove config-server, still related to shards { ', ' .join (current_shards )} "
834
- logger .error (early_removal_message )
835
- # question: what happens in ks if you raise in storage detached? I assume the pod
836
- # is still removed
837
- raise EarlyRemovalOfConfigServerError (early_removal_message )
838
-
839
- # cannot drain shard after storage detached.
840
- if self .is_role (Config .Role .SHARD ) and self .shard .has_config_server ():
841
- logger .info ("Wait for shard to drain before detaching storage." )
842
- self .status .set_and_share_status (MaintenanceStatus ("Draining shard from cluster" ))
843
- mongos_hosts = self .shard .get_mongos_hosts ()
844
- # TODO need to update this function to attempt to patch the statefulset
845
- self .shard .wait_for_draining (mongos_hosts )
846
- logger .info ("Shard successfully drained storage." )
847
-
848
- try :
849
- # retries over a period of 10 minutes in an attempt to resolve race conditions it is
850
- # not possible to defer in storage detached.
851
- logger .debug ("Removing %s from replica set" , self .unit_host (self .unit ))
852
- for attempt in Retrying (
853
- stop = stop_after_attempt (10 ),
854
- wait = wait_fixed (1 ),
855
- reraise = True ,
856
- ):
857
- with attempt :
858
- # remove_replset_member retries for 60 seconds
859
- with MongoDBConnection (self .mongodb_config ) as mongo :
860
- mongo .remove_replset_member (self .unit_host (self .unit ))
861
-
862
- except NotReadyError :
863
- logger .info (
864
- "Failed to remove %s from replica set, another member is syncing" , self .unit .name
865
- )
866
- except PyMongoError as e :
867
- logger .error ("Failed to remove %s from replica set, error=%r" , self .unit .name , e )
868
-
869
814
def _on_stop (self , _ ) -> None :
870
815
"""Handle on_stop event.
871
816
872
817
On stop can occur after a user has refreshed, after a unit has been removed, or when a pod
873
818
is getting restarted.
874
819
"""
820
+ if "True" == self .unit_peer_data .get ("unit_departed" , "False" ):
821
+ logger .debug (f"{ self .unit .name } blocking on_stop" )
822
+ is_in_replica_set = True
823
+ timeout = UNIT_REMOVAL_TIMEOUT
824
+ while is_in_replica_set and timeout > 0 :
825
+ is_in_replica_set = self .is_unit_in_replica_set ()
826
+ time .sleep (1 )
827
+ timeout -= 1
828
+ if timeout < 0 :
829
+ raise Exception (f"{ self .unit .name } .on_stop timeout exceeded" )
830
+ logger .debug (f"{ self .unit .name } releasing on_stop" )
831
+ self .unit_peer_data ["unit_departed" ] = ""
832
+
875
833
# I can add this functionality to mongodb lib - i.e. a function wait_for_new_primary, but
876
834
# this is just a POC
877
835
waiting = 0
0 commit comments