78
78
)
79
79
80
80
from config import Config
81
- from exceptions import (
82
- AdminUserCreationError ,
83
- EarlyRemovalOfConfigServerError ,
84
- MissingSecretError ,
85
- )
81
+ from exceptions import AdminUserCreationError , MissingSecretError
86
82
87
83
logger = logging .getLogger (__name__ )
88
84
@@ -107,7 +103,6 @@ def __init__(self, *args):
107
103
self .framework .observe (self .on .mongod_pebble_ready , self ._on_mongod_pebble_ready )
108
104
self .framework .observe (self .on .config_changed , self ._on_config_changed )
109
105
self .framework .observe (self .on .start , self ._on_start )
110
- self .framework .observe (self .on .stop , self ._on_stop )
111
106
self .framework .observe (self .on .update_status , self ._on_update_status )
112
107
self .framework .observe (
113
108
self .on [Config .Relations .PEERS ].relation_joined , self ._relation_changes_handler
@@ -126,7 +121,7 @@ def __init__(self, *args):
126
121
127
122
self .framework .observe (self .on .get_password_action , self ._on_get_password )
128
123
self .framework .observe (self .on .set_password_action , self ._on_set_password )
129
- self .framework .observe (self .on .mongodb_storage_detaching , self .mongodb_storage_detaching )
124
+ self .framework .observe (self .on .stop , self ._on_stop )
130
125
131
126
self .framework .observe (self .on .secret_remove , self ._on_secret_remove )
132
127
self .framework .observe (self .on .secret_changed , self ._on_secret_changed )
@@ -162,11 +157,6 @@ def __init__(self, *args):
162
157
)
163
158
164
159
# BEGIN: properties
165
- @property
166
- def _is_removing_last_replica (self ) -> bool :
167
- """Returns True if the last replica (juju unit) is getting removed."""
168
- return self .app .planned_units () == 0 and len (self .peers_units ) == 0
169
-
170
160
@property
171
161
def monitoring_jobs (self ) -> list [dict [str , Any ]]:
172
162
"""Defines the labels and targets for metrics."""
@@ -718,6 +708,10 @@ def _relation_changes_handler(self, event: RelationEvent) -> None:
718
708
self ._connect_mongodb_exporter ()
719
709
self ._connect_pbm_agent ()
720
710
711
+ if isinstance (event , RelationDepartedEvent ):
712
+ if event .departing_unit .name == self .unit .name :
713
+ self .unit_peer_data .setdefault ("unit_departed" , "True" )
714
+
721
715
if not self .unit .is_leader ():
722
716
return
723
717
@@ -812,66 +806,25 @@ def update_termination_grace_period(self, seconds: int) -> None:
812
806
patch_type = PatchType .MERGE ,
813
807
)
814
808
815
- def mongodb_storage_detaching (self , event ) -> None :
816
- """Before storage detaches, allow removing unit to remove itself from the set.
817
-
818
- If the removing unit is primary also allow it to step down and elect another unit as
819
- primary while it still has access to its storage.
820
- """
821
- if self .upgrade_in_progress :
822
- # We cannot defer and prevent a user from removing a unit, log a warning instead.
823
- logger .warning (
824
- "Removing replicas during an upgrade is not supported. The charm may be in a broken, unrecoverable state"
825
- )
826
-
827
- # A single replica cannot step down as primary and we cannot reconfigure the replica set to
828
- # have 0 members.
829
- if self ._is_removing_last_replica :
830
- # removing config-server from a sharded cluster can be disaterous.
831
- if self .is_role (Config .Role .CONFIG_SERVER ) and self .config_server .has_shards ():
832
- current_shards = self .config_server .get_related_shards ()
833
- early_removal_message = f"Cannot remove config-server, still related to shards { ', ' .join (current_shards )} "
834
- logger .error (early_removal_message )
835
- # question: what happens in ks if you raise in storage detached? I assume the pod
836
- # is still removed
837
- raise EarlyRemovalOfConfigServerError (early_removal_message )
838
-
839
- # cannot drain shard after storage detached.
840
- if self .is_role (Config .Role .SHARD ) and self .shard .has_config_server ():
841
- logger .info ("Wait for shard to drain before detaching storage." )
842
- self .status .set_and_share_status (MaintenanceStatus ("Draining shard from cluster" ))
843
- mongos_hosts = self .shard .get_mongos_hosts ()
844
- # TODO need to update this function to attempt to patch the statefulset
845
- self .shard .wait_for_draining (mongos_hosts )
846
- logger .info ("Shard successfully drained storage." )
847
-
848
- try :
849
- # retries over a period of 10 minutes in an attempt to resolve race conditions it is
850
- # not possible to defer in storage detached.
851
- logger .debug ("Removing %s from replica set" , self .unit_host (self .unit ))
852
- for attempt in Retrying (
853
- stop = stop_after_attempt (10 ),
854
- wait = wait_fixed (1 ),
855
- reraise = True ,
856
- ):
857
- with attempt :
858
- # remove_replset_member retries for 60 seconds
859
- with MongoDBConnection (self .mongodb_config ) as mongo :
860
- mongo .remove_replset_member (self .unit_host (self .unit ))
861
-
862
- except NotReadyError :
863
- logger .info (
864
- "Failed to remove %s from replica set, another member is syncing" , self .unit .name
865
- )
866
- except PyMongoError as e :
867
- logger .error ("Failed to remove %s from replica set, error=%r" , self .unit .name , e )
868
-
869
809
def _on_stop (self , _ ) -> None :
870
810
"""Handle on_stop event.
871
811
872
812
On stop can occur after a user has refreshed, after a unit has been removed, or when a pod
873
813
is getting restarted.
874
814
"""
815
+ if "True" == self .unit_peer_data .get ("unit_departed" , "False" ):
816
+ logger .debug (f"{ self .unit .name } blocking on_stop" )
817
+ is_in_replica_set = True
818
+ timeout = UNIT_REMOVAL_TIMEOUT
819
+ while is_in_replica_set and timeout > 0 :
820
+ is_in_replica_set = self .is_unit_in_replica_set ()
821
+ time .sleep (1 )
822
+ timeout -= 1
823
+ if timeout < 0 :
824
+ raise Exception (f"{ self .unit .name } .on_stop timeout exceeded" )
825
+ logger .debug (f"{ self .unit .name } releasing on_stop" )
826
+ self .unit_peer_data ["unit_departed" ] = ""
827
+
875
828
# I can add this functionality to mongodb lib - i.e. a function wait_for_new_primary, but
876
829
# this is just a POC
877
830
waiting = 0
0 commit comments