-
Notifications
You must be signed in to change notification settings - Fork 18
DRAFT + WIP [DPE-5661] patch stateful set to ensure pod isn't removed prematurely #348
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 6/edge
Are you sure you want to change the base?
Changes from 2 commits
9c88b59
fc992f0
1d822c1
f726e56
1e09c4e
45912cf
b1b35a1
ec82c90
77e3ff4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,6 +42,9 @@ | |
CrossAppVersionChecker, | ||
get_charm_revision, | ||
) | ||
from lightkube import Client | ||
from lightkube.resources.apps_v1 import StatefulSet | ||
from lightkube.types import PatchType | ||
from ops.charm import ( | ||
ActionEvent, | ||
CharmBase, | ||
|
@@ -75,7 +78,11 @@ | |
) | ||
|
||
from config import Config | ||
from exceptions import AdminUserCreationError, MissingSecretError | ||
from exceptions import ( | ||
AdminUserCreationError, | ||
EarlyRemovalOfConfigServerError, | ||
MissingSecretError, | ||
) | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
@@ -85,6 +92,8 @@ | |
UNIT_SCOPE = Config.Relations.UNIT_SCOPE | ||
Scopes = Config.Relations.Scopes | ||
|
||
ONE_MINUTE = 60 | ||
ONE_YEAR = 31540000 | ||
USER_CREATING_MAX_ATTEMPTS = 5 | ||
USER_CREATION_COOLDOWN = 30 | ||
REPLICA_SET_INIT_CHECK_TIMEOUT = 10 | ||
|
@@ -95,10 +104,10 @@ class MongoDBCharm(CharmBase): | |
|
||
def __init__(self, *args): | ||
super().__init__(*args) | ||
|
||
self.framework.observe(self.on.mongod_pebble_ready, self._on_mongod_pebble_ready) | ||
self.framework.observe(self.on.config_changed, self._on_config_changed) | ||
self.framework.observe(self.on.start, self._on_start) | ||
self.framework.observe(self.on.stop, self._on_stop) | ||
self.framework.observe(self.on.update_status, self._on_update_status) | ||
self.framework.observe( | ||
self.on[Config.Relations.PEERS].relation_joined, self._relation_changes_handler | ||
|
@@ -117,7 +126,7 @@ def __init__(self, *args): | |
|
||
self.framework.observe(self.on.get_password_action, self._on_get_password) | ||
self.framework.observe(self.on.set_password_action, self._on_set_password) | ||
self.framework.observe(self.on.stop, self._on_stop) | ||
self.framework.observe(self.on.mongodb_storage_detaching, self.mongodb_storage_detaching) | ||
|
||
self.framework.observe(self.on.secret_remove, self._on_secret_remove) | ||
self.framework.observe(self.on.secret_changed, self._on_secret_changed) | ||
|
@@ -153,6 +162,10 @@ def __init__(self, *args): | |
) | ||
|
||
# BEGIN: properties | ||
@property | ||
def _is_removing_last_replica(self) -> bool: | ||
"""Returns True if the last replica (juju unit) is getting removed.""" | ||
return self.app.planned_units() == 0 and len(self.peers_units) == 0 | ||
|
||
@property | ||
def monitoring_jobs(self) -> list[dict[str, Any]]: | ||
|
@@ -549,6 +562,7 @@ def _compare_secret_ids(secret_id1: str, secret_id2: str) -> bool: | |
return False | ||
|
||
# BEGIN: charm events | ||
|
||
def _on_mongod_pebble_ready(self, event) -> None: | ||
"""Configure MongoDB pebble layer specification.""" | ||
# Get a reference the container attribute | ||
|
@@ -646,6 +660,19 @@ def _on_start(self, event) -> None: | |
It is needed to install mongodb-clients inside the charm container | ||
to make this function work correctly. | ||
""" | ||
# Patch the stateful set to have an increased termination period to prevent data loss on | ||
# removed shards. As Juju gives us a termination period of 30 seconds: | ||
# https://bugs.launchpad.net/juju/+bug/2035102 | ||
|
||
# It doesn't matter if we patch the stateful set before or after the charm has started. | ||
# The usual start hooks emitted by juju will have already been emitted, so we can expect | ||
# two rounds of restarts on one or more units (some units that get initialised late will | ||
# only have one round of restarts). The second round of start hooks will be emitted | ||
# **only after the replica set has been initialized**, we have 0 control over that. | ||
|
||
if self.unit.is_leader() and self.get_current_termination_period() != ONE_YEAR: | ||
self.update_termination_grace_period(ONE_YEAR) | ||
MiaAltieri marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
container = self.unit.get_container(Config.CONTAINER_NAME) | ||
if not container.can_connect(): | ||
logger.debug("mongod container is not ready yet.") | ||
|
@@ -691,10 +718,6 @@ def _relation_changes_handler(self, event: RelationEvent) -> None: | |
self._connect_mongodb_exporter() | ||
self._connect_pbm_agent() | ||
|
||
if isinstance(event, RelationDepartedEvent): | ||
if event.departing_unit.name == self.unit.name: | ||
self.unit_peer_data.setdefault("unit_departed", "True") | ||
|
||
if not self.unit.is_leader(): | ||
return | ||
|
||
|
@@ -759,19 +782,107 @@ def _reconcile_mongo_hosts_and_users(self, event: RelationEvent) -> None: | |
logger.info("Deferring reconfigure: error=%r", e) | ||
event.defer() | ||
|
||
def _on_stop(self, event) -> None: | ||
if "True" == self.unit_peer_data.get("unit_departed", "False"): | ||
logger.debug(f"{self.unit.name} blocking on_stop") | ||
is_in_replica_set = True | ||
timeout = UNIT_REMOVAL_TIMEOUT | ||
while is_in_replica_set and timeout > 0: | ||
is_in_replica_set = self.is_unit_in_replica_set() | ||
time.sleep(1) | ||
timeout -= 1 | ||
if timeout < 0: | ||
raise Exception(f"{self.unit.name}.on_stop timeout exceeded") | ||
logger.debug(f"{self.unit.name} releasing on_stop") | ||
self.unit_peer_data["unit_departed"] = "" | ||
def get_current_termination_period(self) -> int: | ||
"""Returns the current termination period for the stateful set of this juju application.""" | ||
client = Client() | ||
statefulset = client.get(StatefulSet, name=self.app.name, namespace=self.model.name) | ||
MiaAltieri marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return statefulset.spec.template.spec.terminationGracePeriodSeconds | ||
|
||
def update_termination_grace_period(self, seconds: int) -> None: | ||
"""Patch the termination grace period for the stateful set of this juju application.""" | ||
# updating the termination grace period is only useful for shards, whose sudden removal | ||
# can result in data-loss | ||
if not self.is_role(Config.Role.SHARD): | ||
return | ||
Comment on lines
+898
to
+901
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. im unsure about this bit |
||
|
||
client = Client() | ||
patch_data = { | ||
"spec": { | ||
"template": { | ||
"spec": {"terminationGracePeriodSeconds": ONE_YEAR}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't one year a bit too much? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I chose a ridiculously long amount on purpose to ensure that shard drainage would have enough time in worst case scenarios. i.e. if there are jumbo-chunks that we cannot automatically drain (because the user should decide the new shard key) - then that will require user intervention and I would like to give them more than enough time to do so. |
||
"metadata": {"annotations": {"force-update": str(int(time.time()))}}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What it the force-update annotation useful for here? |
||
} | ||
} | ||
} | ||
client.patch( | ||
StatefulSet, | ||
name=self.app.name, | ||
namespace=self.model.name, | ||
MiaAltieri marked this conversation as resolved.
Show resolved
Hide resolved
|
||
obj=patch_data, | ||
patch_type=PatchType.MERGE, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think MERGE or STRATEGIC don't change anything here so I would not specify it and keep the default value of |
||
) | ||
|
||
def mongodb_storage_detaching(self, event) -> None: | ||
"""Before storage detaches, allow removing unit to remove itself from the set. | ||
|
||
If the removing unit is primary also allow it to step down and elect another unit as | ||
primary while it still has access to its storage. | ||
""" | ||
if self.upgrade_in_progress: | ||
# We cannot defer and prevent a user from removing a unit, log a warning instead. | ||
logger.warning( | ||
"Removing replicas during an upgrade is not supported. The charm may be in a broken, unrecoverable state" | ||
) | ||
|
||
# A single replica cannot step down as primary and we cannot reconfigure the replica set to | ||
# have 0 members. | ||
if self._is_removing_last_replica: | ||
# removing config-server from a sharded cluster can be disaterous. | ||
if self.is_role(Config.Role.CONFIG_SERVER) and self.config_server.has_shards(): | ||
current_shards = self.config_server.get_related_shards() | ||
early_removal_message = f"Cannot remove config-server, still related to shards {', '.join(current_shards)}" | ||
logger.error(early_removal_message) | ||
# question: what happens in ks if you raise in storage detached? I assume the pod | ||
# is still removed | ||
raise EarlyRemovalOfConfigServerError(early_removal_message) | ||
|
||
# cannot drain shard after storage detached. | ||
if self.is_role(Config.Role.SHARD) and self.shard.has_config_server(): | ||
logger.info("Wait for shard to drain before detaching storage.") | ||
self.status.set_and_share_status(MaintenanceStatus("Draining shard from cluster")) | ||
mongos_hosts = self.shard.get_mongos_hosts() | ||
# TODO need to update this function to attempt to patch the statefulset | ||
self.shard.wait_for_draining(mongos_hosts) | ||
logger.info("Shard successfully drained storage.") | ||
|
||
try: | ||
# retries over a period of 10 minutes in an attempt to resolve race conditions it is | ||
# not possible to defer in storage detached. | ||
logger.debug("Removing %s from replica set", self.unit_host(self.unit)) | ||
for attempt in Retrying( | ||
stop=stop_after_attempt(10), | ||
wait=wait_fixed(1), | ||
reraise=True, | ||
): | ||
with attempt: | ||
# remove_replset_member retries for 60 seconds | ||
with MongoDBConnection(self.mongodb_config) as mongo: | ||
mongo.remove_replset_member(self.unit_host(self.unit)) | ||
|
||
except NotReadyError: | ||
logger.info( | ||
"Failed to remove %s from replica set, another member is syncing", self.unit.name | ||
) | ||
except PyMongoError as e: | ||
logger.error("Failed to remove %s from replica set, error=%r", self.unit.name, e) | ||
|
||
def _on_stop(self, _) -> None: | ||
"""Handle on_stop event. | ||
|
||
On stop can occur after a user has refreshed, after a unit has been removed, or when a pod | ||
is getting restarted. | ||
""" | ||
# I can add this functionality to mongodb lib - i.e. a function wait_for_new_primary, but | ||
# this is just a POC | ||
waiting = 0 | ||
while ( | ||
self.unit.name == self.primary and len(self.peers_units) > 1 and waiting < ONE_MINUTE | ||
): | ||
logger.debug("Stepping down current primary, before stopping.") | ||
with MongoDBConnection(self.mongodb_config) as mongo: | ||
mongo.step_down_primary() | ||
time.sleep(1) | ||
waiting += 1 | ||
|
||
def _on_update_status(self, event: UpdateStatusEvent): | ||
# user-made mistakes might result in other incorrect statues. Prioritise informing users of | ||
|
@@ -807,6 +918,17 @@ def _on_update_status(self, event: UpdateStatusEvent): | |
|
||
self.status.set_and_share_status(self.status.process_statuses()) | ||
|
||
# We must ensure that juju does not overwrite our termination period, so we should update | ||
# it as needed. However, updating the termination period can result in an onslaught of | ||
# events, including the upgrade event. To prevent this from messing with upgrades do not | ||
# update the termination period when an upgrade is occurring. | ||
if ( | ||
self.unit.is_leader() | ||
and self.get_current_termination_period() != ONE_YEAR | ||
and not self.upgrade_in_progress | ||
): | ||
self.update_termination_grace_period(ONE_YEAR) | ||
|
||
# END: charm events | ||
|
||
# BEGIN: actions | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need to import lightkube in pyproject.toml