Skip to content

Commit 3fc7400

Browse files
MiaAltieriMehdi-BendrissGu1nness
authored
update to latest libs (#365)
* update to latest libs * update libs * fix HA tests * fix mongos tests * fix metrics test * update channel for revision check * wait for permissions to be resolved * wait for model to settle before creating backup * adding missing return * chore: remove useless comma * remove block until * fix: Use context manager to stop looping forever * revert earlier change * Revert "remove block until" This reverts commit 22446d1. * fix: Delay setting the partition in case the leader is the unit to upgrade --------- Co-authored-by: Mehdi-Bendriss <[email protected]> Co-authored-by: Neha Oudin <[email protected]>
1 parent ab4e9e7 commit 3fc7400

File tree

14 files changed

+101
-69
lines changed

14 files changed

+101
-69
lines changed

lib/charms/mongodb/v0/config_server_interface.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151

5252
# Increment this PATCH version before using `charmcraft publish-lib` or reset
5353
# to 0 if you are raising the major API version
54-
LIBPATCH = 14
54+
LIBPATCH = 15
5555

5656

5757
class ClusterProvider(Object):

lib/charms/mongodb/v1/mongodb_backups.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141

4242
# Increment this PATCH version before using `charmcraft publish-lib` or reset
4343
# to 0 if you are raising the major API version
44-
LIBPATCH = 6
44+
LIBPATCH = 5
4545

4646
logger = logging.getLogger(__name__)
4747

lib/charms/mongodb/v1/mongodb_provider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737

3838
# Increment this PATCH version before using `charmcraft publish-lib` or reset
3939
# to 0 if you are raising the major API version
40-
LIBPATCH = 15
40+
LIBPATCH = 16
4141

4242
logger = logging.getLogger(__name__)
4343
REL_NAME = "database"

lib/charms/mongodb/v1/mongodb_tls.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import logging
1313
import re
1414
import socket
15-
from typing import Dict, List, Optional, Tuple
15+
from typing import Optional, Tuple
1616

1717
from charms.tls_certificates_interface.v3.tls_certificates import (
1818
CertificateAvailableEvent,
@@ -42,7 +42,7 @@
4242

4343
# Increment this PATCH version before using `charmcraft publish-lib` or reset
4444
# to 0 if you are raising the major API version
45-
LIBPATCH = 4
45+
LIBPATCH = 5
4646

4747
WAIT_CERT_UPDATE = "wait-cert-updated"
4848

@@ -105,9 +105,6 @@ def request_certificate(
105105
internal: bool,
106106
):
107107
"""Request TLS certificate."""
108-
if not self.charm.model.get_relation(Config.TLS.TLS_PEER_RELATION):
109-
return
110-
111108
if param is None:
112109
key = generate_private_key()
113110
else:
@@ -234,7 +231,7 @@ def _on_certificate_available(self, event: CertificateAvailableEvent) -> None:
234231
self.charm.cluster.update_ca_secret(new_ca=event.ca)
235232
self.charm.config_server.update_ca_secret(new_ca=event.ca)
236233

237-
if self.waiting_for_both_certs():
234+
if self.is_waiting_for_both_certs():
238235
logger.debug(
239236
"Defer till both internal and external TLS certificates available to avoid second restart."
240237
)
@@ -256,7 +253,7 @@ def _on_certificate_available(self, event: CertificateAvailableEvent) -> None:
256253
# clear waiting status if db service is ready
257254
self.charm.status.set_and_share_status(ActiveStatus())
258255

259-
def waiting_for_both_certs(self):
256+
def is_waiting_for_both_certs(self) -> bool:
260257
"""Returns a boolean indicating whether additional certs are needed."""
261258
if not self.get_tls_secret(internal=True, label_name=Config.TLS.SECRET_CERT_LABEL):
262259
logger.debug("Waiting for internal certificate.")
@@ -295,6 +292,10 @@ def _on_certificate_expiring(self, event: CertificateExpiringEvent) -> None:
295292
return
296293

297294
logger.debug("Generating a new Certificate Signing Request.")
295+
self.request_new_certificates(internal)
296+
297+
def request_new_certificates(self, internal: bool) -> None:
298+
"""Requests the renewel of a new certificate."""
298299
key = self.get_tls_secret(internal, Config.TLS.SECRET_KEY_LABEL).encode("utf-8")
299300
old_csr = self.get_tls_secret(internal, Config.TLS.SECRET_CSR_LABEL).encode("utf-8")
300301
sans = self.get_new_sans()
@@ -313,8 +314,9 @@ def _on_certificate_expiring(self, event: CertificateExpiringEvent) -> None:
313314
)
314315

315316
self.set_tls_secret(internal, Config.TLS.SECRET_CSR_LABEL, new_csr.decode("utf-8"))
317+
self.set_waiting_for_cert_to_update(waiting=True, internal=internal)
316318

317-
def get_new_sans(self) -> Dict:
319+
def get_new_sans(self) -> dict[str, list[str]]:
318320
"""Create a list of DNS names for a MongoDB unit.
319321
320322
Returns:
@@ -341,7 +343,7 @@ def get_new_sans(self) -> Dict:
341343

342344
return sans
343345

344-
def get_current_sans(self, internal: bool) -> List[str] | None:
346+
def get_current_sans(self, internal: bool) -> dict[str, list[str]] | None:
345347
"""Gets the current SANs for the unit cert."""
346348
# if unit has no certificates do not proceed.
347349
if not self.is_tls_enabled(internal=internal):
@@ -411,9 +413,9 @@ def _get_subject_name(self) -> str:
411413

412414
def is_set_waiting_for_cert_to_update(
413415
self,
414-
internal=False,
416+
internal: bool = False,
415417
) -> bool:
416-
"""Returns True we are waiting for a cert to update."""
418+
"""Returns True if we are waiting for a cert to update."""
417419
scope = "int" if internal else "ext"
418420
label_name = f"{scope}-{WAIT_CERT_UPDATE}"
419421

lib/charms/mongodb/v1/shards_interface.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858

5959
# Increment this PATCH version before using `charmcraft publish-lib` or reset
6060
# to 0 if you are raising the major API version
61-
LIBPATCH = 11
61+
LIBPATCH = 12
6262

6363
KEYFILE_KEY = "key-file"
6464
HOSTS_KEY = "host"
@@ -711,7 +711,7 @@ def _on_relation_changed(self, event):
711711

712712
self.update_member_auth(event, (key_file_enabled, tls_enabled))
713713

714-
if tls_enabled and self.charm.tls.waiting_for_both_certs():
714+
if tls_enabled and self.charm.tls.is_waiting_for_both_certs():
715715
logger.info("Waiting for requested certs, before restarting and adding to cluster.")
716716
event.defer()
717717
return

lib/charms/tls_certificates_interface/v3/tls_certificates.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ def _on_all_certificates_invalidated(self, event: AllCertificatesInvalidatedEven
318318

319319
# Increment this PATCH version before using `charmcraft publish-lib` or reset
320320
# to 0 if you are raising the major API version
321-
LIBPATCH = 20
321+
LIBPATCH = 23
322322

323323
PYDEPS = ["cryptography", "jsonschema"]
324324

@@ -1902,10 +1902,20 @@ def _on_relation_changed(self, event: RelationChangedEvent) -> None:
19021902
)
19031903
else:
19041904
try:
1905+
secret = self.model.get_secret(label=f"{LIBID}-{csr_in_sha256_hex}")
19051906
logger.debug(
19061907
"Setting secret with label %s", f"{LIBID}-{csr_in_sha256_hex}"
19071908
)
1908-
secret = self.model.get_secret(label=f"{LIBID}-{csr_in_sha256_hex}")
1909+
# Juju < 3.6 will create a new revision even if the content is the same
1910+
if (
1911+
secret.get_content(refresh=True).get("certificate", "")
1912+
== certificate.certificate
1913+
):
1914+
logger.debug(
1915+
"Secret %s with correct certificate already exists",
1916+
f"{LIBID}-{csr_in_sha256_hex}",
1917+
)
1918+
continue
19091919
secret.set_content(
19101920
{"certificate": certificate.certificate, "csr": certificate.csr}
19111921
)
@@ -1986,11 +1996,19 @@ def _on_secret_expired(self, event: SecretExpiredEvent) -> None:
19861996
provider_certificate = self._find_certificate_in_relation_data(csr)
19871997
if not provider_certificate:
19881998
# A secret expired but we did not find matching certificate. Cleaning up
1999+
logger.warning(
2000+
"Failed to find matching certificate for csr, cleaning up secret %s",
2001+
event.secret.label,
2002+
)
19892003
event.secret.remove_all_revisions()
19902004
return
19912005

19922006
if not provider_certificate.expiry_time:
19932007
# A secret expired but matching certificate is invalid. Cleaning up
2008+
logger.warning(
2009+
"Certificate matching csr is invalid, cleaning up secret %s",
2010+
event.secret.label,
2011+
)
19942012
event.secret.remove_all_revisions()
19952013
return
19962014

@@ -2023,14 +2041,18 @@ def _find_certificate_in_relation_data(self, csr: str) -> Optional[ProviderCerti
20232041
return provider_certificate
20242042
return None
20252043

2026-
def _get_csr_from_secret(self, secret: Secret) -> str:
2044+
def _get_csr_from_secret(self, secret: Secret) -> Union[str, None]:
20272045
"""Extract the CSR from the secret label or content.
20282046
20292047
This function is a workaround to maintain backwards compatibility
20302048
and fix the issue reported in
20312049
https://github.com/canonical/tls-certificates-interface/issues/228
20322050
"""
2033-
if not (csr := secret.get_content().get("csr", "")):
2051+
try:
2052+
content = secret.get_content(refresh=True)
2053+
except SecretNotFoundError:
2054+
return None
2055+
if not (csr := content.get("csr", None)):
20342056
# In versions <14 of the Lib we were storing the CSR in the label of the secret
20352057
# The CSR now is stored int the content of the secret, which was a breaking change
20362058
# Here we get the CSR if the secret was created by an app using libpatch 14 or lower

src/charm.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -636,15 +636,17 @@ def _configure_container(self, container: Container) -> None:
636636
except FailedToUpdateFilesystem as err:
637637
raise ContainerNotReadyError from err
638638

639-
self._configure_layers(container)
640-
641-
# when a network cuts and the pod restarts - reconnect to the exporter
642639
try:
640+
self._configure_layers(container)
641+
# when a network cuts and the pod restarts - reconnect to the exporter and pbm
643642
self._connect_mongodb_exporter()
644643
self._connect_pbm_agent()
645644
except MissingSecretError as e:
646645
logger.error("Cannot connect mongodb exporter: %r", e)
647646
raise ContainerNotReadyError
647+
except ChangeError as e:
648+
logger.error("Cannot configure container layers %r", e)
649+
raise ContainerNotReadyError
648650

649651
# BEGIN: charm events
650652
def _on_upgrade(self, event: UpgradeCharmEvent) -> None:
@@ -927,6 +929,8 @@ def mongodb_storage_detaching(self, event) -> None:
927929
self.shard.wait_for_draining(mongos_hosts)
928930
logger.info("Shard successfully drained storage.")
929931

932+
return
933+
930934
try:
931935
# retries over a period of 10 minutes in an attempt to resolve race conditions it is
932936
logger.debug("Removing %s from replica set", self.unit_host(self.unit))

src/upgrades/kubernetes_upgrades.py

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -224,33 +224,36 @@ def reconcile_partition(
224224
# This does not address the situation where another unit > 1 restarts and sets the
225225
# partition during the `stop` event, but that is unlikely to occur in the small time window
226226
# that causes the unit to hang.
227-
if partition_ < self._partition:
228-
self._partition = partition_
229-
logger.debug(
230-
f"Lowered partition to {partition_} {action_event=} {force=} {self.in_progress=}"
231-
)
232227
if action_event:
233228
assert len(units) >= 2
234-
if self._partition > unit_number(units[1]):
229+
if partition_ > unit_number(units[1]):
235230
message = "Highest number unit is unhealthy. Refresh will not resume."
236231
logger.debug(f"Resume upgrade event failed: {message}")
237232
action_event.fail(message)
238-
return
239-
if force:
240-
# If a unit was unhealthy and the upgrade was forced, only the next unit will
241-
# upgrade. As long as 1 or more units are unhealthy, the upgrade will need to be
242-
# forced for each unit.
243-
244-
# Include "Attempting to" because (on Kubernetes) we only control the partition,
245-
# not which units upgrade. Kubernetes may not upgrade a unit even if the partition
246-
# allows it (e.g. if the charm container of a higher unit is not ready). This is
247-
# also applicable `if not force`, but is unlikely to happen since all units are
248-
# healthy `if not force`.
249-
message = f"Attempting to refresh unit {self._partition}."
250233
else:
251-
message = f"Refresh resumed. Unit {self._partition} is refreshing next."
252-
action_event.set_results({"result": message})
253-
logger.debug(f"Resume refresh succeeded: {message}")
234+
if force:
235+
# If a unit was unhealthy and the upgrade was forced, only
236+
# the next unit will upgrade. As long as 1 or more units
237+
# are unhealthy, the upgrade will need to be forced for
238+
# each unit.
239+
240+
# Include "Attempting to" because (on Kubernetes) we only
241+
# control the partition, not which units upgrade.
242+
# Kubernetes may not upgrade a unit even if the partition
243+
# allows it (e.g. if the charm container of a higher unit
244+
# is not ready). This is also applicable `if not force`,
245+
# but is unlikely to happen since all units are healthy `if
246+
# not force`.
247+
message = f"Attempting to refresh unit {self._partition}."
248+
else:
249+
message = f"Refresh resumed. Unit {self._partition} is refreshing next."
250+
action_event.set_results({"result": message})
251+
logger.debug(f"Resume refresh succeeded: {message}")
252+
if partition_ < self._partition:
253+
self._partition = partition_
254+
logger.debug(
255+
f"Lowered partition to {partition_} {action_event=} {force=} {self.in_progress=}"
256+
)
254257

255258

256259
partition = _Partition()

tests/integration/backup_tests/test_backups.py

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,7 @@
1212
import pytest_asyncio
1313
import yaml
1414
from pytest_operator.plugin import OpsTest
15-
from tenacity import (
16-
RetryError,
17-
Retrying,
18-
stop_after_attempt,
19-
stop_after_delay,
20-
wait_fixed,
21-
)
15+
from tenacity import RetryError, Retrying, stop_after_delay, wait_fixed
2216

2317
from ..ha_tests import helpers as ha_helpers
2418
from ..helpers import (
@@ -244,7 +238,7 @@ async def test_multi_backup(ops_test: OpsTest, github_secrets, continuous_writes
244238
db_unit = await helpers.get_leader_unit(ops_test)
245239

246240
# create first backup once ready
247-
await ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
241+
await ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20)
248242

249243
action = await db_unit.run_action(action_name="create-backup")
250244
first_backup = await action.wait()
@@ -262,7 +256,7 @@ async def test_multi_backup(ops_test: OpsTest, github_secrets, continuous_writes
262256
}
263257
await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
264258

265-
await ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
259+
await ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20)
266260

267261
# create a backup as soon as possible. might not be immediately possible since only one backup
268262
# can happen at a time.
@@ -279,7 +273,7 @@ async def test_multi_backup(ops_test: OpsTest, github_secrets, continuous_writes
279273
# backup can take a lot of time so this function returns once the command was successfully
280274
# sent to pbm. Therefore before checking, wait for Charmed MongoDB to finish creating the
281275
# backup
282-
await ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
276+
await ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20)
283277

284278
# verify that backups was made in GCP bucket
285279
try:
@@ -298,7 +292,7 @@ async def test_multi_backup(ops_test: OpsTest, github_secrets, continuous_writes
298292
"endpoint": "https://s3.amazonaws.com",
299293
}
300294
await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
301-
await ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
295+
await ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20)
302296

303297
# verify that backups was made on the AWS bucket
304298
try:
@@ -448,13 +442,14 @@ async def test_restore_new_cluster(
448442
), "Backups from old cluster are listed as failed"
449443

450444
# find most recent backup id and restore
451-
for attempt in Retrying(stop=stop_after_attempt(120), wait=wait_fixed(1), reraise=True):
452-
action = await leader_unit.run_action(action_name="list-backups")
453-
list_result = await action.wait()
454-
list_result = list_result.results["backups"]
455-
most_recent_backup = list_result.split("\n")[-1]
456-
backup_id = most_recent_backup.split()[0]
457-
assert "-----" not in backup_id, "list of backups are empty."
445+
action = await leader_unit.run_action(action_name="list-backups")
446+
list_result = await action.wait()
447+
list_result = list_result.results["backups"]
448+
most_recent_backup = list_result.split("\n")[-1]
449+
backup_id = most_recent_backup.split()[0]
450+
action = await leader_unit.run_action(action_name="restore", **{"backup-id": backup_id})
451+
restore = await action.wait()
452+
assert restore.results["restore-status"] == "restore started", "restore not successful"
458453

459454
# verify all writes are present
460455
try:

tests/integration/backup_tests/test_sharding_backups.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ async def test_set_credentials_in_cluster(ops_test: OpsTest, github_secrets) ->
107107

108108
# apply new configuration options
109109
await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
110-
await ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active", timeout=TIMEOUT)
110+
await ops_test.model.wait_for_idle(apps=CLUSTER_APPS, status="active", timeout=TIMEOUT)
111111
await setup_cluster_and_s3(ops_test)
112112

113113

0 commit comments

Comments
 (0)