Skip to content

Commit b2aa4b2

Browse files
[DPE-5237] implement revision check for sharding components (#331)
* update deps for revision checker * copy over infra from vm * adding infra for local charm revision * update set status lib * add integration tests * fmt + lint * update unit tests * remove unused bits * Apply suggestions from code review Co-authored-by: Mehdi Bendriss <[email protected]> --------- Co-authored-by: Mehdi Bendriss <[email protected]>
1 parent aa27779 commit b2aa4b2

File tree

13 files changed

+299
-130
lines changed

13 files changed

+299
-130
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@ share/
1717

1818
/requirements.txt
1919
/requirements-last-build.txt
20+
/charm_internal_version

charmcraft.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ parts:
2020
override-build: |
2121
rustup default stable
2222
craftctl default
23+
files:
24+
plugin: dump
25+
source: .
26+
prime:
27+
- charm_internal_version
2328
bases:
2429
- build-on:
2530
- name: "ubuntu"

lib/charms/mongodb/v0/set_status.py

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import Optional, Tuple
88

99
from charms.mongodb.v1.mongodb import MongoConfiguration, MongoDBConnection
10+
from data_platform_helpers.version_check import NoVersionError, get_charm_revision
1011
from ops.charm import CharmBase
1112
from ops.framework import Object
1213
from ops.model import ActiveStatus, BlockedStatus, StatusBase, WaitingStatus
@@ -22,7 +23,7 @@
2223

2324
# Increment this PATCH version before using `charmcraft publish-lib` or reset
2425
# to 0 if you are raising the major API version
25-
LIBPATCH = 3
26+
LIBPATCH = 4
2627

2728
AUTH_FAILED_CODE = 18
2829
UNAUTHORISED_CODE = 13
@@ -88,7 +89,7 @@ def is_status_related_to_mismatched_revision(self, status_type: str) -> bool:
8889
"goal state" which processes data differently and the other via the ".status" property.
8990
Hence we have to be flexible to handle each.
9091
"""
91-
if not self.charm.get_cluster_mismatched_revision_status():
92+
if not self.get_cluster_mismatched_revision_status():
9293
return False
9394

9495
if "waiting" in status_type and self.charm.is_role(Config.Role.CONFIG_SERVER):
@@ -153,7 +154,7 @@ def is_unit_status_ready_for_upgrade(self) -> bool:
153154
if isinstance(current_status, ActiveStatus):
154155
return True
155156

156-
if not isinstance(current_status, WaitingStatus):
157+
if not isinstance(current_status, BlockedStatus):
157158
return False
158159

159160
if status_message and "is not up-to date with config-server" in status_message:
@@ -235,7 +236,49 @@ def get_invalid_integration_status(self) -> Optional[StatusBase]:
235236
"Relation to s3-integrator is not supported, config role must be config-server"
236237
)
237238

238-
return self.charm.get_cluster_mismatched_revision_status()
239+
return self.get_cluster_mismatched_revision_status()
240+
241+
def get_cluster_mismatched_revision_status(self) -> Optional[StatusBase]:
242+
"""Returns a Status if the cluster has mismatched revisions."""
243+
# check for invalid versions in sharding integrations, i.e. a shard running on
244+
# revision 88 and a config-server running on revision 110
245+
current_charms_version = get_charm_revision(
246+
self.charm.unit, local_version=self.charm.get_charm_internal_revision
247+
)
248+
local_identifier = (
249+
"-locally built"
250+
if self.charm.version_checker.is_local_charm(self.charm.app.name)
251+
else ""
252+
)
253+
try:
254+
if self.charm.version_checker.are_related_apps_valid():
255+
return
256+
except NoVersionError as e:
257+
# relations to shards/config-server are expected to provide a version number. If they
258+
# do not, it is because they are from an earlier charm revision, i.e. pre-revison X.
259+
logger.debug(e)
260+
if self.charm.is_role(Config.Role.SHARD):
261+
return BlockedStatus(
262+
f"Charm revision ({current_charms_version}{local_identifier}) is not up-to date with config-server."
263+
)
264+
265+
if self.charm.is_role(Config.Role.SHARD):
266+
config_server_revision = self.charm.version_checker.get_version_of_related_app(
267+
self.get_config_server_name()
268+
)
269+
remote_local_identifier = (
270+
"-locally built"
271+
if self.charm.version_checker.is_local_charm(self.get_config_server_name())
272+
else ""
273+
)
274+
return BlockedStatus(
275+
f"Charm revision ({current_charms_version}{local_identifier}) is not up-to date with config-server ({config_server_revision}{remote_local_identifier})."
276+
)
277+
278+
if self.charm.is_role(Config.Role.CONFIG_SERVER):
279+
return WaitingStatus(
280+
f"Waiting for shards to upgrade/downgrade to revision {current_charms_version}{local_identifier}."
281+
)
239282

240283

241284
def build_unit_status(mongodb_config: MongoConfiguration, unit_host: str) -> StatusBase:

poetry.lock

Lines changed: 96 additions & 95 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ tenacity = "^8.2.3"
2121
pyyaml = "^6.0.1"
2222
jinja2 = "^3.1.3"
2323
poetry-core = "^1.9.0"
24-
data-platform-helpers = "^0.1.2"
24+
data-platform-helpers = "^0.1.3"
2525
pyOpenSSL = "^24.2.1"
2626
setuptools = "^72.0.0"
2727

@@ -72,7 +72,7 @@ juju = "^3.5.0"
7272
pytest = "^8.1.1"
7373
pytest-asyncio = "^0.21.1"
7474
pytest-mock = "^3.14.0"
75-
pytest-operator = "^0.34.0"
75+
pytest-operator = "^0.36.0"
7676
pytest-operator-cache = {git = "https://github.com/canonical/data-platform-workflows", tag = "v21.0.0", subdirectory = "python/pytest_plugins/pytest_operator_cache"}
7777
pytest-operator-groups = {git = "https://github.com/canonical/data-platform-workflows", tag = "v21.0.0", subdirectory = "python/pytest_plugins/pytest_operator_groups"}
7878
pytest-github-secrets = {git = "https://github.com/canonical/data-platform-workflows", tag = "v21.0.0", subdirectory = "python/pytest_plugins/github_secrets"}

src/charm.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@
3838
OperatorUser,
3939
)
4040
from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointProvider
41+
from data_platform_helpers.version_check import (
42+
CrossAppVersionChecker,
43+
get_charm_revision,
44+
)
4145
from ops.charm import (
4246
ActionEvent,
4347
CharmBase,
@@ -56,7 +60,6 @@
5660
ModelError,
5761
Relation,
5862
RelationDataContent,
59-
StatusBase,
6063
Unit,
6164
WaitingStatus,
6265
)
@@ -140,6 +143,15 @@ def __init__(self, *args):
140143
self.config_server = ShardingProvider(self)
141144
self.cluster = ClusterProvider(self)
142145

146+
self.version_checker = CrossAppVersionChecker(
147+
self,
148+
version=get_charm_revision(self.unit, local_version=self.get_charm_internal_revision),
149+
relations_to_check=[
150+
Config.Relations.SHARDING_RELATIONS_NAME,
151+
Config.Relations.CONFIG_SERVER_RELATIONS_NAME,
152+
],
153+
)
154+
143155
# BEGIN: properties
144156

145157
@property
@@ -470,16 +482,15 @@ def primary(self) -> str | None:
470482

471483
return None
472484

485+
@property
486+
def get_charm_internal_revision(self) -> str:
487+
"""Returns the contents of the get_charm_internal_revision file."""
488+
with open(Config.CHARM_INTERNAL_VERSION_FILE, "r") as f:
489+
return f.read().strip()
490+
473491
# END: properties
474492

475493
# BEGIN: generic helper methods
476-
def get_cluster_mismatched_revision_status(self) -> Optional[StatusBase]:
477-
"""Returns a Status if the cluster has mismatched revisions.
478-
479-
TODO implement this method as a part of sharding upgrades.
480-
"""
481-
return None
482-
483494
def remote_mongos_config(self, hosts) -> MongoConfiguration:
484495
"""Generates a MongoConfiguration object for mongos in the deployment of MongoDB."""
485496
# mongos that are part of the cluster have the same username and password, but different
@@ -1568,7 +1579,7 @@ def is_relation_feasible(self, rel_interface: str) -> bool:
15681579
)
15691580
return False
15701581

1571-
if revision_mismatch_status := self.get_cluster_mismatched_revision_status():
1582+
if revision_mismatch_status := self.status.get_cluster_mismatched_revision_status():
15721583
self.status.set_and_share_status(revision_mismatch_status)
15731584
return False
15741585

src/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ class Config:
2222
MONGOD_CONF_DIR = "/etc/mongod"
2323
MONGODB_LOG_FILENAME = "mongodb.log"
2424

25+
CHARM_INTERNAL_VERSION_FILE = "charm_internal_version"
2526
LICENSE_PATH = "/licenses/LICENSE"
2627
CONTAINER_NAME = "mongod"
2728
SERVICE_NAME = "mongod"

src/exceptions.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,40 +7,26 @@
77
class MongoError(Exception):
88
"""Common parent for Mongo errors, allowing to catch them all at once."""
99

10-
pass
11-
1210

1311
class AdminUserCreationError(MongoError):
1412
"""Raised when a commands to create an admin user on MongoDB fail."""
1513

16-
pass
17-
1814

1915
class ApplicationHostNotFoundError(MongoError):
2016
"""Raised when a queried host is not in the application peers or the current host."""
2117

22-
pass
23-
2418

2519
class MongoSecretError(MongoError):
2620
"""Common parent for all Mongo Secret Exceptions."""
2721

28-
pass
29-
3022

3123
class SecretNotAddedError(MongoSecretError):
3224
"""Raised when a Juju 3 secret couldn't be set or re-set."""
3325

34-
pass
35-
3626

3727
class MissingSecretError(MongoSecretError):
3828
"""Could be raised when a Juju 3 mandatory secret couldn't be found."""
3929

40-
pass
41-
4230

4331
class SecretAlreadyExistsError(MongoSecretError):
4432
"""A secret that we want to create already exists."""
45-
46-
pass
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#!/usr/bin/env python3
2+
# Copyright 2024 Canonical Ltd.
3+
# See LICENSE file for licensing details.
4+
import pytest
5+
from pytest_operator.plugin import OpsTest
6+
7+
from ..helpers import METADATA, wait_for_mongodb_units_blocked
8+
9+
MONGODB_K8S_CHARM = "mongodb-k8s"
10+
SHARD_REL_NAME = "sharding"
11+
CONFIG_SERVER_REL_NAME = "config-server"
12+
13+
LOCAL_SHARD_APP_NAME = "local-shard"
14+
REMOTE_SHARD_APP_NAME = "remote-shard"
15+
LOCAL_CONFIG_SERVER_APP_NAME = "local-config-server"
16+
REMOTE_CONFIG_SERVER_APP_NAME = "remote-config-server"
17+
18+
CLUSTER_COMPONENTS = [
19+
LOCAL_SHARD_APP_NAME,
20+
REMOTE_SHARD_APP_NAME,
21+
LOCAL_CONFIG_SERVER_APP_NAME,
22+
REMOTE_CONFIG_SERVER_APP_NAME,
23+
]
24+
25+
26+
@pytest.mark.group(1)
27+
@pytest.mark.abort_on_fail
28+
async def test_build_and_deploy(ops_test: OpsTest) -> None:
29+
my_charm = await ops_test.build_charm(".")
30+
resources = {"mongodb-image": METADATA["resources"]["mongodb-image"]["upstream-source"]}
31+
32+
await ops_test.model.deploy(
33+
MONGODB_K8S_CHARM,
34+
application_name=REMOTE_SHARD_APP_NAME,
35+
config={"role": "shard"},
36+
channel="edge",
37+
)
38+
39+
await ops_test.model.deploy(
40+
MONGODB_K8S_CHARM,
41+
application_name=REMOTE_CONFIG_SERVER_APP_NAME,
42+
config={"role": "config-server"},
43+
channel="edge",
44+
)
45+
await ops_test.model.deploy(
46+
my_charm,
47+
resources=resources,
48+
config={"role": "config-server"},
49+
application_name=LOCAL_CONFIG_SERVER_APP_NAME,
50+
)
51+
await ops_test.model.deploy(
52+
my_charm,
53+
resources=resources,
54+
config={"role": "shard"},
55+
application_name=LOCAL_SHARD_APP_NAME,
56+
)
57+
58+
await ops_test.model.wait_for_idle(apps=CLUSTER_COMPONENTS, idle_period=20)
59+
60+
61+
@pytest.mark.group(1)
62+
@pytest.mark.abort_on_fail
63+
async def test_local_config_server_reports_remote_shard(ops_test: OpsTest) -> None:
64+
"""Tests that the local config server reports remote shard."""
65+
await ops_test.model.integrate(
66+
f"{REMOTE_SHARD_APP_NAME}:{SHARD_REL_NAME}",
67+
f"{LOCAL_CONFIG_SERVER_APP_NAME}:{CONFIG_SERVER_REL_NAME}",
68+
)
69+
70+
await ops_test.model.wait_for_idle(
71+
apps=[LOCAL_CONFIG_SERVER_APP_NAME],
72+
status="waiting",
73+
raise_on_blocked=False,
74+
idle_period=20,
75+
)
76+
77+
config_server_unit = ops_test.model.applications[LOCAL_CONFIG_SERVER_APP_NAME].units[0]
78+
79+
assert (
80+
"Waiting for shards to upgrade/downgrade to revision"
81+
in config_server_unit.workload_status_message
82+
), "Config server does not correctly report mismatch in revision"
83+
84+
85+
@pytest.mark.group(1)
86+
@pytest.mark.abort_on_fail
87+
async def test_local_shard_reports_remote_config_server(ops_test: OpsTest) -> None:
88+
"""Tests that the local shard reports remote config-server."""
89+
await ops_test.model.integrate(
90+
f"{LOCAL_SHARD_APP_NAME}:{SHARD_REL_NAME}",
91+
f"{REMOTE_CONFIG_SERVER_APP_NAME}:{CONFIG_SERVER_REL_NAME}",
92+
)
93+
94+
await wait_for_mongodb_units_blocked(
95+
ops_test,
96+
LOCAL_SHARD_APP_NAME,
97+
timeout=300,
98+
)
99+
100+
shard_unit = ops_test.model.applications[LOCAL_SHARD_APP_NAME].units[0]
101+
assert (
102+
"is not up-to date with config-server." in shard_unit.workload_status_message
103+
), "Shard does not correctly report mismatch in revision"

tests/unit/test_charm.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@
3636

3737

3838
class TestCharm(unittest.TestCase):
39+
@patch("charm.get_charm_revision")
3940
@patch_network_get(private_address="1.1.1.1")
40-
def setUp(self):
41+
def setUp(self, *unused):
4142
self.maxDiff = None
4243
self.harness = Harness(MongoDBCharm)
4344
mongo_resource = {
@@ -583,9 +584,11 @@ def test_reconfigure_remove_member_failure(self, connection, defer):
583584
connection.return_value.__enter__.return_value.remove_replset_member.assert_called()
584585
defer.assert_called()
585586

587+
@patch("charms.mongodb.v0.set_status.get_charm_revision")
588+
@patch("charm.CrossAppVersionChecker.is_local_charm")
586589
@patch("ops.framework.EventBase.defer")
587590
@patch("charm.MongoDBConnection")
588-
def test_reconfigure_peer_not_ready(self, connection, defer):
591+
def test_reconfigure_peer_not_ready(self, connection, defer, *unused):
589592
"""Tests reconfigure does not proceed when the adding member is not ready.
590593
591594
Verifies in relation joined events, that when the adding member is not ready that the event

0 commit comments

Comments
 (0)