Skip to content

Commit c58d0d4

Browse files
Add health check that MySQL Router is part of InnoDB cluster (#59)
Ported from canonical/mysql-router-k8s-operator#116
1 parent df4e28f commit c58d0d4

File tree

4 files changed

+42
-13
lines changed

4 files changed

+42
-13
lines changed

src/abstract_charm.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,11 @@ def __init__(self, *args) -> None:
3434
self._authenticated_workload_type = workload.AuthenticatedWorkload
3535
self._database_requires = relations.database_requires.RelationEndpoint(self)
3636
self._database_provides = relations.database_provides.RelationEndpoint(self)
37-
self.framework.observe(self.on.start, self._on_start)
38-
self.framework.observe(self.on.leader_elected, self._on_leader_elected)
37+
self.framework.observe(self.on.update_status, self.reconcile_database_relations)
38+
# Set status on first start if no relations active
39+
self.framework.observe(self.on.start, self.reconcile_database_relations)
40+
# Update app status
41+
self.framework.observe(self.on.leader_elected, self.reconcile_database_relations)
3942

4043
@property
4144
@abc.abstractmethod
@@ -107,8 +110,8 @@ def _determine_app_status(self, *, event) -> ops.StatusBase:
107110
def _determine_unit_status(self, *, event) -> ops.StatusBase:
108111
"""Report unit status."""
109112
statuses = []
110-
if not self.get_workload(event=event).container_ready:
111-
statuses.append(ops.MaintenanceStatus("Waiting for container"))
113+
workload_ = self.get_workload(event=event)
114+
statuses.append(workload_.get_status(event))
112115
return self._prioritize_statuses(statuses)
113116

114117
def set_status(self, *, event) -> None:
@@ -173,11 +176,3 @@ def reconcile_database_relations(self, event=None) -> None:
173176
elif workload_.container_ready:
174177
workload_.disable()
175178
self.set_status(event=event)
176-
177-
def _on_start(self, _) -> None:
178-
# Set status on first start if no relations active
179-
self.set_status(event=None)
180-
181-
def _on_leader_elected(self, _) -> None:
182-
# Update app status
183-
self.set_status(event=None)

src/mysql_shell.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,3 +169,16 @@ def delete_user(self, username: str) -> None:
169169
logger.debug(f"Deleting {username=}")
170170
self._run_sql([f"DROP USER `{username}`"])
171171
logger.debug(f"Deleted {username=}")
172+
173+
def is_router_in_cluster_set(self, router_id: str) -> bool:
174+
"""Check if MySQL Router is part of InnoDB ClusterSet."""
175+
logger.debug(f"Checking if {router_id=} in cluster set")
176+
output = json.loads(
177+
self._run_commands(
178+
["cluster_set = dba.get_cluster_set()", "print(cluster_set.list_routers())"]
179+
)
180+
)
181+
cluster_set_router_ids = output["routers"].keys()
182+
logger.debug(f"{cluster_set_router_ids=}")
183+
logger.debug(f"Checked if {router_id in cluster_set_router_ids=}")
184+
return router_id in cluster_set_router_ids

src/workload.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import string
1111
import typing
1212

13+
import ops
14+
1315
import container
1416
import mysql_shell
1517

@@ -92,6 +94,11 @@ def disable_tls(self) -> None:
9294
file.unlink(missing_ok=True)
9395
logger.debug("Disabled TLS")
9496

97+
def get_status(self, event) -> typing.Optional[ops.StatusBase]:
98+
"""Report non-active status."""
99+
if not self.container_ready:
100+
return ops.MaintenanceStatus("Waiting for container")
101+
95102

96103
class AuthenticatedWorkload(Workload):
97104
"""Workload with connection to MySQL cluster"""
@@ -122,7 +129,7 @@ def shell(self) -> mysql_shell.Shell:
122129
def _router_id(self) -> str:
123130
"""MySQL Router ID in InnoDB Cluster metadata
124131
125-
Used to remove MySQL Router metadata from InnoDB cluster
132+
Used to remove MySQL Router metadata from InnoDB Cluster
126133
"""
127134
# MySQL Router is bootstrapped without `--directory`—there is one system-wide instance.
128135
return f"{socket.getfqdn()}::system"
@@ -235,3 +242,16 @@ def disable_tls(self) -> None:
235242
super().disable_tls()
236243
if self._container.mysql_router_service_enabled:
237244
self._restart(tls=False)
245+
246+
def get_status(self, event) -> typing.Optional[ops.StatusBase]:
247+
"""Report non-active status."""
248+
if status := super().get_status(event):
249+
return status
250+
if not self.shell.is_router_in_cluster_set(self._router_id):
251+
# Router should not be removed from ClusterSet after bootstrap (except by MySQL charm
252+
# when MySQL Router unit departs relation).
253+
# If Router is not part of ClusterSet after bootstrap, it most likely was manually
254+
# removed.
255+
return ops.BlockedStatus(
256+
"Router was manually removed from MySQL ClusterSet. Remove & re-deploy unit"
257+
)

tests/unit/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def patch(monkeypatch):
3535
monkeypatch.setattr(
3636
"workload.AuthenticatedWorkload._router_username", lambda *args, **kwargs: ""
3737
)
38+
monkeypatch.setattr("mysql_shell.Shell.is_router_in_cluster_set", lambda *args, **kwargs: True)
3839

3940

4041
@pytest.fixture(autouse=True)

0 commit comments

Comments
 (0)