Skip to content

Commit a8c9a73

Browse files
authored
DPE-2919 update pod labels on preemptive switchover (#367)
* primary switchover update pod labels immediately * shorter version * fix pod labeling detail and removed duplicated method * fix log and safe exceptin
1 parent e9f291c commit a8c9a73

File tree

5 files changed

+69
-70
lines changed

5 files changed

+69
-70
lines changed

src/k8s_helpers.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,21 +113,26 @@ def label_pod(self, role: str, pod_name: Optional[str] = None) -> None:
113113
pod_name: (optional) name of the pod to label, defaults to the current pod
114114
"""
115115
try:
116+
if not pod_name:
117+
pod_name = self.pod_name
116118
pod = self.client.get(Pod, pod_name or self.pod_name, namespace=self.namespace)
117119

118120
if not pod.metadata.labels:
119121
pod.metadata.labels = {}
120122

121123
if pod.metadata.labels.get("role") == role:
122124
return
125+
logger.debug(f"Patching {pod_name=} with {role=}")
123126

124127
pod.metadata.labels["cluster-name"] = self.cluster_name
125128
pod.metadata.labels["role"] = role
126-
self.client.patch(Pod, pod_name or self.pod_name, pod)
127-
logger.info(f"Kubernetes pod label {role} created")
129+
self.client.patch(Pod, pod_name, pod)
128130
except ApiError as e:
129131
if e.status.code == 404:
130-
logger.warning(f"Kubernetes pod {pod_name} not found. Scaling in?")
132+
logger.warning(f"Kubernetes {pod_name=} not found. Scaling in?")
133+
return
134+
if e.status.code == 409:
135+
logger.warning(f"Kubernetes {pod_name=} changed. Labeling skipped")
131136
return
132137
if e.status.code == 403:
133138
logger.error("Kubernetes pod label creation failed: `juju trust` needed")

src/mysql_k8s_helpers.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
MySQLClientError,
1616
MySQLConfigureMySQLUsersError,
1717
MySQLExecError,
18+
MySQLGetClusterEndpointsError,
1819
MySQLStartMySQLDError,
1920
MySQLStopMySQLDError,
2021
)
@@ -47,7 +48,7 @@
4748
ROOT_SYSTEM_USER,
4849
XTRABACKUP_PLUGIN_DIR,
4950
)
50-
from k8s_helpers import KubernetesHelpers
51+
from k8s_helpers import KubernetesClientError, KubernetesHelpers
5152
from utils import any_memory_to_bytes
5253

5354
logger = logging.getLogger(__name__)
@@ -613,6 +614,7 @@ def _execute_commands(
613614
user: Optional[str] = None,
614615
group: Optional[str] = None,
615616
env_extra: Optional[Dict] = None,
617+
timeout: Optional[float] = None,
616618
) -> Tuple[str, str]:
617619
"""Execute commands on the server where MySQL is running."""
618620
try:
@@ -624,6 +626,7 @@ def _execute_commands(
624626
user=user,
625627
group=group,
626628
environment=env_extra,
629+
timeout=timeout,
627630
)
628631
stdout, stderr = process.wait_output()
629632
return (stdout, stderr or "")
@@ -820,3 +823,27 @@ def is_data_dir_initialised(self) -> bool:
820823
return expected_content <= content_set
821824
except ExecError:
822825
return False
826+
827+
def update_endpoints(self) -> None:
828+
"""Updates pod labels to reflect role of the unit."""
829+
logger.debug("Updating pod labels")
830+
try:
831+
rw_endpoints, ro_endpoints, offline = self.get_cluster_endpoints(get_ips=False)
832+
833+
for endpoints, label in (
834+
(rw_endpoints, "primary"),
835+
(ro_endpoints, "replicas"),
836+
(offline, "offline"),
837+
):
838+
for pod in (p.split(".")[0] for p in endpoints.split(",")):
839+
if pod:
840+
self.k8s_helper.label_pod(label, pod)
841+
except MySQLGetClusterEndpointsError:
842+
logger.exception("Failed to get cluster endpoints")
843+
except KubernetesClientError:
844+
logger.exception("Can't update pod labels")
845+
846+
def set_cluster_primary(self, new_primary_address: str) -> None:
847+
"""Set the cluster primary and update pod labels."""
848+
super().set_cluster_primary(new_primary_address)
849+
self.update_endpoints()

src/relations/mysql_provider.py

Lines changed: 3 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import logging
77
import socket
88
import typing
9-
from typing import List
109

1110
from charms.data_platform_libs.v0.data_interfaces import (
1211
DatabaseProvides,
@@ -16,7 +15,6 @@
1615
MySQLCreateApplicationDatabaseAndScopedUserError,
1716
MySQLDeleteUserError,
1817
MySQLDeleteUsersForRelationError,
19-
MySQLGetClusterEndpointsError,
2018
MySQLGetMySQLVersionError,
2119
MySQLGrantPrivilegesToUserError,
2220
MySQLRemoveRouterFromMetadataError,
@@ -84,65 +82,6 @@ def _get_or_set_password(self, relation) -> str:
8482
self.database.update_relation_data(relation.id, {"password": password})
8583
return password
8684

87-
@staticmethod
88-
def _endpoints_to_pod_list(endpoints: str) -> List[str]:
89-
"""Converts a comma separated list of endpoints to a list of pods."""
90-
return [p.split(".")[0] for p in endpoints.split(",")]
91-
92-
def _update_endpoints(self) -> None:
93-
"""Updates pod labels to reflect role of the unit."""
94-
logger.debug("Updating pod labels")
95-
try:
96-
rw_endpoints, ro_endpoints, offline = self.charm._mysql.get_cluster_endpoints(
97-
get_ips=False
98-
)
99-
100-
# rw pod labels
101-
if rw_endpoints:
102-
for pod in self._endpoints_to_pod_list(rw_endpoints):
103-
self.charm.k8s_helpers.label_pod("primary", pod)
104-
# ro pod labels
105-
if ro_endpoints:
106-
for pod in self._endpoints_to_pod_list(ro_endpoints):
107-
self.charm.k8s_helpers.label_pod("replicas", pod)
108-
# offline pod labels
109-
if offline:
110-
for pod in self._endpoints_to_pod_list(offline):
111-
self.charm.k8s_helpers.label_pod("offline", pod)
112-
except MySQLGetClusterEndpointsError:
113-
logger.exception("Failed to get cluster endpoints")
114-
except KubernetesClientError:
115-
logger.debug("Can't update pod labels")
116-
self.charm.unit.status = BlockedStatus("Can't update pod labels")
117-
118-
def _update_pod_endpoint(self) -> None:
119-
"""Update pod label to reflect the role of the unit."""
120-
logger.debug(f"Updating pod endpoint for {self.charm.unit.name}")
121-
122-
pod = self.charm.unit.name.replace("/", "-")
123-
124-
try:
125-
cluster_status = self.charm._mysql.get_cluster_status()
126-
if not cluster_status:
127-
self.charm.k8s_helpers.label_pod("error", pod)
128-
return
129-
130-
for hostname, properties in cluster_status["defaultreplicaset"]["topology"].items():
131-
if hostname.split(".")[0] == pod:
132-
if properties["status"] != "online":
133-
label = "offline"
134-
elif properties["memberrole"] == "secondary":
135-
label = "replicas"
136-
elif properties["memberrole"] == "primary":
137-
label = "primary"
138-
else:
139-
label = "none"
140-
141-
logger.debug(f"Labeling pod {pod} with label {label}")
142-
self.charm.k8s_helpers.label_pod(label, pod)
143-
except KubernetesClientError:
144-
logger.error("Error updating pod label. Traffic may not be properly routed.")
145-
14685
# =============
14786
# Handlers
14887
# =============
@@ -171,7 +110,7 @@ def _on_database_requested(self, event: DatabaseRequestedEvent) -> None:
171110

172111
try:
173112
# make sure pods are labeled before adding service
174-
self._update_endpoints()
113+
self.charm._mysql.update_endpoints()
175114

176115
# create k8s services for endpoints
177116
self.charm.k8s_helpers.create_endpoint_services(["primary", "replicas"])
@@ -267,7 +206,7 @@ def _configure_endpoints(self, _) -> None:
267206
if relation.id not in relation_data:
268207
continue
269208

270-
self._update_pod_endpoint()
209+
self.charm._mysql.update_endpoints()
271210

272211
def _on_update_status(self, _) -> None:
273212
"""Handle the update status event.
@@ -285,7 +224,7 @@ def _on_update_status(self, _) -> None:
285224
):
286225
return
287226

288-
self._update_pod_endpoint()
227+
self.charm._mysql.update_endpoints()
289228

290229
def _on_database_broken(self, event: RelationBrokenEvent) -> None:
291230
"""Handle the removal of database relation.

tests/unit/test_database.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def tearDown(self) -> None:
5151
self.patcher.stop()
5252

5353
@patch("k8s_helpers.KubernetesHelpers.wait_service_ready")
54-
@patch("relations.mysql_provider.MySQLProvider._update_endpoints")
54+
@patch("mysql_k8s_helpers.MySQL.update_endpoints")
5555
@patch("k8s_helpers.KubernetesHelpers.create_endpoint_services")
5656
@patch("mysql_k8s_helpers.MySQL.get_mysql_version", return_value="8.0.29-0ubuntu0.20.04.3")
5757
@patch("mysql_k8s_helpers.MySQL.create_application_database_and_scoped_user")

tests/unit/test_mysql_k8s_helpers.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import json
55
import unittest
6-
from unittest.mock import MagicMock, patch
6+
from unittest.mock import MagicMock, call, patch
77

88
import tenacity
99
from charms.mysql.v0.mysql import MySQLClientError, MySQLConfigureMySQLUsersError
@@ -336,3 +336,31 @@ def test_wait_until_unit_removed_from_cluster_exception(self, _get_cluster_statu
336336

337337
with self.assertRaises(MySQLWaitUntilUnitRemovedFromClusterError):
338338
self.mysql._wait_until_unit_removed_from_cluster("mysql-0.mysql-endpoints")
339+
340+
@patch(
341+
"mysql_k8s_helpers.MySQL.get_cluster_endpoints",
342+
return_value=(
343+
"mysql-0.mysql-endpoints",
344+
"mysql-1.mysql-endpoints,mysql-2.mysql-endpoints",
345+
"mysql-3.mysql-endpoints",
346+
),
347+
)
348+
def test_update_endpoints(self, _get_cluster_endpoints):
349+
"""Test the successful execution of update_endpoints."""
350+
_label_pod = MagicMock()
351+
_mock_k8s_helper = MagicMock()
352+
_mock_k8s_helper.label_pod = _label_pod
353+
354+
self.mysql.k8s_helper = _mock_k8s_helper
355+
356+
calls = [
357+
call("primary", "mysql-0"),
358+
call("replicas", "mysql-1"),
359+
call("replicas", "mysql-2"),
360+
call("offline", "mysql-3"),
361+
]
362+
363+
self.mysql.update_endpoints()
364+
_get_cluster_endpoints.assert_called_once()
365+
366+
_label_pod.assert_has_calls(calls)

0 commit comments

Comments
 (0)