From 144ce8475d067c794bb0bafd1fd1fc3a3b0b96e3 Mon Sep 17 00:00:00 2001 From: Paulo Machado Date: Tue, 26 Mar 2024 12:22:28 -0300 Subject: [PATCH 1/4] faster endpoint updates and preemptive switchover on upgrade --- src/mysql_vm_helpers.py | 21 ++++++++-- src/relations/mysql_provider.py | 68 +++++++++++++++++++++++++++------ src/upgrade.py | 21 ++++++++++ 3 files changed, 94 insertions(+), 16 deletions(-) diff --git a/src/mysql_vm_helpers.py b/src/mysql_vm_helpers.py index d9ced4f7ac..4b7b823a45 100644 --- a/src/mysql_vm_helpers.py +++ b/src/mysql_vm_helpers.py @@ -9,6 +9,7 @@ import shutil import subprocess import tempfile +import typing from typing import Dict, List, Optional, Tuple import jinja2 @@ -26,7 +27,6 @@ MySQLStopMySQLDError, ) from charms.operator_libs_linux.v2 import snap -from ops.charm import CharmBase from tenacity import RetryError, Retrying, retry, stop_after_attempt, stop_after_delay, wait_fixed from typing_extensions import override @@ -53,6 +53,9 @@ logger = logging.getLogger(__name__) +if typing.TYPE_CHECKING: + from charm import MySQLOperatorCharm + class MySQLResetRootPasswordAndStartMySQLDError(Error): """Exception raised when there's an error resetting root password and starting mysqld.""" @@ -103,7 +106,7 @@ def __init__( monitoring_password: str, backups_user: str, backups_password: str, - charm: CharmBase, + charm: "MySQLOperatorCharm", ): """Initialize the MySQL class. @@ -233,6 +236,16 @@ def get_available_memory(self) -> int: logger.error("Failed to query system memory") raise MySQLGetAvailableMemoryError + @override + def set_cluster_primary(self, new_primary_address: str) -> None: + """Set the primary instance of the cluster. + + Args: + new_primary_address: the address of the new primary instance + """ + super().set_cluster_primary(new_primary_address) + self.charm.database_relation._update_endpoints_all_relations(None) + def write_mysqld_config(self, profile: str, memory_limit: Optional[int]) -> None: """Create custom mysql config file. @@ -356,11 +369,11 @@ def reset_root_password_and_start_mysqld(self) -> None: except MySQLServiceNotRunningError: raise MySQLResetRootPasswordAndStartMySQLDError("mysqld service not running") - @retry(reraise=True, stop=stop_after_delay(120), wait=wait_fixed(5)) + @retry(reraise=True, stop=stop_after_delay(300), wait=wait_fixed(5)) def wait_until_mysql_connection(self, check_port: bool = True) -> None: """Wait until a connection to MySQL has been obtained. - Retry every 5 seconds for 120 seconds if there is an issue obtaining a connection. + Retry every 5 seconds for 300 seconds if there is an issue obtaining a connection. """ logger.debug("Waiting for MySQL connection") diff --git a/src/relations/mysql_provider.py b/src/relations/mysql_provider.py index 5b85918e38..c4da3c460a 100644 --- a/src/relations/mysql_provider.py +++ b/src/relations/mysql_provider.py @@ -20,7 +20,7 @@ ) from ops.charm import RelationBrokenEvent, RelationDepartedEvent, RelationJoinedEvent from ops.framework import Object -from ops.model import BlockedStatus +from ops.model import BlockedStatus, Relation, Unit from constants import DB_RELATION_NAME, PASSWORD_LENGTH, PEER from utils import generate_random_password @@ -55,15 +55,20 @@ def __init__(self, charm: "MySQLOperatorCharm"): self.framework.observe(self.charm.on.leader_elected, self._update_endpoints_all_relations) self.framework.observe(self.charm.on.update_status, self._update_endpoints_all_relations) + @property + def active_relations(self) -> list[Relation]: + """Return the active relations.""" + relation_data = self.database.fetch_relation_data() + return [ + rel + for rel in self.model.relations[DB_RELATION_NAME] + if rel.id in relation_data # rel.id in relation data after on_database_requested + ] + def _update_endpoints_all_relations(self, _): """Update endpoints for all relations.""" if not self.charm.unit.is_leader(): return - # get all relations involving the database relation - relations = list(self.model.relations[DB_RELATION_NAME]) - # check if there are relations in place - if len(relations) == 0: - return if not self.charm.cluster_initialized or not self.charm.unit_peer_data.get( "unit-initialized" @@ -71,13 +76,9 @@ def _update_endpoints_all_relations(self, _): logger.debug("Waiting cluster/unit to be initialized") return - relation_data = self.database.fetch_relation_data() # for all relations update the read-only-endpoints - for relation in relations: + for relation in self.active_relations: # check if the on_database_requested has been executed - if relation.id not in relation_data: - logger.debug("On database requested not happened yet! Nothing to do in this case") - continue self._update_endpoints(relation.id, relation.app.name) def _on_relation_departed(self, event: RelationDepartedEvent): @@ -207,6 +208,7 @@ def _on_database_requested(self, event: DatabaseRequestedEvent): # get base relation data relation_id = event.relation.id db_name = event.database + assert db_name, "Database name must be provided" extra_user_roles = [] if event.extra_user_roles: extra_user_roles = event.extra_user_roles.split(",") @@ -272,8 +274,8 @@ def _on_database_broken(self, event: RelationBrokenEvent) -> None: # https://github.com/canonical/mysql-operator/issues/32 return + relation_id = event.relation.id try: - relation_id = event.relation.id self.charm._mysql.delete_users_for_relation(relation_id) logger.info(f"Removed user for relation {relation_id}") except (MySQLDeleteUsersForRelationError, KeyError): @@ -309,3 +311,45 @@ def _on_database_provides_relation_departed(self, event: RelationDepartedEvent) logger.info(f"Removed router from metadata {user.router_id}") except MySQLRemoveRouterFromMetadataError: logger.error(f"Failed to remove router from metadata with ID {user.router_id}") + + def remove_unit_from_endpoints(self, unit: Unit) -> None: + """Remove a unit from the endpoints for related applications. + + Args: + unit (ops.Unit): The the unit to be removed. + """ + if not self.charm.unit.is_leader(): + return + + if not self.charm.cluster_initialized: + logger.debug("Waiting cluster to be initialized") + return + + unit_address = self.charm.get_unit_ip(unit) + + # filter out the unit address from the (ro)endpoints + for relation in self.active_relations: + # rw endpoints + endpoints = ( + self.database.fetch_my_relation_field(relation.id, "endpoints", DB_RELATION_NAME) + or "" + ) + if unit_address in endpoints: + self.database.set_endpoints( + relation.id, + ",".join([e for e in endpoints.split(",") if unit_address not in e]), + ) + continue + + # ro endpoints + ro_endpoints = ( + self.database.fetch_my_relation_field( + relation.id, "read-only-endpoints", DB_RELATION_NAME + ) + or "" + ) + if unit_address in ro_endpoints: + self.database.set_read_only_endpoints( + relation.id, + ",".join([e for e in ro_endpoints.split(",") if unit_address not in e]), + ) diff --git a/src/upgrade.py b/src/upgrade.py index 5b9c861047..0c213f1d66 100644 --- a/src/upgrade.py +++ b/src/upgrade.py @@ -17,6 +17,7 @@ VersionError, ) from charms.mysql.v0.mysql import ( + MySQLGetClusterEndpointsError, MySQLGetMySQLVersionError, MySQLServerNotUpgradableError, MySQLSetClusterPrimaryError, @@ -171,6 +172,11 @@ def _on_upgrade_charm_check_legacy(self, event) -> None: @override def _on_upgrade_granted(self, event: UpgradeGrantedEvent) -> None: # noqa: C901 """Handle the upgrade granted event.""" + if self.charm.unit.is_leader(): + # preemptively change primary on leader unit + # we assume the leader is primary, since the switchover is done on pre-upgrade-check + self._primary_switchover() + try: self.charm.unit.status = MaintenanceStatus("stopping services..") self.charm._mysql.stop_mysqld() @@ -260,6 +266,21 @@ def _recover_single_unit_cluster(self) -> None: logger.debug("Recovering single unit cluster") self.charm._mysql.reboot_from_complete_outage() + def _primary_switchover(self) -> None: + """Switchover primary to the first available RO endpoint.""" + try: + _, ro_endpoints, _ = self.charm._mysql.get_cluster_endpoints(get_ips=False) + if not ro_endpoints: + # no ro endpoints, can't switchover + return + new_primary_address = ro_endpoints.split(",")[0] + self.charm._mysql.set_cluster_primary(new_primary_address) + except (MySQLSetClusterPrimaryError, MySQLGetClusterEndpointsError): + # If upgrading mysql version, older mysqlsh will fail to set primary + logger.warning( + "Failed to switchover primary. Endpoints will be updated after upgrade." + ) + def _on_upgrade_changed(self, _) -> None: """Handle the upgrade changed event. From b6c1233ff3581bece8ce1df1b0b75679d09caccf Mon Sep 17 00:00:00 2001 From: Paulo Machado Date: Mon, 15 Apr 2024 15:02:27 -0300 Subject: [PATCH 2/4] added coverage --- tests/unit/test_database.py | 52 +++++++++++++++++++++++++++++++++++++ tests/unit/test_upgrade.py | 8 ++++++ 2 files changed, 60 insertions(+) diff --git a/tests/unit/test_database.py b/tests/unit/test_database.py index 3f54cfb941..f393031574 100644 --- a/tests/unit/test_database.py +++ b/tests/unit/test_database.py @@ -7,6 +7,7 @@ from ops.testing import Harness from charm import MySQLOperatorCharm +from charms.mysql.v0.mysql import RouterUser from constants import DB_RELATION_NAME from .helpers import patch_network_get @@ -82,3 +83,54 @@ def test_database_requested( _create_application_database_and_scoped_user.assert_called_once() _get_cluster_endpoints.assert_called_once() _get_mysql_version.assert_called_once() + + @patch("relations.mysql_provider.MySQLProvider._on_database_broken") + @patch("mysql_vm_helpers.MySQL.remove_router_from_cluster_metadata") + @patch("mysql_vm_helpers.MySQL.delete_user") + @patch("mysql_vm_helpers.MySQL.get_mysql_router_users_for_unit") + def test_relation_departed( + self, + _get_users, + _delete_user, + _remove_router, + _on_database_broken, + ): + self.harness.set_leader(True) + + router_user = RouterUser(username="user1", router_id="router_id") + _get_users.return_value = [router_user] + + self.harness.remove_relation(self.database_relation_id) + _delete_user.assert_called_once_with("user1") + _remove_router.assert_called_once_with("router_id") + + def test_remove_unit_from_endpoints(self): + self.harness.set_leader(True) + self.charm.on.config_changed.emit() + self.harness.update_relation_data( + self.peer_relation_id, self.charm.app.name, {"units-added-to-cluster": "1"} + ) + + self.harness.update_relation_data( + self.database_relation_id, + self.charm.app.name, + { + "data": '{"database": "test_db"}', + "password": "super_secure_password", + "username": f"relation-{self.database_relation_id}", + "endpoints": "2.2.2.2:3306", + "version": "8.0.36-0ubuntu0.22.04.3", + "database": "test_db", + "read-only-endpoints": "2.2.2.1:3306", + }, + ) + + remove_unit = self.harness.model.get_unit("mysql/1") + with patch("charm.MySQLOperatorCharm.get_unit_ip", return_value="2.2.2.1"): + self.charm.database_relation.remove_unit_from_endpoints(remove_unit) + + relation_data = self.harness.get_relation_data( + self.database_relation_id, self.charm.app.name + ) + + self.assertNotIn("read-only-endpoints", relation_data.keys()) diff --git a/tests/unit/test_upgrade.py b/tests/unit/test_upgrade.py index 5c879f50a6..0fdfafdd12 100644 --- a/tests/unit/test_upgrade.py +++ b/tests/unit/test_upgrade.py @@ -294,3 +294,11 @@ def test_prepare_upgrade_from_legacy(self): self.harness.get_relation_data(self.upgrade_relation_id, "mysql")["upgrade-stack"], "[0, 1]", ) + + @patch("charm.MySQLOperatorCharm._mysql") + def test_primary_switchover(self, _mysql): + _mysql.get_cluster_endpoints.return_value = (None, "1.1.1.1:3306,1.1.1.2:3306", None) + + self.charm.upgrade._primary_switchover() + + _mysql.set_cluster_primary.assert_called_once_with("1.1.1.1:3306") From a513b99e5936b2d0cb76f0330cc6697e9feb6c79 Mon Sep 17 00:00:00 2001 From: Paulo Machado Date: Mon, 15 Apr 2024 15:03:54 -0300 Subject: [PATCH 3/4] fix import --- tests/unit/test_database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_database.py b/tests/unit/test_database.py index f393031574..ecd23b1812 100644 --- a/tests/unit/test_database.py +++ b/tests/unit/test_database.py @@ -4,10 +4,10 @@ import unittest from unittest.mock import patch +from charms.mysql.v0.mysql import RouterUser from ops.testing import Harness from charm import MySQLOperatorCharm -from charms.mysql.v0.mysql import RouterUser from constants import DB_RELATION_NAME from .helpers import patch_network_get From b60ee9737f301114ab6206d30f992655afe7f639 Mon Sep 17 00:00:00 2001 From: Paulo Machado Date: Thu, 18 Apr 2024 16:37:25 -0300 Subject: [PATCH 4/4] remove unused method --- src/relations/mysql_provider.py | 44 +-------------------------------- tests/unit/test_database.py | 31 ----------------------- 2 files changed, 1 insertion(+), 74 deletions(-) diff --git a/src/relations/mysql_provider.py b/src/relations/mysql_provider.py index c4da3c460a..e5d19d61da 100644 --- a/src/relations/mysql_provider.py +++ b/src/relations/mysql_provider.py @@ -20,7 +20,7 @@ ) from ops.charm import RelationBrokenEvent, RelationDepartedEvent, RelationJoinedEvent from ops.framework import Object -from ops.model import BlockedStatus, Relation, Unit +from ops.model import BlockedStatus, Relation from constants import DB_RELATION_NAME, PASSWORD_LENGTH, PEER from utils import generate_random_password @@ -311,45 +311,3 @@ def _on_database_provides_relation_departed(self, event: RelationDepartedEvent) logger.info(f"Removed router from metadata {user.router_id}") except MySQLRemoveRouterFromMetadataError: logger.error(f"Failed to remove router from metadata with ID {user.router_id}") - - def remove_unit_from_endpoints(self, unit: Unit) -> None: - """Remove a unit from the endpoints for related applications. - - Args: - unit (ops.Unit): The the unit to be removed. - """ - if not self.charm.unit.is_leader(): - return - - if not self.charm.cluster_initialized: - logger.debug("Waiting cluster to be initialized") - return - - unit_address = self.charm.get_unit_ip(unit) - - # filter out the unit address from the (ro)endpoints - for relation in self.active_relations: - # rw endpoints - endpoints = ( - self.database.fetch_my_relation_field(relation.id, "endpoints", DB_RELATION_NAME) - or "" - ) - if unit_address in endpoints: - self.database.set_endpoints( - relation.id, - ",".join([e for e in endpoints.split(",") if unit_address not in e]), - ) - continue - - # ro endpoints - ro_endpoints = ( - self.database.fetch_my_relation_field( - relation.id, "read-only-endpoints", DB_RELATION_NAME - ) - or "" - ) - if unit_address in ro_endpoints: - self.database.set_read_only_endpoints( - relation.id, - ",".join([e for e in ro_endpoints.split(",") if unit_address not in e]), - ) diff --git a/tests/unit/test_database.py b/tests/unit/test_database.py index ecd23b1812..69487b9738 100644 --- a/tests/unit/test_database.py +++ b/tests/unit/test_database.py @@ -103,34 +103,3 @@ def test_relation_departed( self.harness.remove_relation(self.database_relation_id) _delete_user.assert_called_once_with("user1") _remove_router.assert_called_once_with("router_id") - - def test_remove_unit_from_endpoints(self): - self.harness.set_leader(True) - self.charm.on.config_changed.emit() - self.harness.update_relation_data( - self.peer_relation_id, self.charm.app.name, {"units-added-to-cluster": "1"} - ) - - self.harness.update_relation_data( - self.database_relation_id, - self.charm.app.name, - { - "data": '{"database": "test_db"}', - "password": "super_secure_password", - "username": f"relation-{self.database_relation_id}", - "endpoints": "2.2.2.2:3306", - "version": "8.0.36-0ubuntu0.22.04.3", - "database": "test_db", - "read-only-endpoints": "2.2.2.1:3306", - }, - ) - - remove_unit = self.harness.model.get_unit("mysql/1") - with patch("charm.MySQLOperatorCharm.get_unit_ip", return_value="2.2.2.1"): - self.charm.database_relation.remove_unit_from_endpoints(remove_unit) - - relation_data = self.harness.get_relation_data( - self.database_relation_id, self.charm.app.name - ) - - self.assertNotIn("read-only-endpoints", relation_data.keys())