Skip to content

Commit 0578606

Browse files
paulomachZvirovyi
andauthored
DPE-6487 Pitr (#600)
* Add binlog_utils_udf plugin. * Enable gtid_mode and enforce_gtid_consistency for the MySQL. * Add S3 compatibility check based on the group replication id. * Point-in-time-recovery. * Fix constants. * Integration tests. * Binlogs collector service improvement. * Format restore function. * Use context manager for ca_file in s3_helpers. * Rename start_stop_binlogs_collecting to reconcile_binlogs_collection. * Delete binlogs collector config when not needed. * Improve update_binlogs_collector_config. * Add restore-to-time validation and format notice. * Sync lib changes from VM PR. * Improve binlogs collection service. * Increment LIBPATCH for libs. * Fix errors after main merge. * LIBPATCH * Move binlogs collector config to the env. * Fix occasional DNS errors on get_unit_address. * PITR improvements. * S3 improvements. * Format. * S3 improvements. * Sync mysql lib. * Lint. * split pitr tests * include pitr helper in test layer * libpatch bump * address pr comments --------- Co-authored-by: Vladyslav Tarasenko <[email protected]>
1 parent 64607cc commit 0578606

File tree

20 files changed

+1064
-80
lines changed

20 files changed

+1064
-80
lines changed

actions.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ restore:
5555
backup-id:
5656
type: string
5757
description: A backup-id to identify the backup to restore (format = %Y-%m-%dT%H:%M:%SZ)
58+
restore-to-time:
59+
type: string
60+
description: Point-in-time-recovery target (format = %Y-%m-%d %H:%M:%S).
5861

5962
pre-upgrade-check:
6063
description: Run necessary pre-upgrade checks and preparations before executing a charm refresh.

lib/charms/mysql/v0/architecture.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@
4141
LIBAPI = 0
4242
LIBPATCH = 1
4343

44-
PYDEPS = ["ops>=2.0.0", "pyyaml>=5.0"]
45-
4644

4745
logger = logging.getLogger(__name__)
4846

lib/charms/mysql/v0/backups.py

Lines changed: 229 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,15 @@ def is_unit_blocked(self) -> bool:
4848
import datetime
4949
import logging
5050
import pathlib
51+
import re
5152
import typing
5253
from typing import Dict, List, Optional, Tuple
5354

54-
from charms.data_platform_libs.v0.s3 import S3Requirer
55+
from charms.data_platform_libs.v0.s3 import (
56+
CredentialsChangedEvent,
57+
CredentialsGoneEvent,
58+
S3Requirer,
59+
)
5560
from charms.mysql.v0.mysql import (
5661
MySQLConfigureInstanceError,
5762
MySQLCreateClusterError,
@@ -67,6 +72,7 @@ def is_unit_blocked(self) -> bool:
6772
MySQLPrepareBackupForRestoreError,
6873
MySQLRescanClusterError,
6974
MySQLRestoreBackupError,
75+
MySQLRestorePitrError,
7076
MySQLRetrieveBackupWithXBCloudError,
7177
MySQLServiceNotRunningError,
7278
MySQLSetInstanceOfflineModeError,
@@ -76,6 +82,8 @@ def is_unit_blocked(self) -> bool:
7682
MySQLUnableToGetMemberStateError,
7783
)
7884
from charms.mysql.v0.s3_helpers import (
85+
_construct_endpoint,
86+
ensure_s3_compatible_group_replication_id,
7987
fetch_and_check_existence_of_s3_path,
8088
list_backups_in_s3_path,
8189
upload_content_to_s3,
@@ -85,7 +93,11 @@ def is_unit_blocked(self) -> bool:
8593
from ops.jujuversion import JujuVersion
8694
from ops.model import BlockedStatus, MaintenanceStatus
8795

88-
from constants import MYSQL_DATA_DIR
96+
from constants import (
97+
MYSQL_DATA_DIR,
98+
SERVER_CONFIG_PASSWORD_KEY,
99+
SERVER_CONFIG_USERNAME,
100+
)
89101

90102
logger = logging.getLogger(__name__)
91103

@@ -100,8 +112,12 @@ def is_unit_blocked(self) -> bool:
100112

101113
# Increment this PATCH version before using `charmcraft publish-lib` or reset
102114
# to 0 if you are raising the major API version
103-
LIBPATCH = 12
115+
LIBPATCH = 13
104116

117+
ANOTHER_S3_CLUSTER_REPOSITORY_ERROR_MESSAGE = "S3 repository claimed by another cluster"
118+
MOVE_RESTORED_CLUSTER_TO_ANOTHER_S3_REPOSITORY_ERROR = (
119+
"Move restored cluster to another S3 repository"
120+
)
105121

106122
if typing.TYPE_CHECKING:
107123
from charm import MySQLOperatorCharm
@@ -119,6 +135,13 @@ def __init__(self, charm: "MySQLOperatorCharm", s3_integrator: S3Requirer) -> No
119135
self.framework.observe(self.charm.on.create_backup_action, self._on_create_backup)
120136
self.framework.observe(self.charm.on.list_backups_action, self._on_list_backups)
121137
self.framework.observe(self.charm.on.restore_action, self._on_restore)
138+
self.framework.observe(
139+
self.s3_integrator.on.credentials_changed, self._on_s3_credentials_changed
140+
)
141+
self.framework.observe(self.charm.on.leader_elected, self._on_s3_credentials_changed)
142+
self.framework.observe(
143+
self.s3_integrator.on.credentials_gone, self._on_s3_credentials_gone
144+
)
122145

123146
# ------------------ Helpers ------------------
124147
@property
@@ -235,18 +258,33 @@ def _on_list_backups(self, event: ActionEvent) -> None:
235258

236259
# ------------------ Create Backup ------------------
237260

238-
def _on_create_backup(self, event: ActionEvent) -> None:
239-
"""Handle the create backup action."""
240-
logger.info("A backup has been requested on unit")
261+
def _pre_create_backup_checks(self, event: ActionEvent) -> bool:
262+
"""Run some checks before creating the backup.
241263
264+
Returns: a boolean indicating whether operation should be run.
265+
"""
242266
if not self._s3_integrator_relation_exists:
243267
logger.error("Backup failed: missing relation with S3 integrator charm")
244268
event.fail("Missing relation with S3 integrator charm")
245-
return
269+
return False
270+
271+
if "s3-block-message" in self.charm.app_peer_data:
272+
logger.error("Backup failed: S3 relation is blocked for write")
273+
event.fail("S3 relation is blocked for write")
274+
return False
246275

247276
if not self.charm._mysql.is_mysqld_running():
248277
logger.error(f"Backup failed: process mysqld is not running on {self.charm.unit.name}")
249278
event.fail("Process mysqld not running")
279+
return False
280+
281+
return True
282+
283+
def _on_create_backup(self, event: ActionEvent) -> None:
284+
"""Handle the create backup action."""
285+
logger.info("A backup has been requested on unit")
286+
287+
if not self._pre_create_backup_checks(event):
250288
return
251289

252290
datetime_backup_requested = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
@@ -455,6 +493,18 @@ def _pre_restore_checks(self, event: ActionEvent) -> bool:
455493
event.fail(error_message)
456494
return False
457495

496+
# Quick check for timestamp format
497+
restore_to_time = event.params.get("restore-to-time")
498+
if (
499+
restore_to_time
500+
and restore_to_time != "latest"
501+
and not self._is_mysql_timestamp(restore_to_time)
502+
):
503+
error_message = "Bad restore-to-time format"
504+
logger.error(f"Restore failed: {error_message}")
505+
event.fail(error_message)
506+
return False
507+
458508
if not self.charm._mysql.is_server_connectable():
459509
error_message = "Server running mysqld is not connectable"
460510
logger.error(f"Restore failed: {error_message}")
@@ -479,7 +529,7 @@ def _pre_restore_checks(self, event: ActionEvent) -> bool:
479529

480530
return True
481531

482-
def _on_restore(self, event: ActionEvent) -> None:
532+
def _on_restore(self, event: ActionEvent) -> None: # noqa: C901
483533
"""Handle the restore backup action event.
484534
485535
Restore a backup from S3 (parameters for which can retrieved from the
@@ -489,7 +539,12 @@ def _on_restore(self, event: ActionEvent) -> None:
489539
return
490540

491541
backup_id = event.params["backup-id"].strip().strip("/")
492-
logger.info(f"A restore with backup-id {backup_id} has been requested on unit")
542+
restore_to_time = event.params.get("restore-to-time")
543+
logger.info(
544+
f"A restore with backup-id {backup_id}"
545+
f"{f' to time point {restore_to_time}' if restore_to_time else ''}"
546+
f" has been requested on the unit"
547+
)
493548

494549
# Retrieve and validate missing S3 parameters
495550
s3_parameters, missing_parameters = self._retrieve_s3_parameters()
@@ -519,14 +574,28 @@ def _on_restore(self, event: ActionEvent) -> None:
519574
if not success:
520575
logger.error(f"Restore failed: {error_message}")
521576
event.fail(error_message)
522-
523577
if recoverable:
524578
self._clean_data_dir_and_start_mysqld()
525579
else:
526580
self.charm.unit.status = BlockedStatus(error_message)
527-
528581
return
529582

583+
if restore_to_time is not None:
584+
self.charm.unit.status = MaintenanceStatus("Running point-in-time-recovery operations")
585+
success, error_message = self._pitr_restore(restore_to_time, s3_parameters)
586+
if not success:
587+
logger.error(f"Restore failed: {error_message}")
588+
event.fail(error_message)
589+
self.charm.unit.status = BlockedStatus(error_message)
590+
return
591+
592+
self.charm.app_peer_data.update({
593+
"s3-block-message": MOVE_RESTORED_CLUSTER_TO_ANOTHER_S3_REPOSITORY_ERROR,
594+
"binlogs-collecting": "",
595+
})
596+
if not self.charm._mysql.reconcile_binlogs_collection():
597+
logger.error("Failed to stop binlogs collecting prior to restore")
598+
530599
# Run post-restore operations
531600
self.charm.unit.status = MaintenanceStatus("Running post-restore operations")
532601
success, error_message = self._post_restore()
@@ -611,6 +680,10 @@ def _restore(self, backup_id: str, s3_parameters: Dict[str, str]) -> Tuple[bool,
611680
except MySQLRestoreBackupError:
612681
return False, False, f"Failed to restore backup {backup_id}"
613682

683+
success, error_message = self._clean_data_dir_and_start_mysqld()
684+
if not success:
685+
return False, False, error_message
686+
614687
return True, True, ""
615688

616689
def _clean_data_dir_and_start_mysqld(self) -> Tuple[bool, str]:
@@ -636,15 +709,29 @@ def _clean_data_dir_and_start_mysqld(self) -> Tuple[bool, str]:
636709

637710
return True, ""
638711

712+
def _pitr_restore(
713+
self, restore_to_time: str, s3_parameters: Dict[str, str]
714+
) -> Tuple[bool, str]:
715+
try:
716+
logger.info("Restoring point-in-time-recovery")
717+
stdout, stderr = self.charm._mysql.restore_pitr(
718+
host=self.charm.get_unit_address(self.charm.unit),
719+
mysql_user=self.charm._mysql.server_config_user,
720+
password=self.charm._mysql.server_config_password,
721+
s3_parameters=s3_parameters,
722+
restore_to_time=restore_to_time,
723+
)
724+
logger.debug(f"Stdout of mysql-pitr-helper restore command: {stdout}")
725+
logger.debug(f"Stderr of mysql-pitr-helper restore command: {stderr}")
726+
except MySQLRestorePitrError:
727+
return False, f"Failed to restore point-in-time-recovery to the {restore_to_time}"
728+
return True, ""
729+
639730
def _post_restore(self) -> Tuple[bool, str]:
640731
"""Run operations required after restoring a backup.
641732
642733
Returns: tuple of (success, error_message)
643734
"""
644-
success, error_message = self._clean_data_dir_and_start_mysqld()
645-
if not success:
646-
return success, error_message
647-
648735
try:
649736
logger.info("Configuring instance to be part of an InnoDB cluster")
650737
self.charm._mysql.configure_instance(create_cluster_admin=False)
@@ -674,3 +761,130 @@ def _post_restore(self) -> Tuple[bool, str]:
674761
return False, "Failed to rescan the cluster"
675762

676763
return True, ""
764+
765+
def _on_s3_credentials_changed(self, event: CredentialsChangedEvent) -> None:
766+
if not self.charm.unit.is_leader():
767+
logger.debug("Early exit on _on_s3_credentials_changed: unit is not a leader")
768+
return
769+
770+
if not self._s3_integrator_relation_exists:
771+
logger.debug(
772+
"Early exit on _on_s3_credentials_changed: s3 integrator relation does not exist"
773+
)
774+
return
775+
776+
if (
777+
not self.charm._mysql.is_mysqld_running()
778+
or not self.charm.unit_initialized
779+
or not self.charm.upgrade.idle
780+
):
781+
logger.debug(
782+
"Deferring _on_s3_credentials_changed: mysql cluster is not started yet or upgrade is occurring"
783+
)
784+
event.defer()
785+
return
786+
787+
try:
788+
self.charm._mysql.wait_until_mysql_connection()
789+
except MySQLServiceNotRunningError:
790+
logger.debug(
791+
"Deferring _on_s3_credentials_changed: mysql cluster is not connectable yet"
792+
)
793+
event.defer()
794+
return
795+
796+
logger.info("Retrieving s3 parameters from the s3-integrator relation")
797+
s3_parameters, missing_parameters = self._retrieve_s3_parameters()
798+
if missing_parameters:
799+
logger.error(f"Missing S3 parameters: {missing_parameters}")
800+
return
801+
802+
logger.info("Ensuring compatibility with the provided S3 repository")
803+
if ensure_s3_compatible_group_replication_id(
804+
self.charm._mysql.get_current_group_replication_id(), s3_parameters
805+
):
806+
self.charm.app_peer_data.update({
807+
"s3-block-message": "",
808+
"binlogs-collecting": "true",
809+
})
810+
else:
811+
self.charm.app_peer_data.update({
812+
"s3-block-message": ANOTHER_S3_CLUSTER_REPOSITORY_ERROR_MESSAGE,
813+
"binlogs-collecting": "",
814+
})
815+
816+
if not self.charm._mysql.reconcile_binlogs_collection(
817+
force_restart=True, ignore_inactive_error=True
818+
):
819+
logger.error("Failed to restart binlogs collecting after S3 relation update")
820+
821+
def _on_s3_credentials_gone(self, event: CredentialsGoneEvent) -> None:
822+
if not self.charm.unit.is_leader():
823+
logger.debug("Early exit on _on_s3_credentials_gone: unit is not a leader")
824+
return
825+
826+
self.charm.app_peer_data.update({
827+
"s3-block-message": "",
828+
"binlogs-collecting": "",
829+
})
830+
try:
831+
if not self.charm._mysql.reconcile_binlogs_collection():
832+
logger.error("Failed to stop binlogs collecting after S3 relation depart")
833+
except Exception as e:
834+
logger.error(e)
835+
logger.error(
836+
"Exception is occurred when trying to stop binlogs collecting after S3 relation depart. It may be a leader departure"
837+
)
838+
839+
def get_binlogs_collector_config(self) -> Dict[str, str]:
840+
"""Return binlogs collector service config file.
841+
842+
Returns: dict of binlogs collector service config
843+
"""
844+
if not self._s3_integrator_relation_exists:
845+
logger.error(
846+
"Cannot get binlogs collector config: s3 integrator relation does not exist"
847+
)
848+
return {}
849+
850+
logger.info("Retrieving s3 parameters from the s3-integrator relation")
851+
s3_parameters, missing_parameters = self._retrieve_s3_parameters()
852+
if missing_parameters:
853+
logger.error(
854+
f"Cannot get binlogs collector config: Missing S3 parameters: {missing_parameters}"
855+
)
856+
return {}
857+
858+
binlogs_path = s3_parameters["path"].rstrip("/")
859+
bucket_url = f"{s3_parameters['bucket']}/{binlogs_path}/binlogs"
860+
861+
return {
862+
"ENDPOINT": _construct_endpoint(s3_parameters),
863+
"HOSTS": ",".join(self.charm._mysql.get_cluster_members()),
864+
"USER": SERVER_CONFIG_USERNAME,
865+
"PASS": self.charm.get_secret("app", SERVER_CONFIG_PASSWORD_KEY),
866+
"STORAGE_TYPE": "s3",
867+
"ACCESS_KEY_ID": s3_parameters["access-key"],
868+
"SECRET_ACCESS_KEY": s3_parameters["secret-key"],
869+
"S3_BUCKET_URL": bucket_url,
870+
"DEFAULT_REGION": s3_parameters["region"],
871+
}
872+
873+
def _is_mysql_timestamp(self, timestamp: str) -> bool:
874+
"""Validate the provided timestamp string."""
875+
if not re.match(
876+
r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$",
877+
timestamp,
878+
):
879+
# regex validation necessary to enforce format is valid both here
880+
# and for the go `mysql-pitr-helper` binary
881+
# https://github.com/canonical/mysql-pitr-helper/blob/ed858df5c145b003c9d24223d44b6ea9c7d67888/recoverer/recoverer.go#L194
882+
return False
883+
try:
884+
self._parse_mysql_timestamp(timestamp)
885+
return True
886+
except ValueError:
887+
return False
888+
889+
def _parse_mysql_timestamp(self, timestamp: str) -> datetime.datetime:
890+
return datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")

0 commit comments

Comments
 (0)