Skip to content

Commit 2897bab

Browse files
authored
DPE-4699 backup logging and other fixes (#471)
* port refactoring from k8s * port backup refactor from k8s * tests refactoring after changes * bump libs * remove duplicate call and missing mock * split backup in three (aws, ceph, gcp)
1 parent afe6651 commit 2897bab

File tree

13 files changed

+1456
-675
lines changed

13 files changed

+1456
-675
lines changed

lib/charms/mysql/v0/backups.py

Lines changed: 56 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,14 @@ def is_unit_blocked(self) -> bool:
5555
from charms.mysql.v0.mysql import (
5656
MySQLConfigureInstanceError,
5757
MySQLCreateClusterError,
58+
MySQLCreateClusterSetError,
5859
MySQLDeleteTempBackupDirectoryError,
5960
MySQLDeleteTempRestoreDirectoryError,
6061
MySQLEmptyDataDirectoryError,
6162
MySQLExecuteBackupCommandsError,
6263
MySQLGetMemberStateError,
6364
MySQLInitializeJujuOperationsTableError,
65+
MySQLKillSessionError,
6466
MySQLOfflineModeAndHiddenInstanceExistsError,
6567
MySQLPrepareBackupForRestoreError,
6668
MySQLRescanClusterError,
@@ -80,13 +82,14 @@ def is_unit_blocked(self) -> bool:
8082
from ops.charm import ActionEvent
8183
from ops.framework import Object
8284
from ops.jujuversion import JujuVersion
83-
from ops.model import ActiveStatus, BlockedStatus
85+
from ops.model import BlockedStatus, MaintenanceStatus
8486

8587
from constants import MYSQL_DATA_DIR
8688

8789
logger = logging.getLogger(__name__)
8890

8991
MYSQL_BACKUPS = "mysql-backups"
92+
S3_INTEGRATOR_RELATION_NAME = "s3-parameters"
9093

9194
# The unique Charmhub library identifier, never change it
9295
LIBID = "183844304be247129572309a5fb1e47c"
@@ -96,7 +99,7 @@ def is_unit_blocked(self) -> bool:
9699

97100
# Increment this PATCH version before using `charmcraft publish-lib` or reset
98101
# to 0 if you are raising the major API version
99-
LIBPATCH = 8
102+
LIBPATCH = 9
100103

101104

102105
if typing.TYPE_CHECKING:
@@ -117,6 +120,10 @@ def __init__(self, charm: "MySQLOperatorCharm", s3_integrator: S3Requirer) -> No
117120
self.framework.observe(self.charm.on.restore_action, self._on_restore)
118121

119122
# ------------------ Helpers ------------------
123+
@property
124+
def _s3_integrator_relation_exists(self) -> bool:
125+
"""Returns whether a relation with the s3-integrator exists."""
126+
return bool(self.model.get_relation(S3_INTEGRATOR_RELATION_NAME))
120127

121128
def _retrieve_s3_parameters(self) -> Tuple[Dict[str, str], List[str]]:
122129
"""Retrieve S3 parameters from the S3 integrator relation.
@@ -176,11 +183,7 @@ def _upload_logs_to_s3(
176183
177184
Returns: bool indicating success
178185
"""
179-
logs = f"""Stdout:
180-
{stdout}
181-
182-
Stderr:
183-
{stderr}"""
186+
logs = f"Stdout:\n{stdout}\n\nStderr:\n{stderr}"
184187
logger.debug(f"Logs to upload to S3 at location {log_filename}:\n{logs}")
185188

186189
logger.info(
@@ -206,7 +209,7 @@ def _on_list_backups(self, event: ActionEvent) -> None:
206209
207210
List backups available to restore by this application.
208211
"""
209-
if not self.charm.s3_integrator_relation_exists:
212+
if not self._s3_integrator_relation_exists:
210213
event.fail("Missing relation with S3 integrator charm")
211214
return
212215

@@ -222,7 +225,9 @@ def _on_list_backups(self, event: ActionEvent) -> None:
222225
event.set_results({"backups": self._format_backups_list(backups)})
223226
except Exception as e:
224227
error_message = (
225-
e.message if hasattr(e, "message") else "Failed to retrieve backup ids from S3"
228+
getattr(e, "message")
229+
if hasattr(e, "message")
230+
else "Failed to retrieve backup ids from S3"
226231
)
227232
logger.error(error_message)
228233
event.fail(error_message)
@@ -233,7 +238,7 @@ def _on_create_backup(self, event: ActionEvent) -> None:
233238
"""Handle the create backup action."""
234239
logger.info("A backup has been requested on unit")
235240

236-
if not self.charm.s3_integrator_relation_exists:
241+
if not self._s3_integrator_relation_exists:
237242
logger.error("Backup failed: missing relation with S3 integrator charm")
238243
event.fail("Missing relation with S3 integrator charm")
239244
return
@@ -263,12 +268,13 @@ def _on_create_backup(self, event: ActionEvent) -> None:
263268

264269
# Test uploading metadata to S3 to test credentials before backup
265270
juju_version = JujuVersion.from_environ()
266-
metadata = f"""Date Backup Requested: {datetime_backup_requested}
267-
Model Name: {self.model.name}
268-
Application Name: {self.model.app.name}
269-
Unit Name: {self.charm.unit.name}
270-
Juju Version: {str(juju_version)}
271-
"""
271+
metadata = (
272+
f"Date Backup Requested: {datetime_backup_requested}\n"
273+
f"Model Name: {self.model.name}\n"
274+
f"Application Name: {self.model.app.name}\n"
275+
f"Unit Name: {self.charm.unit.name}\n"
276+
f"Juju Version: {str(juju_version)}\n"
277+
)
272278

273279
if not upload_content_to_s3(metadata, f"{backup_path}.metadata", s3_parameters):
274280
logger.error("Backup failed: Failed to upload metadata to provided S3")
@@ -313,6 +319,7 @@ def _on_create_backup(self, event: ActionEvent) -> None:
313319
"backup-id": datetime_backup_requested,
314320
}
315321
)
322+
self.charm._on_update_status(None)
316323

317324
def _can_unit_perform_backup(self) -> Tuple[bool, Optional[str]]:
318325
"""Validates whether this unit can perform a backup.
@@ -380,8 +387,9 @@ def _backup(self, backup_path: str, s3_parameters: Dict) -> Tuple[bool, Optional
380387
Returns: tuple of (success, error_message)
381388
"""
382389
try:
390+
self.charm.unit.status = MaintenanceStatus("Running backup...")
383391
logger.info("Running the xtrabackup commands")
384-
stdout = self.charm._mysql.execute_backup_commands(
392+
stdout, _ = self.charm._mysql.execute_backup_commands(
385393
backup_path,
386394
s3_parameters,
387395
)
@@ -436,7 +444,7 @@ def _pre_restore_checks(self, event: ActionEvent) -> bool:
436444
437445
Returns: a boolean indicating whether restore should be run
438446
"""
439-
if not self.charm.s3_integrator_relation_exists:
447+
if not self._s3_integrator_relation_exists:
440448
error_message = "Missing relation with S3 integrator charm"
441449
logger.error(f"Restore failed: {error_message}")
442450
event.fail(error_message)
@@ -481,7 +489,7 @@ def _on_restore(self, event: ActionEvent) -> None:
481489
if not self._pre_restore_checks(event):
482490
return
483491

484-
backup_id = event.params.get("backup-id").strip().strip("/")
492+
backup_id = event.params["backup-id"].strip().strip("/")
485493
logger.info(f"A restore with backup-id {backup_id} has been requested on unit")
486494

487495
# Retrieve and validate missing S3 parameters
@@ -500,6 +508,7 @@ def _on_restore(self, event: ActionEvent) -> None:
500508
return
501509

502510
# Run operations to prepare for the restore
511+
self.charm.unit.status = MaintenanceStatus("Running pre-restore operations")
503512
success, error_message = self._pre_restore()
504513
if not success:
505514
logger.error(f"Restore failed: {error_message}")
@@ -520,6 +529,7 @@ def _on_restore(self, event: ActionEvent) -> None:
520529
return
521530

522531
# Run post-restore operations
532+
self.charm.unit.status = MaintenanceStatus("Running post-restore operations")
523533
success, error_message = self._post_restore()
524534
if not success:
525535
logger.error(f"Restore failed: {error_message}")
@@ -533,22 +543,29 @@ def _on_restore(self, event: ActionEvent) -> None:
533543
"completed": "ok",
534544
}
535545
)
546+
# update status as soon as possible
547+
self.charm._on_update_status(None)
536548

537-
def _pre_restore(self) -> Tuple[bool, Optional[str]]:
549+
def _pre_restore(self) -> Tuple[bool, str]:
538550
"""Perform operations that need to be done before performing a restore.
539551
540552
Returns: tuple of (success, error_message)
541553
"""
554+
if not self.charm._mysql.is_mysqld_running():
555+
return True, ""
556+
542557
try:
558+
logger.info("Stopping mysqld before restoring the backup")
559+
self.charm._mysql.kill_client_sessions()
543560
self.charm._mysql.stop_mysqld()
561+
except MySQLKillSessionError:
562+
return False, "Failed to kill client sessions"
544563
except MySQLStopMySQLDError:
545564
return False, "Failed to stop mysqld"
546565

547-
return True, None
566+
return True, ""
548567

549-
def _restore(
550-
self, backup_id: str, s3_parameters: Dict[str, str]
551-
) -> Tuple[bool, bool, Optional[str]]:
568+
def _restore(self, backup_id: str, s3_parameters: Dict[str, str]) -> Tuple[bool, bool, str]:
552569
"""Run the restore operations.
553570
554571
Args:
@@ -558,18 +575,21 @@ def _restore(
558575
Returns: tuple of (success, recoverable_error, error_message)
559576
"""
560577
try:
561-
logger.info("Running xbcloud get commands to retrieve the backup")
578+
logger.info(
579+
"Running xbcloud get commands to retrieve the backup\n"
580+
"This operation can take long time depending on backup size and network speed"
581+
)
582+
self.charm.unit.status = MaintenanceStatus("Downloading backup...")
562583
stdout, stderr, backup_location = self.charm._mysql.retrieve_backup_with_xbcloud(
563584
backup_id,
564585
s3_parameters,
565586
)
566-
logger.debug(f"Stdout of xbcloud get commands: {stdout}")
567-
logger.debug(f"Stderr of xbcloud get commands: {stderr}")
568587
except MySQLRetrieveBackupWithXBCloudError:
569588
return False, True, f"Failed to retrieve backup {backup_id}"
570589

571590
try:
572591
logger.info("Preparing retrieved backup using xtrabackup prepare")
592+
self.charm.unit.status = MaintenanceStatus("Preparing for restore backup...")
573593
stdout, stderr = self.charm._mysql.prepare_backup_for_restore(backup_location)
574594
logger.debug(f"Stdout of xtrabackup prepare command: {stdout}")
575595
logger.debug(f"Stderr of xtrabackup prepare command: {stderr}")
@@ -583,16 +603,17 @@ def _restore(
583603
return False, False, "Failed to empty the data directory"
584604

585605
try:
606+
self.charm.unit.status = MaintenanceStatus("Restoring backup...")
586607
logger.info("Restoring the backup")
587608
stdout, stderr = self.charm._mysql.restore_backup(backup_location)
588609
logger.debug(f"Stdout of xtrabackup move-back command: {stdout}")
589610
logger.debug(f"Stderr of xtrabackup move-back command: {stderr}")
590611
except MySQLRestoreBackupError:
591612
return False, False, f"Failed to restore backup {backup_id}"
592613

593-
return True, True, None
614+
return True, True, ""
594615

595-
def _clean_data_dir_and_start_mysqld(self) -> Tuple[bool, Optional[str]]:
616+
def _clean_data_dir_and_start_mysqld(self) -> Tuple[bool, str]:
596617
"""Run idempotent operations run after restoring a backup.
597618
598619
Returns tuple of (success, error_message)
@@ -613,9 +634,9 @@ def _clean_data_dir_and_start_mysqld(self) -> Tuple[bool, Optional[str]]:
613634
except MySQLStartMySQLDError:
614635
return False, "Failed to start mysqld"
615636

616-
return True, None
637+
return True, ""
617638

618-
def _post_restore(self) -> Tuple[bool, Optional[str]]:
639+
def _post_restore(self) -> Tuple[bool, str]:
619640
"""Run operations required after restoring a backup.
620641
621642
Returns: tuple of (success, error_message)
@@ -638,24 +659,18 @@ def _post_restore(self) -> Tuple[bool, Optional[str]]:
638659
logger.info("Creating cluster on restored node")
639660
unit_label = self.charm.unit.name.replace("/", "-")
640661
self.charm._mysql.create_cluster(unit_label)
662+
self.charm._mysql.create_cluster_set()
641663
self.charm._mysql.initialize_juju_units_operations_table()
642664

643665
self.charm._mysql.rescan_cluster()
644666

645-
logger.info("Retrieving instance cluster state and role")
646-
state, role = self.charm._mysql.get_member_state()
647667
except MySQLCreateClusterError:
648668
return False, "Failed to create InnoDB cluster on restored instance"
669+
except MySQLCreateClusterSetError:
670+
return False, "Failed to create InnoDB cluster-set on restored instance"
649671
except MySQLInitializeJujuOperationsTableError:
650672
return False, "Failed to initialize the juju operations table"
651673
except MySQLRescanClusterError:
652674
return False, "Failed to rescan the cluster"
653-
except MySQLGetMemberStateError:
654-
return False, "Failed to retrieve member state in restored instance"
655675

656-
self.charm.unit_peer_data["member-role"] = role
657-
self.charm.unit_peer_data["member-state"] = state
658-
659-
self.charm.unit.status = ActiveStatus()
660-
661-
return True, None
676+
return True, ""

0 commit comments

Comments
 (0)