47
47
"failed to access/create the bucket, check your S3 settings"
48
48
)
49
49
FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE = "failed to initialize stanza, check your S3 settings"
50
+ CANNOT_RESTORE_PITR = "cannot restore PITR, juju debug-log for details"
51
+ MOVE_RESTORED_CLUSTER_TO_ANOTHER_BUCKET = "Move restored cluster to another S3 bucket"
50
52
51
53
S3_BLOCK_MESSAGES = [
52
54
ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE ,
53
55
FAILED_TO_ACCESS_CREATE_BUCKET_ERROR_MESSAGE ,
54
56
FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE ,
57
+ MOVE_RESTORED_CLUSTER_TO_ANOTHER_BUCKET ,
55
58
]
56
59
57
60
@@ -198,9 +201,29 @@ def can_use_s3_repository(self) -> Tuple[bool, Optional[str]]:
198
201
if self .charm ._patroni .member_started :
199
202
self .charm ._patroni .reload_patroni_configuration ()
200
203
return False , ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE
204
+ return self ._is_s3_wal_compatible (stanza )
201
205
202
206
return True , None
203
207
208
+ def _is_s3_wal_compatible (self , stanza ) -> Tuple [bool , Optional [str ]]:
209
+ """Returns whether the S3 stanza is compatible with current PostgreSQL cluster by WAL parity."""
210
+ charm_last_archived_wal = self .charm .postgresql .get_last_archived_wal ()
211
+ logger .debug (f"last archived wal: { charm_last_archived_wal } " )
212
+ s3_archive = stanza .get ("archive" , [])
213
+ if len (s3_archive ) > 0 :
214
+ s3_last_archived_wal = s3_archive [0 ].get ("max" )
215
+ logger .debug (f"last s3 wal: { str (s3_last_archived_wal )} " )
216
+ if (
217
+ charm_last_archived_wal
218
+ and s3_last_archived_wal
219
+ and charm_last_archived_wal .split ("." , 1 )[0 ] != str (s3_last_archived_wal )
220
+ ):
221
+ if bool (self .charm .app_peer_data .get ("require-change-bucket-after-restore" , None )):
222
+ return False , MOVE_RESTORED_CLUSTER_TO_ANOTHER_BUCKET
223
+ else :
224
+ return False , ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE
225
+ return True , None
226
+
204
227
def _change_connectivity_to_database (self , connectivity : bool ) -> None :
205
228
"""Enable or disable the connectivity to the database."""
206
229
self .charm .unit_peer_data .update ({"connectivity" : "on" if connectivity else "off" })
@@ -423,11 +446,7 @@ def _initialise_stanza(self) -> None:
423
446
424
447
# Enable stanza initialisation if the backup settings were fixed after being invalid
425
448
# or pointing to a repository where there are backups from another cluster.
426
- if self .charm .is_blocked and self .charm .unit .status .message not in [
427
- ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE ,
428
- FAILED_TO_ACCESS_CREATE_BUCKET_ERROR_MESSAGE ,
429
- FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE ,
430
- ]:
449
+ if self .charm .is_blocked and self .charm .unit .status .message not in S3_BLOCK_MESSAGES :
431
450
logger .warning ("couldn't initialize stanza due to a blocked status" )
432
451
return
433
452
@@ -554,6 +573,18 @@ def _on_s3_credential_changed(self, event: CredentialsChangedEvent):
554
573
event .defer ()
555
574
return
556
575
576
+ # Prevents config change in bad state, so DB peer relations change event will not cause patroni related errors.
577
+ if self .charm .unit .status .message == CANNOT_RESTORE_PITR :
578
+ logger .info ("Cannot change S3 configuration in bad PITR restore status" )
579
+ event .defer ()
580
+ return
581
+
582
+ # Prevents S3 change in the middle of restoring backup and patroni / pgbackrest errors caused by that.
583
+ if "restoring-backup" in self .charm .app_peer_data :
584
+ logger .info ("Cannot change S3 configuration during restore" )
585
+ event .defer ()
586
+ return
587
+
557
588
if not self ._render_pgbackrest_conf_file ():
558
589
logger .debug ("Cannot set pgBackRest configurations, missing configurations." )
559
590
return
@@ -567,6 +598,8 @@ def _on_s3_credential_changed(self, event: CredentialsChangedEvent):
567
598
if not self .charm .is_primary :
568
599
return
569
600
601
+ self .charm .app_peer_data .pop ("require-change-bucket-after-restore" , None )
602
+
570
603
try :
571
604
self ._create_bucket_if_not_exists ()
572
605
except (ClientError , ValueError ):
@@ -582,7 +615,11 @@ def _on_s3_credential_changed(self, event: CredentialsChangedEvent):
582
615
583
616
def _on_s3_credential_gone (self , _ ) -> None :
584
617
if self .charm .unit .is_leader ():
585
- self .charm .app_peer_data .update ({"stanza" : "" , "init-pgbackrest" : "" })
618
+ self .charm .app_peer_data .update ({
619
+ "stanza" : "" ,
620
+ "init-pgbackrest" : "" ,
621
+ "require-change-bucket-after-restore" : "" ,
622
+ })
586
623
self .charm .unit_peer_data .update ({"stanza" : "" , "init-pgbackrest" : "" })
587
624
if self .charm .is_blocked and self .charm .unit .status .message in S3_BLOCK_MESSAGES :
588
625
self .charm .unit .status = ActiveStatus ()
@@ -753,20 +790,42 @@ def _on_restore_action(self, event):
753
790
return
754
791
755
792
backup_id = event .params .get ("backup-id" )
756
- logger .info (f"A restore with backup-id { backup_id } has been requested on unit" )
793
+ restore_to_time = event .params .get ("restore-to-time" )
794
+ logger .info (
795
+ f"A restore"
796
+ f"{ ' with backup-id ' + backup_id if backup_id else '' } "
797
+ f"{ ' to time point ' + restore_to_time if restore_to_time else '' } "
798
+ f" has been requested on the unit"
799
+ )
757
800
758
- # Validate the provided backup id.
759
- logger .info ("Validating provided backup-id" )
801
+ # Validate the provided backup id and restore to time .
802
+ logger .info ("Validating provided backup-id and restore-to-time " )
760
803
try :
761
804
backups = self ._list_backups (show_failed = False )
762
- if backup_id not in backups .keys ():
805
+ if backup_id and backup_id not in backups .keys ():
763
806
error_message = f"Invalid backup-id: { backup_id } "
764
807
logger .error (f"Restore failed: { error_message } " )
765
808
event .fail (error_message )
766
809
return
810
+ if not backup_id and restore_to_time and not backups :
811
+ error_message = "Cannot restore PITR without any backups created"
812
+ logger .error (f"Restore failed: { error_message } " )
813
+ event .fail (error_message )
814
+ return
767
815
except ListBackupsError as e :
768
816
logger .exception (e )
769
- error_message = "Failed to retrieve backup id"
817
+ error_message = "Failed to retrieve backups list"
818
+ logger .error (f"Restore failed: { error_message } " )
819
+ event .fail (error_message )
820
+ return
821
+
822
+ # Quick check for timestamp format
823
+ if (
824
+ restore_to_time
825
+ and restore_to_time != "latest"
826
+ and not re .match ("^[0-9-]+ [0-9:.+]+$" , restore_to_time )
827
+ ):
828
+ error_message = "Bad restore-to-time format"
770
829
logger .error (f"Restore failed: { error_message } " )
771
830
event .fail (error_message )
772
831
return
@@ -781,6 +840,17 @@ def _on_restore_action(self, event):
781
840
event .fail (error_message )
782
841
return
783
842
843
+ # Temporarily disabling patroni service auto-restart. This is required as point-in-time-recovery can fail
844
+ # on restore, therefore during cluster bootstrapping process. In this case, we need be able to check patroni
845
+ # service status and logs. Disabling auto-restart feature is essential to prevent wrong status indicated
846
+ # and logs reading race condition (as logs cleared / moved with service restarts).
847
+ if not self .charm .override_patroni_restart_condition ("no" , "restore-backup" ):
848
+ error_message = "Failed to override Patroni restart condition"
849
+ logger .error (f"Restore failed: { error_message } " )
850
+ event .fail (error_message )
851
+ self ._restart_database ()
852
+ return
853
+
784
854
logger .info ("Removing the contents of the data directory" )
785
855
if not self ._empty_data_files ():
786
856
error_message = "Failed to remove contents of the data directory"
@@ -792,8 +862,12 @@ def _on_restore_action(self, event):
792
862
# Mark the cluster as in a restoring backup state and update the Patroni configuration.
793
863
logger .info ("Configuring Patroni to restore the backup" )
794
864
self .charm .app_peer_data .update ({
795
- "restoring-backup" : self ._fetch_backup_from_id (backup_id ),
796
- "restore-stanza" : backups [backup_id ],
865
+ "restoring-backup" : self ._fetch_backup_from_id (backup_id ) if backup_id else "" ,
866
+ "restore-stanza" : backups [backup_id ]
867
+ if backup_id
868
+ else self .charm .app_peer_data .get ("stanza" , self .stanza_name ),
869
+ "restore-to-time" : restore_to_time or "" ,
870
+ "require-change-bucket-after-restore" : "True" ,
797
871
})
798
872
self .charm .update_config ()
799
873
@@ -865,17 +939,20 @@ def _pre_restore_checks(self, event: ActionEvent) -> bool:
865
939
event .fail (validation_message )
866
940
return False
867
941
868
- if not event .params .get ("backup-id" ):
869
- error_message = "Missing backup-id to restore"
942
+ if not event .params .get ("backup-id" ) and not event .params .get ("restore-to-time" ):
943
+ error_message = (
944
+ "Missing backup-id or/and restore-to-time parameter to be able to do restore"
945
+ )
870
946
logger .error (f"Restore failed: { error_message } " )
871
947
event .fail (error_message )
872
948
return False
873
949
874
950
logger .info ("Checking if cluster is in blocked state" )
875
- if (
876
- self .charm .is_blocked
877
- and self .charm .unit .status .message != ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE
878
- ):
951
+ if self .charm .is_blocked and self .charm .unit .status .message not in [
952
+ ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE ,
953
+ CANNOT_RESTORE_PITR ,
954
+ MOVE_RESTORED_CLUSTER_TO_ANOTHER_BUCKET ,
955
+ ]:
879
956
error_message = "Cluster or unit is in a blocking state"
880
957
logger .error (f"Restore failed: { error_message } " )
881
958
event .fail (error_message )
@@ -941,7 +1018,7 @@ def _render_pgbackrest_conf_file(self) -> bool:
941
1018
942
1019
def _restart_database (self ) -> None :
943
1020
"""Removes the restoring backup flag and restart the database."""
944
- self .charm .app_peer_data .update ({"restoring-backup" : "" })
1021
+ self .charm .app_peer_data .update ({"restoring-backup" : "" , "restore-to-time" : "" })
945
1022
self .charm .update_config ()
946
1023
self .charm ._patroni .start_patroni ()
947
1024
0 commit comments