86
86
USER ,
87
87
USER_PASSWORD_KEY ,
88
88
)
89
+ from relations .async_replication import PostgreSQLAsyncReplication
89
90
from relations .db import EXTENSIONS_BLOCKING_MESSAGE , DbProvides
90
91
from relations .postgresql_provider import PostgreSQLProvider
91
92
from upgrade import PostgreSQLUpgrade , get_postgresql_dependencies_model
@@ -166,6 +167,7 @@ def __init__(self, *args):
166
167
self .legacy_db_admin_relation = DbProvides (self , admin = True )
167
168
self .backup = PostgreSQLBackups (self , "s3-parameters" )
168
169
self .tls = PostgreSQLTLS (self , PEER )
170
+ self .async_replication = PostgreSQLAsyncReplication (self )
169
171
self .restart_manager = RollingOpsManager (
170
172
charm = self , relation = "restart" , callback = self ._restart
171
173
)
@@ -321,6 +323,8 @@ def primary_endpoint(self) -> Optional[str]:
321
323
for attempt in Retrying (stop = stop_after_delay (60 ), wait = wait_fixed (3 )):
322
324
with attempt :
323
325
primary = self ._patroni .get_primary ()
326
+ if primary is None and (standby_leader := self ._patroni .get_standby_leader ()):
327
+ primary = standby_leader
324
328
primary_endpoint = self ._patroni .get_member_ip (primary )
325
329
# Force a retry if there is no primary or the member that was
326
330
# returned is not in the list of the current cluster members
@@ -420,6 +424,9 @@ def _on_peer_relation_departed(self, event: RelationDepartedEvent) -> None:
420
424
self .unit .status = WaitingStatus (PRIMARY_NOT_REACHABLE_MESSAGE )
421
425
return
422
426
427
+ # Update the sync-standby endpoint in the async replication data.
428
+ self .async_replication .update_async_replication_data ()
429
+
423
430
def _on_pgdata_storage_detaching (self , _ ) -> None :
424
431
# Change the primary if it's the unit that is being removed.
425
432
try :
@@ -513,9 +520,13 @@ def _on_peer_relation_changed(self, event: HookEvent):
513
520
514
521
# Restart the workload if it's stuck on the starting state after a timeline divergence
515
522
# due to a backup that was restored.
516
- if not self .is_primary and (
517
- self ._patroni .member_replication_lag == "unknown"
518
- or int (self ._patroni .member_replication_lag ) > 1000
523
+ if (
524
+ not self .is_primary
525
+ and not self .is_standby_leader
526
+ and (
527
+ self ._patroni .member_replication_lag == "unknown"
528
+ or int (self ._patroni .member_replication_lag ) > 1000
529
+ )
519
530
):
520
531
self ._patroni .reinitialize_postgresql ()
521
532
logger .debug ("Deferring on_peer_relation_changed: reinitialising replica" )
@@ -551,8 +562,7 @@ def _update_new_unit_status(self) -> None:
551
562
# a failed switchover, so wait until the primary is elected.
552
563
if self .primary_endpoint :
553
564
self ._update_relation_endpoints ()
554
- if not self .is_blocked :
555
- self .unit .status = ActiveStatus ()
565
+ self .async_replication .handle_read_only_mode ()
556
566
else :
557
567
self .unit .status = WaitingStatus (PRIMARY_NOT_REACHABLE_MESSAGE )
558
568
@@ -688,6 +698,7 @@ def _hosts(self) -> set:
688
698
def _patroni (self ) -> Patroni :
689
699
"""Returns an instance of the Patroni object."""
690
700
return Patroni (
701
+ self ,
691
702
self ._unit_ip ,
692
703
self .cluster_name ,
693
704
self ._member_name ,
@@ -704,6 +715,11 @@ def is_primary(self) -> bool:
704
715
"""Return whether this unit is the primary instance."""
705
716
return self .unit .name == self ._patroni .get_primary (unit_name_pattern = True )
706
717
718
+ @property
719
+ def is_standby_leader (self ) -> bool :
720
+ """Return whether this unit is the standby leader instance."""
721
+ return self .unit .name == self ._patroni .get_standby_leader (unit_name_pattern = True )
722
+
707
723
@property
708
724
def is_tls_enabled (self ) -> bool :
709
725
"""Return whether TLS is enabled."""
@@ -902,6 +918,9 @@ def _on_config_changed(self, event) -> None:
902
918
if self .is_blocked and "Configuration Error" in self .unit .status .message :
903
919
self .unit .status = ActiveStatus ()
904
920
921
+ # Update the sync-standby endpoint in the async replication data.
922
+ self .async_replication .update_async_replication_data ()
923
+
905
924
if not self .unit .is_leader ():
906
925
return
907
926
@@ -929,6 +948,9 @@ def enable_disable_extensions(self, database: str = None) -> None:
929
948
Args:
930
949
database: optional database where to enable/disable the extension.
931
950
"""
951
+ if self ._patroni .get_primary () is None :
952
+ logger .debug ("Early exit enable_disable_extensions: standby cluster" )
953
+ return
932
954
spi_module = ["refint" , "autoinc" , "insert_username" , "moddatetime" ]
933
955
plugins_exception = {"uuid_ossp" : '"uuid-ossp"' }
934
956
original_status = self .unit .status
@@ -1188,6 +1210,9 @@ def _on_set_password(self, event: ActionEvent) -> None:
1188
1210
# Other units Patroni configuration will be reloaded in the peer relation changed event.
1189
1211
self .update_config ()
1190
1212
1213
+ # Update the password in the async replication data.
1214
+ self .async_replication .update_async_replication_data ()
1215
+
1191
1216
event .set_results ({"password" : password })
1192
1217
1193
1218
def _on_update_status (self , _ ) -> None :
@@ -1225,6 +1250,9 @@ def _on_update_status(self, _) -> None:
1225
1250
if self ._handle_workload_failures ():
1226
1251
return
1227
1252
1253
+ # Update the sync-standby endpoint in the async replication data.
1254
+ self .async_replication .update_async_replication_data ()
1255
+
1228
1256
self ._set_primary_status_message ()
1229
1257
1230
1258
# Restart topology observer if it is gone
@@ -1270,8 +1298,16 @@ def _handle_workload_failures(self) -> bool:
1270
1298
a bool indicating whether the charm performed any action.
1271
1299
"""
1272
1300
# Restart the workload if it's stuck on the starting state after a restart.
1301
+ try :
1302
+ is_primary = self .is_primary
1303
+ is_standby_leader = self .is_standby_leader
1304
+ except RetryError :
1305
+ return False
1306
+
1273
1307
if (
1274
- not self ._patroni .member_started
1308
+ not is_primary
1309
+ and not is_standby_leader
1310
+ and not self ._patroni .member_started
1275
1311
and "postgresql_restarted" in self ._peers .data [self .unit ]
1276
1312
and self ._patroni .member_replication_lag == "unknown"
1277
1313
):
@@ -1291,6 +1327,8 @@ def _set_primary_status_message(self) -> None:
1291
1327
try :
1292
1328
if self ._patroni .get_primary (unit_name_pattern = True ) == self .unit .name :
1293
1329
self .unit .status = ActiveStatus ("Primary" )
1330
+ elif self .is_standby_leader :
1331
+ self .unit .status = ActiveStatus ("Standby Leader" )
1294
1332
elif self ._patroni .member_started :
1295
1333
self .unit .status = ActiveStatus ()
1296
1334
except (RetryError , ConnectionError ) as e :
0 commit comments