@@ -219,7 +219,7 @@ def is_compatible(
219
219
def refresh_snap (
220
220
self , * , snap_name : str , snap_revision : str , refresh : charm_refresh .Machines
221
221
) -> None :
222
- # Update the configuration.
222
+ logger . debug ( " Update Patroni config on snap refresh" )
223
223
self ._charm .set_unit_status (MaintenanceStatus ("updating configuration" ), refresh = refresh )
224
224
self ._charm .update_config (refresh = refresh )
225
225
self ._charm .updated_synchronous_node_count ()
@@ -407,6 +407,7 @@ def set_unit_status(
407
407
json .dumps (refresh_status .message )
408
408
)
409
409
return
410
+ logger .debug (f"Set unit status: { status } " )
410
411
self .unit .status = status
411
412
412
413
def _reconcile_refresh_status (self , _ = None ):
@@ -444,8 +445,8 @@ def _reconcile_refresh_status(self, _=None):
444
445
445
446
def _on_databases_change (self , _ ):
446
447
"""Handle databases change event."""
448
+ logger .debug ("Update Patroni config on databases changed" )
447
449
self .update_config ()
448
- logger .debug ("databases changed" )
449
450
timestamp = datetime .now ()
450
451
self .unit_peer_data .update ({"pg_hba_needs_update_timestamp" : str (timestamp )})
451
452
logger .debug (f"authorisation rules changed at { timestamp } " )
@@ -617,9 +618,12 @@ def primary_endpoint(self) -> str | None:
617
618
return None
618
619
try :
619
620
primary = self ._patroni .get_primary ()
621
+ logger .debug (f"primary_endpoint: got primary '{ primary } '" )
620
622
if primary is None and (standby_leader := self ._patroni .get_standby_leader ()):
623
+ logger .debug (f"Using standby_leader { standby_leader } as primary" )
621
624
primary = standby_leader
622
625
primary_endpoint = self ._patroni .get_member_ip (primary ) if primary else None
626
+ logger .debug (f"primary_endpoint: got primary endpoint '{ primary_endpoint } '" )
623
627
# Force a retry if there is no primary or the member that was
624
628
# returned is not in the list of the current cluster members
625
629
# (like when the cluster was not updated yet after a failed switchover).
@@ -709,8 +713,9 @@ def _on_peer_relation_departed(self, event: RelationDepartedEvent) -> None:
709
713
event .defer ()
710
714
return
711
715
712
- # Update the list of the current members.
716
+ logger . debug ( " Update the list of the current members" )
713
717
self ._remove_from_members_ips (member_ip )
718
+ logger .debug ("Update Patroni config on peer relation departure" )
714
719
self .update_config ()
715
720
716
721
if self .primary_endpoint :
@@ -863,6 +868,8 @@ def has_raft_keys(self):
863
868
def _peer_relation_changed_checks (self , event : HookEvent ) -> bool :
864
869
"""Split of to reduce complexity."""
865
870
# Prevents the cluster to be reconfigured before it's bootstrapped in the leader.
871
+ logger .debug (f"Calling on_peer_relation_changed, event: '{ event } '" )
872
+
866
873
if not self .is_cluster_initialised :
867
874
logger .debug ("Early exit on_peer_relation_changed: cluster not initialized" )
868
875
return False
@@ -891,7 +898,7 @@ def _on_peer_relation_changed(self, event: HookEvent):
891
898
892
899
# Update the list of the cluster members in the replicas to make them know each other.
893
900
try :
894
- # Update the members of the cluster in the Patroni configuration on this unit.
901
+ logger . debug ( " Update the members of the cluster in Patroni on this unit" )
895
902
self .update_config ()
896
903
except RetryError :
897
904
self .set_unit_status (BlockedStatus ("failed to update cluster members on member" ))
@@ -1069,12 +1076,14 @@ def add_cluster_member(self, member: str) -> None:
1069
1076
NotReadyError if either the new member or the current members are not ready.
1070
1077
"""
1071
1078
unit = self .model .get_unit (label2name (member ))
1079
+ logger .debug (f"add_cluster_member: adding unit { unit } to the cluster" )
1072
1080
if member_ip := self ._get_unit_ip (unit ):
1073
1081
if not self ._patroni .are_all_members_ready ():
1074
1082
logger .info ("not all members are ready" )
1075
1083
raise NotReadyError ("not all members are ready" )
1076
1084
1077
- # Add the member to the list that should be updated in each other member.
1085
+ # It should be updated in each other member.
1086
+ logger .debug (f"Add member { member_ip } to the members_ips list" )
1078
1087
self ._add_to_members_ips (member_ip )
1079
1088
1080
1089
# Update Patroni configuration file.
@@ -1190,6 +1199,7 @@ def _units_ips(self) -> set[str]:
1190
1199
for unit in self ._peers .units :
1191
1200
if ip := self ._get_unit_ip (unit ):
1192
1201
addresses .add (ip )
1202
+ logger .debug (f"_units_ips addresses: { addresses } " )
1193
1203
return addresses
1194
1204
1195
1205
@property
@@ -1327,7 +1337,7 @@ def _on_install(self, event: InstallEvent) -> None:
1327
1337
self ._reboot_on_detached_storage (event )
1328
1338
return
1329
1339
1330
- self .set_unit_status (MaintenanceStatus ("installing PostgreSQL" ))
1340
+ self .set_unit_status (MaintenanceStatus ("downloading & installing PostgreSQL" ))
1331
1341
1332
1342
# Install the charmed PostgreSQL snap.
1333
1343
try :
@@ -1399,6 +1409,7 @@ def _on_leader_elected(self, event: LeaderElectedEvent) -> None: # noqa: C901
1399
1409
if not self .get_secret (APP_SCOPE , "internal-ca" ):
1400
1410
self .tls .generate_internal_peer_ca ()
1401
1411
self .tls .generate_internal_peer_cert ()
1412
+ logger .debug ("Update Patroni config on leader elected" )
1402
1413
self .update_config ()
1403
1414
1404
1415
# Don't update connection endpoints in the first time this event run for
@@ -1443,10 +1454,10 @@ def _on_config_changed(self, event) -> None: # noqa: C901
1443
1454
1444
1455
try :
1445
1456
self ._validate_config_options ()
1446
- # update config on every run
1457
+ logger . debug ( "Update Patroni config on config changed" )
1447
1458
self .update_config ()
1448
- except psycopg2 .OperationalError :
1449
- logger .debug ("Defer on_config_changed: Cannot connect to database" )
1459
+ except psycopg2 .OperationalError as e :
1460
+ logger .debug (f "Defer on_config_changed: Cannot connect to database ( { e } ) " )
1450
1461
event .defer ()
1451
1462
return
1452
1463
except ValueError as e :
@@ -1503,14 +1514,17 @@ def enable_disable_extensions(self, database: str | None = None) -> None:
1503
1514
continue
1504
1515
extension = PLUGIN_OVERRIDES .get (extension , extension )
1505
1516
if self ._check_extension_dependencies (extension , enable ):
1517
+ logger .debug (f"Early exit: { extension } has broken dependencies" )
1506
1518
self .set_unit_status (BlockedStatus (EXTENSIONS_DEPENDENCY_MESSAGE ))
1507
1519
return
1508
1520
extensions [extension ] = enable
1509
1521
if self .is_blocked and self .unit .status .message == EXTENSIONS_DEPENDENCY_MESSAGE :
1522
+ logger .debug ("Marking unit as Active" )
1510
1523
self .set_unit_status (ActiveStatus ())
1511
1524
original_status = self .unit .status
1512
1525
self .set_unit_status (WaitingStatus ("Updating extensions" ))
1513
1526
try :
1527
+ logger .debug ("Enabling/disabling PostgreSQL extensions..." )
1514
1528
self .postgresql .enable_disable_extensions (extensions , database )
1515
1529
except psycopg2 .errors .DependentObjectsStillExist as e :
1516
1530
logger .error (
@@ -1522,8 +1536,10 @@ def enable_disable_extensions(self, database: str | None = None) -> None:
1522
1536
except PostgreSQLEnableDisableExtensionError as e :
1523
1537
logger .exception ("failed to change plugins: %s" , str (e ))
1524
1538
if original_status .message == EXTENSION_OBJECT_MESSAGE :
1539
+ logger .debug ("Marking unit as Active and finish with extensions" )
1525
1540
self .set_unit_status (ActiveStatus ())
1526
1541
return
1542
+ logger .debug (f"Restoring original unit status to { original_status } " )
1527
1543
self .set_unit_status (original_status )
1528
1544
1529
1545
def _check_extension_dependencies (self , extension : str , enable : bool ) -> bool :
@@ -1604,16 +1620,19 @@ def _on_start(self, event: StartEvent) -> None:
1604
1620
# Only the leader can bootstrap the cluster.
1605
1621
# On replicas, only prepare for starting the instance later.
1606
1622
if not self .unit .is_leader ():
1623
+ logger .debug ("Prepare for starting replica instance later" )
1607
1624
self ._start_replica (event )
1608
1625
self ._restart_services_after_reboot ()
1609
1626
return
1610
1627
1611
- # Bootstrap the cluster in the leader unit.
1612
- self ._start_primary (event )
1613
- self ._restart_services_after_reboot ()
1628
+ logger . debug ( " Bootstrap the cluster in the leader unit" )
1629
+ self ._start_primary (event ) # start Patroni
1630
+ self ._restart_services_after_reboot () # start Patroni #2
1614
1631
1615
1632
def _restart_services_after_reboot (self ):
1616
1633
"""Restart the Patroni and pgBackRest after a reboot."""
1634
+ logger .debug (f"_restart_services_after_reboot: self._unit_ip: { self ._unit_ip } " )
1635
+ logger .debug (f"_restart_services_after_reboot: self.members_ips: { self .members_ips } " )
1617
1636
if self ._unit_ip in self .members_ips :
1618
1637
self ._patroni .start_patroni ()
1619
1638
self .backup .start_stop_pgbackrest_service ()
@@ -1702,6 +1721,8 @@ def _setup_ldap_sync(self, postgres_snap: snap.Snap | None = None) -> None:
1702
1721
postgres_snap .restart (services = ["ldap-sync" ])
1703
1722
1704
1723
def _setup_users (self ) -> None :
1724
+ """Create PostgreSQL users used/operated by charm."""
1725
+ logger .debug ("Setup PostgreSQL users" )
1705
1726
self .postgresql .create_predefined_instance_roles ()
1706
1727
1707
1728
# Create the default postgres database user that is needed for some
@@ -1710,14 +1731,14 @@ def _setup_users(self) -> None:
1710
1731
# This event can be run on a replica if the machines are restarted.
1711
1732
# For that case, check whether the postgres user already exits.
1712
1733
users = self .postgresql .list_users ()
1713
- # Create the backup user.
1714
1734
if BACKUP_USER not in users :
1735
+ logger .debug (f"Creating user { BACKUP_USER } " )
1715
1736
self .postgresql .create_user (
1716
1737
BACKUP_USER , new_password (), extra_user_roles = [ROLE_BACKUP ]
1717
1738
)
1718
1739
self .postgresql .grant_database_privileges_to_user (BACKUP_USER , "postgres" , ["connect" ])
1719
1740
if MONITORING_USER not in users :
1720
- # Create the monitoring user.
1741
+ logger . debug ( f"Creating user { MONITORING_USER } " )
1721
1742
self .postgresql .create_user (
1722
1743
MONITORING_USER ,
1723
1744
self .get_secret (APP_SCOPE , MONITORING_PASSWORD_KEY ),
@@ -1785,11 +1806,10 @@ def _start_primary(self, event: StartEvent) -> None:
1785
1806
# Flag to know if triggers need to be removed after refresh
1786
1807
self .app_peer_data ["refresh_remove_trigger" ] = "True"
1787
1808
1788
- # Clear unit data if this unit became a replica after a failover/switchover.
1809
+ logger . debug ( " Clear unit data if this unit became a replica after a failover/switchover" )
1789
1810
self ._update_relation_endpoints ()
1790
1811
1791
- # Enable/disable PostgreSQL extensions if they were set before the cluster
1792
- # was fully initialised.
1812
+ # if extensions were set before the cluster was fully initialised.
1793
1813
self .enable_disable_extensions ()
1794
1814
1795
1815
logger .debug ("Active workload time: %s" , datetime .now ())
@@ -1880,6 +1900,7 @@ def _update_admin_password(self, admin_secret_id: str) -> None:
1880
1900
1881
1901
# Update and reload Patroni configuration in this unit to use the new password.
1882
1902
# Other units Patroni configuration will be reloaded in the peer relation changed event.
1903
+ logger .debug ("Update Patroni config on admin password update" )
1883
1904
self .update_config ()
1884
1905
1885
1906
def _on_promote_to_primary (self , event : ActionEvent ) -> None :
@@ -2015,6 +2036,7 @@ def _was_restore_successful(self) -> bool:
2015
2036
2016
2037
def _can_run_on_update_status (self ) -> bool :
2017
2038
if not self .is_cluster_initialised :
2039
+ logger .debug ("Early exit on_update_status: cluster is not initialised" )
2018
2040
return False
2019
2041
2020
2042
if self .has_raft_keys ():
@@ -2076,6 +2098,7 @@ def _set_primary_status_message(self) -> None:
2076
2098
self .set_unit_status (
2077
2099
BlockedStatus (self .app_peer_data ["s3-initialization-block-message" ])
2078
2100
)
2101
+ logger .debug ("Early exit _set_primary_status_message: s3 is blocked" )
2079
2102
return
2080
2103
if self .unit .is_leader () and (
2081
2104
self .app_peer_data .get ("logical-replication-validation" ) == "error"
@@ -2213,6 +2236,7 @@ def push_tls_files_to_workload(self) -> bool:
2213
2236
)
2214
2237
2215
2238
try :
2239
+ logger .debug ("Update Patroni config on push tls files to workload" )
2216
2240
return self .update_config ()
2217
2241
except Exception :
2218
2242
logger .exception ("TLS files failed to push. Error in config update" )
@@ -2227,6 +2251,7 @@ def push_ca_file_into_workload(self, secret_name: str) -> bool:
2227
2251
subprocess .check_call ([UPDATE_CERTS_BIN_PATH ]) # noqa: S603
2228
2252
2229
2253
try :
2254
+ logger .debug ("Update Patroni config on push CA file into workload" )
2230
2255
return self .update_config ()
2231
2256
except Exception :
2232
2257
logger .exception ("CA file failed to push. Error in config update" )
@@ -2240,6 +2265,7 @@ def clean_ca_file_from_workload(self, secret_name: str) -> bool:
2240
2265
subprocess .check_call ([UPDATE_CERTS_BIN_PATH ]) # noqa: S603
2241
2266
2242
2267
try :
2268
+ logger .debug ("Update Patroni config on clean CA file from workload" )
2243
2269
return self .update_config ()
2244
2270
except Exception :
2245
2271
logger .exception ("CA file failed to clean. Error in config update" )
@@ -2319,6 +2345,7 @@ def update_config(
2319
2345
refresh : charm_refresh .Machines | None = None ,
2320
2346
) -> bool :
2321
2347
"""Updates Patroni config file based on the existence of the TLS files."""
2348
+ logger .debug ("Updating Patroni config" )
2322
2349
if refresh is None :
2323
2350
refresh = self .refresh
2324
2351
@@ -2351,6 +2378,7 @@ def update_config(
2351
2378
slots = replication_slots ,
2352
2379
)
2353
2380
if no_peers :
2381
+ logger .debug ("Early exit update_config: no peers" )
2354
2382
return True
2355
2383
2356
2384
if not self ._is_workload_running :
@@ -2387,6 +2415,7 @@ def update_config(
2387
2415
else max (4 * self .cpu_count , 100 )
2388
2416
)
2389
2417
2418
+ logger .debug ("Bulk update parameters controller by patroni" )
2390
2419
self ._patroni .bulk_update_parameters_controller_by_patroni ({
2391
2420
"max_connections" : max_connections ,
2392
2421
"max_prepared_transactions" : self .config .memory_max_prepared_transactions ,
@@ -2412,6 +2441,7 @@ def update_config(
2412
2441
2413
2442
self .unit_peer_data .update ({"user_hash" : self .generate_user_hash })
2414
2443
if self .unit .is_leader ():
2444
+ logger .debug (f"Updating user_hash in app databag on leader: { self .generate_user_hash } " )
2415
2445
self .app_peer_data .update ({"user_hash" : self .generate_user_hash })
2416
2446
return True
2417
2447
@@ -2433,6 +2463,7 @@ def _validate_config_options(self) -> None:
2433
2463
):
2434
2464
raise ValueError ("request_date_style config option has an invalid value" )
2435
2465
2466
+ logger .debug ("Checking timezone config options" )
2436
2467
if self .config .request_time_zone not in self .postgresql .get_postgresql_timezones ():
2437
2468
raise ValueError ("request_time_zone config option has an invalid value" )
2438
2469
@@ -2446,6 +2477,7 @@ def _validate_config_options(self) -> None:
2446
2477
2447
2478
def _handle_postgresql_restart_need (self ) -> None :
2448
2479
"""Handle PostgreSQL restart need based on the TLS configuration and configuration changes."""
2480
+ logger .debug ("Checking for PostgreSQL restart necessity" )
2449
2481
if self ._can_connect_to_postgresql :
2450
2482
restart_postgresql = self .is_tls_enabled != self .postgresql .is_tls_enabled (
2451
2483
check_current_host = True
0 commit comments