Skip to content

Commit 269ed18

Browse files
DPE-7726: Extra Debug (Remove?)
1 parent 1703639 commit 269ed18

File tree

2 files changed

+55
-20
lines changed

2 files changed

+55
-20
lines changed

src/charm.py

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def is_compatible(
219219
def refresh_snap(
220220
self, *, snap_name: str, snap_revision: str, refresh: charm_refresh.Machines
221221
) -> None:
222-
# Update the configuration.
222+
logger.debug("Update Patroni config on snap refresh")
223223
self._charm.set_unit_status(MaintenanceStatus("updating configuration"), refresh=refresh)
224224
self._charm.update_config(refresh=refresh)
225225
self._charm.updated_synchronous_node_count()
@@ -407,6 +407,7 @@ def set_unit_status(
407407
json.dumps(refresh_status.message)
408408
)
409409
return
410+
logger.debug(f"Set unit status: {status}")
410411
self.unit.status = status
411412

412413
def _reconcile_refresh_status(self, _=None):
@@ -444,8 +445,8 @@ def _reconcile_refresh_status(self, _=None):
444445

445446
def _on_databases_change(self, _):
446447
"""Handle databases change event."""
448+
logger.debug("Update Patroni config on databases changed")
447449
self.update_config()
448-
logger.debug("databases changed")
449450
timestamp = datetime.now()
450451
self.unit_peer_data.update({"pg_hba_needs_update_timestamp": str(timestamp)})
451452
logger.debug(f"authorisation rules changed at {timestamp}")
@@ -617,9 +618,12 @@ def primary_endpoint(self) -> str | None:
617618
return None
618619
try:
619620
primary = self._patroni.get_primary()
621+
logger.debug(f"primary_endpoint: got primary '{primary}'")
620622
if primary is None and (standby_leader := self._patroni.get_standby_leader()):
623+
logger.debug(f"Using standby_leader {standby_leader} as primary")
621624
primary = standby_leader
622625
primary_endpoint = self._patroni.get_member_ip(primary) if primary else None
626+
logger.debug(f"primary_endpoint: got primary endpoint '{primary_endpoint}'")
623627
# Force a retry if there is no primary or the member that was
624628
# returned is not in the list of the current cluster members
625629
# (like when the cluster was not updated yet after a failed switchover).
@@ -709,8 +713,9 @@ def _on_peer_relation_departed(self, event: RelationDepartedEvent) -> None:
709713
event.defer()
710714
return
711715

712-
# Update the list of the current members.
716+
logger.debug("Update the list of the current members")
713717
self._remove_from_members_ips(member_ip)
718+
logger.debug("Update Patroni config on peer relation departure")
714719
self.update_config()
715720

716721
if self.primary_endpoint:
@@ -863,6 +868,8 @@ def has_raft_keys(self):
863868
def _peer_relation_changed_checks(self, event: HookEvent) -> bool:
864869
"""Split of to reduce complexity."""
865870
# Prevents the cluster to be reconfigured before it's bootstrapped in the leader.
871+
logger.debug(f"Calling on_peer_relation_changed, event: '{event}'")
872+
866873
if not self.is_cluster_initialised:
867874
logger.debug("Early exit on_peer_relation_changed: cluster not initialized")
868875
return False
@@ -891,7 +898,7 @@ def _on_peer_relation_changed(self, event: HookEvent):
891898

892899
# Update the list of the cluster members in the replicas to make them know each other.
893900
try:
894-
# Update the members of the cluster in the Patroni configuration on this unit.
901+
logger.debug("Update the members of the cluster in Patroni on this unit")
895902
self.update_config()
896903
except RetryError:
897904
self.set_unit_status(BlockedStatus("failed to update cluster members on member"))
@@ -1069,12 +1076,14 @@ def add_cluster_member(self, member: str) -> None:
10691076
NotReadyError if either the new member or the current members are not ready.
10701077
"""
10711078
unit = self.model.get_unit(label2name(member))
1079+
logger.debug(f"add_cluster_member: adding unit {unit} to the cluster")
10721080
if member_ip := self._get_unit_ip(unit):
10731081
if not self._patroni.are_all_members_ready():
10741082
logger.info("not all members are ready")
10751083
raise NotReadyError("not all members are ready")
10761084

1077-
# Add the member to the list that should be updated in each other member.
1085+
# It should be updated in each other member.
1086+
logger.debug(f"Add member {member_ip} to the members_ips list")
10781087
self._add_to_members_ips(member_ip)
10791088

10801089
# Update Patroni configuration file.
@@ -1190,6 +1199,7 @@ def _units_ips(self) -> set[str]:
11901199
for unit in self._peers.units:
11911200
if ip := self._get_unit_ip(unit):
11921201
addresses.add(ip)
1202+
logger.debug(f"_units_ips addresses: {addresses}")
11931203
return addresses
11941204

11951205
@property
@@ -1327,7 +1337,7 @@ def _on_install(self, event: InstallEvent) -> None:
13271337
self._reboot_on_detached_storage(event)
13281338
return
13291339

1330-
self.set_unit_status(MaintenanceStatus("installing PostgreSQL"))
1340+
self.set_unit_status(MaintenanceStatus("downloading & installing PostgreSQL"))
13311341

13321342
# Install the charmed PostgreSQL snap.
13331343
try:
@@ -1399,6 +1409,7 @@ def _on_leader_elected(self, event: LeaderElectedEvent) -> None: # noqa: C901
13991409
if not self.get_secret(APP_SCOPE, "internal-ca"):
14001410
self.tls.generate_internal_peer_ca()
14011411
self.tls.generate_internal_peer_cert()
1412+
logger.debug("Update Patroni config on leader elected")
14021413
self.update_config()
14031414

14041415
# Don't update connection endpoints in the first time this event run for
@@ -1443,10 +1454,10 @@ def _on_config_changed(self, event) -> None: # noqa: C901
14431454

14441455
try:
14451456
self._validate_config_options()
1446-
# update config on every run
1457+
logger.debug("Update Patroni config on config changed")
14471458
self.update_config()
1448-
except psycopg2.OperationalError:
1449-
logger.debug("Defer on_config_changed: Cannot connect to database")
1459+
except psycopg2.OperationalError as e:
1460+
logger.debug(f"Defer on_config_changed: Cannot connect to database ({e})")
14501461
event.defer()
14511462
return
14521463
except ValueError as e:
@@ -1503,14 +1514,17 @@ def enable_disable_extensions(self, database: str | None = None) -> None:
15031514
continue
15041515
extension = PLUGIN_OVERRIDES.get(extension, extension)
15051516
if self._check_extension_dependencies(extension, enable):
1517+
logger.debug(f"Early exit: {extension} has broken dependencies")
15061518
self.set_unit_status(BlockedStatus(EXTENSIONS_DEPENDENCY_MESSAGE))
15071519
return
15081520
extensions[extension] = enable
15091521
if self.is_blocked and self.unit.status.message == EXTENSIONS_DEPENDENCY_MESSAGE:
1522+
logger.debug("Marking unit as Active")
15101523
self.set_unit_status(ActiveStatus())
15111524
original_status = self.unit.status
15121525
self.set_unit_status(WaitingStatus("Updating extensions"))
15131526
try:
1527+
logger.debug("Enabling/disabling PostgreSQL extensions...")
15141528
self.postgresql.enable_disable_extensions(extensions, database)
15151529
except psycopg2.errors.DependentObjectsStillExist as e:
15161530
logger.error(
@@ -1522,8 +1536,10 @@ def enable_disable_extensions(self, database: str | None = None) -> None:
15221536
except PostgreSQLEnableDisableExtensionError as e:
15231537
logger.exception("failed to change plugins: %s", str(e))
15241538
if original_status.message == EXTENSION_OBJECT_MESSAGE:
1539+
logger.debug("Marking unit as Active and finish with extensions")
15251540
self.set_unit_status(ActiveStatus())
15261541
return
1542+
logger.debug(f"Restoring original unit status to {original_status}")
15271543
self.set_unit_status(original_status)
15281544

15291545
def _check_extension_dependencies(self, extension: str, enable: bool) -> bool:
@@ -1604,16 +1620,19 @@ def _on_start(self, event: StartEvent) -> None:
16041620
# Only the leader can bootstrap the cluster.
16051621
# On replicas, only prepare for starting the instance later.
16061622
if not self.unit.is_leader():
1623+
logger.debug("Prepare for starting replica instance later")
16071624
self._start_replica(event)
16081625
self._restart_services_after_reboot()
16091626
return
16101627

1611-
# Bootstrap the cluster in the leader unit.
1612-
self._start_primary(event)
1613-
self._restart_services_after_reboot()
1628+
logger.debug("Bootstrap the cluster in the leader unit")
1629+
self._start_primary(event) # start Patroni
1630+
self._restart_services_after_reboot() # start Patroni #2
16141631

16151632
def _restart_services_after_reboot(self):
16161633
"""Restart the Patroni and pgBackRest after a reboot."""
1634+
logger.debug(f"_restart_services_after_reboot: self._unit_ip: {self._unit_ip}")
1635+
logger.debug(f"_restart_services_after_reboot: self.members_ips: {self.members_ips}")
16171636
if self._unit_ip in self.members_ips:
16181637
self._patroni.start_patroni()
16191638
self.backup.start_stop_pgbackrest_service()
@@ -1702,6 +1721,8 @@ def _setup_ldap_sync(self, postgres_snap: snap.Snap | None = None) -> None:
17021721
postgres_snap.restart(services=["ldap-sync"])
17031722

17041723
def _setup_users(self) -> None:
1724+
"""Create PostgreSQL users used/operated by charm."""
1725+
logger.debug("Setup PostgreSQL users")
17051726
self.postgresql.create_predefined_instance_roles()
17061727

17071728
# Create the default postgres database user that is needed for some
@@ -1710,14 +1731,14 @@ def _setup_users(self) -> None:
17101731
# This event can be run on a replica if the machines are restarted.
17111732
# For that case, check whether the postgres user already exits.
17121733
users = self.postgresql.list_users()
1713-
# Create the backup user.
17141734
if BACKUP_USER not in users:
1735+
logger.debug(f"Creating user {BACKUP_USER}")
17151736
self.postgresql.create_user(
17161737
BACKUP_USER, new_password(), extra_user_roles=[ROLE_BACKUP]
17171738
)
17181739
self.postgresql.grant_database_privileges_to_user(BACKUP_USER, "postgres", ["connect"])
17191740
if MONITORING_USER not in users:
1720-
# Create the monitoring user.
1741+
logger.debug(f"Creating user {MONITORING_USER}")
17211742
self.postgresql.create_user(
17221743
MONITORING_USER,
17231744
self.get_secret(APP_SCOPE, MONITORING_PASSWORD_KEY),
@@ -1785,11 +1806,10 @@ def _start_primary(self, event: StartEvent) -> None:
17851806
# Flag to know if triggers need to be removed after refresh
17861807
self.app_peer_data["refresh_remove_trigger"] = "True"
17871808

1788-
# Clear unit data if this unit became a replica after a failover/switchover.
1809+
logger.debug("Clear unit data if this unit became a replica after a failover/switchover")
17891810
self._update_relation_endpoints()
17901811

1791-
# Enable/disable PostgreSQL extensions if they were set before the cluster
1792-
# was fully initialised.
1812+
# if extensions were set before the cluster was fully initialised.
17931813
self.enable_disable_extensions()
17941814

17951815
logger.debug("Active workload time: %s", datetime.now())
@@ -1880,6 +1900,7 @@ def _update_admin_password(self, admin_secret_id: str) -> None:
18801900

18811901
# Update and reload Patroni configuration in this unit to use the new password.
18821902
# Other units Patroni configuration will be reloaded in the peer relation changed event.
1903+
logger.debug("Update Patroni config on admin password update")
18831904
self.update_config()
18841905

18851906
def _on_promote_to_primary(self, event: ActionEvent) -> None:
@@ -2015,6 +2036,7 @@ def _was_restore_successful(self) -> bool:
20152036

20162037
def _can_run_on_update_status(self) -> bool:
20172038
if not self.is_cluster_initialised:
2039+
logger.debug("Early exit on_update_status: cluster is not initialised")
20182040
return False
20192041

20202042
if self.has_raft_keys():
@@ -2076,6 +2098,7 @@ def _set_primary_status_message(self) -> None:
20762098
self.set_unit_status(
20772099
BlockedStatus(self.app_peer_data["s3-initialization-block-message"])
20782100
)
2101+
logger.debug("Early exit _set_primary_status_message: s3 is blocked")
20792102
return
20802103
if self.unit.is_leader() and (
20812104
self.app_peer_data.get("logical-replication-validation") == "error"
@@ -2213,6 +2236,7 @@ def push_tls_files_to_workload(self) -> bool:
22132236
)
22142237

22152238
try:
2239+
logger.debug("Update Patroni config on push tls files to workload")
22162240
return self.update_config()
22172241
except Exception:
22182242
logger.exception("TLS files failed to push. Error in config update")
@@ -2227,6 +2251,7 @@ def push_ca_file_into_workload(self, secret_name: str) -> bool:
22272251
subprocess.check_call([UPDATE_CERTS_BIN_PATH]) # noqa: S603
22282252

22292253
try:
2254+
logger.debug("Update Patroni config on push CA file into workload")
22302255
return self.update_config()
22312256
except Exception:
22322257
logger.exception("CA file failed to push. Error in config update")
@@ -2240,6 +2265,7 @@ def clean_ca_file_from_workload(self, secret_name: str) -> bool:
22402265
subprocess.check_call([UPDATE_CERTS_BIN_PATH]) # noqa: S603
22412266

22422267
try:
2268+
logger.debug("Update Patroni config on clean CA file from workload")
22432269
return self.update_config()
22442270
except Exception:
22452271
logger.exception("CA file failed to clean. Error in config update")
@@ -2319,6 +2345,7 @@ def update_config(
23192345
refresh: charm_refresh.Machines | None = None,
23202346
) -> bool:
23212347
"""Updates Patroni config file based on the existence of the TLS files."""
2348+
logger.debug("Updating Patroni config")
23222349
if refresh is None:
23232350
refresh = self.refresh
23242351

@@ -2351,6 +2378,7 @@ def update_config(
23512378
slots=replication_slots,
23522379
)
23532380
if no_peers:
2381+
logger.debug("Early exit update_config: no peers")
23542382
return True
23552383

23562384
if not self._is_workload_running:
@@ -2387,6 +2415,7 @@ def update_config(
23872415
else max(4 * self.cpu_count, 100)
23882416
)
23892417

2418+
logger.debug("Bulk update parameters controller by patroni")
23902419
self._patroni.bulk_update_parameters_controller_by_patroni({
23912420
"max_connections": max_connections,
23922421
"max_prepared_transactions": self.config.memory_max_prepared_transactions,
@@ -2412,6 +2441,7 @@ def update_config(
24122441

24132442
self.unit_peer_data.update({"user_hash": self.generate_user_hash})
24142443
if self.unit.is_leader():
2444+
logger.debug(f"Updating user_hash in app databag on leader: {self.generate_user_hash}")
24152445
self.app_peer_data.update({"user_hash": self.generate_user_hash})
24162446
return True
24172447

@@ -2433,6 +2463,7 @@ def _validate_config_options(self) -> None:
24332463
):
24342464
raise ValueError("request_date_style config option has an invalid value")
24352465

2466+
logger.debug("Checking timezone config options")
24362467
if self.config.request_time_zone not in self.postgresql.get_postgresql_timezones():
24372468
raise ValueError("request_time_zone config option has an invalid value")
24382469

@@ -2446,6 +2477,7 @@ def _validate_config_options(self) -> None:
24462477

24472478
def _handle_postgresql_restart_need(self) -> None:
24482479
"""Handle PostgreSQL restart need based on the TLS configuration and configuration changes."""
2480+
logger.debug("Checking for PostgreSQL restart necessity")
24492481
if self._can_connect_to_postgresql:
24502482
restart_postgresql = self.is_tls_enabled != self.postgresql.is_tls_enabled(
24512483
check_current_host=True

src/cluster.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def _dict_to_hba_string(_dict: dict[str, Any]) -> str:
213213

214214
def bootstrap_cluster(self) -> bool:
215215
"""Bootstrap a PostgreSQL cluster using Patroni."""
216-
# Render the configuration files and start the cluster.
216+
logger.debug("bootstrap_cluster: render the configuration files and start Patroni")
217217
self.configure_patroni_on_unit()
218218
return self.start_patroni()
219219

@@ -603,7 +603,9 @@ def is_member_isolated(self) -> bool:
603603
auth=self._patroni_auth,
604604
)
605605
logger.debug(
606-
"API is_member_isolated: %s (%s)", r["members"], r.elapsed.total_seconds()
606+
"API is_member_isolated: %s (%s)",
607+
r.json()["members"],
608+
r.elapsed.total_seconds(),
607609
)
608610
except RetryError:
609611
# Return False if it was not possible to get the cluster info. Try again later.
@@ -850,7 +852,7 @@ def stop_patroni(self) -> bool:
850852

851853
def switchover(self, candidate: str | None = None) -> None:
852854
"""Trigger a switchover."""
853-
# Try to trigger the switchover.
855+
logger.debug("Triggering the switchover to {candidate}")
854856
for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)):
855857
with attempt:
856858
current_primary = self.get_primary()
@@ -1009,6 +1011,7 @@ def remove_raft_member(self, member_ip: str) -> None:
10091011

10101012
# Leader doesn't always trigger when changing it's own peer data.
10111013
if self.charm.unit.is_leader():
1014+
logger.debug("Emitting peer_relation_changed on leader")
10121015
self.charm.on[PEER].relation_changed.emit(
10131016
unit=self.charm.unit,
10141017
app=self.charm.app,

0 commit comments

Comments
 (0)