Skip to content

Commit 6a5b271

Browse files
DPE-7726: Add Patroni API logging
Also: * use commong logger everywhere * and add several useful log messaged (e.g. DB connection) * remove no longer necessary debug 'Init class PostgreSQL' * align Patroni API requests style everhywhere * add Patroni API duration to debug logs
1 parent addca8f commit 6a5b271

File tree

3 files changed

+61
-17
lines changed

3 files changed

+61
-17
lines changed

lib/charms/postgresql_k8s/v1/postgresql.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,11 @@ def _connect_to_database(
273273
raise PostgreSQLUndefinedHostError("Host not set")
274274
if not self.password:
275275
raise PostgreSQLUndefinedPasswordError("Password not set")
276+
277+
dbname = database if database else self.database
278+
logger.debug(f"New DB connection: dbname='{dbname}' user='{self.user}' host='{host}' connect_timeout=1")
276279
connection = psycopg2.connect(
277-
f"dbname='{database if database else self.database}' user='{self.user}' host='{host}'"
280+
f"dbname='{dbname}' user='{self.user}' host='{host}'"
278281
f"password='{self.password}' connect_timeout=1"
279282
)
280283
connection.autocommit = True

src/charm.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def unit_number(unit_name: str):
177177
# Lowest unit number is last to refresh
178178
last_unit_to_refresh = sorted(all_units, key=unit_number)[0].replace("/", "-")
179179
if self._charm._patroni.get_primary() == last_unit_to_refresh:
180-
logging.info(
180+
logger.info(
181181
f"Unit {last_unit_to_refresh} was already primary during pre-refresh check"
182182
)
183183
else:
@@ -187,7 +187,7 @@ def unit_number(unit_name: str):
187187
logger.warning(f"switchover failed with reason: {e}")
188188
raise charm_refresh.PrecheckFailed("Unable to switch primary")
189189
else:
190-
logging.info(
190+
logger.info(
191191
f"Switched primary to unit {last_unit_to_refresh} during pre-refresh check"
192192
)
193193

@@ -612,7 +612,6 @@ def postgresql(self) -> PostgreSQL:
612612
"""Returns an instance of the object used to interact with the database."""
613613
password = str(self.get_secret(APP_SCOPE, f"{USER}-password"))
614614
if self._postgresql is None or self._postgresql.primary_host is None:
615-
logger.debug("Init class PostgreSQL")
616615
self._postgresql = PostgreSQL(
617616
primary_host=self.primary_endpoint,
618617
current_host=self._unit_ip,
@@ -2475,7 +2474,9 @@ def _handle_postgresql_restart_need(self) -> None:
24752474
logger.error(f"Reload patroni call failed! error: {e!s}")
24762475

24772476
restart_pending = self._patroni.is_restart_pending()
2478-
logger.debug(f"Checking if restart pending: {restart_postgresql} or {restart_pending}")
2477+
logger.debug(
2478+
f"Checking if restart pending: TLS={restart_postgresql} or API={restart_pending}"
2479+
)
24792480
restart_postgresql = restart_postgresql or restart_pending
24802481

24812482
self.unit_peer_data.update({"tls": "enabled" if self.is_tls_enabled else ""})

src/cluster.py

Lines changed: 52 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ def cluster_status(self, alternative_endpoints: list | None = None) -> list[Clus
274274
if response := self.parallel_patroni_get_request(
275275
f"/{PATRONI_CLUSTER_STATUS_ENDPOINT}", alternative_endpoints
276276
):
277-
logger.debug("Patroni cluster members: %s", response["members"])
277+
logger.debug("API cluster_status: %s", response["members"])
278278
return response["members"]
279279
raise RetryError(
280280
last_attempt=Future.construct(1, Exception("Unable to reach any units"), True)
@@ -462,6 +462,7 @@ def get_patroni_health(self) -> dict[str, str]:
462462
timeout=API_REQUEST_TIMEOUT,
463463
auth=self._patroni_auth,
464464
)
465+
logger.debug("API get_patroni_health: %s (%s)", r, r.elapsed.total_seconds())
465466

466467
return r.json()
467468

@@ -529,15 +530,20 @@ def is_replication_healthy(self) -> bool:
529530
for members_ip in members_ips:
530531
endpoint = "leader" if members_ip == primary_ip else "replica?lag=16kB"
531532
url = self._patroni_url.replace(self.unit_ip, members_ip)
532-
member_status = requests.get(
533+
r = requests.get(
533534
f"{url}/{endpoint}",
534535
verify=self.verify,
535536
auth=self._patroni_auth,
536537
timeout=PATRONI_TIMEOUT,
537538
)
538-
if member_status.status_code != 200:
539+
logger.debug(
540+
"API is_replication_healthy: %s (%s)",
541+
r,
542+
r.elapsed.total_seconds(),
543+
)
544+
if r.status_code != 200:
539545
logger.debug(
540-
f"Failed replication check for {members_ip} with code {member_status.status_code}"
546+
f"Failed replication check for {members_ip} with code {r.status_code}"
541547
)
542548
raise Exception
543549
except RetryError:
@@ -590,17 +596,20 @@ def is_member_isolated(self) -> bool:
590596
try:
591597
for attempt in Retrying(stop=stop_after_delay(10), wait=wait_fixed(3)):
592598
with attempt:
593-
cluster_status = requests.get(
599+
r = requests.get(
594600
f"{self._patroni_url}/{PATRONI_CLUSTER_STATUS_ENDPOINT}",
595601
verify=self.verify,
596602
timeout=API_REQUEST_TIMEOUT,
597603
auth=self._patroni_auth,
598604
)
605+
logger.debug(
606+
"API is_member_isolated: %s (%s)", r["members"], r.elapsed.total_seconds()
607+
)
599608
except RetryError:
600609
# Return False if it was not possible to get the cluster info. Try again later.
601610
return False
602611

603-
return len(cluster_status.json()["members"]) == 0
612+
return len(r.json()["members"]) == 0
604613

605614
def online_cluster_members(self) -> list[ClusterMember]:
606615
"""Return list of online cluster members."""
@@ -631,15 +640,21 @@ def promote_standby_cluster(self) -> None:
631640
auth=self._patroni_auth,
632641
timeout=PATRONI_TIMEOUT,
633642
)
643+
logger.debug(
644+
"API promote_standby_cluster: %s (%s)",
645+
config_response,
646+
config_response.elapsed.total_seconds(),
647+
)
634648
if "standby_cluster" not in config_response.json():
635649
raise StandbyClusterAlreadyPromotedError("standby cluster is already promoted")
636-
requests.patch(
650+
r = requests.patch(
637651
f"{self._patroni_url}/config",
638652
verify=self.verify,
639653
json={"standby_cluster": None},
640654
auth=self._patroni_auth,
641655
timeout=PATRONI_TIMEOUT,
642656
)
657+
logger.debug("API promote_standby_cluster patch: %s (%s)", r, r.elapsed.total_seconds())
643658
for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)):
644659
with attempt:
645660
if self.get_primary() is None:
@@ -846,6 +861,7 @@ def switchover(self, candidate: str | None = None) -> None:
846861
auth=self._patroni_auth,
847862
timeout=PATRONI_TIMEOUT,
848863
)
864+
logger.debug("API switchover: %s (%s)", r, r.elapsed.total_seconds())
849865

850866
# Check whether the switchover was unsuccessful.
851867
if r.status_code != 200:
@@ -1012,12 +1028,14 @@ def remove_raft_member(self, member_ip: str) -> None:
10121028
@retry(stop=stop_after_attempt(20), wait=wait_exponential(multiplier=1, min=2, max=10))
10131029
def reload_patroni_configuration(self):
10141030
"""Reload Patroni configuration after it was changed."""
1015-
requests.post(
1031+
logger.debug("Reloading Patroni configuration...")
1032+
r = requests.post(
10161033
f"{self._patroni_url}/reload",
10171034
verify=self.verify,
10181035
auth=self._patroni_auth,
10191036
timeout=PATRONI_TIMEOUT,
10201037
)
1038+
logger.debug("API reload_patroni_configuration: %s (%s)", r, r.elapsed.total_seconds())
10211039

10221040
def is_patroni_running(self) -> bool:
10231041
"""Check if the Patroni service is running."""
@@ -1048,36 +1066,45 @@ def restart_patroni(self) -> bool:
10481066
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
10491067
def restart_postgresql(self) -> None:
10501068
"""Restart PostgreSQL."""
1051-
requests.post(
1069+
logger.debug("Restarting PostgreSQL...")
1070+
r = requests.post(
10521071
f"{self._patroni_url}/restart",
10531072
verify=self.verify,
10541073
auth=self._patroni_auth,
10551074
timeout=PATRONI_TIMEOUT,
10561075
)
1076+
logger.debug("API restart_postgresql: %s (%s)", r, r.elapsed.total_seconds())
10571077

10581078
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
10591079
def reinitialize_postgresql(self) -> None:
10601080
"""Reinitialize PostgreSQL."""
1061-
requests.post(
1081+
logger.debug("Reinitializing PostgreSQL...")
1082+
r = requests.post(
10621083
f"{self._patroni_url}/reinitialize",
10631084
verify=self.verify,
10641085
auth=self._patroni_auth,
10651086
timeout=PATRONI_TIMEOUT,
10661087
)
1088+
logger.debug("API reinitialize_postgresql: %s (%s)", r, r.elapsed.total_seconds())
10671089

10681090
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
10691091
def bulk_update_parameters_controller_by_patroni(self, parameters: dict[str, Any]) -> None:
10701092
"""Update the value of a parameter controller by Patroni.
10711093
10721094
For more information, check https://patroni.readthedocs.io/en/latest/patroni_configuration.html#postgresql-parameters-controlled-by-patroni.
10731095
"""
1074-
requests.patch(
1096+
r = requests.patch(
10751097
f"{self._patroni_url}/config",
10761098
verify=self.verify,
10771099
json={"postgresql": {"parameters": parameters}},
10781100
auth=self._patroni_auth,
10791101
timeout=PATRONI_TIMEOUT,
10801102
)
1103+
logger.debug(
1104+
"API bulk_update_parameters_controller_by_patroni: %s (%s)",
1105+
r,
1106+
r.elapsed.total_seconds(),
1107+
)
10811108

10821109
def ensure_slots_controller_by_patroni(self, slots: dict[str, str]) -> None:
10831110
"""Synchronises slots controlled by Patroni with the provided state by removing unneeded slots and creating new ones.
@@ -1093,6 +1120,11 @@ def ensure_slots_controller_by_patroni(self, slots: dict[str, str]) -> None:
10931120
timeout=PATRONI_TIMEOUT,
10941121
auth=self._patroni_auth,
10951122
)
1123+
logger.debug(
1124+
"API ensure_slots_controller_by_patroni: %s (%s)",
1125+
current_config,
1126+
current_config.elapsed.total_seconds(),
1127+
)
10961128
if current_config.status_code != 200:
10971129
raise Exception(
10981130
f"Failed to get current Patroni config: {current_config.status_code} {current_config.text}"
@@ -1106,13 +1138,18 @@ def ensure_slots_controller_by_patroni(self, slots: dict[str, str]) -> None:
11061138
"plugin": "pgoutput",
11071139
"type": "logical",
11081140
}
1109-
requests.patch(
1141+
r = requests.patch(
11101142
f"{self._patroni_url}/config",
11111143
verify=self.verify,
11121144
json={"slots": slots_patch},
11131145
auth=self._patroni_auth,
11141146
timeout=PATRONI_TIMEOUT,
11151147
)
1148+
logger.debug(
1149+
"API ensure_slots_controller_by_patroni: %s (%s)",
1150+
r,
1151+
r.elapsed.total_seconds(),
1152+
)
11161153

11171154
@property
11181155
def _synchronous_node_count(self) -> int:
@@ -1140,6 +1177,9 @@ def update_synchronous_node_count(self) -> None:
11401177
auth=self._patroni_auth,
11411178
timeout=PATRONI_TIMEOUT,
11421179
)
1180+
logger.debug(
1181+
"API update_synchronous_node_count: %s (%s)", r, r.elapsed.total_seconds()
1182+
)
11431183

11441184
# Check whether the update was unsuccessful.
11451185
if r.status_code != 200:

0 commit comments

Comments
 (0)