Skip to content

Commit db93686

Browse files
DPE-7726: Add Patroni API logging
Also: * use commong logger everywhere * and add several useful log messaged (e.g. DB connection) * remove no longer necessary debug 'Init class PostgreSQL' * align Patroni API requests style everhywhere * add Patroni API duration to debug logs
1 parent addca8f commit db93686

File tree

3 files changed

+63
-17
lines changed

3 files changed

+63
-17
lines changed

lib/charms/postgresql_k8s/v1/postgresql.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,11 @@ def _connect_to_database(
273273
raise PostgreSQLUndefinedHostError("Host not set")
274274
if not self.password:
275275
raise PostgreSQLUndefinedPasswordError("Password not set")
276+
277+
dbname = database if database else self.database
278+
logger.debug(f"New DB connection: dbname='{dbname}' user='{self.user}' host='{host}' connect_timeout=1")
276279
connection = psycopg2.connect(
277-
f"dbname='{database if database else self.database}' user='{self.user}' host='{host}'"
280+
f"dbname='{dbname}' user='{self.user}' host='{host}'"
278281
f"password='{self.password}' connect_timeout=1"
279282
)
280283
connection.autocommit = True

src/charm.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def unit_number(unit_name: str):
177177
# Lowest unit number is last to refresh
178178
last_unit_to_refresh = sorted(all_units, key=unit_number)[0].replace("/", "-")
179179
if self._charm._patroni.get_primary() == last_unit_to_refresh:
180-
logging.info(
180+
logger.info(
181181
f"Unit {last_unit_to_refresh} was already primary during pre-refresh check"
182182
)
183183
else:
@@ -187,7 +187,7 @@ def unit_number(unit_name: str):
187187
logger.warning(f"switchover failed with reason: {e}")
188188
raise charm_refresh.PrecheckFailed("Unable to switch primary")
189189
else:
190-
logging.info(
190+
logger.info(
191191
f"Switched primary to unit {last_unit_to_refresh} during pre-refresh check"
192192
)
193193

@@ -612,7 +612,6 @@ def postgresql(self) -> PostgreSQL:
612612
"""Returns an instance of the object used to interact with the database."""
613613
password = str(self.get_secret(APP_SCOPE, f"{USER}-password"))
614614
if self._postgresql is None or self._postgresql.primary_host is None:
615-
logger.debug("Init class PostgreSQL")
616615
self._postgresql = PostgreSQL(
617616
primary_host=self.primary_endpoint,
618617
current_host=self._unit_ip,
@@ -2475,7 +2474,9 @@ def _handle_postgresql_restart_need(self) -> None:
24752474
logger.error(f"Reload patroni call failed! error: {e!s}")
24762475

24772476
restart_pending = self._patroni.is_restart_pending()
2478-
logger.debug(f"Checking if restart pending: {restart_postgresql} or {restart_pending}")
2477+
logger.debug(
2478+
f"Checking if restart pending: TLS={restart_postgresql} or API={restart_pending}"
2479+
)
24792480
restart_postgresql = restart_postgresql or restart_pending
24802481

24812482
self.unit_peer_data.update({"tls": "enabled" if self.is_tls_enabled else ""})

src/cluster.py

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ def cluster_status(self, alternative_endpoints: list | None = None) -> list[Clus
274274
if response := self.parallel_patroni_get_request(
275275
f"/{PATRONI_CLUSTER_STATUS_ENDPOINT}", alternative_endpoints
276276
):
277-
logger.debug("Patroni cluster members: %s", response["members"])
277+
logger.debug("API cluster_status: %s", response["members"])
278278
return response["members"]
279279
raise RetryError(
280280
last_attempt=Future.construct(1, Exception("Unable to reach any units"), True)
@@ -462,6 +462,7 @@ def get_patroni_health(self) -> dict[str, str]:
462462
timeout=API_REQUEST_TIMEOUT,
463463
auth=self._patroni_auth,
464464
)
465+
logger.debug("API get_patroni_health: %s (%s)", r, r.elapsed.total_seconds())
465466

466467
return r.json()
467468

@@ -529,15 +530,20 @@ def is_replication_healthy(self) -> bool:
529530
for members_ip in members_ips:
530531
endpoint = "leader" if members_ip == primary_ip else "replica?lag=16kB"
531532
url = self._patroni_url.replace(self.unit_ip, members_ip)
532-
member_status = requests.get(
533+
r = requests.get(
533534
f"{url}/{endpoint}",
534535
verify=self.verify,
535536
auth=self._patroni_auth,
536537
timeout=PATRONI_TIMEOUT,
537538
)
538-
if member_status.status_code != 200:
539+
logger.debug(
540+
"API is_replication_healthy: %s (%s)",
541+
r,
542+
r.elapsed.total_seconds(),
543+
)
544+
if r.status_code != 200:
539545
logger.debug(
540-
f"Failed replication check for {members_ip} with code {member_status.status_code}"
546+
f"Failed replication check for {members_ip} with code {r.status_code}"
541547
)
542548
raise Exception
543549
except RetryError:
@@ -590,17 +596,22 @@ def is_member_isolated(self) -> bool:
590596
try:
591597
for attempt in Retrying(stop=stop_after_delay(10), wait=wait_fixed(3)):
592598
with attempt:
593-
cluster_status = requests.get(
599+
r = requests.get(
594600
f"{self._patroni_url}/{PATRONI_CLUSTER_STATUS_ENDPOINT}",
595601
verify=self.verify,
596602
timeout=API_REQUEST_TIMEOUT,
597603
auth=self._patroni_auth,
598604
)
605+
logger.debug(
606+
"API is_member_isolated: %s (%s)",
607+
r.json()["members"],
608+
r.elapsed.total_seconds(),
609+
)
599610
except RetryError:
600611
# Return False if it was not possible to get the cluster info. Try again later.
601612
return False
602613

603-
return len(cluster_status.json()["members"]) == 0
614+
return len(r.json()["members"]) == 0
604615

605616
def online_cluster_members(self) -> list[ClusterMember]:
606617
"""Return list of online cluster members."""
@@ -631,15 +642,21 @@ def promote_standby_cluster(self) -> None:
631642
auth=self._patroni_auth,
632643
timeout=PATRONI_TIMEOUT,
633644
)
645+
logger.debug(
646+
"API promote_standby_cluster: %s (%s)",
647+
config_response,
648+
config_response.elapsed.total_seconds(),
649+
)
634650
if "standby_cluster" not in config_response.json():
635651
raise StandbyClusterAlreadyPromotedError("standby cluster is already promoted")
636-
requests.patch(
652+
r = requests.patch(
637653
f"{self._patroni_url}/config",
638654
verify=self.verify,
639655
json={"standby_cluster": None},
640656
auth=self._patroni_auth,
641657
timeout=PATRONI_TIMEOUT,
642658
)
659+
logger.debug("API promote_standby_cluster patch: %s (%s)", r, r.elapsed.total_seconds())
643660
for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)):
644661
with attempt:
645662
if self.get_primary() is None:
@@ -846,6 +863,7 @@ def switchover(self, candidate: str | None = None) -> None:
846863
auth=self._patroni_auth,
847864
timeout=PATRONI_TIMEOUT,
848865
)
866+
logger.debug("API switchover: %s (%s)", r, r.elapsed.total_seconds())
849867

850868
# Check whether the switchover was unsuccessful.
851869
if r.status_code != 200:
@@ -1012,12 +1030,14 @@ def remove_raft_member(self, member_ip: str) -> None:
10121030
@retry(stop=stop_after_attempt(20), wait=wait_exponential(multiplier=1, min=2, max=10))
10131031
def reload_patroni_configuration(self):
10141032
"""Reload Patroni configuration after it was changed."""
1015-
requests.post(
1033+
logger.debug("Reloading Patroni configuration...")
1034+
r = requests.post(
10161035
f"{self._patroni_url}/reload",
10171036
verify=self.verify,
10181037
auth=self._patroni_auth,
10191038
timeout=PATRONI_TIMEOUT,
10201039
)
1040+
logger.debug("API reload_patroni_configuration: %s (%s)", r, r.elapsed.total_seconds())
10211041

10221042
def is_patroni_running(self) -> bool:
10231043
"""Check if the Patroni service is running."""
@@ -1048,36 +1068,45 @@ def restart_patroni(self) -> bool:
10481068
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
10491069
def restart_postgresql(self) -> None:
10501070
"""Restart PostgreSQL."""
1051-
requests.post(
1071+
logger.debug("Restarting PostgreSQL...")
1072+
r = requests.post(
10521073
f"{self._patroni_url}/restart",
10531074
verify=self.verify,
10541075
auth=self._patroni_auth,
10551076
timeout=PATRONI_TIMEOUT,
10561077
)
1078+
logger.debug("API restart_postgresql: %s (%s)", r, r.elapsed.total_seconds())
10571079

10581080
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
10591081
def reinitialize_postgresql(self) -> None:
10601082
"""Reinitialize PostgreSQL."""
1061-
requests.post(
1083+
logger.debug("Reinitializing PostgreSQL...")
1084+
r = requests.post(
10621085
f"{self._patroni_url}/reinitialize",
10631086
verify=self.verify,
10641087
auth=self._patroni_auth,
10651088
timeout=PATRONI_TIMEOUT,
10661089
)
1090+
logger.debug("API reinitialize_postgresql: %s (%s)", r, r.elapsed.total_seconds())
10671091

10681092
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
10691093
def bulk_update_parameters_controller_by_patroni(self, parameters: dict[str, Any]) -> None:
10701094
"""Update the value of a parameter controller by Patroni.
10711095
10721096
For more information, check https://patroni.readthedocs.io/en/latest/patroni_configuration.html#postgresql-parameters-controlled-by-patroni.
10731097
"""
1074-
requests.patch(
1098+
r = requests.patch(
10751099
f"{self._patroni_url}/config",
10761100
verify=self.verify,
10771101
json={"postgresql": {"parameters": parameters}},
10781102
auth=self._patroni_auth,
10791103
timeout=PATRONI_TIMEOUT,
10801104
)
1105+
logger.debug(
1106+
"API bulk_update_parameters_controller_by_patroni: %s (%s)",
1107+
r,
1108+
r.elapsed.total_seconds(),
1109+
)
10811110

10821111
def ensure_slots_controller_by_patroni(self, slots: dict[str, str]) -> None:
10831112
"""Synchronises slots controlled by Patroni with the provided state by removing unneeded slots and creating new ones.
@@ -1093,6 +1122,11 @@ def ensure_slots_controller_by_patroni(self, slots: dict[str, str]) -> None:
10931122
timeout=PATRONI_TIMEOUT,
10941123
auth=self._patroni_auth,
10951124
)
1125+
logger.debug(
1126+
"API ensure_slots_controller_by_patroni: %s (%s)",
1127+
current_config,
1128+
current_config.elapsed.total_seconds(),
1129+
)
10961130
if current_config.status_code != 200:
10971131
raise Exception(
10981132
f"Failed to get current Patroni config: {current_config.status_code} {current_config.text}"
@@ -1106,13 +1140,18 @@ def ensure_slots_controller_by_patroni(self, slots: dict[str, str]) -> None:
11061140
"plugin": "pgoutput",
11071141
"type": "logical",
11081142
}
1109-
requests.patch(
1143+
r = requests.patch(
11101144
f"{self._patroni_url}/config",
11111145
verify=self.verify,
11121146
json={"slots": slots_patch},
11131147
auth=self._patroni_auth,
11141148
timeout=PATRONI_TIMEOUT,
11151149
)
1150+
logger.debug(
1151+
"API ensure_slots_controller_by_patroni: %s (%s)",
1152+
r,
1153+
r.elapsed.total_seconds(),
1154+
)
11161155

11171156
@property
11181157
def _synchronous_node_count(self) -> int:
@@ -1140,6 +1179,9 @@ def update_synchronous_node_count(self) -> None:
11401179
auth=self._patroni_auth,
11411180
timeout=PATRONI_TIMEOUT,
11421181
)
1182+
logger.debug(
1183+
"API update_synchronous_node_count: %s (%s)", r, r.elapsed.total_seconds()
1184+
)
11431185

11441186
# Check whether the update was unsuccessful.
11451187
if r.status_code != 200:

0 commit comments

Comments
 (0)