Skip to content

Commit 458e8c8

Browse files
authored
[MISC] Don't restart during initial sync (#1000)
* Don't restart during initial sync * Catch relations map exceptions
1 parent bdb1f4e commit 458e8c8

File tree

3 files changed

+55
-31
lines changed

3 files changed

+55
-31
lines changed

src/charm.py

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2157,35 +2157,43 @@ def relations_user_databases_map(self) -> dict:
21572157
if not self.is_cluster_initialised or not self._patroni.member_started:
21582158
return {USER: "all", REPLICATION_USER: "all", REWIND_USER: "all"}
21592159
user_database_map = {}
2160-
for user in sorted(
2161-
self.postgresql.list_users_from_relation(current_host=self.is_connectivity_enabled)
2162-
):
2163-
user_database_map[user] = ",".join(
2164-
sorted(
2165-
self.postgresql.list_accessible_databases_for_user(
2166-
user, current_host=self.is_connectivity_enabled
2160+
try:
2161+
for user in sorted(
2162+
self.postgresql.list_users_from_relation(current_host=self.is_connectivity_enabled)
2163+
):
2164+
user_database_map[user] = ",".join(
2165+
sorted(
2166+
self.postgresql.list_accessible_databases_for_user(
2167+
user, current_host=self.is_connectivity_enabled
2168+
)
21672169
)
21682170
)
2169-
)
2170-
# Add "landscape" superuser by default to the list when the "db-admin" relation is present
2171-
# or when the "database" relation has "extra-user-roles" set to "SUPERUSER" (which may mean
2172-
# that PgBouncer is related to the database and there is the possibility that Landscape
2173-
# is related to it).
2174-
if any(
2175-
True
2176-
for relation in self.client_relations
2177-
if relation.name == "db-admin" # Possibly Landscape relation.
2178-
or (
2179-
relation.name == "database"
2180-
and relation.data[relation.app].get("extra-user-roles") == "SUPERUSER"
2181-
) # PgBouncer (which may be related to Landscape).
2182-
):
2183-
user_database_map["landscape"] = "all"
2184-
if self.postgresql.list_access_groups(current_host=self.is_connectivity_enabled) != set(
2185-
ACCESS_GROUPS
2186-
):
2187-
user_database_map.update({USER: "all", REPLICATION_USER: "all", REWIND_USER: "all"})
2188-
return user_database_map
2171+
# Add "landscape" superuser by default to the list when the "db-admin" relation is present
2172+
# or when the "database" relation has "extra-user-roles" set to "SUPERUSER" (which may mean
2173+
# that PgBouncer is related to the database and there is the possibility that Landscape
2174+
# is related to it).
2175+
if any(
2176+
True
2177+
for relation in self.client_relations
2178+
if relation.name == "db-admin" # Possibly Landscape relation.
2179+
or (
2180+
relation.name == "database"
2181+
and relation.data[relation.app].get("extra-user-roles") == "SUPERUSER"
2182+
) # PgBouncer (which may be related to Landscape).
2183+
):
2184+
user_database_map["landscape"] = "all"
2185+
if self.postgresql.list_access_groups(
2186+
current_host=self.is_connectivity_enabled
2187+
) != set(ACCESS_GROUPS):
2188+
user_database_map.update({
2189+
USER: "all",
2190+
REPLICATION_USER: "all",
2191+
REWIND_USER: "all",
2192+
})
2193+
return user_database_map
2194+
except PostgreSQLListUsersError:
2195+
logger.debug("relations_user_databases_map: Unable to get users")
2196+
return {USER: "all", REPLICATION_USER: "all", REWIND_USER: "all"}
21892197

21902198
def override_patroni_restart_condition(
21912199
self, new_condition: str, repeat_cause: str | None

src/cluster.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@
5353

5454
PG_BASE_CONF_PATH = f"{POSTGRESQL_CONF_PATH}/postgresql.conf"
5555

56-
RUNNING_STATES = ["running", "streaming"]
56+
STARTED_STATES = ["running", "streaming"]
57+
RUNNING_STATES = [*STARTED_STATES, "starting"]
5758

5859
PATRONI_TIMEOUT = 10
5960

@@ -336,7 +337,7 @@ def get_standby_leader(
336337
)
337338
for member in cluster_status.json()["members"]:
338339
if member["role"] == "standby_leader":
339-
if check_whether_is_running and member["state"] not in RUNNING_STATES:
340+
if check_whether_is_running and member["state"] not in STARTED_STATES:
340341
logger.warning(f"standby leader {member['name']} is not running")
341342
continue
342343
standby_leader = member["name"]
@@ -416,7 +417,7 @@ def are_all_members_ready(self) -> bool:
416417
# a standby leader, because sometimes there may exist (for some period of time)
417418
# only replicas after a failed switchover.
418419
return all(
419-
member["state"] in RUNNING_STATES for member in cluster_status.json()["members"]
420+
member["state"] in STARTED_STATES for member in cluster_status.json()["members"]
420421
) and any(
421422
member["role"] in ["leader", "standby_leader"]
422423
for member in cluster_status.json()["members"]
@@ -496,6 +497,8 @@ def member_started(self) -> bool:
496497
True if services is ready False otherwise. Retries over a period of 60 seconds times to
497498
allow server time to start up.
498499
"""
500+
if not self.is_patroni_running():
501+
return False
499502
try:
500503
response = self.get_patroni_health()
501504
except RetryError:
@@ -517,7 +520,7 @@ def member_inactive(self) -> bool:
517520
return True
518521

519522
return response["state"] not in [
520-
*RUNNING_STATES,
523+
*STARTED_STATES,
521524
"creating replica",
522525
"starting",
523526
"restarting",
@@ -965,6 +968,16 @@ def reload_patroni_configuration(self):
965968
timeout=PATRONI_TIMEOUT,
966969
)
967970

971+
def is_patroni_running(self) -> bool:
972+
"""Check if the Patroni service is running."""
973+
try:
974+
cache = snap.SnapCache()
975+
selected_snap = cache["charmed-postgresql"]
976+
return selected_snap.services["patroni"]["active"]
977+
except snap.SnapError as e:
978+
logger.debug(f"Failed to check Patroni service: {e}")
979+
return False
980+
968981
def restart_patroni(self) -> bool:
969982
"""Restart Patroni.
970983

tests/unit/test_cluster.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,7 @@ def test_member_started_true(peers_ips, patroni):
571571
patch("cluster.requests.get") as _get,
572572
patch("cluster.stop_after_delay", return_value=stop_after_delay(0)),
573573
patch("cluster.wait_fixed", return_value=wait_fixed(0)),
574+
patch("charm.Patroni.is_patroni_running", return_value=True),
574575
):
575576
_get.return_value.json.return_value = {"state": "running"}
576577

@@ -586,6 +587,7 @@ def test_member_started_false(peers_ips, patroni):
586587
patch("cluster.requests.get") as _get,
587588
patch("cluster.stop_after_delay", return_value=stop_after_delay(0)),
588589
patch("cluster.wait_fixed", return_value=wait_fixed(0)),
590+
patch("charm.Patroni.is_patroni_running", return_value=True),
589591
):
590592
_get.return_value.json.return_value = {"state": "stopped"}
591593

@@ -601,6 +603,7 @@ def test_member_started_error(peers_ips, patroni):
601603
patch("cluster.requests.get") as _get,
602604
patch("cluster.stop_after_delay", return_value=stop_after_delay(0)),
603605
patch("cluster.wait_fixed", return_value=wait_fixed(0)),
606+
patch("charm.Patroni.is_patroni_running", return_value=True),
604607
):
605608
_get.side_effect = Exception
606609

0 commit comments

Comments
 (0)