Skip to content

Commit 1a1c2d4

Browse files
[DPE-4257] Async replication UX Improvements (#481)
* Syncing the UX with MySQL Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Fix failover and set-secret behaviour Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Improve statuses Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Fix app status set Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Fix model switch Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Fix config integration test Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Fix backups integration test Signed-off-by: Marcelo Henrique Neppel <[email protected]> --------- Signed-off-by: Marcelo Henrique Neppel <[email protected]>
1 parent 882f06b commit 1a1c2d4

File tree

9 files changed

+323
-158
lines changed

9 files changed

+323
-158
lines changed

actions.yaml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@ create-backup:
1111
Differential backup is a copy only of changed data since the last full backup.
1212
Incremental backup is a copy only of changed data since the last backup (any type).
1313
Possible values - full, differential, incremental.
14+
create-replication:
15+
description: Set up asynchronous replication between two clusters.
16+
params:
17+
name:
18+
type: string
19+
description: The name of the replication (defaults to 'default').
20+
default: default
1421
get-primary:
1522
description: Get the unit which is the primary/leader in the replication.
1623
get-password:
@@ -25,10 +32,10 @@ list-backups:
2532
description: Lists backups in s3 storage.
2633
pre-upgrade-check:
2734
description: Run necessary pre-upgrade checks and preparations before executing a charm refresh.
28-
promote-cluster:
35+
promote-to-primary:
2936
description: Promotes the cluster of choice to a primary cluster. Must be ran against the leader unit.
3037
params:
31-
force-promotion:
38+
force:
3239
type: boolean
3340
description: Force the promotion of a cluster when there is already a primary cluster.
3441
restore:

metadata.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ peers:
2626
interface: upgrade
2727

2828
provides:
29-
async-primary:
30-
interface: async_replication
29+
replication-offer:
30+
interface: postgresql_async
3131
limit: 1
3232
optional: true
3333
database:
@@ -41,8 +41,8 @@ provides:
4141
limit: 1
4242

4343
requires:
44-
async-replica:
45-
interface: async_replication
44+
replication:
45+
interface: postgresql_async
4646
limit: 1
4747
optional: true
4848
certificates:

src/charm.py

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,11 @@
9090
USER,
9191
USER_PASSWORD_KEY,
9292
)
93-
from relations.async_replication import PostgreSQLAsyncReplication
93+
from relations.async_replication import (
94+
REPLICATION_CONSUMER_RELATION,
95+
REPLICATION_OFFER_RELATION,
96+
PostgreSQLAsyncReplication,
97+
)
9498
from relations.db import EXTENSIONS_BLOCKING_MESSAGE, DbProvides
9599
from relations.postgresql_provider import PostgreSQLProvider
96100
from upgrade import PostgreSQLUpgrade, get_postgresql_dependencies_model
@@ -1222,15 +1226,42 @@ def _on_set_password(self, event: ActionEvent) -> None:
12221226
)
12231227
return
12241228

1225-
# Update the password in the PostgreSQL instance.
1226-
try:
1227-
self.postgresql.update_user_password(username, password)
1228-
except PostgreSQLUpdateUserPasswordError as e:
1229-
logger.exception(e)
1229+
replication_offer_relation = self.model.get_relation(REPLICATION_OFFER_RELATION)
1230+
if (
1231+
replication_offer_relation is not None
1232+
and not self.async_replication.is_primary_cluster()
1233+
):
1234+
# Update the password in the other cluster PostgreSQL primary instance.
1235+
other_cluster_endpoints = self.async_replication.get_all_primary_cluster_endpoints()
1236+
other_cluster_primary = self._patroni.get_primary(
1237+
alternative_endpoints=other_cluster_endpoints
1238+
)
1239+
other_cluster_primary_ip = [
1240+
replication_offer_relation.data[unit].get("private-address")
1241+
for unit in replication_offer_relation.units
1242+
if unit.name.replace("/", "-") == other_cluster_primary
1243+
][0]
1244+
try:
1245+
self.postgresql.update_user_password(
1246+
username, password, database_host=other_cluster_primary_ip
1247+
)
1248+
except PostgreSQLUpdateUserPasswordError as e:
1249+
logger.exception(e)
1250+
event.fail("Failed changing the password.")
1251+
return
1252+
elif self.model.get_relation(REPLICATION_CONSUMER_RELATION) is not None:
12301253
event.fail(
1231-
"Failed changing the password: Not all members healthy or finished initial sync."
1254+
"Failed changing the password: This action can be ran only in the cluster from the offer side."
12321255
)
12331256
return
1257+
else:
1258+
# Update the password in this cluster PostgreSQL primary instance.
1259+
try:
1260+
self.postgresql.update_user_password(username, password)
1261+
except PostgreSQLUpdateUserPasswordError as e:
1262+
logger.exception(e)
1263+
event.fail("Failed changing the password.")
1264+
return
12341265

12351266
# Update the password in the secret store.
12361267
self.set_secret(APP_SCOPE, f"{username}-password", password)
@@ -1239,9 +1270,6 @@ def _on_set_password(self, event: ActionEvent) -> None:
12391270
# Other units Patroni configuration will be reloaded in the peer relation changed event.
12401271
self.update_config()
12411272

1242-
# Update the password in the async replication data.
1243-
self.async_replication.update_async_replication_data()
1244-
12451273
event.set_results({"password": password})
12461274

12471275
def _on_update_status(self, _) -> None:
@@ -1357,7 +1385,7 @@ def _set_primary_status_message(self) -> None:
13571385
if self._patroni.get_primary(unit_name_pattern=True) == self.unit.name:
13581386
self.unit.status = ActiveStatus("Primary")
13591387
elif self.is_standby_leader:
1360-
self.unit.status = ActiveStatus("Standby Leader")
1388+
self.unit.status = ActiveStatus("Standby")
13611389
elif self._patroni.member_started:
13621390
self.unit.status = ActiveStatus()
13631391
except (RetryError, ConnectionError) as e:

src/cluster.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,19 +230,20 @@ def get_member_status(self, member_name: str) -> str:
230230
return member["state"]
231231
return ""
232232

233-
def get_primary(self, unit_name_pattern=False) -> str:
233+
def get_primary(self, unit_name_pattern=False, alternative_endpoints: List[str] = None) -> str:
234234
"""Get primary instance.
235235
236236
Args:
237237
unit_name_pattern: whether to convert pod name to unit name
238+
alternative_endpoints: list of alternative endpoints to check for the primary.
238239
239240
Returns:
240241
primary pod or unit name.
241242
"""
242243
# Request info from cluster endpoint (which returns all members of the cluster).
243244
for attempt in Retrying(stop=stop_after_attempt(2 * len(self.peers_ips) + 1)):
244245
with attempt:
245-
url = self._get_alternative_patroni_url(attempt)
246+
url = self._get_alternative_patroni_url(attempt, alternative_endpoints)
246247
cluster_status = requests.get(
247248
f"{url}/{PATRONI_CLUSTER_STATUS_ENDPOINT}",
248249
verify=self.verify,
@@ -301,12 +302,18 @@ def get_sync_standby_names(self) -> List[str]:
301302
sync_standbys.append("/".join(member["name"].rsplit("-", 1)))
302303
return sync_standbys
303304

304-
def _get_alternative_patroni_url(self, attempt: AttemptManager) -> str:
305+
def _get_alternative_patroni_url(
306+
self, attempt: AttemptManager, alternative_endpoints: List[str] = None
307+
) -> str:
305308
"""Get an alternative REST API URL from another member each time.
306309
307310
When the Patroni process is not running in the current unit it's needed
308311
to use a URL from another cluster member REST API to do some operations.
309312
"""
313+
if alternative_endpoints is not None:
314+
return self._patroni_url.replace(
315+
self.unit_ip, alternative_endpoints[attempt.retry_state.attempt_number - 1]
316+
)
310317
attempt_number = attempt.retry_state.attempt_number
311318
if attempt_number > 1:
312319
url = self._patroni_url

0 commit comments

Comments
 (0)