Skip to content

Commit 73d3660

Browse files
authored
[DPE-6189] Manage passwords with user secrets (#926)
This PR introduces juju user secrets for managing passwords. It contains the following changes: **Functionality:** - no longer support `get-password` and `set-password` actions - new config option `system-users` to configure a secret that includes the system users' password(s) - it is no longer possible to leave out the `password` parameter to trigger a password rotation as secrets cannot have empty values **Implementation:** - add handler for `secret_changed` event to `charm.py` - add method `get_secret_from_id()` to `charm.py` - trigger updating the system user passwords on `config_changed` - consider pre-configured system user passwords on `leader_elected` - remove `get_password` handler - replace `set_password` handler with `_update_admin_passwords()` method - `_update_admin_passwords()` is responsible for the actual business logic: - retrieving the passwords from the configured secret - checking if password updates need to be performed - calling the `postgresql.update_user_password()` method **Testing:** - remove integration tests that are no longer required, such as testing for empty password or testing if the output of `get-password` is the same as the output of `juju show-secret` - adjust integration test helpers for `get_password()` and `set_password` to use secrets - remove unit tests for `get-/set-password` actions
1 parent 53885dd commit 73d3660

File tree

11 files changed

+186
-316
lines changed

11 files changed

+186
-316
lines changed

actions.yaml

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,6 @@ create-replication:
2020
default: default
2121
get-primary:
2222
description: Get the unit with is the primary/leader in the replication.
23-
get-password:
24-
description: Get a charm system user's password.
25-
Useful for manual troubleshooting and for backing up cluster credentials.
26-
It cannot be used for application integration relations.
27-
params:
28-
username:
29-
type: string
30-
description: The username, the default value 'operator'.
31-
Possible values - backup, operator, replication, rewind, patroni.
3223
list-backups:
3324
description: Lists backups in s3 storage in AWS.
3425
pre-upgrade-check:
@@ -56,17 +47,6 @@ restore:
5647
description: Point-in-time-recovery target in PSQL format.
5748
resume-upgrade:
5849
description: Resume a rolling upgrade after asserting successful upgrade of a new revision.
59-
set-password:
60-
description: Change the system user's password, which is used by charm.
61-
It is for internal charm users and SHOULD NOT be used by applications.
62-
params:
63-
username:
64-
type: string
65-
description: The username, the default value 'operator'.
66-
Possible values - backup, operator, replication rewind.
67-
password:
68-
type: string
69-
description: The password will be auto-generated if this option is not specified.
7050
set-tls-private-key:
7151
description: Set the private key, which will be used for certificate signing requests (CSR). Run for each unit separately.
7252
params:

config.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,14 @@ options:
773773
Allowed values are: from -1 to 86400.
774774
type: int
775775
default: -1
776+
system-users:
777+
type: secret
778+
description: |
779+
Configure the internal system users and their passwords. The passwords will
780+
be auto-generated if this option is not set. It is for internal use only
781+
and SHOULD NOT be used by applications. This needs to be a Juju Secret URI pointing
782+
to a secret that contains the following content: `<username>: <password>`.
783+
Possible users: backup, monitoring, operator, replication, rewind.
776784
vacuum_autovacuum_analyze_scale_factor:
777785
description: |
778786
Specifies a fraction of the table size to add to autovacuum_vacuum_threshold when

src/charm.py

Lines changed: 102 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
HookEvent,
6060
LeaderElectedEvent,
6161
RelationDepartedEvent,
62+
SecretChangedEvent,
6263
WorkloadEvent,
6364
)
6465
from ops.model import (
@@ -68,6 +69,7 @@
6869
MaintenanceStatus,
6970
ModelError,
7071
Relation,
72+
SecretNotFoundError,
7173
Unit,
7274
UnknownStatus,
7375
WaitingStatus,
@@ -211,12 +213,12 @@ def __init__(self, *args):
211213
self.framework.observe(self.on.leader_elected, self._on_leader_elected)
212214
self.framework.observe(self.on[PEER].relation_changed, self._on_peer_relation_changed)
213215
self.framework.observe(self.on.secret_changed, self._on_peer_relation_changed)
216+
# add specific handler for updated system-user secrets
217+
self.framework.observe(self.on.secret_changed, self._on_secret_changed)
214218
self.framework.observe(self.on[PEER].relation_departed, self._on_peer_relation_departed)
215219
self.framework.observe(self.on.postgresql_pebble_ready, self._on_postgresql_pebble_ready)
216220
self.framework.observe(self.on.pgdata_storage_detaching, self._on_pgdata_storage_detaching)
217221
self.framework.observe(self.on.stop, self._on_stop)
218-
self.framework.observe(self.on.get_password_action, self._on_get_password)
219-
self.framework.observe(self.on.set_password_action, self._on_set_password)
220222
self.framework.observe(self.on.promote_to_primary_action, self._on_promote_to_primary)
221223
self.framework.observe(self.on.get_primary_action, self._on_get_primary)
222224
self.framework.observe(self.on.update_status, self._on_update_status)
@@ -380,6 +382,25 @@ def remove_secret(self, scope: Scopes, key: str) -> None:
380382

381383
self.peer_relation_data(scope).delete_relation_data(peers.id, [secret_key])
382384

385+
def get_secret_from_id(self, secret_id: str) -> dict[str, str]:
386+
"""Resolve the given id of a Juju secret and return the content as a dict.
387+
388+
This method can be used to retrieve any secret, not just those used via the peer relation.
389+
If the secret is not owned by the charm, it has to be granted access to it.
390+
391+
Args:
392+
secret_id (str): The id of the secret.
393+
394+
Returns:
395+
dict: The content of the secret.
396+
"""
397+
try:
398+
secret_content = self.model.get_secret(id=secret_id).get_content(refresh=True)
399+
except (SecretNotFoundError, ModelError):
400+
raise
401+
402+
return secret_content
403+
383404
@property
384405
def is_cluster_initialised(self) -> bool:
385406
"""Returns whether the cluster is already initialised."""
@@ -662,6 +683,17 @@ def _on_peer_relation_changed(self, event: HookEvent) -> None: # noqa: C901
662683

663684
self.async_replication.handle_read_only_mode()
664685

686+
def _on_secret_changed(self, event: SecretChangedEvent) -> None:
687+
"""Handle the secret_changed event."""
688+
if not self.unit.is_leader():
689+
return
690+
691+
if (admin_secret_id := self.config.system_users) and admin_secret_id == event.secret.id:
692+
try:
693+
self._update_admin_password(admin_secret_id)
694+
except PostgreSQLUpdateUserPasswordError:
695+
event.defer()
696+
665697
def _on_config_changed(self, event) -> None:
666698
"""Handle configuration changes, like enabling plugins."""
667699
if not self.is_cluster_initialised:
@@ -703,6 +735,12 @@ def _on_config_changed(self, event) -> None:
703735
# Enable and/or disable the extensions.
704736
self.enable_disable_extensions()
705737

738+
if admin_secret_id := self.config.system_users:
739+
try:
740+
self._update_admin_password(admin_secret_id)
741+
except PostgreSQLUpdateUserPasswordError:
742+
event.defer()
743+
706744
def enable_disable_extensions(self, database: str | None = None) -> None:
707745
"""Enable/disable PostgreSQL extensions set through config options.
708746
@@ -858,6 +896,17 @@ def _get_hostname_from_unit(self, member: str) -> str:
858896

859897
def _on_leader_elected(self, event: LeaderElectedEvent) -> None:
860898
"""Handle the leader-elected event."""
899+
# consider configured system user passwords
900+
system_user_passwords = {}
901+
if admin_secret_id := self.config.system_users:
902+
try:
903+
system_user_passwords = self.get_secret_from_id(secret_id=admin_secret_id)
904+
except (ModelError, SecretNotFoundError) as e:
905+
# only display the error but don't return to make sure all users have passwords
906+
logger.error(f"Error setting internal passwords: {e}")
907+
self.unit.status = BlockedStatus("Password setting for system users failed.")
908+
event.defer()
909+
861910
for password in {
862911
USER_PASSWORD_KEY,
863912
REPLICATION_PASSWORD_KEY,
@@ -866,7 +915,14 @@ def _on_leader_elected(self, event: LeaderElectedEvent) -> None:
866915
PATRONI_PASSWORD_KEY,
867916
}:
868917
if self.get_secret(APP_SCOPE, password) is None:
869-
self.set_secret(APP_SCOPE, password, new_password())
918+
if password in system_user_passwords:
919+
# use provided passwords for system-users if available
920+
self.set_secret(APP_SCOPE, password, system_user_passwords[password])
921+
logger.info(f"Using configured password for {password}")
922+
else:
923+
# generate a password for this user if not provided
924+
self.set_secret(APP_SCOPE, password, new_password())
925+
logger.info(f"Generated new password for {password}")
870926

871927
# Add this unit to the list of cluster members
872928
# (the cluster should start with only this member).
@@ -1202,66 +1258,22 @@ def _has_non_restore_waiting_status(self) -> bool:
12021258
and not self.is_cluster_restoring_to_time
12031259
)
12041260

1205-
def _on_get_password(self, event: ActionEvent) -> None:
1206-
"""Returns the password for a user as an action response.
1207-
1208-
If no user is provided, the password of the operator user is returned.
1209-
"""
1210-
username = event.params.get("username", USER)
1211-
if username not in PASSWORD_USERS and self.is_ldap_enabled:
1212-
event.fail("The action can be run only for system users when LDAP is enabled")
1213-
return
1214-
if username not in PASSWORD_USERS:
1215-
event.fail(
1216-
f"The action can be run only for system users or Patroni:"
1217-
f" {', '.join(PASSWORD_USERS)} not {username}"
1218-
)
1219-
return
1220-
1221-
event.set_results({"password": self.get_secret(APP_SCOPE, f"{username}-password")})
1222-
1223-
def _on_set_password(self, event: ActionEvent) -> None: # noqa: C901
1224-
"""Set the password for the specified user."""
1225-
# Only leader can write the new password into peer relation.
1226-
if not self.unit.is_leader():
1227-
event.fail("The action can be run only on leader unit")
1228-
return
1229-
1230-
username = event.params.get("username", USER)
1231-
if username not in SYSTEM_USERS and self.is_ldap_enabled:
1232-
event.fail("The action can be run only for system users when LDAP is enabled")
1233-
return
1234-
if username not in SYSTEM_USERS:
1235-
event.fail(
1236-
f"The action can be run only for system users:"
1237-
f" {', '.join(SYSTEM_USERS)} not {username}"
1238-
)
1239-
return
1240-
1241-
password = new_password()
1242-
if "password" in event.params:
1243-
password = event.params["password"]
1244-
1245-
if password == self.get_secret(APP_SCOPE, f"{username}-password"):
1246-
event.log("The old and new passwords are equal.")
1247-
event.set_results({"password": password})
1248-
return
1249-
1250-
# Ensure all members are ready before trying to reload Patroni
1251-
# configuration to avoid errors (like the API not responding in
1252-
# one instance because PostgreSQL and/or Patroni are not ready).
1261+
def _update_admin_password(self, admin_secret_id: str) -> None:
1262+
"""Check if the password of a system user was changed and update it in the database."""
12531263
if not self._patroni.are_all_members_ready():
1254-
event.fail(
1264+
# Ensure all members are ready before reloading Patroni configuration to avoid errors
1265+
# e.g. API not responding in one instance because PostgreSQL / Patroni are not ready
1266+
raise PostgreSQLUpdateUserPasswordError(
12551267
"Failed changing the password: Not all members healthy or finished initial sync."
12561268
)
1257-
return
12581269

1270+
# cross-cluster replication: extract the database host on which to update the passwords
12591271
replication_offer_relation = self.model.get_relation(REPLICATION_OFFER_RELATION)
1272+
other_cluster_primary_ip = ""
12601273
if (
12611274
replication_offer_relation is not None
12621275
and not self.async_replication.is_primary_cluster()
12631276
):
1264-
# Update the password in the other cluster PostgreSQL primary instance.
12651277
other_cluster_endpoints = self.async_replication.get_all_primary_cluster_endpoints()
12661278
other_cluster_primary = self._patroni.get_primary(
12671279
alternative_endpoints=other_cluster_endpoints
@@ -1271,37 +1283,51 @@ def _on_set_password(self, event: ActionEvent) -> None: # noqa: C901
12711283
for unit in replication_offer_relation.units
12721284
if unit.name.replace("/", "-") == other_cluster_primary
12731285
)
1274-
try:
1275-
self.postgresql.update_user_password(
1276-
username, password, database_host=other_cluster_primary_ip
1277-
)
1278-
except PostgreSQLUpdateUserPasswordError as e:
1279-
logger.exception(e)
1280-
event.fail("Failed changing the password.")
1281-
return
12821286
elif self.model.get_relation(REPLICATION_CONSUMER_RELATION) is not None:
1283-
event.fail(
1284-
"Failed changing the password: This action can be ran only in the cluster from the offer side."
1287+
logger.error(
1288+
"Failed changing the password: This can be ran only in the cluster from the offer side."
12851289
)
1290+
self.unit.status = BlockedStatus("Password update for system users failed.")
12861291
return
1287-
else:
1288-
# Update the password in this cluster PostgreSQL primary instance.
1289-
try:
1290-
self.postgresql.update_user_password(username, password)
1291-
except PostgreSQLUpdateUserPasswordError as e:
1292-
logger.exception(e)
1293-
event.fail("Failed changing the password.")
1294-
return
12951292

1296-
# Update the password in the secret store.
1297-
self.set_secret(APP_SCOPE, f"{username}-password", password)
1293+
try:
1294+
# get the secret content and check each user configured there
1295+
# only SYSTEM_USERS with changed passwords are processed, all others ignored
1296+
updated_passwords = self.get_secret_from_id(secret_id=admin_secret_id)
1297+
for user, password in list(updated_passwords.items()):
1298+
if user not in SYSTEM_USERS:
1299+
logger.error(
1300+
f"Can only update system users: {', '.join(SYSTEM_USERS)} not {user}"
1301+
)
1302+
updated_passwords.pop(user)
1303+
continue
1304+
if password == self.get_secret(APP_SCOPE, f"{user}-password"):
1305+
updated_passwords.pop(user)
1306+
except (ModelError, SecretNotFoundError) as e:
1307+
logger.error(f"Error updating internal passwords: {e}")
1308+
self.unit.status = BlockedStatus("Password update for system users failed.")
1309+
return
1310+
1311+
try:
1312+
# perform the actual password update for the remaining users
1313+
for user, password in updated_passwords.items():
1314+
logger.info(f"Updating password for user {user}")
1315+
self.postgresql.update_user_password(
1316+
user,
1317+
password,
1318+
database_host=other_cluster_primary_ip if other_cluster_primary_ip else None,
1319+
)
1320+
# Update the password in the secret store after updating it in the database
1321+
self.set_secret(APP_SCOPE, f"{user}-password", password)
1322+
except PostgreSQLUpdateUserPasswordError as e:
1323+
logger.exception(e)
1324+
self.unit.status = BlockedStatus("Password update for system users failed.")
1325+
return
12981326

12991327
# Update and reload Patroni configuration in this unit to use the new password.
13001328
# Other units Patroni configuration will be reloaded in the peer relation changed event.
13011329
self.update_config()
13021330

1303-
event.set_results({"password": password})
1304-
13051331
def _on_promote_to_primary(self, event: ActionEvent) -> None:
13061332
if event.params.get("scope") == "cluster":
13071333
return self.async_replication.promote_to_primary(event)

src/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ class CharmConfig(BaseConfigModel):
165165
storage_default_table_access_method: str | None
166166
storage_gin_pending_list_limit: int | None
167167
storage_old_snapshot_threshold: int | None
168+
system_users: str | None
168169
vacuum_autovacuum_analyze_scale_factor: float | None
169170
vacuum_autovacuum_analyze_threshold: int | None
170171
vacuum_autovacuum_freeze_max_age: int | None

src/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
SECRET_CACHE_LABEL = "cache" # noqa: S105
3939
SECRET_INTERNAL_LABEL = "internal-secret" # noqa: S105
4040
SECRET_DELETED_LABEL = "None" # noqa: S105
41+
SYSTEM_USERS_PASSWORD_CONFIG = "system-users" # noqa: S105
4142

4243
APP_SCOPE = "app"
4344
UNIT_SCOPE = "unit"

tests/integration/ha_tests/helpers.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
db_connect,
4444
execute_query_on_unit,
4545
get_password,
46-
get_password_on_unit,
4746
get_primary,
4847
get_unit_address,
4948
run_command_on_unit,
@@ -317,7 +316,7 @@ async def count_writes(
317316
) -> tuple[dict[str, int], dict[str, int]]:
318317
"""Count the number of writes in the database."""
319318
app = await app_name(ops_test)
320-
password = await get_password(ops_test, database_app_name=app, down_unit=down_unit)
319+
password = await get_password(ops_test, database_app_name=app)
321320
members = []
322321
for model in [ops_test.model, extra_model]:
323322
if model is None:
@@ -1019,7 +1018,7 @@ async def create_db(ops_test: OpsTest, app: str, db: str) -> None:
10191018
"""Creates database with specified name."""
10201019
unit = ops_test.model.applications[app].units[0]
10211020
unit_address = await get_unit_address(ops_test, unit.name)
1022-
password = await get_password_on_unit(ops_test, "operator", unit, app)
1021+
password = await get_password(ops_test, "operator", app)
10231022

10241023
conn = db_connect(unit_address, password)
10251024
conn.autocommit = True
@@ -1034,7 +1033,7 @@ async def check_db(ops_test: OpsTest, app: str, db: str) -> bool:
10341033
"""Returns True if database with specified name already exists."""
10351034
unit = ops_test.model.applications[app].units[0]
10361035
unit_address = await get_unit_address(ops_test, unit.name)
1037-
password = await get_password_on_unit(ops_test, "operator", unit, app)
1036+
password = await get_password(ops_test, "operator", app)
10381037

10391038
query = await execute_query_on_unit(
10401039
unit_address,

tests/integration/ha_tests/test_self_healing_1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ async def test_forceful_restart_without_data_and_transaction_logs(
284284
logger.info(f"rotating WAL segments on {new_primary_name}")
285285
files = await list_wal_files(ops_test, app)
286286
host = await get_unit_address(ops_test, new_primary_name)
287-
password = await get_password(ops_test, down_unit=primary_name)
287+
password = await get_password(ops_test)
288288
with db_connect(host, password) as connection:
289289
connection.autocommit = True
290290
with connection.cursor() as cursor:

0 commit comments

Comments
 (0)