Skip to content

Commit 713f6e4

Browse files
authored
[MISC] Suppress oversee users in standby clusters (#507)
* Suppress oversee users in standby clusters * Add suppression test * Wait for model to idle * Test fixes * Fix test * Remove action
1 parent 165d04c commit 713f6e4

File tree

4 files changed

+66
-11
lines changed

4 files changed

+66
-11
lines changed

src/relations/async_replication.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ def _configure_standby_cluster(self, event: RelationChangedEvent) -> bool:
193193
filename = f"{POSTGRESQL_DATA_PATH}-{str(datetime.now()).replace(' ', '-').replace(':', '-')}.tar.gz"
194194
subprocess.check_call(f"tar -zcf {filename} {POSTGRESQL_DATA_PATH}".split())
195195
logger.warning("Please review the backup file %s and handle its removal", filename)
196+
self.charm.app_peer_data["suppress-oversee-users"] = "true"
196197
return True
197198

198199
def get_all_primary_cluster_endpoints(self) -> List[str]:
@@ -481,7 +482,7 @@ def is_primary_cluster(self) -> bool:
481482
return self.charm.app == self._get_primary_cluster()
482483

483484
def _on_async_relation_broken(self, _) -> None:
484-
if "departing" in self.charm._peers.data[self.charm.unit]:
485+
if not self.charm._peers or "departing" in self.charm._peers.data[self.charm.unit]:
485486
logger.debug("Early exit on_async_relation_broken: Skipping departing unit.")
486487
return
487488

src/relations/postgresql_provider.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ def oversee_users(self) -> None:
137137
if not self.charm.unit.is_leader():
138138
return
139139

140+
delete_user = "suppress-oversee-users" not in self.charm.app_peer_data
141+
140142
# Retrieve database users.
141143
try:
142144
database_users = {
@@ -159,13 +161,16 @@ def oversee_users(self) -> None:
159161

160162
# Delete that users that exist in the database but not in the active relations.
161163
for user in database_users - relation_users:
162-
try:
163-
logger.info("Remove relation user: %s", user)
164-
self.charm.set_secret(APP_SCOPE, user, None)
165-
self.charm.set_secret(APP_SCOPE, f"{user}-database", None)
166-
self.charm.postgresql.delete_user(user)
167-
except PostgreSQLDeleteUserError:
168-
logger.error(f"Failed to delete user {user}")
164+
if delete_user:
165+
try:
166+
logger.info("Remove relation user: %s", user)
167+
self.charm.set_secret(APP_SCOPE, user, None)
168+
self.charm.set_secret(APP_SCOPE, f"{user}-database", None)
169+
self.charm.postgresql.delete_user(user)
170+
except PostgreSQLDeleteUserError:
171+
logger.error("Failed to delete user %s", user)
172+
else:
173+
logger.info("Stale relation user detected: %s", user)
169174

170175
def update_endpoints(self, event: DatabaseRequestedEvent = None) -> None:
171176
"""Set the read/write and read-only endpoints."""

tests/integration/ha_tests/test_async_replication.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
IDLE_PERIOD = 5
4242
TIMEOUT = 2000
4343

44+
DATA_INTEGRATOR_APP_NAME = "data-integrator"
45+
4446

4547
@contextlib.asynccontextmanager
4648
async def fast_forward(
@@ -115,6 +117,14 @@ async def test_deploy_async_replication_setup(
115117
num_units=CLUSTER_SIZE,
116118
config={"profile": "testing"},
117119
)
120+
if not await app_name(ops_test, DATA_INTEGRATOR_APP_NAME):
121+
await ops_test.model.deploy(
122+
DATA_INTEGRATOR_APP_NAME,
123+
num_units=1,
124+
channel="latest/edge",
125+
config={"database-name": "testdb"},
126+
)
127+
await ops_test.model.relate(DATABASE_APP_NAME, DATA_INTEGRATOR_APP_NAME)
118128
if not await app_name(ops_test, model=second_model):
119129
charm = await ops_test.build_charm(".")
120130
await second_model.deploy(
@@ -128,7 +138,7 @@ async def test_deploy_async_replication_setup(
128138
async with ops_test.fast_forward(), fast_forward(second_model):
129139
await gather(
130140
first_model.wait_for_idle(
131-
apps=[DATABASE_APP_NAME, APPLICATION_NAME],
141+
apps=[DATABASE_APP_NAME, APPLICATION_NAME, DATA_INTEGRATOR_APP_NAME],
132142
status="active",
133143
timeout=TIMEOUT,
134144
),
@@ -218,6 +228,19 @@ async def test_async_replication(
218228
await check_writes(ops_test, extra_model=second_model)
219229

220230

231+
@pytest.mark.group(1)
232+
@markers.juju3
233+
@pytest.mark.abort_on_fail
234+
async def test_get_data_integrator_credentials(
235+
ops_test: OpsTest,
236+
):
237+
unit = ops_test.model.applications[DATA_INTEGRATOR_APP_NAME].units[0]
238+
action = await unit.run_action(action_name="get-credentials")
239+
result = await action.wait()
240+
global data_integrator_credentials
241+
data_integrator_credentials = result.results
242+
243+
221244
@pytest.mark.group(1)
222245
@markers.juju3
223246
@pytest.mark.abort_on_fail
@@ -273,6 +296,29 @@ async def test_switchover(
273296
await are_writes_increasing(ops_test, extra_model=second_model)
274297

275298

299+
@pytest.mark.group(1)
300+
@markers.juju3
301+
@pytest.mark.abort_on_fail
302+
async def test_data_integrator_creds_keep_on_working(
303+
ops_test: OpsTest,
304+
second_model: Model,
305+
) -> None:
306+
user = data_integrator_credentials["postgresql"]["username"]
307+
password = data_integrator_credentials["postgresql"]["password"]
308+
database = data_integrator_credentials["postgresql"]["database"]
309+
310+
any_unit = second_model.applications[DATABASE_APP_NAME].units[0].name
311+
primary = await get_primary(ops_test, any_unit, second_model)
312+
address = second_model.units.get(primary).public_address
313+
314+
connstr = f"dbname='{database}' user='{user}' host='{address}' port='5432' password='{password}' connect_timeout=1"
315+
try:
316+
with psycopg2.connect(connstr) as connection:
317+
pass
318+
finally:
319+
connection.close()
320+
321+
276322
@pytest.mark.group(1)
277323
@markers.juju3
278324
@pytest.mark.abort_on_fail

tests/integration/helpers.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -621,17 +621,20 @@ async def get_password(ops_test: OpsTest, unit_name: str, username: str = "opera
621621
stop=stop_after_attempt(10),
622622
wait=wait_exponential(multiplier=1, min=2, max=30),
623623
)
624-
async def get_primary(ops_test: OpsTest, unit_name: str) -> str:
624+
async def get_primary(ops_test: OpsTest, unit_name: str, model=None) -> str:
625625
"""Get the primary unit.
626626
627627
Args:
628628
ops_test: ops_test instance.
629629
unit_name: the name of the unit.
630+
model: Model to use.
630631
631632
Returns:
632633
the current primary unit.
633634
"""
634-
action = await ops_test.model.units.get(unit_name).run_action("get-primary")
635+
if not model:
636+
model = ops_test.model
637+
action = await model.units.get(unit_name).run_action("get-primary")
635638
action = await action.wait()
636639
return action.results["primary"]
637640

0 commit comments

Comments
 (0)