Skip to content

Commit 576806d

Browse files
[DPE-7129] Fix network cut without ip change test (#900)
* Move _update_member_ip call to correctly remove Raft cluster member when network is cut Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Fix coverage Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Update refresh tests to modify charm to ensure refresh off edge or stable * Fix lint warnings * Store temporary charms in /tmp for upgrade_from_stable tests * Use force-refresh-start instead of forcing refresh by updating versions * Handle same snap revision situation in upgrade tests Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Improvement to avoid replica restart while syncing from primary Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Run stop-continuous-writes action only once Signed-off-by: Marcelo Henrique Neppel <[email protected]> --------- Signed-off-by: Marcelo Henrique Neppel <[email protected]> Co-authored-by: Shayan Patel <[email protected]>
1 parent 3ee6bd0 commit 576806d

File tree

4 files changed

+46
-31
lines changed

4 files changed

+46
-31
lines changed

src/charm.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -851,9 +851,6 @@ def _peer_relation_changed_checks(self, event: HookEvent) -> bool:
851851
event.defer()
852852
return False
853853

854-
if self._update_member_ip():
855-
return False
856-
857854
# Don't update this member before it's part of the members list.
858855
if self._unit_ip not in self.members_ips:
859856
logger.debug("Early exit on_peer_relation_changed: Unit not in the members list")
@@ -1446,6 +1443,11 @@ def _on_config_changed(self, event) -> None: # noqa: C901
14461443
if self.is_blocked and "Configuration Error" in self.unit.status.message:
14471444
self.set_unit_status(ActiveStatus())
14481445

1446+
if self._update_member_ip():
1447+
# Update the sync-standby endpoint in the async replication data.
1448+
self.async_replication.update_async_replication_data()
1449+
return
1450+
14491451
# Update the sync-standby endpoint in the async replication data.
14501452
self.async_replication.update_async_replication_data()
14511453

src/cluster.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,12 @@ def member_inactive(self) -> bool:
532532
except RetryError:
533533
return True
534534

535-
return response["state"] not in [*RUNNING_STATES, "starting", "restarting"]
535+
return response["state"] not in [
536+
*RUNNING_STATES,
537+
"creating replica",
538+
"starting",
539+
"restarting",
540+
]
536541

537542
@property
538543
def member_replication_lag(self) -> str:

tests/integration/ha_tests/helpers.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -241,9 +241,7 @@ async def is_cluster_updated(
241241

242242
# Verify that no writes to the database were missed after stopping the writes.
243243
logger.info("checking that no writes to the database were missed after stopping the writes")
244-
for attempt in Retrying(stop=stop_after_attempt(3), wait=wait_fixed(5), reraise=True):
245-
with attempt:
246-
total_expected_writes = await check_writes(ops_test, use_ip_from_inside)
244+
total_expected_writes = await check_writes(ops_test, use_ip_from_inside)
247245

248246
# Verify that old primary is up-to-date.
249247
logger.info("checking that the former primary is up to date with the cluster after restarting")
@@ -259,17 +257,19 @@ async def check_writes(
259257
) -> int:
260258
"""Gets the total writes from the test charm and compares to the writes from db."""
261259
total_expected_writes = await stop_continuous_writes(ops_test)
262-
actual_writes, max_number_written = await count_writes(
263-
ops_test, use_ip_from_inside=use_ip_from_inside, extra_model=extra_model
264-
)
265-
for member, count in actual_writes.items():
266-
print(
267-
f"member: {member}, count: {count}, max_number_written: {max_number_written[member]}, total_expected_writes: {total_expected_writes}"
268-
)
269-
assert count == max_number_written[member], (
270-
f"{member}: writes to the db were missed: count of actual writes different from the max number written."
271-
)
272-
assert total_expected_writes == count, f"{member}: writes to the db were missed."
260+
for attempt in Retrying(stop=stop_after_attempt(3), wait=wait_fixed(5), reraise=True):
261+
with attempt:
262+
actual_writes, max_number_written = await count_writes(
263+
ops_test, use_ip_from_inside=use_ip_from_inside, extra_model=extra_model
264+
)
265+
for member, count in actual_writes.items():
266+
logger.info(
267+
f"member: {member}, count: {count}, max_number_written: {max_number_written[member]}, total_expected_writes: {total_expected_writes}"
268+
)
269+
assert count == max_number_written[member], (
270+
f"{member}: writes to the db were missed: count of actual writes different from the max number written."
271+
)
272+
assert total_expected_writes == count, f"{member}: writes to the db were missed."
273273
return total_expected_writes
274274

275275

tests/unit/test_charm.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,9 @@ def test_is_cluster_initialised(harness):
267267

268268
def test_on_config_changed(harness):
269269
with (
270+
patch(
271+
"charm.PostgresqlOperatorCharm._update_member_ip", return_value=False
272+
) as _update_member_ip,
270273
patch(
271274
"charm.PostgresqlOperatorCharm._validate_config_options"
272275
) as _validate_config_options,
@@ -316,6 +319,23 @@ def test_on_config_changed(harness):
316319
harness.charm.on.config_changed.emit()
317320
_enable_disable_extensions.assert_called_once()
318321

322+
# Test when there is an error related to the config options.
323+
_update_member_ip.reset_mock()
324+
_enable_disable_extensions.reset_mock()
325+
harness.charm.unit.status = BlockedStatus("Configuration Error")
326+
harness.charm.on.config_changed.emit()
327+
assert isinstance(harness.model.unit.status, ActiveStatus)
328+
_update_member_ip.assert_called_once()
329+
_enable_disable_extensions.assert_called_once()
330+
331+
# Test when the unit has updated its member IP.
332+
_update_member_ip.reset_mock()
333+
_enable_disable_extensions.reset_mock()
334+
_update_member_ip.return_value = True
335+
harness.charm.on.config_changed.emit()
336+
_update_member_ip.assert_called_once()
337+
_enable_disable_extensions.assert_not_called()
338+
319339

320340
def test_check_extension_dependencies(harness):
321341
with (
@@ -1426,32 +1446,20 @@ def test_on_peer_relation_changed(harness):
14261446
mock_event.defer.reset_mock()
14271447
_reconfigure_cluster.reset_mock()
14281448
_reconfigure_cluster.return_value = True
1429-
_update_member_ip.return_value = False
14301449
_member_started.return_value = True
14311450
_primary_endpoint.return_value = "192.0.2.0"
14321451
harness.model.unit.status = WaitingStatus("awaiting for cluster to start")
14331452
harness.charm._on_peer_relation_changed(mock_event)
14341453
mock_event.defer.assert_not_called()
14351454
_reconfigure_cluster.assert_called_once_with(mock_event)
1436-
_update_member_ip.assert_called_once()
14371455
_update_config.assert_called_once()
14381456
_start_patroni.assert_called_once()
14391457
_update_new_unit_status.assert_called_once()
14401458

1441-
# Test when the cluster member updates its IP.
1442-
_update_member_ip.reset_mock()
1459+
# Test when the unit fails to update the Patroni configuration.
14431460
_update_config.reset_mock()
14441461
_start_patroni.reset_mock()
1445-
_update_member_ip.return_value = True
14461462
_update_new_unit_status.reset_mock()
1447-
harness.charm._on_peer_relation_changed(mock_event)
1448-
_update_member_ip.assert_called_once()
1449-
_update_config.assert_not_called()
1450-
_start_patroni.assert_not_called()
1451-
_update_new_unit_status.assert_not_called()
1452-
1453-
# Test when the unit fails to update the Patroni configuration.
1454-
_update_member_ip.return_value = False
14551463
_update_config.side_effect = RetryError(last_attempt=1)
14561464
harness.charm._on_peer_relation_changed(mock_event)
14571465
_update_config.assert_called_once()

0 commit comments

Comments
 (0)