Skip to content

Commit b73d14a

Browse files
MiaAltieriGu1nnessMehdi-Bendriss
authored
re-enable remote-upgrade tests (#355)
* re-enable-tests * remove skips in test_upgrades.py * fix: upgrades fixes * fix: remove debug * fixes for network cut tests * add fix to backup tests * increase timeout yolo * fix password tests * Update test_upgrades.py * remote apps do not need a resource * make sure tests wait * fixes for how app is passed through functions * update app name in tests --------- Co-authored-by: Neha Oudin <[email protected]> Co-authored-by: Mehdi Bendriss <[email protected]>
1 parent e8b6f2d commit b73d14a

File tree

6 files changed

+99
-43
lines changed

6 files changed

+99
-43
lines changed

tests/integration/backup_tests/test_backups.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,13 @@
1212
import pytest_asyncio
1313
import yaml
1414
from pytest_operator.plugin import OpsTest
15-
from tenacity import RetryError, Retrying, stop_after_delay, wait_fixed
15+
from tenacity import (
16+
RetryError,
17+
Retrying,
18+
stop_after_attempt,
19+
stop_after_delay,
20+
wait_fixed,
21+
)
1622

1723
from ..ha_tests import helpers as ha_helpers
1824
from ..helpers import (
@@ -442,14 +448,13 @@ async def test_restore_new_cluster(
442448
), "Backups from old cluster are listed as failed"
443449

444450
# find most recent backup id and restore
445-
action = await leader_unit.run_action(action_name="list-backups")
446-
list_result = await action.wait()
447-
list_result = list_result.results["backups"]
448-
most_recent_backup = list_result.split("\n")[-1]
449-
backup_id = most_recent_backup.split()[0]
450-
action = await leader_unit.run_action(action_name="restore", **{"backup-id": backup_id})
451-
restore = await action.wait()
452-
assert restore.results["restore-status"] == "restore started", "restore not successful"
451+
for attempt in Retrying(stop=stop_after_attempt(120), wait=wait_fixed(1), reraise=True):
452+
action = await leader_unit.run_action(action_name="list-backups")
453+
list_result = await action.wait()
454+
list_result = list_result.results["backups"]
455+
most_recent_backup = list_result.split("\n")[-1]
456+
backup_id = most_recent_backup.split()[0]
457+
assert "-----" not in backup_id, "list of backups are empty."
453458

454459
# verify all writes are present
455460
try:

tests/integration/ha_tests/helpers.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ async def get_direct_mongo_client(
387387
) -> MongoClient:
388388
"""Returns a direct mongodb client potentially passing over some of the units."""
389389
port = MONGOS_PORT if mongos else MONGOD_PORT
390+
mongodb_name = app_name or await get_application_name(ops_test, APP_NAME)
390391
if unit:
391392
url = await mongodb_uri(
392393
ops_test,
@@ -395,11 +396,10 @@ async def get_direct_mongo_client(
395396
port=port,
396397
username=username,
397398
password=password,
399+
app_name=mongodb_name,
398400
)
399401
return MongoClient(url, directConnection=True)
400402

401-
mongodb_name = app_name or await get_application_name(ops_test, APP_NAME)
402-
403403
for unit in ops_test.model.applications[mongodb_name].units:
404404
if unit.name not in excluded and unit.workload_status == "active":
405405
url = await mongodb_uri(
@@ -415,10 +415,10 @@ async def get_direct_mongo_client(
415415
assert False, "No fitting unit could be found"
416416

417417

418-
async def find_unit(ops_test: OpsTest, leader: bool) -> ops.model.Unit:
418+
async def find_unit(ops_test: OpsTest, leader: bool, app_name: str = APP_NAME) -> ops.model.Unit:
419419
"""Helper function identifies a unit, based on need for leader or non-leader."""
420420
ret_unit = None
421-
app = await get_application_name(ops_test, APP_NAME)
421+
app = await get_application_name(ops_test, app_name)
422422
for unit in ops_test.model.applications[app].units:
423423
if await unit.is_leader_from_status() == leader:
424424
ret_unit = unit
@@ -666,11 +666,15 @@ def remove_instance_isolation(ops_test: OpsTest) -> None:
666666
reraise=True,
667667
)
668668
async def wait_until_unit_in_status(
669-
ops_test: OpsTest, unit_to_check: Unit, online_unit: Unit, status: str
669+
ops_test: OpsTest,
670+
unit_to_check: Unit,
671+
online_unit: Unit,
672+
status: str,
673+
app_name: str = APP_NAME,
670674
) -> None:
671675
"""Waits until a replica is in the provided status as reported by MongoDB or timeout occurs."""
672676
with await get_direct_mongo_client(
673-
ops_test, online_unit.name, use_subprocess_to_get_password=True
677+
ops_test, online_unit.name, use_subprocess_to_get_password=True, app_name=app_name
674678
) as client:
675679
data = client.admin.command("replSetGetStatus")
676680

@@ -972,5 +976,5 @@ async def count_writes(ops_test: OpsTest, app_name: str = None) -> int:
972976
"""New versions of pymongo no longer support the count operation, instead find is used."""
973977
app_name = app_name or await get_app_name(ops_test)
974978
unit = ops_test.model.applications[app_name].units[0]
975-
with await get_direct_mongo_client(ops_test, unit.name) as client:
979+
with await get_direct_mongo_client(ops_test, unit.name, app_name=app_name) as client:
976980
return client[TEST_DB][TEST_COLLECTION].count_documents({})

tests/integration/sharding_tests/helpers.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,11 @@ async def deploy_cluster_components(
5757
else:
5858
my_charm = MONGODB_CHARM_NAME
5959

60-
resources = {"mongodb-image": METADATA["resources"]["mongodb-image"]["upstream-source"]}
60+
resources = (
61+
None
62+
if channel
63+
else {"mongodb-image": METADATA["resources"]["mongodb-image"]["upstream-source"]}
64+
)
6165
await ops_test.model.deploy(
6266
my_charm,
6367
resources=resources,

tests/integration/upgrades/helpers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ async def assert_successful_run_upgrade_sequence(
2424
logger.info(f"Upgrading {app_name}")
2525

2626
await ops_test.model.applications[app_name].refresh(path=new_charm)
27-
await ops_test.model.wait_for_idle(apps=[app_name], timeout=1000, idle_period=30)
27+
# TODO future work, resolve flickering status of app
28+
await ops_test.model.wait_for_idle(apps=[app_name], timeout=1000, idle_period=90)
2829

2930
# resume upgrade only needs to be ran when:
3031
# 1. there are more than one units in the application

tests/integration/upgrades/test_sharding_upgrades.py

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,14 @@ async def add_writes_to_shards(ops_test: OpsTest):
7171
@pytest.mark.abort_on_fail
7272
async def test_build_and_deploy(ops_test: OpsTest) -> None:
7373
"""Build deploy, and integrate, a sharded cluster."""
74+
num_units_cluster_config = {
75+
CONFIG_SERVER_APP_NAME: 3,
76+
SHARD_ONE_APP_NAME: 3,
77+
SHARD_TWO_APP_NAME: 3,
78+
}
7479
await deploy_and_scale_application(ops_test)
7580

76-
await deploy_cluster_components(ops_test)
81+
await deploy_cluster_components(ops_test, num_units_cluster_config, channel="6/edge")
7782

7883
await ops_test.model.wait_for_idle(
7984
apps=CLUSTER_COMPONENTS,
@@ -94,7 +99,6 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None:
9499
await ops_test.model.applications[WRITE_APP].set_config({"mongos-uri": mongos_uri})
95100

96101

97-
@pytest.mark.skip()
98102
@pytest.mark.group(1)
99103
@pytest.mark.abort_on_fail
100104
async def test_upgrade(ops_test: OpsTest, add_writes_to_shards) -> None:
@@ -106,6 +110,14 @@ async def test_upgrade(ops_test: OpsTest, add_writes_to_shards) -> None:
106110
ops_test, sharding_component, new_charm=new_charm
107111
)
108112

113+
await ops_test.model.wait_for_idle(
114+
apps=CLUSTER_COMPONENTS,
115+
status="active",
116+
timeout=1000,
117+
idle_period=30,
118+
raise_on_error=False,
119+
)
120+
109121
application_unit = ops_test.model.applications[WRITE_APP].units[0]
110122
stop_writes_action = await application_unit.run_action(
111123
"stop-continuous-writes",
@@ -139,44 +151,66 @@ async def test_upgrade(ops_test: OpsTest, add_writes_to_shards) -> None:
139151
), "missed writes during upgrade procedure."
140152

141153

142-
@pytest.mark.skip()
143154
@pytest.mark.group(1)
144155
@pytest.mark.abort_on_fail
145156
async def test_pre_upgrade_check_success(ops_test: OpsTest) -> None:
146157
"""Verify that the pre-refresh check succeeds in the happy path."""
158+
await ops_test.model.wait_for_idle(
159+
apps=CLUSTER_COMPONENTS,
160+
status="active",
161+
timeout=1000,
162+
idle_period=30,
163+
raise_on_error=False,
164+
)
165+
147166
for sharding_component in CLUSTER_COMPONENTS:
148167
leader_unit = await backup_helpers.get_leader_unit(ops_test, sharding_component)
149168
action = await leader_unit.run_action("pre-refresh-check")
150169
await action.wait()
151170
assert action.status == "completed", "pre-refresh-check failed, expected to succeed."
152171

153172

154-
@pytest.mark.skip()
155173
@pytest.mark.group(1)
156174
@pytest.mark.abort_on_fail
157175
async def test_pre_upgrade_check_failure(ops_test: OpsTest, chaos_mesh) -> None:
158176
"""Verify that the pre-refresh check fails if there is a problem with one of the shards."""
177+
await ops_test.model.wait_for_idle(
178+
apps=CLUSTER_COMPONENTS,
179+
status="active",
180+
timeout=1000,
181+
idle_period=30,
182+
raise_on_error=False,
183+
)
184+
159185
leader_unit = await backup_helpers.get_leader_unit(ops_test, SHARD_TWO_APP_NAME)
160186

161187
non_leader_unit = None
162188
for unit in ops_test.model.applications[SHARD_TWO_APP_NAME].units:
163-
if unit != leader_unit:
189+
if unit.name != leader_unit.name:
164190
non_leader_unit = unit
165191
break
166192

167193
isolate_instance_from_cluster(ops_test, non_leader_unit.name)
168194
await wait_until_unit_in_status(
169-
ops_test, non_leader_unit, leader_unit, "(not reachable/healthy)"
195+
ops_test,
196+
non_leader_unit,
197+
leader_unit,
198+
"(not reachable/healthy)",
199+
app_name=SHARD_TWO_APP_NAME,
170200
)
171201

172202
for sharding_component in CLUSTER_COMPONENTS:
173203
leader_unit = await backup_helpers.get_leader_unit(ops_test, sharding_component)
174204
action = await leader_unit.run_action("pre-refresh-check")
175205
await action.wait()
176-
assert action.status == "completed", "pre-refresh-check failed, expected to succeed."
206+
assert action.status == "failed", "pre-refresh-check succeeded, expected to fail."
177207

178208
# restore network after test
179209
remove_instance_isolation(ops_test)
180210
await ops_test.model.wait_for_idle(
181-
apps=[SHARD_TWO_APP_NAME], status="active", timeout=1000, idle_period=30
211+
apps=[SHARD_TWO_APP_NAME],
212+
status="active",
213+
timeout=1000,
214+
idle_period=30,
215+
raise_on_error=False,
182216
)

tests/integration/upgrades/test_upgrades.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from pytest_operator.plugin import OpsTest
1010

1111
from ..backup_tests import helpers as backup_helpers
12+
from ..ha_tests import helpers as ha_helpers
1213
from ..ha_tests.helpers import (
1314
count_writes,
1415
deploy_and_scale_application,
@@ -18,7 +19,7 @@
1819
remove_instance_isolation,
1920
wait_until_unit_in_status,
2021
)
21-
from ..helpers import check_or_scale_app, get_app_name, get_password, set_password
22+
from ..helpers import check_or_scale_app, get_app_name, get_password
2223
from .helpers import assert_successful_run_upgrade_sequence
2324

2425
logger = logging.getLogger(__name__)
@@ -27,7 +28,15 @@
2728
MONGODB_CHARM_NAME = "mongodb-k8s"
2829

2930

30-
@pytest.mark.skip("skip until upgrades work has been released to charmhub")
31+
@pytest.fixture(scope="module")
32+
def chaos_mesh(ops_test: OpsTest) -> None:
33+
ha_helpers.deploy_chaos_mesh(ops_test.model.info.name)
34+
35+
yield
36+
37+
ha_helpers.destroy_chaos_mesh(ops_test.model.info.name)
38+
39+
3140
@pytest.mark.group(1)
3241
@pytest.mark.abort_on_fail
3342
async def test_build_and_deploy(ops_test: OpsTest):
@@ -37,10 +46,10 @@ async def test_build_and_deploy(ops_test: OpsTest):
3746
db_app_name = await get_app_name(ops_test)
3847

3948
if db_app_name:
40-
await check_or_scale_app(ops_test, db_app_name, required_units=2)
49+
await check_or_scale_app(ops_test, db_app_name, required_units=3)
4150
return
4251
else:
43-
await ops_test.model.deploy(MONGODB_CHARM_NAME, channel="6/edge", num_units=2)
52+
await ops_test.model.deploy(MONGODB_CHARM_NAME, channel="6/edge", num_units=3, trust=True)
4453

4554
db_app_name = await get_app_name(ops_test)
4655
await ops_test.model.wait_for_idle(
@@ -50,7 +59,6 @@ async def test_build_and_deploy(ops_test: OpsTest):
5059
await relate_mongodb_and_application(ops_test, db_app_name, WRITE_APP)
5160

5261

53-
@pytest.mark.skip("skip until upgrades work has been released to charmhub")
5462
@pytest.mark.group(1)
5563
@pytest.mark.abort_on_fail
5664
async def test_successful_upgrade(ops_test: OpsTest, continuous_writes) -> None:
@@ -69,7 +77,6 @@ async def test_successful_upgrade(ops_test: OpsTest, continuous_writes) -> None:
6977
assert total_expected_writes == actual_writes, "missed writes during upgrade procedure."
7078

7179

72-
@pytest.mark.skip("skip until upgrades work has been released to charmhub")
7380
@pytest.mark.group(1)
7481
@pytest.mark.abort_on_fail
7582
async def test_preflight_check(ops_test: OpsTest) -> None:
@@ -82,7 +89,6 @@ async def test_preflight_check(ops_test: OpsTest) -> None:
8289
assert action.status == "completed", "pre-refresh-check failed, expected to succeed."
8390

8491

85-
@pytest.mark.skip("skip until upgrades work has been released to charmhub")
8692
@pytest.mark.group(1)
8793
@pytest.mark.abort_on_fail
8894
async def test_preflight_check_failure(ops_test: OpsTest, chaos_mesh) -> None:
@@ -91,7 +97,7 @@ async def test_preflight_check_failure(ops_test: OpsTest, chaos_mesh) -> None:
9197

9298
non_leader_unit = None
9399
for unit in ops_test.model.applications[db_app_name].units:
94-
if unit != leader_unit:
100+
if unit.name != leader_unit.name:
95101
non_leader_unit = unit
96102
break
97103

@@ -103,29 +109,31 @@ async def test_preflight_check_failure(ops_test: OpsTest, chaos_mesh) -> None:
103109
logger.info("Calling pre-refresh-check")
104110
action = await leader_unit.run_action("pre-refresh-check")
105111
await action.wait()
106-
assert action.status == "completed", "pre-refresh-check failed, expected to succeed."
112+
assert action.status == "failed", "pre-refresh-check succeeded, expected to fail."
107113

108114
# restore network after test
109115
remove_instance_isolation(ops_test)
110116
await ops_test.model.wait_for_idle(
111-
apps=[db_app_name], status="active", timeout=1000, idle_period=30
117+
apps=[db_app_name], status="active", timeout=1000, idle_period=30, raise_on_error=False
112118
)
113119

114120

115-
@pytest.mark.skip("Missing upgrade code for now")
116121
@pytest.mark.group(1)
117122
@pytest.mark.abort_on_fail
118123
async def test_upgrade_password_change_fail(ops_test: OpsTest):
119124
app_name = await get_app_name(ops_test)
120-
leader_id = await find_unit(ops_test, leader=True, app_name=app_name)
121-
122-
current_password = await get_password(ops_test, leader_id, app_name=app_name)
125+
leader = await find_unit(ops_test, leader=True, app_name="mongodb-k8s")
126+
leader_id = leader.name.split("/")[1]
127+
current_password = await get_password(ops_test, leader_id, app_name="mongodb-k8s")
123128
new_charm = await ops_test.build_charm(".")
129+
124130
await ops_test.model.applications[app_name].refresh(path=new_charm)
125-
results = await set_password(ops_test, leader_id, password="0xdeadbeef", app_name=app_name)
126131

127-
assert results == "Cannot set passwords while an upgrade is in progress."
132+
action = await ops_test.model.units.get(f"{app_name}/{leader_id}").run_action(
133+
"set-password", **{"username": "username", "password": "new-password"}
134+
)
135+
action = await action.wait()
128136

137+
assert "Cannot set passwords while an upgrade is in progress." == action.message
129138
after_action_password = await get_password(ops_test, leader_id, app_name=app_name)
130-
131139
assert current_password == after_action_password

0 commit comments

Comments
 (0)