Skip to content

Commit 47f804c

Browse files
author
Andrei Neagu
committed
added tests for restart_revert_operation_step_in_error
1 parent 0f32e1d commit 47f804c

File tree

2 files changed

+178
-33
lines changed
  • services/dynamic-scheduler
    • src/simcore_service_dynamic_scheduler/services/generic_scheduler
    • tests/unit/services/generic_scheduler

2 files changed

+178
-33
lines changed

services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -762,7 +762,7 @@ async def restart_create_operation_step_in_manual_intervention(
762762
)
763763

764764

765-
async def restart_revert_operation_step_in_error( # TODO: add test for this
765+
async def restart_revert_operation_step_in_error(
766766
app: FastAPI, schedule_id: ScheduleId, step_name: StepName
767767
) -> None:
768768
"""restarts a step stuck in `revert` in an error state"""

services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__core.py

Lines changed: 177 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
SingleStepGroup,
3232
cancel_operation,
3333
restart_create_operation_step_in_manual_intervention,
34+
restart_revert_operation_step_in_error,
3435
start_operation,
3536
)
3637
from simcore_service_dynamic_scheduler.services.generic_scheduler._errors import (
@@ -181,6 +182,21 @@ async def create(
181182
raise RuntimeError(msg)
182183

183184

185+
class _GlobalStepIssueTracker:
186+
has_issue: bool = True
187+
188+
@classmethod
189+
def set_issue_solved(cls) -> None:
190+
cls.has_issue = False
191+
192+
193+
@pytest.fixture
194+
def reset_step_issue_tracker() -> Iterable[None]:
195+
_GlobalStepIssueTracker.has_issue = True
196+
yield
197+
_GlobalStepIssueTracker.has_issue = True
198+
199+
184200
class _FailOnCreateAndRevertBS(_BS):
185201
@classmethod
186202
async def create(
@@ -195,8 +211,9 @@ async def revert(
195211
cls, app: FastAPI, required_context: RequiredOperationContext
196212
) -> ProvidedOperationContext | None:
197213
await super().revert(app, required_context)
198-
msg = "always fails on REVERT"
199-
raise RuntimeError(msg)
214+
if _GlobalStepIssueTracker.has_issue:
215+
msg = "sometimes fails only on REVERT"
216+
raise RuntimeError(msg)
200217

201218

202219
class _SleepsForeverBS(_BS):
@@ -208,32 +225,32 @@ async def create(
208225
await asyncio.sleep(1e10)
209226

210227

211-
class _GlobalManualInterventionTracker:
212-
raise_on_create: bool = True
213-
214-
215-
@pytest.fixture
216-
def reset_raise_on_create() -> Iterable[None]:
217-
_GlobalManualInterventionTracker.raise_on_create = True
218-
yield
219-
_GlobalManualInterventionTracker.raise_on_create = True
220-
221-
222228
class _WaitManualInerventionBS(_BS):
223229
@classmethod
224230
async def create(
225231
cls, app: FastAPI, required_context: RequiredOperationContext
226232
) -> ProvidedOperationContext | None:
227233
await super().create(app, required_context)
228-
if _GlobalManualInterventionTracker.raise_on_create:
229-
msg = "always fails only on CREATE"
234+
if _GlobalStepIssueTracker.has_issue:
235+
msg = "sometimes fails only on CREATE"
230236
raise RuntimeError(msg)
231237

232238
@classmethod
233239
def wait_for_manual_intervention(cls) -> bool:
234240
return True
235241

236242

243+
def _get_steps_matching_class(
244+
operation: Operation, *, match: type[BaseStep]
245+
) -> list[type]:
246+
return [
247+
step
248+
for group in operation
249+
for step in group.get_step_subgroup_to_run()
250+
if issubclass(step, match)
251+
]
252+
253+
237254
def _compose_key(
238255
key_nuber: int | None, *, with_revert: bool, is_creating: bool, is_providing: bool
239256
) -> str:
@@ -427,6 +444,9 @@ class _FCR1(_FailOnCreateAndRevertBS): ...
427444
class _FCR2(_FailOnCreateAndRevertBS): ...
428445

429446

447+
class _FCR3(_FailOnCreateAndRevertBS): ...
448+
449+
430450
# Below will sleep forever
431451

432452

@@ -448,17 +468,6 @@ class _WMI2(_WaitManualInerventionBS): ...
448468
class _WMI3(_WaitManualInerventionBS): ...
449469

450470

451-
def _get_wait_manaul_intervention_steps(
452-
operation: Operation,
453-
) -> list[type[_WaitManualInerventionBS]]:
454-
result: list[type[_WaitManualInerventionBS]] = []
455-
for group in operation:
456-
for step in group.get_step_subgroup_to_run():
457-
if issubclass(step, _WaitManualInerventionBS):
458-
result.append(step)
459-
return result
460-
461-
462471
# Below steps which require and provide context keys
463472

464473

@@ -1035,7 +1044,7 @@ async def test_repeating_step(
10351044
],
10361045
)
10371046
async def test_wait_for_manual_intervention(
1038-
reset_raise_on_create: None,
1047+
reset_step_issue_tracker: None,
10391048
preserve_caplog_for_async_logging: None,
10401049
steps_call_order: list[tuple[str, str]],
10411050
selected_app: FastAPI,
@@ -1064,9 +1073,11 @@ async def test_wait_for_manual_intervention(
10641073
await asyncio.sleep(0.1)
10651074
await _ensure_keys_in_store(selected_app, expected_keys=formatted_expected_keys)
10661075

1067-
# unblock all waiting for manual intervention steps and restart them
1068-
steps_to_restart = _get_wait_manaul_intervention_steps(operation)
1069-
_GlobalManualInterventionTracker.raise_on_create = False
1076+
# set step to no longer raise and restart the failed steps
1077+
steps_to_restart = _get_steps_matching_class(
1078+
operation, match=_WaitManualInerventionBS
1079+
)
1080+
_GlobalStepIssueTracker.set_issue_solved()
10701081
await limited_gather(
10711082
*(
10721083
restart_create_operation_step_in_manual_intervention(
@@ -1076,12 +1087,146 @@ async def test_wait_for_manual_intervention(
10761087
),
10771088
limit=_PARALLEL_RESTARTS,
10781089
)
1079-
# should finish normally
1090+
# should finish schedule operation
1091+
await ensure_expected_order(steps_call_order, after_restart_expected_order)
1092+
await _ensure_keys_in_store(selected_app, expected_keys=set())
1093+
1094+
1095+
@pytest.mark.parametrize("app_count", [10])
1096+
@pytest.mark.parametrize(
1097+
"operation, expected_order, expected_keys, after_restart_expected_order",
1098+
[
1099+
pytest.param(
1100+
[
1101+
SingleStepGroup(_S1),
1102+
ParallelStepGroup(_S2, _S3, _S4),
1103+
SingleStepGroup(_FCR1),
1104+
# below are not included in any expected order
1105+
ParallelStepGroup(_S5, _S6),
1106+
SingleStepGroup(_S7),
1107+
],
1108+
[
1109+
CreateSequence(_S1),
1110+
CreateRandom(_S2, _S3, _S4),
1111+
CreateSequence(_FCR1),
1112+
RevertSequence(_FCR1),
1113+
],
1114+
{
1115+
"SCH:{schedule_id}",
1116+
"SCH:{schedule_id}:GROUPS:test_op:0S:C",
1117+
"SCH:{schedule_id}:GROUPS:test_op:1P:C",
1118+
"SCH:{schedule_id}:GROUPS:test_op:2S:C",
1119+
"SCH:{schedule_id}:GROUPS:test_op:2S:R",
1120+
"SCH:{schedule_id}:STEPS:test_op:0S:C:_S1",
1121+
"SCH:{schedule_id}:STEPS:test_op:1P:C:_S2",
1122+
"SCH:{schedule_id}:STEPS:test_op:1P:C:_S3",
1123+
"SCH:{schedule_id}:STEPS:test_op:1P:C:_S4",
1124+
"SCH:{schedule_id}:STEPS:test_op:2S:C:_FCR1",
1125+
"SCH:{schedule_id}:STEPS:test_op:2S:R:_FCR1",
1126+
},
1127+
[
1128+
CreateSequence(_S1),
1129+
CreateRandom(_S2, _S3, _S4),
1130+
CreateSequence(_FCR1),
1131+
RevertSequence(_FCR1),
1132+
RevertSequence(_FCR1), # this one is retried
1133+
RevertRandom(_S2, _S3, _S4),
1134+
RevertSequence(_S1),
1135+
],
1136+
id="s1-p3-s1(1mi)",
1137+
),
1138+
pytest.param(
1139+
[
1140+
SingleStepGroup(_S1),
1141+
ParallelStepGroup(_S2, _S3, _S4),
1142+
ParallelStepGroup(_FCR1, _FCR2, _FCR3, _S5, _S6, _S7),
1143+
# below are not included in any expected order
1144+
SingleStepGroup(_S8),
1145+
ParallelStepGroup(_S9, _S10),
1146+
],
1147+
[
1148+
CreateSequence(_S1),
1149+
CreateRandom(_S2, _S3, _S4),
1150+
CreateRandom(_FCR1, _FCR2, _FCR3, _S5, _S6, _S7),
1151+
],
1152+
{
1153+
"SCH:{schedule_id}",
1154+
"SCH:{schedule_id}:GROUPS:test_op:0S:C",
1155+
"SCH:{schedule_id}:GROUPS:test_op:1P:C",
1156+
"SCH:{schedule_id}:GROUPS:test_op:2P:C",
1157+
"SCH:{schedule_id}:GROUPS:test_op:2P:R",
1158+
"SCH:{schedule_id}:STEPS:test_op:0S:C:_S1",
1159+
"SCH:{schedule_id}:STEPS:test_op:1P:C:_S2",
1160+
"SCH:{schedule_id}:STEPS:test_op:1P:C:_S3",
1161+
"SCH:{schedule_id}:STEPS:test_op:1P:C:_S4",
1162+
"SCH:{schedule_id}:STEPS:test_op:2P:C:_S5",
1163+
"SCH:{schedule_id}:STEPS:test_op:2P:C:_S6",
1164+
"SCH:{schedule_id}:STEPS:test_op:2P:C:_S7",
1165+
"SCH:{schedule_id}:STEPS:test_op:2P:C:_FCR1",
1166+
"SCH:{schedule_id}:STEPS:test_op:2P:C:_FCR2",
1167+
"SCH:{schedule_id}:STEPS:test_op:2P:C:_FCR3",
1168+
"SCH:{schedule_id}:STEPS:test_op:2P:R:_S5",
1169+
"SCH:{schedule_id}:STEPS:test_op:2P:R:_S6",
1170+
"SCH:{schedule_id}:STEPS:test_op:2P:R:_S7",
1171+
"SCH:{schedule_id}:STEPS:test_op:2P:R:_FCR1",
1172+
"SCH:{schedule_id}:STEPS:test_op:2P:R:_FCR2",
1173+
"SCH:{schedule_id}:STEPS:test_op:2P:R:_FCR3",
1174+
},
1175+
[
1176+
CreateSequence(_S1),
1177+
CreateRandom(_S2, _S3, _S4),
1178+
CreateRandom(_FCR1, _FCR2, _FCR3, _S5, _S6, _S7),
1179+
RevertRandom(_FCR1, _FCR2, _FCR3, _S5, _S6, _S7),
1180+
RevertRandom(_FCR1, _FCR2, _FCR3), # retried steps
1181+
RevertRandom(_S2, _S3, _S4),
1182+
RevertSequence(_S1),
1183+
],
1184+
id="s1-p3-p6(3mi)",
1185+
),
1186+
],
1187+
)
1188+
async def test_restart_revert_operation_step_in_error(
1189+
reset_step_issue_tracker: None,
1190+
preserve_caplog_for_async_logging: None,
1191+
steps_call_order: list[tuple[str, str]],
1192+
selected_app: FastAPI,
1193+
register_operation: Callable[[OperationName, Operation], None],
1194+
operation: Operation,
1195+
operation_name: OperationName,
1196+
expected_order: list[BaseExpectedStepOrder],
1197+
expected_keys: set[str],
1198+
after_restart_expected_order: list[BaseExpectedStepOrder],
1199+
):
1200+
register_operation(operation_name, operation)
1201+
1202+
schedule_id = await start_operation(selected_app, operation_name, {})
1203+
assert isinstance(schedule_id, ScheduleId)
1204+
1205+
formatted_expected_keys = {k.format(schedule_id=schedule_id) for k in expected_keys}
1206+
1207+
await ensure_expected_order(steps_call_order, expected_order)
1208+
await _ensure_keys_in_store(selected_app, expected_keys=formatted_expected_keys)
1209+
1210+
# set step to no longer raise and restart the failed steps
1211+
steps_to_restart = _get_steps_matching_class(
1212+
operation, match=_FailOnCreateAndRevertBS
1213+
)
1214+
_GlobalStepIssueTracker.set_issue_solved()
1215+
await limited_gather(
1216+
*(
1217+
restart_revert_operation_step_in_error(
1218+
selected_app, schedule_id, step.get_step_name()
1219+
)
1220+
for step in steps_to_restart
1221+
),
1222+
limit=_PARALLEL_RESTARTS,
1223+
)
1224+
# should finish schedule operation
10801225
await ensure_expected_order(steps_call_order, after_restart_expected_order)
10811226
await _ensure_keys_in_store(selected_app, expected_keys=set())
10821227

10831228

1084-
# TODO: restart_revert_operation_step_in_error
1229+
# TODO: add tests for all errors raised by `restart_operation_step_in_error`
10851230

10861231

10871232
@pytest.mark.parametrize("app_count", [10])

0 commit comments

Comments
 (0)