3131 SingleStepGroup ,
3232 cancel_operation ,
3333 restart_create_operation_step_in_manual_intervention ,
34+ restart_revert_operation_step_in_error ,
3435 start_operation ,
3536)
3637from simcore_service_dynamic_scheduler .services .generic_scheduler ._errors import (
@@ -181,6 +182,21 @@ async def create(
181182 raise RuntimeError (msg )
182183
183184
185+ class _GlobalStepIssueTracker :
186+ has_issue : bool = True
187+
188+ @classmethod
189+ def set_issue_solved (cls ) -> None :
190+ cls .has_issue = False
191+
192+
193+ @pytest .fixture
194+ def reset_step_issue_tracker () -> Iterable [None ]:
195+ _GlobalStepIssueTracker .has_issue = True
196+ yield
197+ _GlobalStepIssueTracker .has_issue = True
198+
199+
184200class _FailOnCreateAndRevertBS (_BS ):
185201 @classmethod
186202 async def create (
@@ -195,8 +211,9 @@ async def revert(
195211 cls , app : FastAPI , required_context : RequiredOperationContext
196212 ) -> ProvidedOperationContext | None :
197213 await super ().revert (app , required_context )
198- msg = "always fails on REVERT"
199- raise RuntimeError (msg )
214+ if _GlobalStepIssueTracker .has_issue :
215+ msg = "sometimes fails only on REVERT"
216+ raise RuntimeError (msg )
200217
201218
202219class _SleepsForeverBS (_BS ):
@@ -208,32 +225,32 @@ async def create(
208225 await asyncio .sleep (1e10 )
209226
210227
211- class _GlobalManualInterventionTracker :
212- raise_on_create : bool = True
213-
214-
215- @pytest .fixture
216- def reset_raise_on_create () -> Iterable [None ]:
217- _GlobalManualInterventionTracker .raise_on_create = True
218- yield
219- _GlobalManualInterventionTracker .raise_on_create = True
220-
221-
222228class _WaitManualInerventionBS (_BS ):
223229 @classmethod
224230 async def create (
225231 cls , app : FastAPI , required_context : RequiredOperationContext
226232 ) -> ProvidedOperationContext | None :
227233 await super ().create (app , required_context )
228- if _GlobalManualInterventionTracker . raise_on_create :
229- msg = "always fails only on CREATE"
234+ if _GlobalStepIssueTracker . has_issue :
235+ msg = "sometimes fails only on CREATE"
230236 raise RuntimeError (msg )
231237
232238 @classmethod
233239 def wait_for_manual_intervention (cls ) -> bool :
234240 return True
235241
236242
243+ def _get_steps_matching_class (
244+ operation : Operation , * , match : type [BaseStep ]
245+ ) -> list [type ]:
246+ return [
247+ step
248+ for group in operation
249+ for step in group .get_step_subgroup_to_run ()
250+ if issubclass (step , match )
251+ ]
252+
253+
237254def _compose_key (
238255 key_nuber : int | None , * , with_revert : bool , is_creating : bool , is_providing : bool
239256) -> str :
@@ -427,6 +444,9 @@ class _FCR1(_FailOnCreateAndRevertBS): ...
427444class _FCR2 (_FailOnCreateAndRevertBS ): ...
428445
429446
447+ class _FCR3 (_FailOnCreateAndRevertBS ): ...
448+
449+
430450# Below will sleep forever
431451
432452
@@ -448,17 +468,6 @@ class _WMI2(_WaitManualInerventionBS): ...
448468class _WMI3 (_WaitManualInerventionBS ): ...
449469
450470
451- def _get_wait_manaul_intervention_steps (
452- operation : Operation ,
453- ) -> list [type [_WaitManualInerventionBS ]]:
454- result : list [type [_WaitManualInerventionBS ]] = []
455- for group in operation :
456- for step in group .get_step_subgroup_to_run ():
457- if issubclass (step , _WaitManualInerventionBS ):
458- result .append (step )
459- return result
460-
461-
462471# Below steps which require and provide context keys
463472
464473
@@ -1035,7 +1044,7 @@ async def test_repeating_step(
10351044 ],
10361045)
10371046async def test_wait_for_manual_intervention (
1038- reset_raise_on_create : None ,
1047+ reset_step_issue_tracker : None ,
10391048 preserve_caplog_for_async_logging : None ,
10401049 steps_call_order : list [tuple [str , str ]],
10411050 selected_app : FastAPI ,
@@ -1064,9 +1073,11 @@ async def test_wait_for_manual_intervention(
10641073 await asyncio .sleep (0.1 )
10651074 await _ensure_keys_in_store (selected_app , expected_keys = formatted_expected_keys )
10661075
1067- # unblock all waiting for manual intervention steps and restart them
1068- steps_to_restart = _get_wait_manaul_intervention_steps (operation )
1069- _GlobalManualInterventionTracker .raise_on_create = False
1076+ # set step to no longer raise and restart the failed steps
1077+ steps_to_restart = _get_steps_matching_class (
1078+ operation , match = _WaitManualInerventionBS
1079+ )
1080+ _GlobalStepIssueTracker .set_issue_solved ()
10701081 await limited_gather (
10711082 * (
10721083 restart_create_operation_step_in_manual_intervention (
@@ -1076,12 +1087,146 @@ async def test_wait_for_manual_intervention(
10761087 ),
10771088 limit = _PARALLEL_RESTARTS ,
10781089 )
1079- # should finish normally
1090+ # should finish schedule operation
1091+ await ensure_expected_order (steps_call_order , after_restart_expected_order )
1092+ await _ensure_keys_in_store (selected_app , expected_keys = set ())
1093+
1094+
1095+ @pytest .mark .parametrize ("app_count" , [10 ])
1096+ @pytest .mark .parametrize (
1097+ "operation, expected_order, expected_keys, after_restart_expected_order" ,
1098+ [
1099+ pytest .param (
1100+ [
1101+ SingleStepGroup (_S1 ),
1102+ ParallelStepGroup (_S2 , _S3 , _S4 ),
1103+ SingleStepGroup (_FCR1 ),
1104+ # below are not included in any expected order
1105+ ParallelStepGroup (_S5 , _S6 ),
1106+ SingleStepGroup (_S7 ),
1107+ ],
1108+ [
1109+ CreateSequence (_S1 ),
1110+ CreateRandom (_S2 , _S3 , _S4 ),
1111+ CreateSequence (_FCR1 ),
1112+ RevertSequence (_FCR1 ),
1113+ ],
1114+ {
1115+ "SCH:{schedule_id}" ,
1116+ "SCH:{schedule_id}:GROUPS:test_op:0S:C" ,
1117+ "SCH:{schedule_id}:GROUPS:test_op:1P:C" ,
1118+ "SCH:{schedule_id}:GROUPS:test_op:2S:C" ,
1119+ "SCH:{schedule_id}:GROUPS:test_op:2S:R" ,
1120+ "SCH:{schedule_id}:STEPS:test_op:0S:C:_S1" ,
1121+ "SCH:{schedule_id}:STEPS:test_op:1P:C:_S2" ,
1122+ "SCH:{schedule_id}:STEPS:test_op:1P:C:_S3" ,
1123+ "SCH:{schedule_id}:STEPS:test_op:1P:C:_S4" ,
1124+ "SCH:{schedule_id}:STEPS:test_op:2S:C:_FCR1" ,
1125+ "SCH:{schedule_id}:STEPS:test_op:2S:R:_FCR1" ,
1126+ },
1127+ [
1128+ CreateSequence (_S1 ),
1129+ CreateRandom (_S2 , _S3 , _S4 ),
1130+ CreateSequence (_FCR1 ),
1131+ RevertSequence (_FCR1 ),
1132+ RevertSequence (_FCR1 ), # this one is retried
1133+ RevertRandom (_S2 , _S3 , _S4 ),
1134+ RevertSequence (_S1 ),
1135+ ],
1136+ id = "s1-p3-s1(1mi)" ,
1137+ ),
1138+ pytest .param (
1139+ [
1140+ SingleStepGroup (_S1 ),
1141+ ParallelStepGroup (_S2 , _S3 , _S4 ),
1142+ ParallelStepGroup (_FCR1 , _FCR2 , _FCR3 , _S5 , _S6 , _S7 ),
1143+ # below are not included in any expected order
1144+ SingleStepGroup (_S8 ),
1145+ ParallelStepGroup (_S9 , _S10 ),
1146+ ],
1147+ [
1148+ CreateSequence (_S1 ),
1149+ CreateRandom (_S2 , _S3 , _S4 ),
1150+ CreateRandom (_FCR1 , _FCR2 , _FCR3 , _S5 , _S6 , _S7 ),
1151+ ],
1152+ {
1153+ "SCH:{schedule_id}" ,
1154+ "SCH:{schedule_id}:GROUPS:test_op:0S:C" ,
1155+ "SCH:{schedule_id}:GROUPS:test_op:1P:C" ,
1156+ "SCH:{schedule_id}:GROUPS:test_op:2P:C" ,
1157+ "SCH:{schedule_id}:GROUPS:test_op:2P:R" ,
1158+ "SCH:{schedule_id}:STEPS:test_op:0S:C:_S1" ,
1159+ "SCH:{schedule_id}:STEPS:test_op:1P:C:_S2" ,
1160+ "SCH:{schedule_id}:STEPS:test_op:1P:C:_S3" ,
1161+ "SCH:{schedule_id}:STEPS:test_op:1P:C:_S4" ,
1162+ "SCH:{schedule_id}:STEPS:test_op:2P:C:_S5" ,
1163+ "SCH:{schedule_id}:STEPS:test_op:2P:C:_S6" ,
1164+ "SCH:{schedule_id}:STEPS:test_op:2P:C:_S7" ,
1165+ "SCH:{schedule_id}:STEPS:test_op:2P:C:_FCR1" ,
1166+ "SCH:{schedule_id}:STEPS:test_op:2P:C:_FCR2" ,
1167+ "SCH:{schedule_id}:STEPS:test_op:2P:C:_FCR3" ,
1168+ "SCH:{schedule_id}:STEPS:test_op:2P:R:_S5" ,
1169+ "SCH:{schedule_id}:STEPS:test_op:2P:R:_S6" ,
1170+ "SCH:{schedule_id}:STEPS:test_op:2P:R:_S7" ,
1171+ "SCH:{schedule_id}:STEPS:test_op:2P:R:_FCR1" ,
1172+ "SCH:{schedule_id}:STEPS:test_op:2P:R:_FCR2" ,
1173+ "SCH:{schedule_id}:STEPS:test_op:2P:R:_FCR3" ,
1174+ },
1175+ [
1176+ CreateSequence (_S1 ),
1177+ CreateRandom (_S2 , _S3 , _S4 ),
1178+ CreateRandom (_FCR1 , _FCR2 , _FCR3 , _S5 , _S6 , _S7 ),
1179+ RevertRandom (_FCR1 , _FCR2 , _FCR3 , _S5 , _S6 , _S7 ),
1180+ RevertRandom (_FCR1 , _FCR2 , _FCR3 ), # retried steps
1181+ RevertRandom (_S2 , _S3 , _S4 ),
1182+ RevertSequence (_S1 ),
1183+ ],
1184+ id = "s1-p3-p6(3mi)" ,
1185+ ),
1186+ ],
1187+ )
1188+ async def test_restart_revert_operation_step_in_error (
1189+ reset_step_issue_tracker : None ,
1190+ preserve_caplog_for_async_logging : None ,
1191+ steps_call_order : list [tuple [str , str ]],
1192+ selected_app : FastAPI ,
1193+ register_operation : Callable [[OperationName , Operation ], None ],
1194+ operation : Operation ,
1195+ operation_name : OperationName ,
1196+ expected_order : list [BaseExpectedStepOrder ],
1197+ expected_keys : set [str ],
1198+ after_restart_expected_order : list [BaseExpectedStepOrder ],
1199+ ):
1200+ register_operation (operation_name , operation )
1201+
1202+ schedule_id = await start_operation (selected_app , operation_name , {})
1203+ assert isinstance (schedule_id , ScheduleId )
1204+
1205+ formatted_expected_keys = {k .format (schedule_id = schedule_id ) for k in expected_keys }
1206+
1207+ await ensure_expected_order (steps_call_order , expected_order )
1208+ await _ensure_keys_in_store (selected_app , expected_keys = formatted_expected_keys )
1209+
1210+ # set step to no longer raise and restart the failed steps
1211+ steps_to_restart = _get_steps_matching_class (
1212+ operation , match = _FailOnCreateAndRevertBS
1213+ )
1214+ _GlobalStepIssueTracker .set_issue_solved ()
1215+ await limited_gather (
1216+ * (
1217+ restart_revert_operation_step_in_error (
1218+ selected_app , schedule_id , step .get_step_name ()
1219+ )
1220+ for step in steps_to_restart
1221+ ),
1222+ limit = _PARALLEL_RESTARTS ,
1223+ )
1224+ # should finish schedule operation
10801225 await ensure_expected_order (steps_call_order , after_restart_expected_order )
10811226 await _ensure_keys_in_store (selected_app , expected_keys = set ())
10821227
10831228
1084- # TODO: restart_revert_operation_step_in_error
1229+ # TODO: add tests for all errors raised by `restart_operation_step_in_error`
10851230
10861231
10871232@pytest .mark .parametrize ("app_count" , [10 ])
0 commit comments