diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/__init__.py index a294fccc2273..6f91a3e789c5 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/__init__.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/__init__.py @@ -1,7 +1,7 @@ from ._core import ( cancel_operation, - restart_operation_step_stuck_during_undo, - restart_operation_step_stuck_in_manual_intervention_during_create, + restart_operation_step_stuck_during_revert, + restart_operation_step_stuck_in_manual_intervention_during_execute, start_operation, ) from ._deferred_runner import ( @@ -9,9 +9,14 @@ get_step_group_proxy, get_step_store_proxy, ) +from ._event_after_registration import ( + register_to_start_after_on_executed_completed, + register_to_start_after_on_reverted_completed, +) from ._lifespan import generic_scheduler_lifespan from ._models import ( OperationName, + OperationToStart, ProvidedOperationContext, RequiredOperationContext, ScheduleId, @@ -36,11 +41,14 @@ "OperationContextProxy", "OperationName", "OperationRegistry", + "OperationToStart", "ParallelStepGroup", "ProvidedOperationContext", + "register_to_start_after_on_executed_completed", + "register_to_start_after_on_reverted_completed", "RequiredOperationContext", - "restart_operation_step_stuck_during_undo", - "restart_operation_step_stuck_in_manual_intervention_during_create", + "restart_operation_step_stuck_during_revert", + "restart_operation_step_stuck_in_manual_intervention_during_execute", "ScheduleId", "SingleStepGroup", "start_operation", diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_core.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_core.py index 8ee52189eefb..17f259d22f21 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_core.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_core.py @@ -30,16 +30,27 @@ from ._errors import ( CannotCancelWhileWaitingForManualInterventionError, NoDataFoundError, + OperationNotCancellableError, StepNameNotInCurrentGroupError, StepNotInErrorStateError, StepNotWaitingForManualInterventionError, UnexpectedStepHandlingError, ) -from ._event import enqueue_schedule_event +from ._event import ( + enqueue_execute_completed_event, + enqueue_revert_completed_event, + enqueue_schedule_event, +) +from ._event_after_registration import ( + register_to_start_after_on_executed_completed, + register_to_start_after_on_reverted_completed, +) from ._models import ( + EventType, OperationContext, OperationErrorType, OperationName, + OperationToStart, ScheduleId, StepName, StepStatus, @@ -52,6 +63,7 @@ from ._store import ( DeleteStepKeys, OperationContextProxy, + OperationEventsProxy, ScheduleDataStoreProxy, StepGroupProxy, StepStoreProxy, @@ -79,7 +91,11 @@ def __init__( self._store: Store = Store.get_from_app_state(app) async def start_operation( - self, operation_name: OperationName, initial_operation_context: OperationContext + self, + operation_name: OperationName, + initial_operation_context: OperationContext, + on_execute_completed: OperationToStart | None, + on_revert_completed: OperationToStart | None, ) -> ScheduleId: """start an operation by it's given name and providing an initial context""" schedule_id: ScheduleId = f"{uuid4()}" @@ -101,7 +117,7 @@ async def start_operation( { "operation_name": operation_name, "group_index": 0, - "is_creating": True, + "is_executing": True, } ) @@ -112,14 +128,24 @@ async def start_operation( ) await operation_content_proxy.create_or_update(initial_operation_context) + if on_execute_completed: + await register_to_start_after_on_executed_completed( + self.app, schedule_id, to_start=on_execute_completed + ) + + if on_revert_completed: + await register_to_start_after_on_reverted_completed( + self.app, schedule_id=schedule_id, to_start=on_revert_completed + ) + await enqueue_schedule_event(self.app, schedule_id) return schedule_id async def cancel_operation(self, schedule_id: ScheduleId) -> None: """ - Sets the operation to undo form the point in which it arrived in: - - when is_creating=True: cancels all steps & moves operation to undo - - when is_creating=False: does nothing, since undo is already running + Sets the operation to revert form the point in which it arrived in: + - when is_executing=True: cancels all steps & moves operation to revert + - when is_executing=False: does nothing, since revert is already running # NOTE: SEE `_on_schedule_event` for more details """ @@ -127,11 +153,11 @@ async def cancel_operation(self, schedule_id: ScheduleId) -> None: store=self._store, schedule_id=schedule_id ) - is_creating = await schedule_data_proxy.read("is_creating") + is_executing = await schedule_data_proxy.read("is_executing") - if is_creating is False: + if is_executing is False: _logger.warning( - "Cannot cancel steps for schedule_id='%s' since UNDO is running", + "Cannot cancel steps for schedule_id='%s' since REVERT is running", schedule_id, ) return @@ -140,7 +166,11 @@ async def cancel_operation(self, schedule_id: ScheduleId) -> None: group_index = await schedule_data_proxy.read("group_index") operation = OperationRegistry.get_operation(operation_name) - group = operation[group_index] + + if operation.is_cancellable is False: + raise OperationNotCancellableError(operation_name=operation_name) + + group = operation.step_groups[group_index] group_step_proxies = get_group_step_proxies( self._store, @@ -148,19 +178,18 @@ async def cancel_operation(self, schedule_id: ScheduleId) -> None: operation_name=operation_name, group_index=group_index, step_group=group, - is_creating=is_creating, + is_executing=is_executing, ) # not allowed to cancel while waiting for manual intervention - if any( - await limited_gather( - *( - get_requires_manual_intervention(step) - for step in group_step_proxies.values() - ), - limit=PARALLEL_REQUESTS, - ) - ): + require_manual_intervention = await limited_gather( + *( + get_requires_manual_intervention(step) + for step in group_step_proxies.values() + ), + limit=PARALLEL_REQUESTS, + ) + if any(require_manual_intervention): raise CannotCancelWhileWaitingForManualInterventionError( schedule_id=schedule_id ) @@ -198,12 +227,12 @@ async def restart_operation_step_stuck_in_error( schedule_data_proxy = ScheduleDataStoreProxy( store=self._store, schedule_id=schedule_id ) - is_creating = await schedule_data_proxy.read("is_creating") + is_executing = await schedule_data_proxy.read("is_executing") operation_name = await schedule_data_proxy.read("operation_name") group_index = await schedule_data_proxy.read("group_index") operation = OperationRegistry.get_operation(operation_name) - step_group = operation[group_index] + step_group = operation.step_groups[group_index] step_group_name = step_group.get_step_group_name(index=group_index) if step_name not in { @@ -221,7 +250,7 @@ async def restart_operation_step_stuck_in_error( operation_name=operation_name, step_group_name=step_group_name, step_name=step_name, - is_creating=is_creating, + is_executing=is_executing, ) try: @@ -255,7 +284,7 @@ async def restart_operation_step_stuck_in_error( schedule_id=schedule_id, operation_name=operation_name, step_group_name=step_group_name, - is_creating=is_creating, + is_executing=is_executing, ) # remove previus entries for the step @@ -270,12 +299,12 @@ async def restart_operation_step_stuck_in_error( step_name, operation_name, schedule_id, - "manual intervention" if in_manual_intervention else "error in undo", + "manual intervention" if in_manual_intervention else "error during revert", ) # restart only this step await start_and_mark_as_started( step_proxy, - is_creating=is_creating, + is_executing=is_executing, expected_steps_count=len(step_group), ) @@ -301,26 +330,26 @@ async def _on_schedule_event(self, schedule_id: ScheduleId) -> None: From this point onwards an `operation` can be advanced in one the following modes: - `CEREATEING`: default mode when starting an operation - - runs the `create()` of each step in each group (`first` -> `last` group) + - runs the `execute()` of each step in each group (`first` -> `last` group) - when done, it removes all operation data - - `UNDOING`: undo the actions of `create()` in reverse order with respect to CREATING - - runs the `undo()` of each step in each group (`current` -> `first` group) + - `REVERTING`: revert the actions of `execute()` in reverse order with respect to CREATING + - runs the `revert()` of each step in each group (`current` -> `first` group) - when done, it removes all operation data - - `REPEATING`: repeats the `create()` of all steps in a group - - waits and runs the `create()` of all the steps in last group in the operation + - `REPEATING`: repeats the `execute()` of all steps in a group + - waits and runs the `execute()` of all the steps in last group in the operation - never completes, unless operation is cancelled NOTE: `REPEATING` is triggered by setting `BaseStepGroup(repeat_steps=True)` during definition of an `operation`. - NOTE: `UNDOING` is triggered by calling `cancel_operation()` or when a step finishes with + NOTE: `REVERTING` is triggered by calling `cancel_operation()` or when a step finishes with status `FAILED` or `CANCELLED` (except in manual intervention). There are 3 reasons why an operation will hang: - - MANUAL_INTERVENTION: step failed during `create()` and flagged for manual intervention + - MANUAL_INTERVENTION: step failed during `execute()` and flagged for manual intervention -> requires support intervention - - STEP_ISSUE: a step failed during `undo()` due to an error in the step's undo code + - STEP_ISSUE: a step failed during `revert()` due to an error in the step's revert code -> unexpected behviour / requires developer intervention - - FRAMEWORK_ISSUE: a step failed during `undo()` because it was cancelled + - FRAMEWORK_ISSUE: a step failed during `revert()` because it was cancelled -> unexpected behviour / requires developer intervention NOTE: only MANUAL_INTERVENTION is an allowed to happen all other failuires are to be treated @@ -331,11 +360,11 @@ async def _on_schedule_event(self, schedule_id: ScheduleId) -> None: ) operation_name = await schedule_data_proxy.read("operation_name") - is_creating = await schedule_data_proxy.read("is_creating") + is_executing = await schedule_data_proxy.read("is_executing") group_index = await schedule_data_proxy.read("group_index") operation = OperationRegistry.get_operation(operation_name) - step_group = operation[group_index] + step_group = operation.step_groups[group_index] group_step_proxies = get_group_step_proxies( self._store, @@ -343,13 +372,13 @@ async def _on_schedule_event(self, schedule_id: ScheduleId) -> None: operation_name=operation_name, group_index=group_index, step_group=step_group, - is_creating=is_creating, + is_executing=is_executing, ) # 1) ensure all operation steps in the group are started before advancing if await start_steps_which_were_not_started( group_step_proxies, - is_creating=is_creating, + is_executing=is_executing, group_step_count=len(step_group), ): return @@ -374,7 +403,7 @@ async def _on_schedule_event(self, schedule_id: ScheduleId) -> None: step_group_name = step_group.get_step_group_name(index=group_index) base_message = f"{step_group_name=} in {operation_name=} for {schedule_id=}" - if step_group.repeat_steps is True and is_creating: + if step_group.repeat_steps is True and is_executing: with log_context(_logger, logging.DEBUG, f"REPEATING {base_message}"): await self._advance_as_repeating( schedule_data_proxy, @@ -385,7 +414,7 @@ async def _on_schedule_event(self, schedule_id: ScheduleId) -> None: group_step_proxies, ) - elif is_creating: + elif is_executing: with log_context(_logger, logging.DEBUG, f"CREATING {base_message}"): await self._advance_as_creating( steps_statuses, @@ -398,8 +427,8 @@ async def _on_schedule_event(self, schedule_id: ScheduleId) -> None: ) else: - with log_context(_logger, logging.DEBUG, f"UNDOING {base_message}"): - await self._advance_as_undoing( + with log_context(_logger, logging.DEBUG, f"REVERTING {base_message}"): + await self._advance_as_reverting( steps_statuses, schedule_data_proxy, schedule_id, @@ -419,7 +448,7 @@ async def _advance_as_repeating( ) -> None: # REPEATING logic: # 1) sleep before repeating - # 2) if any of the repeating steps was cancelled -> move to undo + # 2) if any of the repeating steps was cancelled -> move to revert # 3) -> restart all steps in the group step_proxies: Iterable[StepStoreProxy] = group_step_proxies.values() @@ -427,14 +456,14 @@ async def _advance_as_repeating( # 1) sleep before repeating await asyncio.sleep(current_step_group.wait_before_repeat.total_seconds()) - # 2) if any of the repeating steps was cancelled -> move to undo + # 2) if any of the repeating steps was cancelled -> move to revert # since some time passed, query all steps statuses again, # a cancellation request might have been requested steps_stauses = await get_steps_statuses(step_proxies) if any(status == StepStatus.CANCELLED for status in steps_stauses.values()): # NOTE: - await schedule_data_proxy.create_or_update("is_creating", value=False) + await schedule_data_proxy.create_or_update("is_executing", value=False) await enqueue_schedule_event(self.app, schedule_id) return @@ -447,7 +476,7 @@ async def _advance_as_repeating( schedule_id=schedule_id, operation_name=operation_name, step_group_name=current_step_group.get_step_group_name(index=group_index), - is_creating=True, + is_executing=True, ) await group_proxy.delete() await enqueue_schedule_event(self.app, schedule_id) @@ -465,9 +494,9 @@ async def _advance_as_creating( # CREATION logic: # 1) if all steps in group in SUUCESS # - 1a) -> move to next group - # - 1b) if reached the end of the CREATE operation -> remove all created data + # - 1b) if reached the end of the EXECUTE operation -> remove all created data [EMIT execute complete event] # 2) if manual intervention is required -> do nothing else - # 3) if any step in CANCELLED or FAILED (and not in manual intervention) -> move to undo + # 3) if any step in CANCELLED or FAILED (and not in manual intervention) -> move to revert # 1) if all steps in group in SUUCESS if all(status == StepStatus.SUCCESS for status in steps_statuses.values()): @@ -476,17 +505,40 @@ async def _advance_as_creating( try: next_group_index = group_index + 1 # does a next group exist? - _ = operation[next_group_index] + _ = operation.step_groups[next_group_index] await schedule_data_proxy.create_or_update( "group_index", value=next_group_index ) await enqueue_schedule_event(self.app, schedule_id) except IndexError: - # 1b) if reached the end of the CREATE operation -> remove all created data + # 1b) if reached the end of the EXECUTE operation -> remove all created data [EMIT execute complete event] + on_executed_proxy = OperationEventsProxy( + self._store, schedule_id, EventType.ON_EXECUTEDD_COMPLETED + ) + on_executed_operation_name: OperationName | None = None + on_executed_initial_context: OperationContext | None = None + if await on_executed_proxy.exists(): + on_executed_operation_name = await on_executed_proxy.read( + "operation_name" + ) + on_executed_initial_context = await on_executed_proxy.read( + "initial_context" + ) + await cleanup_after_finishing( - self._store, schedule_id=schedule_id, is_creating=True + self._store, schedule_id=schedule_id, is_executing=True ) + if ( + on_executed_operation_name is not None + and on_executed_initial_context is not None + ): + await enqueue_execute_completed_event( + self.app, + schedule_id, + on_executed_operation_name, + on_executed_initial_context, + ) return @@ -504,7 +556,7 @@ async def _advance_as_creating( index=group_index ), step_name=step.get_step_name(), - is_creating=True, + is_executing=True, ) await step_proxy.create_or_update( "requires_manual_intervention", value=True @@ -522,7 +574,7 @@ async def _advance_as_creating( ) return - # 3) if any step in CANCELLED or FAILED (and not in manual intervention) -> move to undo + # 3) if any step in CANCELLED or FAILED (and not in manual intervention) -> move to revert if any( s in {StepStatus.FAILED, StepStatus.CANCELLED} for s in steps_statuses.values() @@ -530,9 +582,9 @@ async def _advance_as_creating( with log_context( _logger, logging.DEBUG, - f"{operation_name=} was not successfull: {steps_statuses=}, moving to undo", + f"{operation_name=} was not successfull: {steps_statuses=}, moving to revert", ): - await schedule_data_proxy.create_or_update("is_creating", value=False) + await schedule_data_proxy.create_or_update("is_executing", value=False) await enqueue_schedule_event(self.app, schedule_id) return @@ -540,7 +592,7 @@ async def _advance_as_creating( direction="creation", steps_statuses=steps_statuses, schedule_id=schedule_id ) - async def _advance_as_undoing( + async def _advance_as_reverting( self, steps_statuses: dict[StepName, StepStatus], schedule_data_proxy: ScheduleDataStoreProxy, @@ -549,9 +601,9 @@ async def _advance_as_undoing( group_index: NonNegativeInt, current_step_group: BaseStepGroup, ) -> None: - # UNDO logic: + # REVERT logic: # 1) if all steps in group in SUCCESS - # - 1a) if reached the end of the UNDO operation -> remove all created data + # - 1a) if reached the end of the REVERT operation -> remove all created data [EMIT revert complete event] # - 1b) -> move to previous group # 2) it is unexpected to have a FAILED step -> do nothing else # 3) it is unexpected to have a CANCELLED step -> do nothing else @@ -561,10 +613,33 @@ async def _advance_as_undoing( previous_group_index = group_index - 1 if previous_group_index < 0: - # 1a) if reached the end of the UNDO operation -> remove all created data + # 1a) if reached the end of the REVERT operation -> remove all created data [EMIT revert complete event] + on_reverted_proxy = OperationEventsProxy( + self._store, schedule_id, EventType.ON_REVERT_COMPLETED + ) + on_reverted_operation_name: OperationName | None = None + on_reverted_initial_context: OperationContext | None = None + if await on_reverted_proxy.exists(): + on_reverted_operation_name = await on_reverted_proxy.read( + "operation_name" + ) + on_reverted_initial_context = await on_reverted_proxy.read( + "initial_context" + ) + await cleanup_after_finishing( - self._store, schedule_id=schedule_id, is_creating=False + self._store, schedule_id=schedule_id, is_executing=False ) + if ( + on_reverted_operation_name is not None + and on_reverted_initial_context is not None + ): + await enqueue_revert_completed_event( + self.app, + schedule_id, + on_reverted_operation_name, + on_reverted_initial_context, + ) return # 1b) -> move to previous group @@ -598,7 +673,7 @@ async def _advance_as_undoing( for step_name, traceback in error_tracebacks ) message = ( - f"Operation 'undo' for schedule_id='{schedule_id}' failed for steps: " + f"Operation 'revert' for schedule_id='{schedule_id}' failed for steps: " f"'{failed_step_names}'. Step code should never fail during destruction, " f"please report to developers:\n{formatted_tracebacks}" ) @@ -613,7 +688,7 @@ async def _advance_as_undoing( n for n, s in steps_statuses.items() if s == StepStatus.CANCELLED ]: message = ( - f"Operation 'undo' for schedule_id='{schedule_id}' was cancelled for steps: " + f"Operation 'revert' for schedule_id='{schedule_id}' was cancelled for steps: " f"{cancelled_step_names}. This should not happen, and should be addressed." ) _logger.error(message) @@ -626,7 +701,7 @@ async def _advance_as_undoing( return raise UnexpectedStepHandlingError( - direction="undo", steps_statuses=steps_statuses, schedule_id=schedule_id + direction="revert", steps_statuses=steps_statuses, schedule_id=schedule_id ) @@ -634,24 +709,30 @@ async def start_operation( app: FastAPI, operation_name: OperationName, initial_operation_context: OperationContext, + *, + on_execute_completed: OperationToStart | None = None, + on_revert_completed: OperationToStart | None = None, ) -> ScheduleId: return await Core.get_from_app_state(app).start_operation( - operation_name, initial_operation_context + operation_name, + initial_operation_context, + on_execute_completed, + on_revert_completed, ) async def cancel_operation(app: FastAPI, schedule_id: ScheduleId) -> None: """ - Unstruct scheduler to undo all steps completed until + Unstruct scheduler to revert all steps completed until now for the running operation. - `undoing` refers to the act of undoing the effects of a step + `reverting` refers to the act of reverting the effects of a step that has already been completed (eg: remove a created network) """ await Core.get_from_app_state(app).cancel_operation(schedule_id) -async def restart_operation_step_stuck_in_manual_intervention_during_create( +async def restart_operation_step_stuck_in_manual_intervention_during_execute( app: FastAPI, schedule_id: ScheduleId, step_name: StepName ) -> None: """ @@ -667,14 +748,14 @@ async def restart_operation_step_stuck_in_manual_intervention_during_create( ) -async def restart_operation_step_stuck_during_undo( +async def restart_operation_step_stuck_during_revert( app: FastAPI, schedule_id: ScheduleId, step_name: StepName ) -> None: """ - Restarts a `stuck step` while the operation is being undone + Restarts a `stuck step` while the operation is being reverted `stuck step` is a step that has failed and exhausted all retries - `undoing` refers to the act of undoing the effects of a step + `reverting` refers to the act of reverting the effects of a step that has already been completed (eg: remove a created network) """ await Core.get_from_app_state(app).restart_operation_step_stuck_in_error( diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_core_utils.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_core_utils.py index bef6ef6fa6a1..01152a78115e 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_core_utils.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_core_utils.py @@ -75,7 +75,7 @@ async def get_steps_statuses( async def start_and_mark_as_started( step_proxy: StepStoreProxy, *, - is_creating: bool, + is_executing: bool, expected_steps_count: NonNegativeInt, ) -> None: await DeferredRunner.start( @@ -83,7 +83,7 @@ async def start_and_mark_as_started( operation_name=step_proxy.operation_name, step_group_name=step_proxy.step_group_name, step_name=step_proxy.step_name, - is_creating=is_creating, + is_executing=is_executing, expected_steps_count=expected_steps_count, ) await step_proxy.create_or_update_multiple( @@ -117,7 +117,7 @@ async def get_step_error_traceback( operation_name=operation_name, step_group_name=current_step_group.get_step_group_name(index=group_index), step_name=step_name, - is_creating=False, + is_executing=False, ) return step_name, await step_proxy.read("error_traceback") @@ -129,7 +129,7 @@ def get_group_step_proxies( operation_name: OperationName, group_index: NonNegativeInt, step_group: BaseStepGroup, - is_creating: bool, + is_executing: bool, ) -> dict[StepName, StepStoreProxy]: return { step.get_step_name(): StepStoreProxy( @@ -138,7 +138,7 @@ def get_group_step_proxies( operation_name=operation_name, step_group_name=step_group.get_step_group_name(index=group_index), step_name=step.get_step_name(), - is_creating=is_creating, + is_executing=is_executing, ) for step in step_group.get_step_subgroup_to_run() } @@ -168,7 +168,7 @@ async def _get_steps_to_start( async def start_steps_which_were_not_started( group_step_proxies: dict[StepName, StepStoreProxy], *, - is_creating: bool, + is_executing: bool, group_step_count: NonNegativeInt, ) -> bool: """retruns True if any step was started""" @@ -186,7 +186,7 @@ async def start_steps_which_were_not_started( *( start_and_mark_as_started( step_proxy, - is_creating=is_creating, + is_executing=is_executing, expected_steps_count=group_step_count, ) for step_proxy in to_start_step_proxies @@ -198,11 +198,11 @@ async def start_steps_which_were_not_started( async def cleanup_after_finishing( - store: Store, *, schedule_id: ScheduleId, is_creating: bool + store: Store, *, schedule_id: ScheduleId, is_executing: bool ) -> None: removal_proxy = OperationRemovalProxy(store=store, schedule_id=schedule_id) await removal_proxy.delete() - verb = "COMPLETED" if is_creating else "UNDONE" + verb = "COMPLETED" if is_executing else "REVERTED" _logger.debug("Operation for schedule_id='%s' %s successfully", verb, schedule_id) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_deferred_runner.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_deferred_runner.py index f3ee304c1923..26d3f895d967 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_deferred_runner.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_deferred_runner.py @@ -34,7 +34,7 @@ def get_step_store_proxy(context: DeferredContext) -> StepStoreProxy: operation_name: OperationName = context["operation_name"] step_group_name: StepGroupName = context["step_group_name"] step_name: StepName = context["step_name"] - is_creating = context["is_creating"] + is_executing = context["is_executing"] return StepStoreProxy( store=Store.get_from_app_state(app), @@ -42,7 +42,7 @@ def get_step_store_proxy(context: DeferredContext) -> StepStoreProxy: operation_name=operation_name, step_group_name=step_group_name, step_name=step_name, - is_creating=is_creating, + is_executing=is_executing, ) @@ -51,14 +51,14 @@ def get_step_group_proxy(context: DeferredContext) -> StepGroupProxy: schedule_id: ScheduleId = context["schedule_id"] operation_name: OperationName = context["operation_name"] step_group_name: StepGroupName = context["step_group_name"] - is_creating = context["is_creating"] + is_executing = context["is_executing"] return StepGroupProxy( store=Store.get_from_app_state(app), schedule_id=schedule_id, operation_name=operation_name, step_group_name=step_group_name, - is_creating=is_creating, + is_executing=is_executing, ) @@ -124,7 +124,7 @@ async def start( # type:ignore[override] # pylint:disable=arguments-differ operation_name: OperationName, step_group_name: StepGroupName, step_name: StepName, - is_creating: bool, + is_executing: bool, expected_steps_count: NonNegativeInt, ) -> DeferredContext: return { @@ -132,28 +132,28 @@ async def start( # type:ignore[override] # pylint:disable=arguments-differ "operation_name": operation_name, "step_group_name": step_group_name, "step_name": step_name, - "is_creating": is_creating, + "is_executing": is_executing, "expected_steps_count": expected_steps_count, } @classmethod async def get_retries(cls, context: DeferredContext) -> int: - is_creating = context["is_creating"] + is_executing = context["is_executing"] step = _get_step(context) return ( - await step.get_create_retries(context) - if is_creating - else await step.get_undo_retries(context) + await step.get_execute_retries(context) + if is_executing + else await step.get_revert_retries(context) ) @classmethod async def get_timeout(cls, context: DeferredContext) -> timedelta: - is_creating = context["is_creating"] + is_executing = context["is_executing"] step = _get_step(context) return ( - await step.get_create_wait_between_attempts(context) - if is_creating - else await step.get_undo_wait_between_attempts(context) + await step.get_execute_wait_between_attempts(context) + if is_executing + else await step.get_revert_wait_between_attempts(context) ) @classmethod @@ -165,7 +165,7 @@ async def on_created(cls, task_uid: TaskUID, context: DeferredContext) -> None: @classmethod async def run(cls, context: DeferredContext) -> None: app = context["app"] - is_creating = context["is_creating"] + is_executing = context["is_executing"] await get_step_store_proxy(context).create_or_update( "status", StepStatus.RUNNING @@ -175,31 +175,31 @@ async def run(cls, context: DeferredContext) -> None: operation_context_proxy = get_operation_context_proxy(context) - if is_creating: + if is_executing: required_context = await operation_context_proxy.read( - *step.get_create_requires_context_keys() + *step.get_execute_requires_context_keys() ) _raise_if_any_context_value_is_none(required_context) - step_provided_operation_context = await step.create(app, required_context) + step_provided_operation_context = await step.execute(app, required_context) provided_operation_context = step_provided_operation_context or {} - create_provides_keys = step.get_create_provides_context_keys() + execute_provides_keys = step.get_execute_provides_context_keys() _raise_if_provided_context_keys_are_missing_or_none( - provided_operation_context, create_provides_keys + provided_operation_context, execute_provides_keys ) else: required_context = await operation_context_proxy.read( - *step.get_undo_requires_context_keys() + *step.get_revert_requires_context_keys() ) _raise_if_any_context_value_is_none(required_context) - step_provided_operation_context = await step.undo(app, required_context) + step_provided_operation_context = await step.revert(app, required_context) provided_operation_context = step_provided_operation_context or {} - undo_provides_keys = step.get_undo_provides_context_keys() + revert_provides_keys = step.get_revert_provides_context_keys() _raise_if_provided_context_keys_are_missing_or_none( - provided_operation_context, undo_provides_keys + provided_operation_context, revert_provides_keys ) await operation_context_proxy.create_or_update(provided_operation_context) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_dependencies.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_dependencies.py new file mode 100644 index 000000000000..49fa770ec826 --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_dependencies.py @@ -0,0 +1,30 @@ +from typing import TYPE_CHECKING + +from fastapi import FastAPI + +if TYPE_CHECKING: + from ._core import Core + from ._event_after import AfterEventManager + from ._event_scheduler import EventScheduler + +# NOTE: +# Due to circular dependencies it is not possible to use the following: +# - `Core.get_from_app_state(app)` +# - `AfterEventManager.get_from_app_state(app)` +# - `EventScheduler.get_from_app_state(app)` +# This module avoids issues with circular dependencies + + +def get_core(app: FastAPI) -> "Core": + core: Core = app.state.generic_scheduler_core + return core + + +def get_after_event_manager(app: FastAPI) -> "AfterEventManager": + after_event_manager: AfterEventManager = app.state.after_event_manager + return after_event_manager + + +def get_event_scheduler(app: FastAPI) -> "EventScheduler": + event_scheduler: EventScheduler = app.state.generic_scheduler_event_scheduler + return event_scheduler diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_errors.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_errors.py index bb24cac569e2..fc3b8e7d31e6 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_errors.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_errors.py @@ -49,6 +49,10 @@ class InitialOperationContextKeyNotAllowedError(BaseGenericSchedulerError): ) +class OperationNotCancellableError(BaseGenericSchedulerError): + msg_template: str = "Operation '{operation_name}' is not cancellable" + + class CannotCancelWhileWaitingForManualInterventionError(BaseGenericSchedulerError): msg_template: str = ( "Cannot cancel schedule_id='{schedule_id}' while one or more steps are waiting for manual intervention." diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event.py index ed0a64456fcb..5476ff6bdf08 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event.py @@ -1,13 +1,42 @@ -from typing import TYPE_CHECKING - from fastapi import FastAPI -from ._models import ScheduleId - -if TYPE_CHECKING: - from ._event_scheduler import EventScheduler +from ._dependencies import get_event_scheduler +from ._event_base_queue import OperationToStartEvent +from ._event_queues import ExecuteCompletedQueue, RevertCompletedQueue, ScheduleQueue +from ._models import OperationContext, OperationName, ScheduleId async def enqueue_schedule_event(app: FastAPI, schedule_id: ScheduleId) -> None: - event_scheduler: EventScheduler = app.state.generic_scheduler_event_scheduler - await event_scheduler.enqueue_schedule_event(schedule_id) + await get_event_scheduler(app).enqueue_message_for(ScheduleQueue, schedule_id) + + +async def enqueue_execute_completed_event( + app: FastAPI, + schedule_id: ScheduleId, + operation_name: OperationName, + initial_context: OperationContext, +) -> None: + await get_event_scheduler(app).enqueue_message_for( + ExecuteCompletedQueue, + OperationToStartEvent( + schedule_id=schedule_id, + operation_name=operation_name, + initial_context=initial_context, + ), + ) + + +async def enqueue_revert_completed_event( + app: FastAPI, + schedule_id: ScheduleId, + operation_name: OperationName, + initial_context: OperationContext, +) -> None: + await get_event_scheduler(app).enqueue_message_for( + RevertCompletedQueue, + OperationToStartEvent( + schedule_id=schedule_id, + operation_name=operation_name, + initial_context=initial_context, + ), + ) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_after.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_after.py new file mode 100644 index 000000000000..ed6932c577f5 --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_after.py @@ -0,0 +1,82 @@ +import logging + +from fastapi import FastAPI +from servicelib.fastapi.app_state import SingletonInAppStateMixin +from servicelib.logging_utils import log_context + +from ._core import start_operation +from ._models import ( + EventType, + OperationContext, + OperationName, + OperationToStart, + ScheduleId, +) +from ._operation import OperationRegistry +from ._store import OperationEventsProxy, Store + +_logger = logging.getLogger(__name__) + + +class AfterEventManager(SingletonInAppStateMixin): + """ + Allows to register an operation to be started after + another operation ends the EXECUTED or REVERTED successfully. + """ + + app_state_name: str = "after_event_manager" + + def __init__(self, app: FastAPI) -> None: + self.app = app + self._store = Store.get_from_app_state(app) + + async def register_to_start_after( + self, + schedule_id: ScheduleId, + event_type: EventType, + *, + to_start: OperationToStart, + ) -> None: + # ensure operation exists + OperationRegistry.get_operation(to_start.operation_name) + + events_proxy = OperationEventsProxy(self._store, schedule_id, event_type) + await events_proxy.create_or_update_multiple( + { + "initial_context": to_start.initial_context, + "operation_name": to_start.operation_name, + } + ) + _logger.debug( + "Registered event_type='%s' to_start='%s' for schedule_id='%s'", + event_type, + to_start, + schedule_id, + ) + + async def safe_on_event_type( + self, + event_type: EventType, + schedule_id: ScheduleId, + operation_name: OperationName, + initial_context: OperationContext, + ) -> None: + with log_context( + _logger, + logging.DEBUG, + f"processing {event_type=} for {schedule_id=} {operation_name=} {initial_context=}", + log_duration=True, + ): + + new_schedule_id = await start_operation( + self.app, operation_name, initial_context + ) + _logger.debug( + "Finished execution of event_type='%s' for schedule_id='%s'. " + "Started new_schedule_id='%s' from operation_name='%s' with initial_context='%s'", + event_type, + schedule_id, + new_schedule_id, + operation_name, + initial_context, + ) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_after_registration.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_after_registration.py new file mode 100644 index 000000000000..5956999077ac --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_after_registration.py @@ -0,0 +1,31 @@ +from typing import TYPE_CHECKING + +from fastapi import FastAPI + +from ._models import EventType, OperationToStart, ScheduleId + +if TYPE_CHECKING: + from ._event_after import AfterEventManager + + +def _get_after_event_manager(app: FastAPI) -> "AfterEventManager": + # NOTE: could not use AfterEventManager.get_from_app_state(app) + # due to circular dependency + after_event_manager: AfterEventManager = app.state.after_event_manager + return after_event_manager + + +async def register_to_start_after_on_executed_completed( + app: FastAPI, schedule_id: ScheduleId, *, to_start: OperationToStart +) -> None: + await _get_after_event_manager(app).register_to_start_after( + schedule_id, EventType.ON_EXECUTEDD_COMPLETED, to_start=to_start + ) + + +async def register_to_start_after_on_reverted_completed( + app: FastAPI, schedule_id: ScheduleId, *, to_start: OperationToStart +) -> None: + await _get_after_event_manager(app).register_to_start_after( + schedule_id, EventType.ON_REVERT_COMPLETED, to_start=to_start + ) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_base_queue.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_base_queue.py new file mode 100644 index 000000000000..71a0a8ed15af --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_base_queue.py @@ -0,0 +1,91 @@ +import functools +import logging +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Final + +from fastapi import FastAPI +from faststream.exceptions import FastStreamException, RejectMessage +from faststream.rabbit import RabbitExchange, RabbitQueue, RabbitRouter +from faststream.rabbit.schemas.queue import ClassicQueueArgs + +from ._models import OperationContext, OperationName, ScheduleId + +_logger = logging.getLogger(__name__) + + +EXCHANGE_NAME: Final[str] = "dynamic-scheduler-events" + + +def _get_global_queue( + queue_name: str, arguments: ClassicQueueArgs | None = None +) -> RabbitQueue: + return RabbitQueue( + f"{EXCHANGE_NAME}_{queue_name}", durable=True, arguments=arguments + ) + + +def _stop_retry_for_unintended_errors(func): + """ + Stops FastStream's retry chain when an unexpected error is raised (bug or otherwise). + This is especially important when the subscribers have ``retry=True``. + + Only propagate FastStream error that handle message acknowledgement. + """ + + @functools.wraps(func) + async def wrapper(*args, **kwargs): + try: + return await func(*args, **kwargs) + except Exception as e: + if isinstance(e, FastStreamException): + # if there are issues with Redis or FastStream (core dependencies) + # message is always retried + raise + + msg = ( + "Unexpected error. Aborting message retry. " + f"Please check code at: '{func.__module__}.{func.__name__}'" + ) + _logger.exception(msg) + raise RejectMessage from e + + return wrapper + + +@dataclass +class OperationToStartEvent: + schedule_id: ScheduleId + operation_name: OperationName + initial_context: OperationContext + + +@dataclass +class BaseEventQueue(ABC): + app: FastAPI + router: RabbitRouter + exchange: RabbitExchange + + _queue: RabbitQueue | None = None + + @classmethod + def get_queue_name(cls) -> str: + return cls.__name__ + + @property + def queue(self) -> RabbitQueue: + assert self._queue is not None # nosec + return self._queue + + def __post_init__(self): + self._queue = _get_global_queue(queue_name=self.get_queue_name()) + + # apply decorators + handler = _stop_retry_for_unintended_errors(self.handler) + handler = self.router.subscriber( + queue=self._queue, exchange=self.exchange, retry=True + )(handler) + + @abstractmethod + async def handler(self, **kwargs) -> None: + """implement actions to take after event is received""" diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_queues.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_queues.py new file mode 100644 index 000000000000..4fbd43ab7f9b --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_queues.py @@ -0,0 +1,34 @@ +from ._dependencies import get_after_event_manager, get_core +from ._event_base_queue import BaseEventQueue, OperationToStartEvent +from ._models import EventType, ScheduleId + + +class ScheduleQueue(BaseEventQueue): + async def handler( # type:ignore[override] # pylint:disable=arguments-differ + self, schedule_id: ScheduleId + ) -> None: + await get_core(self.app).safe_on_schedule_event(schedule_id) + + +class ExecuteCompletedQueue(BaseEventQueue): + async def handler( # type:ignore[override] # pylint:disable=arguments-differ + self, event: OperationToStartEvent + ) -> None: + await get_after_event_manager(self.app).safe_on_event_type( + EventType.ON_EXECUTEDD_COMPLETED, + event.schedule_id, + event.operation_name, + event.initial_context, + ) + + +class RevertCompletedQueue(BaseEventQueue): + async def handler( # type:ignore[override] # pylint:disable=arguments-differ + self, event: OperationToStartEvent + ) -> None: + await get_after_event_manager(self.app).safe_on_event_type( + EventType.ON_REVERT_COMPLETED, + event.schedule_id, + event.operation_name, + event.initial_context, + ) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_scheduler.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_scheduler.py index 12c36d15131e..8e0f5bec2d7c 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_scheduler.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_event_scheduler.py @@ -1,64 +1,19 @@ -import functools import logging -from typing import Final from fastapi import FastAPI -from faststream.exceptions import FastStreamException, RejectMessage from faststream.rabbit import ( ExchangeType, RabbitBroker, RabbitExchange, - RabbitQueue, RabbitRouter, ) -from faststream.rabbit.schemas.queue import ClassicQueueArgs +from faststream.rabbit.types import AioPikaSendableMessage from servicelib.fastapi.app_state import SingletonInAppStateMixin from ...core.settings import ApplicationSettings -from ._core import Core +from ._event_base_queue import EXCHANGE_NAME, BaseEventQueue +from ._event_queues import ExecuteCompletedQueue, RevertCompletedQueue, ScheduleQueue from ._lifecycle_protocol import SupportsLifecycle -from ._models import ScheduleId - -_logger = logging.getLogger(__name__) - - -_EXCHANGE_NAME: Final[str] = __name__ - - -def _get_global_queue( - queue_name: str, arguments: ClassicQueueArgs | None = None -) -> RabbitQueue: - return RabbitQueue( - f"{_EXCHANGE_NAME}_{queue_name}", durable=True, arguments=arguments - ) - - -def _stop_retry_for_unintended_errors(func): - """ - Stops FastStream's retry chain when an unexpected error is raised (bug or otherwise). - This is especially important when the subscribers have ``retry=True``. - - Only propagate FastStream error that handle message acknowledgement. - """ - - @functools.wraps(func) - async def wrapper(*args, **kwargs): - try: - return await func(*args, **kwargs) - except Exception as e: - if isinstance(e, FastStreamException): - # if there are issues with Redis or FastStream (core dependencies) - # message is always retried - raise - - msg = ( - "Unexpected error. Aborting message retry. " - f"Please check code at: '{func.__module__}.{func.__name__}'" - ) - _logger.exception(msg) - raise RejectMessage from e - - return wrapper class EventScheduler(SingletonInAppStateMixin, SupportsLifecycle): @@ -76,36 +31,29 @@ def __init__(self, app: FastAPI) -> None: ) self._router: RabbitRouter = RabbitRouter() self._exchange = RabbitExchange( - _EXCHANGE_NAME, durable=True, type=ExchangeType.DIRECT + EXCHANGE_NAME, durable=True, type=ExchangeType.DIRECT ) - self._queue_schedule_event = _get_global_queue(queue_name="schedule_queue") - @_stop_retry_for_unintended_errors - async def _on_safe_on_schedule_event( # pylint:disable=method-hidden - self, schedule_id: ScheduleId - ) -> None: - await Core.get_from_app_state(self.app).safe_on_schedule_event(schedule_id) + self._queues: dict[str, BaseEventQueue] = { + queue_class.get_queue_name(): queue_class(app, self._router, self._exchange) + for queue_class in ( + ScheduleQueue, + ExecuteCompletedQueue, + RevertCompletedQueue, + ) + } - async def enqueue_schedule_event(self, schedule_id: ScheduleId) -> None: + async def enqueue_message_for( + self, queue_class: type[BaseEventQueue], message: AioPikaSendableMessage + ) -> None: await self._broker.publish( - schedule_id, - queue=self._queue_schedule_event, + message, + queue=self._queues[queue_class.get_queue_name()].queue, exchange=self._exchange, ) - def _register_subscribers(self) -> None: - # pylint:disable=unexpected-keyword-arg - # pylint:disable=no-value-for-parameter - self._on_safe_on_schedule_event = self._router.subscriber( - queue=self._queue_schedule_event, - exchange=self._exchange, - retry=True, - )(self._on_safe_on_schedule_event) - async def setup(self) -> None: - self._register_subscribers() self._broker.include_router(self._router) - await self._broker.start() async def shutdown(self) -> None: diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_lifespan.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_lifespan.py index 5e801014b159..268464100e80 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_lifespan.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_lifespan.py @@ -5,6 +5,7 @@ from ...core.settings import ApplicationSettings from ._core import Core +from ._event_after import AfterEventManager from ._event_scheduler import EventScheduler from ._lifecycle_protocol import SupportsLifecycle from ._store import Store @@ -19,6 +20,9 @@ async def generic_scheduler_lifespan(app: FastAPI) -> AsyncIterator[State]: # core Core(app).set_to_app_state(app) + # after event manager + AfterEventManager(app).set_to_app_state(app) + # event scheduler event_scheduler = EventScheduler(app) event_scheduler.set_to_app_state(app) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_models.py index 15a084b3ae12..b83199836bd6 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_models.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_models.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass from enum import auto from typing import Annotated, Any, Final, TypeAlias @@ -15,9 +16,9 @@ # contains all inputs and outpus of each step in the operation OperationContext: TypeAlias = dict[str, Any] -# the inputs of `create` or `undo` of a step +# the inputs of `execute` or `revert` of a step RequiredOperationContext: TypeAlias = dict[str, Any] -# the outputs of `create` or `undo` of a step +# the outputs of `execute` or `revert` of a step ProvidedOperationContext: TypeAlias = dict[str, Any] @@ -39,3 +40,14 @@ class StepStatus(StrAutoEnum): class OperationErrorType(StrAutoEnum): FRAMEWORK_ISSUE = auto() STEP_ISSUE = auto() + + +class EventType(StrAutoEnum): + ON_EXECUTEDD_COMPLETED = auto() + ON_REVERT_COMPLETED = auto() + + +@dataclass(frozen=True) +class OperationToStart: + operation_name: OperationName + initial_context: OperationContext diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_operation.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_operation.py index 7f9ecf448b5f..5fd88d034790 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_operation.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_operation.py @@ -29,35 +29,35 @@ class BaseStep(ABC): def get_step_name(cls) -> StepName: return cls.__name__ - ### CREATE + ### EXECUTE @classmethod @abstractmethod - async def create( + async def execute( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: """ [mandatory] handler to be implemented with the code resposible for achieving a goal NOTE: Ensure this is successful if: - - `create` is called multiple times and does not cause duplicate resources + - `execute` is called multiple times and does not cause duplicate resources """ @classmethod - def get_create_requires_context_keys(cls) -> set[str]: + def get_execute_requires_context_keys(cls) -> set[str]: """ - [optional] keys that must be present in the OperationContext when CREATE is called + [optional] keys that must be present in the OperationContext when EXECUTE is called """ return set() @classmethod - def get_create_provides_context_keys(cls) -> set[str]: + def get_execute_provides_context_keys(cls) -> set[str]: """ - [optional] keys that will be added to the OperationContext when CREATE is successful + [optional] keys that will be added to the OperationContext when EXECUTE is successful """ return set() @classmethod - async def get_create_retries(cls, context: DeferredContext) -> int: + async def get_execute_retries(cls, context: DeferredContext) -> int: """ [optional] amount of retires in case of creation HINT: you can use `get_operation_context_proxy()`, `get_step_group_proxy(context)` @@ -67,7 +67,7 @@ async def get_create_retries(cls, context: DeferredContext) -> int: return _DEFAULT_STEP_RETRIES @classmethod - async def get_create_wait_between_attempts( + async def get_execute_wait_between_attempts( cls, context: DeferredContext ) -> timedelta: """ @@ -85,39 +85,39 @@ def wait_for_manual_intervention(cls) -> bool: """ return _DEFAULT_WAIT_FOR_MANUAL_INTERVENTION - ### UNDO + ### REVERT @classmethod - async def undo( + async def revert( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: """ - [optional] handler responsible for cleanup of resources created above. + [optional] handler responsible for cleanup of resources executed above. NOTE: Ensure this is successful if: - - `create` is not executed - - `create` is executed partially - - `undo` is called multiple times + - `execute` is not executed + - `execute` is executed partially + - `revert` is called multiple times """ _ = required_context _ = app return {} @classmethod - def get_undo_requires_context_keys(cls) -> set[str]: + def get_revert_requires_context_keys(cls) -> set[str]: """ - [optional] keys that must be present in the OperationContext when UNDO is called + [optional] keys that must be present in the OperationContext when REVERT is called """ return set() @classmethod - def get_undo_provides_context_keys(cls) -> set[str]: + def get_revert_provides_context_keys(cls) -> set[str]: """ - [optional] keys that will be added to the OperationContext when UNDO is successful + [optional] keys that will be added to the OperationContext when REVERT is successful """ return set() @classmethod - async def get_undo_retries(cls, context: DeferredContext) -> int: + async def get_revert_retries(cls, context: DeferredContext) -> int: """ [optional] amount of retires in case of failure HINT: you can use `get_operation_context_proxy()`, `get_step_group_proxy(context)` @@ -127,7 +127,7 @@ async def get_undo_retries(cls, context: DeferredContext) -> int: return _DEFAULT_STEP_RETRIES @classmethod - async def get_undo_wait_between_attempts( + async def get_revert_wait_between_attempts( cls, context: DeferredContext ) -> timedelta: """ @@ -229,7 +229,15 @@ def get_step_subgroup_to_run(self) -> StepsSubGroup: return TypeAdapter(StepsSubGroup).validate_python(tuple(self._steps)) -Operation: TypeAlias = Annotated[list[BaseStepGroup], Field(min_length=1)] +class Operation: + def __init__( + self, *step_groups: BaseStepGroup, is_cancellable: bool = True + ) -> None: + self.step_groups = list(step_groups) + self.is_cancellable = is_cancellable + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({', '.join(repr(group) for group in self.step_groups)})" def _has_abstract_methods(cls: type[object]) -> bool: @@ -237,12 +245,18 @@ def _has_abstract_methods(cls: type[object]) -> bool: @validate_call(config={"arbitrary_types_allowed": True}) -def _validate_operation(operation: Operation) -> dict[StepName, type[BaseStep]]: +def _validate_operation( # noqa: C901 + operation: Operation, +) -> dict[StepName, type[BaseStep]]: + if len(operation.step_groups) == 0: + msg = f"{Operation.__name__} should have at least 1 item" + raise ValueError(msg) + detected_steps_names: dict[StepName, type[BaseStep]] = {} - create_provided_keys: set[str] = set() - undo_provided_keys: set[str] = set() + execute_provided_keys: set[str] = set() + revert_provided_keys: set[str] = set() - for k, step_group in enumerate(operation): + for k, step_group in enumerate(operation.step_groups): if ( isinstance(step_group, ParallelStepGroup) and len(step_group.steps) < _MIN_PARALLEL_STEPS @@ -253,7 +267,7 @@ def _validate_operation(operation: Operation) -> dict[StepName, type[BaseStep]]: ) raise ValueError(msg) - if k < len(operation) - 1 and step_group.repeat_steps is True: + if k < len(operation.step_groups) - 1 and step_group.repeat_steps is True: msg = f"Only the last step group can have repeat_steps=True. Error at index {k=}" raise ValueError(msg) @@ -270,26 +284,26 @@ def _validate_operation(operation: Operation) -> dict[StepName, type[BaseStep]]: detected_steps_names[step_name] = step - for key in step.get_create_provides_context_keys(): - if key in create_provided_keys: + for key in step.get_execute_provides_context_keys(): + if key in execute_provided_keys: msg = ( f"Step {step_name=} provides already provided {key=} in " - f"{step.get_create_provides_context_keys.__name__}()" + f"{step.get_execute_provides_context_keys.__name__}()" ) raise ValueError(msg) - create_provided_keys.add(key) - for key in step.get_undo_provides_context_keys(): - if key in undo_provided_keys: + execute_provided_keys.add(key) + for key in step.get_revert_provides_context_keys(): + if key in revert_provided_keys: msg = ( f"Step {step_name=} provides already provided {key=} in " - f"{step.get_undo_provides_context_keys.__name__}()" + f"{step.get_revert_provides_context_keys.__name__}()" ) raise ValueError(msg) - undo_provided_keys.add(key) + revert_provided_keys.add(key) if ( step_group.repeat_steps is True - and k == len(operation) - 1 + and k == len(operation.step_groups) - 1 and any( step.wait_for_manual_intervention() for step in step_group.get_step_subgroup_to_run() @@ -307,10 +321,10 @@ def _validate_operation(operation: Operation) -> dict[StepName, type[BaseStep]]: def get_operation_provided_context_keys(operation: Operation) -> set[str]: provided_keys: set[str] = set() - for step_group in operation: + for step_group in operation.step_groups: for step in step_group.get_step_subgroup_to_run(): - provided_keys.update(step.get_create_provides_context_keys()) - provided_keys.update(step.get_undo_provides_context_keys()) + provided_keys.update(step.get_execute_provides_context_keys()) + provided_keys.update(step.get_revert_provides_context_keys()) return provided_keys diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_store.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_store.py index f0392f5a4017..87c7f9517f29 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_store.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/generic_scheduler/_store.py @@ -12,6 +12,8 @@ from ._errors import NoDataFoundError from ._lifecycle_protocol import SupportsLifecycle from ._models import ( + EventType, + OperationContext, OperationErrorType, OperationName, ProvidedOperationContext, @@ -26,16 +28,17 @@ _STEPS_KEY: Final[str] = "STEPS" _GROUPS_KEY: Final[str] = "GROUPS" _OPERATION_CONTEXT_KEY: Final[str] = "OP_CTX" +_EVENTS_KEY: Final[str] = "EVENTS" -def _get_is_creating_str(*, is_creating: bool) -> str: - return "C" if is_creating else "U" +def _get_is_executing_str(*, is_executing: bool) -> str: + return "E" if is_executing else "R" def _get_scheduler_data_hash_key(*, schedule_id: ScheduleId) -> str: # SCHEDULE_NAMESPACE:SCHEDULE_ID # - SCHEDULE_NAMESPACE: namespace prefix - # - SCHEDULE_ID: the unique scheudle_id assigned + # - SCHEDULE_ID: the unique schedule_id assigned # Example: # - SCH:00000000-0000-0000-0000-000000000000 return f"{_SCHEDULE_NAMESPACE}:{schedule_id}" @@ -47,21 +50,21 @@ def _get_step_hash_key( operation_name: OperationName, group_name: StepGroupName, step_name: StepName, - is_creating: bool, + is_executing: bool, ) -> str: - # SCHEDULE_NAMESPACE:SCHEDULE_ID:STEPS:OPERATION_NAME:GROUP_SHORT_NAME:STEP_NAME:IS_CREATING + # SCHEDULE_NAMESPACE:SCHEDULE_ID:STEPS:OPERATION_NAME:GROUP_SHORT_NAME:STEP_NAME:IS_EXECUTING # - SCHEDULE_NAMESPACE: namespace prefix - # - SCHEDULE_ID: the unique scheudle_id assigned + # - SCHEDULE_ID: the unique schedule_id assigned # - CONSTANT: the constant "STEPS" # - OPERATION_NAME form the vairble's name during registration # - GROUP_SHORT_NAME # -> "{index}(S|P)[R]": S=single or P=parallel and optinally, "R" if steps should be repeated forever - # - IS_CREATING: "C" (create) or "U" (undo) + # - IS_EXECUTING: "E" (execute) or "R" (revert) # - STEP_NAME form it's class # Example: # - SCH:00000000-0000-0000-0000-000000000000:STEPS:START_SERVICE:0S:C:BS1 - is_creating_str = _get_is_creating_str(is_creating=is_creating) - return f"{_SCHEDULE_NAMESPACE}:{schedule_id}:{_STEPS_KEY}:{operation_name}:{group_name}:{is_creating_str}:{step_name}" + is_executing_str = _get_is_executing_str(is_executing=is_executing) + return f"{_SCHEDULE_NAMESPACE}:{schedule_id}:{_STEPS_KEY}:{operation_name}:{group_name}:{is_executing_str}:{step_name}" def _get_group_hash_key( @@ -69,20 +72,20 @@ def _get_group_hash_key( schedule_id: ScheduleId, operation_name: OperationName, group_name: StepGroupName, - is_creating: bool, + is_executing: bool, ) -> str: - # SCHEDULE_NAMESPACE:SCHEDULE_ID:GROUPS:OPERATION_NAME:GROUP_SHORT_NAME:IS_CREATING + # SCHEDULE_NAMESPACE:SCHEDULE_ID:GROUPS:OPERATION_NAME:GROUP_SHORT_NAME:IS_EXECUTING # - SCHEDULE_NAMESPACE: namespace prefix - # - SCHEDULE_ID: the unique scheudle_id assigned + # - SCHEDULE_ID: the unique schedule_id assigned # - CONSTANT: the constant "GROUPS" # - OPERATION_NAME form the vairble's name during registration # - GROUP_SHORT_NAME # -> "{index}(S|P)[R]": S=single or P=parallel and optinally, "R" if steps should be repeated forever - # - IS_CREATING: "C" (create) or "U" (undo) + # - IS_EXECUTING: "E" (execute) or "R" (revert) # Example: # - SCH:00000000-0000-0000-0000-000000000000:GROUPS:START_SERVICE:0S:C - is_creating_str = _get_is_creating_str(is_creating=is_creating) - return f"{_SCHEDULE_NAMESPACE}:{schedule_id}:{_GROUPS_KEY}:{operation_name}:{group_name}:{is_creating_str}" + is_executing_str = _get_is_executing_str(is_executing=is_executing) + return f"{_SCHEDULE_NAMESPACE}:{schedule_id}:{_GROUPS_KEY}:{operation_name}:{group_name}:{is_executing_str}" def _get_operation_context_hash_key( @@ -90,7 +93,7 @@ def _get_operation_context_hash_key( ) -> str: # SCHEDULE_NAMESPACE:SCHEDULE_ID:STEPS:OPERATION_NAME # - SCHEDULE_NAMESPACE: namespace prefix - # - SCHEDULE_ID: the unique scheudle_id assigned + # - SCHEDULE_ID: the unique schedule_id assigned # - CONSTANT: the constant "OP_CTX" # - OPERATION_NAME form the vairble's name during registration # Example: @@ -100,6 +103,19 @@ def _get_operation_context_hash_key( ) +def _get_schedule_events_hash_key( + *, schedule_id: ScheduleId, event_type: EventType +) -> str: + # SCHEDULE_NAMESPACE:SCHEDULE_ID:STEPS:OPERATION_NAME + # - SCHEDULE_NAMESPACE: namespace prefix + # - SCHEDULE_ID: the unique schedule_id assigned + # - CONSTANT: the constant "EVENTS" + # - EVENT_TYPE: name of the event from EventType + # Example: + # - SCH:00000000-0000-0000-0000-000000000000:EVENTS:ON_... + return f"{_SCHEDULE_NAMESPACE}:{schedule_id}:{_EVENTS_KEY}:{event_type}" + + class Store(SingletonInAppStateMixin, SupportsLifecycle): """ Interface to Redis, shuld not use directly but use the @@ -143,7 +159,7 @@ async def set_key_in_hash(self, hash_key: str, key: str, value: Any) -> None: """saves a single key-value pair in a hash""" await self.set_keys_in_hash(hash_key, {key: value}) - async def get_key_from_hash(self, hash_key: str, *keys: str) -> tuple[Any, ...]: + async def get_keys_from_hash(self, hash_key: str, *keys: str) -> tuple[Any, ...]: """retrieves one or more keys from a hash""" result: list[str | None] = await handle_redis_returns_union_types( self.redis.hmget(hash_key, list(keys)) @@ -176,11 +192,17 @@ async def delete(self, *keys: str) -> None: """removes keys from redis""" await handle_redis_returns_union_types(self.redis.delete(*keys)) + async def exists(self, hash_key: str) -> bool: + result: bool = ( + await handle_redis_returns_union_types(self.redis.exists(hash_key)) == 1 + ) + return result + class _UpdateScheduleDataDict(TypedDict): operation_name: NotRequired[OperationName] group_index: NotRequired[NonNegativeInt] - is_creating: NotRequired[bool] + is_executing: NotRequired[bool] operation_error_type: NotRequired[OperationErrorType] operation_error_message: NotRequired[str] @@ -188,7 +210,7 @@ class _UpdateScheduleDataDict(TypedDict): _DeleteScheduleDataKeys = Literal[ "operation_name", "group_index", - "is_creating", + "is_executing", "operation_error_type", "operation_error_message", ] @@ -207,7 +229,7 @@ async def read(self, key: Literal["operation_name"]) -> OperationName: ... @overload async def read(self, key: Literal["group_index"]) -> NonNegativeInt: ... @overload - async def read(self, key: Literal["is_creating"]) -> bool: ... + async def read(self, key: Literal["is_executing"]) -> bool: ... @overload async def read( self, key: Literal["operation_error_type"] @@ -217,7 +239,7 @@ async def read(self, key: Literal["operation_error_message"]) -> str: ... async def read(self, key: str) -> Any: """raises NoDataFoundError if the key is not present in the hash""" hash_key = self._get_hash_key() - (result,) = await self._store.get_key_from_hash(hash_key, key) + (result,) = await self._store.get_keys_from_hash(hash_key, key) if result is None: raise NoDataFoundError(key=key, hash_key=hash_key) return result @@ -232,7 +254,7 @@ async def create_or_update( ) -> None: ... @overload async def create_or_update( - self, key: Literal["is_creating"], *, value: bool + self, key: Literal["is_executing"], *, value: bool ) -> None: ... @overload async def create_or_update( @@ -260,20 +282,20 @@ def __init__( schedule_id: ScheduleId, operation_name: OperationName, step_group_name: StepGroupName, - is_creating: bool, + is_executing: bool, ) -> None: self._store = store self.schedule_id = schedule_id self.operation_name = operation_name self.step_group_name = step_group_name - self.is_creating = is_creating + self.is_executing = is_executing def _get_hash_key(self) -> str: return _get_group_hash_key( schedule_id=self.schedule_id, operation_name=self.operation_name, group_name=self.step_group_name, - is_creating=self.is_creating, + is_executing=self.is_executing, ) async def increment_and_get_done_steps_count(self) -> NonNegativeInt: @@ -316,14 +338,14 @@ def __init__( operation_name: OperationName, step_group_name: StepGroupName, step_name: StepName, - is_creating: bool, + is_executing: bool, ) -> None: self._store = store self.schedule_id = schedule_id self.operation_name = operation_name self.step_group_name = step_group_name self.step_name = step_name - self.is_creating = is_creating + self.is_executing = is_executing def _get_hash_key(self) -> str: return _get_step_hash_key( @@ -331,7 +353,7 @@ def _get_hash_key(self) -> str: operation_name=self.operation_name, group_name=self.step_group_name, step_name=self.step_name, - is_creating=self.is_creating, + is_executing=self.is_executing, ) @overload @@ -347,9 +369,9 @@ async def read(self, key: Literal["deferred_created"]) -> bool: ... async def read(self, key: str) -> Any: """raises NoDataFoundError if the key is not present in the hash""" hash_key = self._get_hash_key() - (result,) = await self._store.get_key_from_hash(hash_key, key) + (result,) = await self._store.get_keys_from_hash(hash_key, key) if result is None: - raise NoDataFoundError(schedule_id=self.schedule_id, hash_key=hash_key) + raise NoDataFoundError(key=key, hash_key=hash_key) return result @overload @@ -413,13 +435,64 @@ async def read(self, *keys: str) -> RequiredOperationContext: return {} hash_key = self._get_hash_key() - result = await self._store.get_key_from_hash(hash_key, *keys) + result = await self._store.get_keys_from_hash(hash_key, *keys) return dict(zip(keys, result, strict=True)) async def delete(self) -> None: await self._store.delete(self._get_hash_key()) +class _EventDict(TypedDict): + operation_name: NotRequired[OperationName] + initial_context: NotRequired[OperationContext] + + +class OperationEventsProxy: + def __init__( + self, store: Store, schedule_id: ScheduleId, event_type: EventType + ) -> None: + self._store = store + self.schedule_id = schedule_id + self.event_type = event_type + + def _get_hash_key(self) -> str: + return _get_schedule_events_hash_key( + schedule_id=self.schedule_id, event_type=self.event_type + ) + + @overload + async def create_or_update( + self, key: Literal["initial_context"], value: OperationContext + ) -> None: ... + @overload + async def create_or_update( + self, key: Literal["operation_name"], value: OperationName + ) -> None: ... + async def create_or_update(self, key: str, value: Any) -> None: + await self._store.set_key_in_hash(self._get_hash_key(), key, value) + + async def create_or_update_multiple(self, updates: _EventDict) -> None: + await self._store.set_keys_in_hash(self._get_hash_key(), updates=updates) # type: ignore[arg-type] + + @overload + async def read(self, key: Literal["operation_name"]) -> OperationName: ... + @overload + async def read(self, key: Literal["initial_context"]) -> OperationContext: ... + async def read(self, key: str) -> Any: + """raises NoDataFoundError if the key is not present in the hash""" + hash_key = self._get_hash_key() + (result,) = await self._store.get_keys_from_hash(hash_key, key) + if result is None: + raise NoDataFoundError(key=key, hash_key=hash_key) + return result + + async def delete(self) -> None: + await self._store.delete(self._get_hash_key()) + + async def exists(self) -> bool: + return await self._store.exists(self._get_hash_key()) + + class OperationRemovalProxy: def __init__(self, *, store: Store, schedule_id: ScheduleId) -> None: self._store = store diff --git a/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__core.py b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__core.py index 1f008d7f9c41..b963855b1fd3 100644 --- a/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__core.py +++ b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__core.py @@ -13,6 +13,7 @@ import pytest from asgi_lifespan import LifespanManager +from asyncpg import NoDataFoundError from fastapi import FastAPI from pydantic import NonNegativeInt, TypeAdapter from pytest_simcore.helpers.typing_env import EnvVarsDict @@ -29,16 +30,24 @@ RequiredOperationContext, ScheduleId, SingleStepGroup, + StepStoreProxy, cancel_operation, - restart_operation_step_stuck_during_undo, - restart_operation_step_stuck_in_manual_intervention_during_create, + restart_operation_step_stuck_during_revert, + restart_operation_step_stuck_in_manual_intervention_during_execute, start_operation, ) -from simcore_service_dynamic_scheduler.services.generic_scheduler._core import Core +from simcore_service_dynamic_scheduler.services.generic_scheduler._core import ( + Core, + Store, +) +from simcore_service_dynamic_scheduler.services.generic_scheduler._deferred_runner import ( + StepGroupName, +) from simcore_service_dynamic_scheduler.services.generic_scheduler._errors import ( CannotCancelWhileWaitingForManualInterventionError, InitialOperationContextKeyNotAllowedError, OperationContextValueIsNoneError, + OperationNotCancellableError, ProvidedOperationContextKeysAreMissingError, StepNameNotInCurrentGroupError, StepNotInErrorStateError, @@ -47,9 +56,6 @@ from simcore_service_dynamic_scheduler.services.generic_scheduler._models import ( OperationContext, ) -from simcore_service_dynamic_scheduler.services.generic_scheduler._store import ( - Store, -) from tenacity import ( AsyncRetrying, retry_if_exception_type, @@ -57,14 +63,15 @@ wait_fixed, ) from utils import ( - CREATED, - UNDONE, + EXECUTED, + REVERTED, BaseExpectedStepOrder, - CreateRandom, - CreateSequence, - UndoRandom, - UndoSequence, + ExecuteRandom, + ExecuteSequence, + RevertRandom, + RevertSequence, ensure_expected_order, + ensure_keys_in_store, ) pytest_simcore_core_services_selection = [ @@ -150,39 +157,39 @@ def steps_call_order() -> Iterable[list[tuple[str, str]]]: class _BS(BaseStep): @classmethod - async def create( + async def execute( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: _ = app _ = required_context - _STEPS_CALL_ORDER.append((cls.__name__, CREATED)) + _STEPS_CALL_ORDER.append((cls.__name__, EXECUTED)) return { **required_context, - **{k: _CTX_VALUE for k in cls.get_create_provides_context_keys()}, + **{k: _CTX_VALUE for k in cls.get_execute_provides_context_keys()}, } @classmethod - async def undo( + async def revert( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: _ = app _ = required_context - _STEPS_CALL_ORDER.append((cls.__name__, UNDONE)) + _STEPS_CALL_ORDER.append((cls.__name__, REVERTED)) return { **required_context, - **{k: _CTX_VALUE for k in cls.get_undo_provides_context_keys()}, + **{k: _CTX_VALUE for k in cls.get_revert_provides_context_keys()}, } -class _UndoBS(_BS): +class _RevertBS(_BS): @classmethod - async def create( + async def execute( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: - await super().create(app, required_context) - msg = "always fails only on CREATE" + await super().execute(app, required_context) + msg = "always fails only on EXECUTE" raise RuntimeError(msg) @@ -201,42 +208,42 @@ def reset_step_issue_tracker() -> Iterable[None]: _GlobalStepIssueTracker.has_issue = True -class _FailOnCreateAndUndoBS(_BS): +class _FailOnExecuteAndRevertBS(_BS): @classmethod - async def create( + async def execute( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: - await super().create(app, required_context) - msg = "always fails on CREATE" + await super().execute(app, required_context) + msg = "always fails on EXECUTE" raise RuntimeError(msg) @classmethod - async def undo( + async def revert( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: - await super().undo(app, required_context) + await super().revert(app, required_context) if _GlobalStepIssueTracker.has_issue: - msg = "sometimes fails only on UNDO" + msg = "sometimes fails only on REVERT" raise RuntimeError(msg) class _SleepsForeverBS(_BS): @classmethod - async def create( + async def execute( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: - await super().create(app, required_context) + await super().execute(app, required_context) await asyncio.sleep(1e10) class _WaitManualInerventionBS(_BS): @classmethod - async def create( + async def execute( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: - await super().create(app, required_context) + await super().execute(app, required_context) if _GlobalStepIssueTracker.has_issue: - msg = "sometimes fails only on CREATE" + msg = "sometimes fails only on EXECUTE" raise RuntimeError(msg) @classmethod @@ -249,19 +256,19 @@ def _get_steps_matching_class( ) -> list[type]: return [ step - for group in operation + for group in operation.step_groups for step in group.get_step_subgroup_to_run() if issubclass(step, match) ] def _compose_key( - key_nuber: int | None, *, with_undo: bool, is_creating: bool, is_providing: bool + key_nuber: int | None, *, with_revert: bool, is_executing: bool, is_providing: bool ) -> str: key_parts = [ "bs", - "undo" if with_undo else "", - "c" if is_creating else "r", + "revert" if with_revert else "", + "e" if is_executing else "r", "prov" if is_providing else "req", f"{key_nuber}", ] @@ -284,90 +291,111 @@ def get_key_number(cls) -> int: class _BaseRequiresProvidesContext(_BS, _MixingGetKeNumber): @classmethod - def get_create_requires_context_keys(cls) -> set[str]: + def get_execute_requires_context_keys(cls) -> set[str]: return { _compose_key( cls.get_key_number(), - with_undo=False, - is_creating=True, + with_revert=False, + is_executing=True, is_providing=False, ) } @classmethod - def get_create_provides_context_keys(cls) -> set[str]: + def get_execute_provides_context_keys(cls) -> set[str]: return { _compose_key( cls.get_key_number(), - with_undo=False, - is_creating=True, + with_revert=False, + is_executing=True, is_providing=True, ) } -class _BaseRequiresProvidesUndoContext(_UndoBS, _MixingGetKeNumber): +class _BaseRequiresProvidesRevertContext(_RevertBS, _MixingGetKeNumber): @classmethod - def get_create_requires_context_keys(cls) -> set[str]: + def get_execute_requires_context_keys(cls) -> set[str]: return { _compose_key( cls.get_key_number(), - with_undo=True, - is_creating=True, + with_revert=True, + is_executing=True, is_providing=False, ) } @classmethod - def get_create_provides_context_keys(cls) -> set[str]: + def get_execute_provides_context_keys(cls) -> set[str]: return { _compose_key( cls.get_key_number(), - with_undo=True, - is_creating=True, + with_revert=True, + is_executing=True, is_providing=True, ) } @classmethod - def get_undo_requires_context_keys(cls) -> set[str]: + def get_revert_requires_context_keys(cls) -> set[str]: return { _compose_key( cls.get_key_number(), - with_undo=True, - is_creating=False, + with_revert=True, + is_executing=False, is_providing=False, ) } @classmethod - def get_undo_provides_context_keys(cls) -> set[str]: + def get_revert_provides_context_keys(cls) -> set[str]: return { _compose_key( cls.get_key_number(), - with_undo=True, - is_creating=False, + with_revert=True, + is_executing=False, is_providing=True, ) } -async def _assert_keys_in_store(app: FastAPI, *, expected_keys: set[str]) -> None: - keys = set(await Store.get_from_app_state(app).redis.keys()) - assert keys == expected_keys - - -async def _ensure_keys_in_store(app: FastAPI, *, expected_keys: set[str]) -> None: +async def _ensure_log_mesage(caplog: pytest.LogCaptureFixture, *, message: str) -> None: async for attempt in AsyncRetrying(**_RETRY_PARAMS): with attempt: - await _assert_keys_in_store(app, expected_keys=expected_keys) + await asyncio.sleep(0) # wait for event to trigger + assert message in caplog.text -async def _esnure_log_mesage(caplog: pytest.LogCaptureFixture, *, message: str) -> None: +async def _esnure_steps_have_status( + app: FastAPI, + schedule_id: ScheduleId, + operation_name: OperationName, + *, + step_group_name: StepGroupName, + steps: Iterable[type[BaseStep]], +) -> None: + store = Store.get_from_app_state(app) + + store_proxies = [ + StepStoreProxy( + store=store, + schedule_id=schedule_id, + operation_name=operation_name, + step_group_name=step_group_name, + step_name=step.get_step_name(), + is_executing=True, + ) + for step in steps + ] + async for attempt in AsyncRetrying(**_RETRY_PARAMS): with attempt: - await asyncio.sleep(0) # wait for envet to trigger - assert message in caplog.text + for step_proxy in store_proxies: + try: + await step_proxy.read("status") + except NoDataFoundError: + msg = f"Step {step_proxy.step_name} has no status" + raise AssertionError(msg) from None ############## TESTS ############## @@ -406,49 +434,49 @@ class _S9(_BS): ... class _S10(_BS): ... -# Below fail on create (expected) +# Below fail on execute (expected) -class _RS1(_UndoBS): ... +class _RS1(_RevertBS): ... -class _RS2(_UndoBS): ... +class _RS2(_RevertBS): ... -class _RS3(_UndoBS): ... +class _RS3(_RevertBS): ... -class _RS4(_UndoBS): ... +class _RS4(_RevertBS): ... -class _RS5(_UndoBS): ... +class _RS5(_RevertBS): ... -class _RS6(_UndoBS): ... +class _RS6(_RevertBS): ... -class _RS7(_UndoBS): ... +class _RS7(_RevertBS): ... -class _RS8(_UndoBS): ... +class _RS8(_RevertBS): ... -class _RS9(_UndoBS): ... +class _RS9(_RevertBS): ... -class _RS10(_UndoBS): ... +class _RS10(_RevertBS): ... -# Below fail both on create and undo (unexpected) +# Below fail both on execute and revert (unexpected) -class _FCR1(_FailOnCreateAndUndoBS): ... +class _FCR1(_FailOnExecuteAndRevertBS): ... -class _FCR2(_FailOnCreateAndUndoBS): ... +class _FCR2(_FailOnExecuteAndRevertBS): ... -class _FCR3(_FailOnCreateAndUndoBS): ... +class _FCR3(_FailOnExecuteAndRevertBS): ... # Below will sleep forever @@ -460,7 +488,7 @@ class _SF1(_SleepsForeverBS): ... class _SF2(_SleepsForeverBS): ... -# Below will wait for manual intervention after it fails on create +# Below will wait for manual intervention after it fails on execute class _WMI1(_WaitManualInerventionBS): ... @@ -481,10 +509,10 @@ class RPCtxS1(_BaseRequiresProvidesContext): ... class RPCtxS2(_BaseRequiresProvidesContext): ... -class RPCtxR1(_BaseRequiresProvidesUndoContext): ... +class RPCtxR1(_BaseRequiresProvidesRevertContext): ... -class RPCtxR2(_BaseRequiresProvidesUndoContext): ... +class RPCtxR2(_BaseRequiresProvidesRevertContext): ... @pytest.mark.parametrize("app_count", [10]) @@ -492,102 +520,102 @@ class RPCtxR2(_BaseRequiresProvidesUndoContext): ... "operation, expected_order", [ pytest.param( - [ + Operation( SingleStepGroup(_S1), - ], + ), [ - CreateSequence(_S1), + ExecuteSequence(_S1), ], id="s1", ), pytest.param( - [ + Operation( ParallelStepGroup(_S1, _S2), - ], + ), [ - CreateRandom(_S1, _S2), + ExecuteRandom(_S1, _S2), ], id="p2", ), pytest.param( - [ + Operation( ParallelStepGroup(_S1, _S2, _S3, _S4, _S5, _S6, _S7, _S8, _S9, _S10), - ], + ), [ - CreateRandom(_S1, _S2, _S3, _S4, _S5, _S6, _S7, _S8, _S9, _S10), + ExecuteRandom(_S1, _S2, _S3, _S4, _S5, _S6, _S7, _S8, _S9, _S10), ], id="p10", ), pytest.param( - [ + Operation( SingleStepGroup(_S1), SingleStepGroup(_S2), SingleStepGroup(_S3), ParallelStepGroup(_S4, _S5, _S6, _S7, _S8, _S9), SingleStepGroup(_S10), - ], + ), [ - CreateSequence(_S1, _S2, _S3), - CreateRandom(_S4, _S5, _S6, _S7, _S8, _S9), - CreateSequence(_S10), + ExecuteSequence(_S1, _S2, _S3), + ExecuteRandom(_S4, _S5, _S6, _S7, _S8, _S9), + ExecuteSequence(_S10), ], id="s1-s1-s1-p6-s1", ), pytest.param( - [ + Operation( SingleStepGroup(_RS1), - ], + ), [ - CreateSequence(_RS1), - UndoSequence(_RS1), + ExecuteSequence(_RS1), + RevertSequence(_RS1), ], id="s1(1r)", ), pytest.param( - [ + Operation( ParallelStepGroup(_RS1, _S1, _S2, _S3, _S4, _S5, _S6), - ], + ), [ - CreateRandom(_S1, _S2, _S3, _S4, _S5, _S6, _RS1), - UndoRandom(_S1, _S2, _S3, _S4, _S5, _S6, _RS1), + ExecuteRandom(_S1, _S2, _S3, _S4, _S5, _S6, _RS1), + RevertRandom(_S1, _S2, _S3, _S4, _S5, _S6, _RS1), ], id="p7(1r)", ), pytest.param( - [ + Operation( SingleStepGroup(_S1), ParallelStepGroup(_S2, _S3, _S4, _S5, _S6), SingleStepGroup(_RS1), SingleStepGroup(_S7), # will not execute ParallelStepGroup(_S8, _S9), # will not execute - ], + ), [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _S4, _S5, _S6), - CreateSequence(_RS1), - UndoSequence(_RS1), - UndoRandom(_S2, _S3, _S4, _S5, _S6), - UndoSequence(_S1), + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _S4, _S5, _S6), + ExecuteSequence(_RS1), + RevertSequence(_RS1), + RevertRandom(_S2, _S3, _S4, _S5, _S6), + RevertSequence(_S1), ], id="s1-p5-s1(1r)-s1-p2", ), pytest.param( - [ + Operation( SingleStepGroup(_S1), ParallelStepGroup(_RS1, _S2, _S3, _S4, _S5, _S6), SingleStepGroup(_S7), # will not execute ParallelStepGroup(_S8, _S9), # will not execute - ], + ), [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _S4, _S5, _S6, _RS1), - UndoRandom(_S2, _S3, _S4, _S5, _S6, _RS1), - UndoSequence(_S1), + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _S4, _S5, _S6, _RS1), + RevertRandom(_S2, _S3, _S4, _S5, _S6, _RS1), + RevertSequence(_S1), ], id="s1-p6(1r)-s1-p2", ), pytest.param( - [ + Operation( ParallelStepGroup( _S1, _S2, @@ -610,9 +638,9 @@ class RPCtxR2(_BaseRequiresProvidesUndoContext): ... _RS9, _RS10, ), - ], + ), [ - CreateRandom( + ExecuteRandom( _S1, _S2, _S3, @@ -634,7 +662,7 @@ class RPCtxR2(_BaseRequiresProvidesUndoContext): ... _RS9, _RS10, ), - UndoRandom( + RevertRandom( _S1, _S2, _S3, @@ -661,7 +689,7 @@ class RPCtxR2(_BaseRequiresProvidesUndoContext): ... ), ], ) -async def test_create_undo_order( +async def test_execute_revert_order( preserve_caplog_for_async_logging: None, steps_call_order: list[tuple[str, str]], selected_app: FastAPI, @@ -677,7 +705,7 @@ async def test_create_undo_order( await ensure_expected_order(steps_call_order, expected_order) - await _ensure_keys_in_store(selected_app, expected_keys=set()) + await ensure_keys_in_store(selected_app, expected_keys=set()) @pytest.mark.parametrize("app_count", [10]) @@ -685,97 +713,97 @@ async def test_create_undo_order( "operation, expected_order, expected_keys", [ pytest.param( - [ + Operation( SingleStepGroup(_FCR1), - ], + ), [ - CreateSequence(_FCR1), - UndoSequence(_FCR1), + ExecuteSequence(_FCR1), + RevertSequence(_FCR1), ], { "SCH:{schedule_id}", - "SCH:{schedule_id}:GROUPS:test_op:0S:C", - "SCH:{schedule_id}:GROUPS:test_op:0S:U", - "SCH:{schedule_id}:STEPS:test_op:0S:C:_FCR1", - "SCH:{schedule_id}:STEPS:test_op:0S:U:_FCR1", + "SCH:{schedule_id}:GROUPS:test_op:0S:E", + "SCH:{schedule_id}:GROUPS:test_op:0S:R", + "SCH:{schedule_id}:STEPS:test_op:0S:E:_FCR1", + "SCH:{schedule_id}:STEPS:test_op:0S:R:_FCR1", }, id="s1(1rf)", ), pytest.param( - [ + Operation( SingleStepGroup(_S1), SingleStepGroup(_FCR1), - ], + ), [ - CreateSequence(_S1, _FCR1), - UndoSequence(_FCR1), + ExecuteSequence(_S1, _FCR1), + RevertSequence(_FCR1), ], { "SCH:{schedule_id}", - "SCH:{schedule_id}:GROUPS:test_op:0S:C", - "SCH:{schedule_id}:GROUPS:test_op:1S:C", - "SCH:{schedule_id}:GROUPS:test_op:1S:U", - "SCH:{schedule_id}:STEPS:test_op:0S:C:_S1", - "SCH:{schedule_id}:STEPS:test_op:1S:C:_FCR1", - "SCH:{schedule_id}:STEPS:test_op:1S:U:_FCR1", + "SCH:{schedule_id}:GROUPS:test_op:0S:E", + "SCH:{schedule_id}:GROUPS:test_op:1S:E", + "SCH:{schedule_id}:GROUPS:test_op:1S:R", + "SCH:{schedule_id}:STEPS:test_op:0S:E:_S1", + "SCH:{schedule_id}:STEPS:test_op:1S:E:_FCR1", + "SCH:{schedule_id}:STEPS:test_op:1S:R:_FCR1", }, id="s2(1rf)", ), pytest.param( - [ + Operation( SingleStepGroup(_S1), ParallelStepGroup(_FCR1, _S2, _S3), - ], + ), [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _FCR1), - UndoRandom(_S2, _S3, _FCR1), + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _FCR1), + RevertRandom(_S2, _S3, _FCR1), ], { "SCH:{schedule_id}", - "SCH:{schedule_id}:GROUPS:test_op:0S:C", - "SCH:{schedule_id}:GROUPS:test_op:1P:C", - "SCH:{schedule_id}:GROUPS:test_op:1P:U", - "SCH:{schedule_id}:STEPS:test_op:0S:C:_S1", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_FCR1", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S2", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S3", - "SCH:{schedule_id}:STEPS:test_op:1P:U:_FCR1", - "SCH:{schedule_id}:STEPS:test_op:1P:U:_S2", - "SCH:{schedule_id}:STEPS:test_op:1P:U:_S3", + "SCH:{schedule_id}:GROUPS:test_op:0S:E", + "SCH:{schedule_id}:GROUPS:test_op:1P:E", + "SCH:{schedule_id}:GROUPS:test_op:1P:R", + "SCH:{schedule_id}:STEPS:test_op:0S:E:_S1", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_FCR1", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S2", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S3", + "SCH:{schedule_id}:STEPS:test_op:1P:R:_FCR1", + "SCH:{schedule_id}:STEPS:test_op:1P:R:_S2", + "SCH:{schedule_id}:STEPS:test_op:1P:R:_S3", }, id="s1p3(1rf)", ), pytest.param( - [ + Operation( SingleStepGroup(_S1), ParallelStepGroup(_FCR1, _FCR2, _S2, _S3), - ], + ), [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _FCR1, _FCR2), - UndoRandom(_S2, _S3, _FCR2, _FCR1), + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _FCR1, _FCR2), + RevertRandom(_S2, _S3, _FCR2, _FCR1), ], { "SCH:{schedule_id}", - "SCH:{schedule_id}:GROUPS:test_op:0S:C", - "SCH:{schedule_id}:GROUPS:test_op:1P:C", - "SCH:{schedule_id}:GROUPS:test_op:1P:U", - "SCH:{schedule_id}:STEPS:test_op:0S:C:_S1", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_FCR1", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_FCR2", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S2", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S3", - "SCH:{schedule_id}:STEPS:test_op:1P:U:_FCR1", - "SCH:{schedule_id}:STEPS:test_op:1P:U:_FCR2", - "SCH:{schedule_id}:STEPS:test_op:1P:U:_S2", - "SCH:{schedule_id}:STEPS:test_op:1P:U:_S3", + "SCH:{schedule_id}:GROUPS:test_op:0S:E", + "SCH:{schedule_id}:GROUPS:test_op:1P:E", + "SCH:{schedule_id}:GROUPS:test_op:1P:R", + "SCH:{schedule_id}:STEPS:test_op:0S:E:_S1", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_FCR1", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_FCR2", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S2", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S3", + "SCH:{schedule_id}:STEPS:test_op:1P:R:_FCR1", + "SCH:{schedule_id}:STEPS:test_op:1P:R:_FCR2", + "SCH:{schedule_id}:STEPS:test_op:1P:R:_S2", + "SCH:{schedule_id}:STEPS:test_op:1P:R:_S3", }, id="s1p4(2rf)", ), ], ) -async def test_fails_during_undo_is_in_error_state( +async def test_fails_during_revert_is_in_error_state( preserve_caplog_for_async_logging: None, steps_call_order: list[tuple[str, str]], selected_app: FastAPI, @@ -793,7 +821,7 @@ async def test_fails_during_undo_is_in_error_state( await ensure_expected_order(steps_call_order, expected_order) formatted_expected_keys = {k.format(schedule_id=schedule_id) for k in expected_keys} - await _ensure_keys_in_store(selected_app, expected_keys=formatted_expected_keys) + await ensure_keys_in_store(selected_app, expected_keys=formatted_expected_keys) @pytest.mark.parametrize("cancel_count", [1, 10]) @@ -802,40 +830,40 @@ async def test_fails_during_undo_is_in_error_state( "operation, expected_before_cancel_order, expected_order", [ pytest.param( - [ + Operation( SingleStepGroup(_S1), ParallelStepGroup(_S2, _S3, _S4), SingleStepGroup(_SF1), - ], + ), [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _S4), - CreateSequence(_SF1), + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _S4), + ExecuteSequence(_SF1), ], [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _S4), - CreateSequence(_SF1), - UndoSequence(_SF1), - UndoRandom(_S2, _S3, _S4), - UndoSequence(_S1), + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _S4), + ExecuteSequence(_SF1), + RevertSequence(_SF1), + RevertRandom(_S2, _S3, _S4), + RevertSequence(_S1), ], id="s1p3s1(1s)", ), pytest.param( - [ + Operation( SingleStepGroup(_S1), ParallelStepGroup(_S2, _S3, _S4, _SF1, _SF2), - ], + ), [ - CreateSequence(_S1), - CreateRandom(_SF1, _SF2, _S2, _S3, _S4), + ExecuteSequence(_S1), + ExecuteRandom(_SF1, _SF2, _S2, _S3, _S4), ], [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _S4, _SF1, _SF2), - UndoRandom(_S2, _S3, _S4, _SF2, _SF1), - UndoSequence(_S1), + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _S4, _SF1, _SF2), + RevertRandom(_S2, _S3, _S4, _SF2, _SF1), + RevertSequence(_S1), ], id="s1p4(1s)", ), @@ -866,7 +894,7 @@ async def test_cancelled_finishes_nicely( await ensure_expected_order(steps_call_order, expected_order) - await _ensure_keys_in_store(selected_app, expected_keys=set()) + await ensure_keys_in_store(selected_app, expected_keys=set()) _FAST_REPEAT_INTERVAL: Final[timedelta] = timedelta(seconds=0.1) @@ -878,60 +906,60 @@ async def test_cancelled_finishes_nicely( "operation, expected_before_cancel_order, expected_order", [ pytest.param( - [ + Operation( SingleStepGroup( _S1, repeat_steps=True, wait_before_repeat=_FAST_REPEAT_INTERVAL ), - ], - [CreateSequence(_S1) for _ in range(_REPAT_COUNT)], + ), + [ExecuteSequence(_S1) for _ in range(_REPAT_COUNT)], [ - *[CreateSequence(_S1) for _ in range(_REPAT_COUNT)], - UndoSequence(_S1), + *[ExecuteSequence(_S1) for _ in range(_REPAT_COUNT)], + RevertSequence(_S1), ], id="s1(r)", ), pytest.param( - [ + Operation( ParallelStepGroup( _S1, _S2, repeat_steps=True, wait_before_repeat=_FAST_REPEAT_INTERVAL, ), - ], - [CreateRandom(_S1, _S2) for _ in range(_REPAT_COUNT)], + ), + [ExecuteRandom(_S1, _S2) for _ in range(_REPAT_COUNT)], [ - *[CreateRandom(_S1, _S2) for _ in range(_REPAT_COUNT)], - UndoRandom(_S1, _S2), + *[ExecuteRandom(_S1, _S2) for _ in range(_REPAT_COUNT)], + RevertRandom(_S1, _S2), ], id="p2(r)", ), pytest.param( - [ + Operation( SingleStepGroup( _RS1, repeat_steps=True, wait_before_repeat=_FAST_REPEAT_INTERVAL ), - ], - [CreateSequence(_RS1) for _ in range(_REPAT_COUNT)], + ), + [ExecuteSequence(_RS1) for _ in range(_REPAT_COUNT)], [ - *[CreateSequence(_RS1) for _ in range(_REPAT_COUNT)], - UndoSequence(_RS1), + *[ExecuteSequence(_RS1) for _ in range(_REPAT_COUNT)], + RevertSequence(_RS1), ], id="s1(rf)", ), pytest.param( - [ + Operation( ParallelStepGroup( _RS1, _RS2, repeat_steps=True, wait_before_repeat=_FAST_REPEAT_INTERVAL, ), - ], - [CreateRandom(_RS1, _RS2) for _ in range(_REPAT_COUNT)], + ), + [ExecuteRandom(_RS1, _RS2) for _ in range(_REPAT_COUNT)], [ - *[CreateRandom(_RS1, _RS2) for _ in range(_REPAT_COUNT)], - UndoRandom(_RS1, _RS2), + *[ExecuteRandom(_RS1, _RS2) for _ in range(_REPAT_COUNT)], + RevertRandom(_RS1, _RS2), ], id="p2(rf)", ), @@ -956,14 +984,14 @@ async def test_repeating_step( steps_call_order, expected_before_cancel_order, use_only_first_entries=True ) - # cancelling stops the loop and causes undo to run + # cancelling stops the loop and causes revert to run await cancel_operation(selected_app, schedule_id) await ensure_expected_order( steps_call_order, expected_order, use_only_last_entries=True ) - await _ensure_keys_in_store(selected_app, expected_keys=set()) + await ensure_keys_in_store(selected_app, expected_keys=set()) @pytest.mark.parametrize("app_count", [10]) @@ -971,77 +999,77 @@ async def test_repeating_step( "operation, expected_order, expected_keys, after_restart_expected_order", [ pytest.param( - [ + Operation( SingleStepGroup(_S1), ParallelStepGroup(_S2, _S3, _S4), SingleStepGroup(_WMI1), # below are not included when waiting for manual intervention ParallelStepGroup(_S5, _S6), SingleStepGroup(_S7), - ], + ), [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _S4), - CreateSequence(_WMI1), + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _S4), + ExecuteSequence(_WMI1), ], { "SCH:{schedule_id}", - "SCH:{schedule_id}:GROUPS:test_op:0S:C", - "SCH:{schedule_id}:GROUPS:test_op:1P:C", - "SCH:{schedule_id}:GROUPS:test_op:2S:C", - "SCH:{schedule_id}:STEPS:test_op:0S:C:_S1", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S2", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S3", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S4", - "SCH:{schedule_id}:STEPS:test_op:2S:C:_WMI1", + "SCH:{schedule_id}:GROUPS:test_op:0S:E", + "SCH:{schedule_id}:GROUPS:test_op:1P:E", + "SCH:{schedule_id}:GROUPS:test_op:2S:E", + "SCH:{schedule_id}:STEPS:test_op:0S:E:_S1", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S2", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S3", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S4", + "SCH:{schedule_id}:STEPS:test_op:2S:E:_WMI1", }, [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _S4), - CreateSequence(_WMI1), - CreateSequence(_WMI1), # retried step - CreateRandom(_S5, _S6), # it is completed now - CreateSequence(_S7), # it is completed now + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _S4), + ExecuteSequence(_WMI1), + ExecuteSequence(_WMI1), # retried step + ExecuteRandom(_S5, _S6), # it is completed now + ExecuteSequence(_S7), # it is completed now ], id="s1-p3-s1(1mi)", ), pytest.param( - [ + Operation( SingleStepGroup(_S1), ParallelStepGroup(_S2, _S3, _S4), ParallelStepGroup(_WMI1, _WMI2, _WMI3, _S5, _S6, _S7), # below are not included when waiting for manual intervention SingleStepGroup(_S8), ParallelStepGroup(_S9, _S10), - ], + ), [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _S4), - CreateRandom(_WMI1, _WMI2, _WMI3, _S5, _S6, _S7), + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _S4), + ExecuteRandom(_WMI1, _WMI2, _WMI3, _S5, _S6, _S7), ], { "SCH:{schedule_id}", - "SCH:{schedule_id}:GROUPS:test_op:0S:C", - "SCH:{schedule_id}:GROUPS:test_op:1P:C", - "SCH:{schedule_id}:GROUPS:test_op:2P:C", - "SCH:{schedule_id}:STEPS:test_op:0S:C:_S1", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S2", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S3", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S4", - "SCH:{schedule_id}:STEPS:test_op:2P:C:_S5", - "SCH:{schedule_id}:STEPS:test_op:2P:C:_S6", - "SCH:{schedule_id}:STEPS:test_op:2P:C:_S7", - "SCH:{schedule_id}:STEPS:test_op:2P:C:_WMI1", - "SCH:{schedule_id}:STEPS:test_op:2P:C:_WMI2", - "SCH:{schedule_id}:STEPS:test_op:2P:C:_WMI3", + "SCH:{schedule_id}:GROUPS:test_op:0S:E", + "SCH:{schedule_id}:GROUPS:test_op:1P:E", + "SCH:{schedule_id}:GROUPS:test_op:2P:E", + "SCH:{schedule_id}:STEPS:test_op:0S:E:_S1", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S2", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S3", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S4", + "SCH:{schedule_id}:STEPS:test_op:2P:E:_S5", + "SCH:{schedule_id}:STEPS:test_op:2P:E:_S6", + "SCH:{schedule_id}:STEPS:test_op:2P:E:_S7", + "SCH:{schedule_id}:STEPS:test_op:2P:E:_WMI1", + "SCH:{schedule_id}:STEPS:test_op:2P:E:_WMI2", + "SCH:{schedule_id}:STEPS:test_op:2P:E:_WMI3", }, [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _S4), - CreateRandom(_WMI1, _WMI2, _WMI3, _S5, _S6, _S7), - CreateRandom(_WMI1, _WMI2, _WMI3), # retried steps - CreateSequence(_S8), # it is completed now - CreateRandom(_S9, _S10), # it is completed now + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _S4), + ExecuteRandom(_WMI1, _WMI2, _WMI3, _S5, _S6, _S7), + ExecuteRandom(_WMI1, _WMI2, _WMI3), # retried steps + ExecuteSequence(_S8), # it is completed now + ExecuteRandom(_S9, _S10), # it is completed now ], id="s1-p3-p6(3mi)", ), @@ -1068,7 +1096,17 @@ async def test_wait_for_manual_intervention( await ensure_expected_order(steps_call_order, expected_order) - await _ensure_keys_in_store(selected_app, expected_keys=formatted_expected_keys) + await ensure_keys_in_store(selected_app, expected_keys=formatted_expected_keys) + + await _esnure_steps_have_status( + selected_app, + schedule_id, + operation_name, + step_group_name=operation.step_groups[ + len(expected_order) - 1 + ].get_step_group_name(index=len(expected_order) - 1), + steps=expected_order[-1].steps, + ) # even if cancelled, state of waiting for manual intervention remains the same async for attempt in AsyncRetrying(**_RETRY_PARAMS): @@ -1076,7 +1114,7 @@ async def test_wait_for_manual_intervention( with pytest.raises(CannotCancelWhileWaitingForManualInterventionError): await cancel_operation(selected_app, schedule_id) - await _ensure_keys_in_store(selected_app, expected_keys=formatted_expected_keys) + await ensure_keys_in_store(selected_app, expected_keys=formatted_expected_keys) # set step to no longer raise and restart the failed steps steps_to_restart = _get_steps_matching_class( @@ -1085,7 +1123,7 @@ async def test_wait_for_manual_intervention( _GlobalStepIssueTracker.set_issue_solved() await limited_gather( *( - restart_operation_step_stuck_in_manual_intervention_during_create( + restart_operation_step_stuck_in_manual_intervention_during_execute( selected_app, schedule_id, step.get_step_name() ) for step in steps_to_restart @@ -1094,7 +1132,25 @@ async def test_wait_for_manual_intervention( ) # should finish schedule operation await ensure_expected_order(steps_call_order, after_restart_expected_order) - await _ensure_keys_in_store(selected_app, expected_keys=set()) + await ensure_keys_in_store(selected_app, expected_keys=set()) + + +@pytest.mark.parametrize("app_count", [10]) +async def test_operation_is_not_cancellable( + reset_step_issue_tracker: None, + preserve_caplog_for_async_logging: None, + selected_app: FastAPI, + register_operation: Callable[[OperationName, Operation], None], + operation_name: OperationName, +): + operation = Operation(SingleStepGroup(_S1), is_cancellable=False) + register_operation(operation_name, operation) + + schedule_id = await start_operation(selected_app, operation_name, {}) + + # even if cancelled, state of waiting for manual intervention remains the same + with pytest.raises(OperationNotCancellableError): + await cancel_operation(selected_app, schedule_id) @pytest.mark.parametrize("app_count", [10]) @@ -1102,96 +1158,96 @@ async def test_wait_for_manual_intervention( "operation, expected_order, expected_keys, after_restart_expected_order", [ pytest.param( - [ + Operation( SingleStepGroup(_S1), ParallelStepGroup(_S2, _S3, _S4), SingleStepGroup(_FCR1), # below are not included in any expected order ParallelStepGroup(_S5, _S6), SingleStepGroup(_S7), - ], + ), [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _S4), - CreateSequence(_FCR1), - UndoSequence(_FCR1), + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _S4), + ExecuteSequence(_FCR1), + RevertSequence(_FCR1), ], { "SCH:{schedule_id}", - "SCH:{schedule_id}:GROUPS:test_op:0S:C", - "SCH:{schedule_id}:GROUPS:test_op:1P:C", - "SCH:{schedule_id}:GROUPS:test_op:2S:C", - "SCH:{schedule_id}:GROUPS:test_op:2S:U", - "SCH:{schedule_id}:STEPS:test_op:0S:C:_S1", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S2", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S3", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S4", - "SCH:{schedule_id}:STEPS:test_op:2S:C:_FCR1", - "SCH:{schedule_id}:STEPS:test_op:2S:U:_FCR1", + "SCH:{schedule_id}:GROUPS:test_op:0S:E", + "SCH:{schedule_id}:GROUPS:test_op:1P:E", + "SCH:{schedule_id}:GROUPS:test_op:2S:E", + "SCH:{schedule_id}:GROUPS:test_op:2S:R", + "SCH:{schedule_id}:STEPS:test_op:0S:E:_S1", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S2", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S3", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S4", + "SCH:{schedule_id}:STEPS:test_op:2S:E:_FCR1", + "SCH:{schedule_id}:STEPS:test_op:2S:R:_FCR1", }, [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _S4), - CreateSequence(_FCR1), - UndoSequence(_FCR1), - UndoSequence(_FCR1), # this one is retried - UndoRandom(_S2, _S3, _S4), - UndoSequence(_S1), + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _S4), + ExecuteSequence(_FCR1), + RevertSequence(_FCR1), + RevertSequence(_FCR1), # this one is retried + RevertRandom(_S2, _S3, _S4), + RevertSequence(_S1), ], id="s1-p3-s1(1r)", ), pytest.param( - [ + Operation( SingleStepGroup(_S1), ParallelStepGroup(_S2, _S3, _S4), ParallelStepGroup(_FCR1, _FCR2, _FCR3, _S5, _S6, _S7), # below are not included in any expected order SingleStepGroup(_S8), ParallelStepGroup(_S9, _S10), - ], + ), [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _S4), - CreateRandom(_FCR1, _FCR2, _FCR3, _S5, _S6, _S7), - UndoRandom(_FCR1, _FCR2, _FCR3, _S5, _S6, _S7), + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _S4), + ExecuteRandom(_FCR1, _FCR2, _FCR3, _S5, _S6, _S7), + RevertRandom(_FCR1, _FCR2, _FCR3, _S5, _S6, _S7), ], { "SCH:{schedule_id}", - "SCH:{schedule_id}:GROUPS:test_op:0S:C", - "SCH:{schedule_id}:GROUPS:test_op:1P:C", - "SCH:{schedule_id}:GROUPS:test_op:2P:C", - "SCH:{schedule_id}:GROUPS:test_op:2P:U", - "SCH:{schedule_id}:STEPS:test_op:0S:C:_S1", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S2", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S3", - "SCH:{schedule_id}:STEPS:test_op:1P:C:_S4", - "SCH:{schedule_id}:STEPS:test_op:2P:C:_S5", - "SCH:{schedule_id}:STEPS:test_op:2P:C:_S6", - "SCH:{schedule_id}:STEPS:test_op:2P:C:_S7", - "SCH:{schedule_id}:STEPS:test_op:2P:C:_FCR1", - "SCH:{schedule_id}:STEPS:test_op:2P:C:_FCR2", - "SCH:{schedule_id}:STEPS:test_op:2P:C:_FCR3", - "SCH:{schedule_id}:STEPS:test_op:2P:U:_S5", - "SCH:{schedule_id}:STEPS:test_op:2P:U:_S6", - "SCH:{schedule_id}:STEPS:test_op:2P:U:_S7", - "SCH:{schedule_id}:STEPS:test_op:2P:U:_FCR1", - "SCH:{schedule_id}:STEPS:test_op:2P:U:_FCR2", - "SCH:{schedule_id}:STEPS:test_op:2P:U:_FCR3", + "SCH:{schedule_id}:GROUPS:test_op:0S:E", + "SCH:{schedule_id}:GROUPS:test_op:1P:E", + "SCH:{schedule_id}:GROUPS:test_op:2P:E", + "SCH:{schedule_id}:GROUPS:test_op:2P:R", + "SCH:{schedule_id}:STEPS:test_op:0S:E:_S1", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S2", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S3", + "SCH:{schedule_id}:STEPS:test_op:1P:E:_S4", + "SCH:{schedule_id}:STEPS:test_op:2P:E:_S5", + "SCH:{schedule_id}:STEPS:test_op:2P:E:_S6", + "SCH:{schedule_id}:STEPS:test_op:2P:E:_S7", + "SCH:{schedule_id}:STEPS:test_op:2P:E:_FCR1", + "SCH:{schedule_id}:STEPS:test_op:2P:E:_FCR2", + "SCH:{schedule_id}:STEPS:test_op:2P:E:_FCR3", + "SCH:{schedule_id}:STEPS:test_op:2P:R:_S5", + "SCH:{schedule_id}:STEPS:test_op:2P:R:_S6", + "SCH:{schedule_id}:STEPS:test_op:2P:R:_S7", + "SCH:{schedule_id}:STEPS:test_op:2P:R:_FCR1", + "SCH:{schedule_id}:STEPS:test_op:2P:R:_FCR2", + "SCH:{schedule_id}:STEPS:test_op:2P:R:_FCR3", }, [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _S4), - CreateRandom(_FCR1, _FCR2, _FCR3, _S5, _S6, _S7), - UndoRandom(_FCR1, _FCR2, _FCR3, _S5, _S6, _S7), - UndoRandom(_FCR1, _FCR2, _FCR3), # retried steps - UndoRandom(_S2, _S3, _S4), - UndoSequence(_S1), + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _S4), + ExecuteRandom(_FCR1, _FCR2, _FCR3, _S5, _S6, _S7), + RevertRandom(_FCR1, _FCR2, _FCR3, _S5, _S6, _S7), + RevertRandom(_FCR1, _FCR2, _FCR3), # retried steps + RevertRandom(_S2, _S3, _S4), + RevertSequence(_S1), ], id="s1-p3-p6(3r)", ), ], ) -async def test_restart_undo_operation_step_in_error( +async def test_restart_revert_operation_step_in_error( reset_step_issue_tracker: None, preserve_caplog_for_async_logging: None, steps_call_order: list[tuple[str, str]], @@ -1211,20 +1267,26 @@ async def test_restart_undo_operation_step_in_error( formatted_expected_keys = {k.format(schedule_id=schedule_id) for k in expected_keys} await ensure_expected_order(steps_call_order, expected_order) - await _ensure_keys_in_store(selected_app, expected_keys=formatted_expected_keys) - - # give some time for the deferred runner to store the errors - # avoids flakiness - await asyncio.sleep(0.1) + await ensure_keys_in_store(selected_app, expected_keys=formatted_expected_keys) + + await _esnure_steps_have_status( + selected_app, + schedule_id, + operation_name, + step_group_name=operation.step_groups[ + len(expected_order) - 2 + ].get_step_group_name(index=len(expected_order) - 2), + steps=expected_order[-1].steps, + ) # set step to no longer raise and restart the failed steps steps_to_restart = _get_steps_matching_class( - operation, match=_FailOnCreateAndUndoBS + operation, match=_FailOnExecuteAndRevertBS ) _GlobalStepIssueTracker.set_issue_solved() await limited_gather( *( - restart_operation_step_stuck_during_undo( + restart_operation_step_stuck_during_revert( selected_app, schedule_id, step.get_step_name() ) for step in steps_to_restart @@ -1233,7 +1295,7 @@ async def test_restart_undo_operation_step_in_error( ) # should finish schedule operation await ensure_expected_order(steps_call_order, after_restart_expected_order) - await _ensure_keys_in_store(selected_app, expected_keys=set()) + await ensure_keys_in_store(selected_app, expected_keys=set()) @pytest.mark.parametrize("app_count", [10]) @@ -1246,11 +1308,11 @@ async def test_errors_with_restart_operation_step_in_error( operation_name: OperationName, in_manual_intervention: bool, ): - operation: Operation = [ + operation = Operation( SingleStepGroup(_S1), ParallelStepGroup(_S2, _S3, _S4), ParallelStepGroup(_SF1, _FCR1), # sleeps here forever - ] + ) register_operation(operation_name, operation) schedule_id = await start_operation(selected_app, operation_name, {}) @@ -1259,13 +1321,19 @@ async def test_errors_with_restart_operation_step_in_error( await ensure_expected_order( steps_call_order, [ - CreateSequence(_S1), - CreateRandom(_S2, _S3, _S4), - CreateRandom(_SF1, _FCR1), + ExecuteSequence(_S1), + ExecuteRandom(_S2, _S3, _S4), + ExecuteRandom(_SF1, _FCR1), ], ) - # give some time for the deferred runner to store the errors - await asyncio.sleep(0.1) + + await _esnure_steps_have_status( + selected_app, + schedule_id, + operation_name, + step_group_name=operation.step_groups[2].get_step_group_name(index=2), + steps=operation.step_groups[-1].steps, + ) with pytest.raises(StepNameNotInCurrentGroupError): await Core.get_from_app_state( @@ -1303,57 +1371,57 @@ async def test_errors_with_restart_operation_step_in_error( "operation, initial_context, expected_order", [ pytest.param( - [ + Operation( SingleStepGroup(RPCtxS1), - ], + ), { - "bs__c_req_1": _CTX_VALUE, # required by create + "bs__e_req_1": _CTX_VALUE, # required by execute }, [ - CreateSequence(RPCtxS1), + ExecuteSequence(RPCtxS1), ], id="s1", ), pytest.param( - [ + Operation( ParallelStepGroup(RPCtxS1, RPCtxS2), - ], + ), { - "bs__c_req_1": _CTX_VALUE, # required by create - "bs__c_req_2": _CTX_VALUE, # required by create + "bs__e_req_1": _CTX_VALUE, # required by execute + "bs__e_req_2": _CTX_VALUE, # required by execute }, [ - CreateRandom(RPCtxS1, RPCtxS2), + ExecuteRandom(RPCtxS1, RPCtxS2), ], id="p2", ), pytest.param( - [ + Operation( SingleStepGroup(RPCtxR1), - ], + ), { - "bs_undo_c_req_1": _CTX_VALUE, # required by create - "bs_undo_r_req_1": _CTX_VALUE, # not created automatically since crete fails + "bs_revert_e_req_1": _CTX_VALUE, # required by execute + "bs_revert_r_req_1": _CTX_VALUE, # not executed automatically since crete fails }, [ - CreateSequence(RPCtxR1), - UndoSequence(RPCtxR1), + ExecuteSequence(RPCtxR1), + RevertSequence(RPCtxR1), ], id="s1(1r)", ), pytest.param( - [ + Operation( ParallelStepGroup(RPCtxR1, RPCtxR2), - ], + ), { - "bs_undo_c_req_1": _CTX_VALUE, # required by create - "bs_undo_c_req_2": _CTX_VALUE, # required by create - "bs_undo_r_req_1": _CTX_VALUE, # not created automatically since crete fails - "bs_undo_r_req_2": _CTX_VALUE, # not created automatically since crete fails + "bs_revert_e_req_1": _CTX_VALUE, # required by execute + "bs_revert_e_req_2": _CTX_VALUE, # required by execute + "bs_revert_r_req_1": _CTX_VALUE, # not executed automatically since crete fails + "bs_revert_r_req_2": _CTX_VALUE, # not executed automatically since crete fails }, [ - CreateRandom(RPCtxR1, RPCtxR2), - UndoRandom(RPCtxR1, RPCtxR2), + ExecuteRandom(RPCtxR1, RPCtxR2), + RevertRandom(RPCtxR1, RPCtxR2), ], id="p2(2r)", ), @@ -1381,7 +1449,7 @@ async def test_operation_context_usage( # NOTE: might fail because it raised ProvidedOperationContextKeysAreMissingError check logs await ensure_expected_order(steps_call_order, expected_order) - await _ensure_keys_in_store(selected_app, expected_keys=set()) + await ensure_keys_in_store(selected_app, expected_keys=set()) assert f"{OperationContextValueIsNoneError.__name__}" not in caplog.text assert f"{ProvidedOperationContextKeysAreMissingError.__name__}" not in caplog.text @@ -1392,29 +1460,29 @@ async def test_operation_context_usage( "operation, initial_context", [ pytest.param( - [ + Operation( SingleStepGroup(RPCtxS1), - ], + ), { - "bs__c_prov_1": _CTX_VALUE, # already provied by step creates issue + "bs__e_prov_1": _CTX_VALUE, # already provied by step execute issue }, id="s1", ), pytest.param( - [ + Operation( SingleStepGroup(RPCtxR1), - ], + ), { - "bs_undo_c_prov_1": _CTX_VALUE, # already provied by step creates issue + "bs_revert_e_prov_1": _CTX_VALUE, # already provied by step execute issue }, id="s1", ), pytest.param( - [ + Operation( SingleStepGroup(RPCtxR1), - ], + ), { - "bs_undo_r_prov_1": _CTX_VALUE, # already provied by step creates issue + "bs_revert_r_prov_1": _CTX_VALUE, # already provied by step execute issue }, id="s1", ), @@ -1433,7 +1501,7 @@ async def test_operation_initial_context_using_key_provided_by_step( with pytest.raises(InitialOperationContextKeyNotAllowedError): await start_operation(selected_app, operation_name, initial_context) - await _ensure_keys_in_store(selected_app, expected_keys=set()) + await ensure_keys_in_store(selected_app, expected_keys=set()) @pytest.mark.parametrize("app_count", [10]) @@ -1441,26 +1509,26 @@ async def test_operation_initial_context_using_key_provided_by_step( "operation, initial_context, expected_order", [ pytest.param( - [ + Operation( SingleStepGroup(RPCtxS1), - ], + ), { # `bs__c_req_1` is missing }, [ - UndoSequence(RPCtxS1), + RevertSequence(RPCtxS1), ], id="missing_context_key", ), pytest.param( - [ + Operation( SingleStepGroup(RPCtxS1), - ], + ), { - "bs__c_req_1": None, + "bs__e_req_1": None, }, [ - UndoSequence(RPCtxS1), + RevertSequence(RPCtxS1), ], id="context_key_is_none", ), @@ -1485,11 +1553,11 @@ async def test_step_does_not_receive_context_key_or_is_none( schedule_id = await start_operation(selected_app, operation_name, initial_context) assert TypeAdapter(ScheduleId).validate_python(schedule_id) - await _esnure_log_mesage(caplog, message=OperationContextValueIsNoneError.__name__) + await _ensure_log_mesage(caplog, message=OperationContextValueIsNoneError.__name__) await ensure_expected_order(steps_call_order, expected_order) - await _ensure_keys_in_store(selected_app, expected_keys=set()) + await ensure_keys_in_store(selected_app, expected_keys=set()) class _BadImplementedStep(BaseStep): @@ -1505,47 +1573,47 @@ def _get_provided_context( return return_values - # CREATE + # EXECUTE @classmethod - def get_create_requires_context_keys(cls) -> set[str]: - return {"to_return", "trigger_undo"} + def get_execute_requires_context_keys(cls) -> set[str]: + return {"to_return", "trigger_revert"} @classmethod - def get_create_provides_context_keys(cls) -> set[str]: + def get_execute_provides_context_keys(cls) -> set[str]: return {"a_key"} @classmethod - async def create( + async def execute( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: print("INJECTED_CONTEXT_C", required_context) _ = app - _STEPS_CALL_ORDER.append((cls.__name__, CREATED)) + _STEPS_CALL_ORDER.append((cls.__name__, EXECUTED)) - if required_context.get("trigger_undo"): - msg = "triggering undo" + if required_context.get("trigger_revert"): + msg = "triggering revert" raise RuntimeError(msg) return cls._get_provided_context(required_context) - # UNDO + # REVERT @classmethod - def get_undo_requires_context_keys(cls) -> set[str]: - return {"to_return", "trigger_undo"} + def get_revert_requires_context_keys(cls) -> set[str]: + return {"to_return", "trigger_revert"} @classmethod - def get_undo_provides_context_keys(cls) -> set[str]: + def get_revert_provides_context_keys(cls) -> set[str]: return {"a_key"} @classmethod - async def undo( + async def revert( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: print("INJECTED_CONTEXT_R", required_context) _ = app - _STEPS_CALL_ORDER.append((cls.__name__, UNDONE)) + _STEPS_CALL_ORDER.append((cls.__name__, REVERTED)) return cls._get_provided_context(required_context) @@ -1555,11 +1623,11 @@ async def undo( "operation, initial_context, expected_error_str, expected_order, expected_keys", [ pytest.param( - [ + Operation( SingleStepGroup(_BadImplementedStep), - ], + ), { - "trigger_undo": False, + "trigger_revert": False, "to_return": { "add_to_return": True, "keys": {"a_key": None}, @@ -1567,50 +1635,50 @@ async def undo( }, f"{OperationContextValueIsNoneError.__name__}: Values of context cannot be None: {{'a_key'", [ - CreateSequence(_BadImplementedStep), - UndoSequence(_BadImplementedStep), + ExecuteSequence(_BadImplementedStep), + RevertSequence(_BadImplementedStep), ], { "SCH:{schedule_id}", - "SCH:{schedule_id}:GROUPS:test_op:0S:C", - "SCH:{schedule_id}:GROUPS:test_op:0S:U", + "SCH:{schedule_id}:GROUPS:test_op:0S:E", + "SCH:{schedule_id}:GROUPS:test_op:0S:R", "SCH:{schedule_id}:OP_CTX:test_op", - "SCH:{schedule_id}:STEPS:test_op:0S:C:_BadImplementedStep", - "SCH:{schedule_id}:STEPS:test_op:0S:U:_BadImplementedStep", + "SCH:{schedule_id}:STEPS:test_op:0S:E:_BadImplementedStep", + "SCH:{schedule_id}:STEPS:test_op:0S:R:_BadImplementedStep", }, - id="create-returns-key-set-to-None", + id="execute-returns-key-set-to-None", ), pytest.param( - [ + Operation( SingleStepGroup(_BadImplementedStep), - ], + ), { - "trigger_undo": False, + "trigger_revert": False, "to_return": { "add_to_return": False, }, }, f"{ProvidedOperationContextKeysAreMissingError.__name__}: Provided context {{}} is missing keys {{'a_key'", [ - CreateSequence(_BadImplementedStep), - UndoSequence(_BadImplementedStep), + ExecuteSequence(_BadImplementedStep), + RevertSequence(_BadImplementedStep), ], { "SCH:{schedule_id}", - "SCH:{schedule_id}:GROUPS:test_op:0S:C", - "SCH:{schedule_id}:GROUPS:test_op:0S:U", + "SCH:{schedule_id}:GROUPS:test_op:0S:E", + "SCH:{schedule_id}:GROUPS:test_op:0S:R", "SCH:{schedule_id}:OP_CTX:test_op", - "SCH:{schedule_id}:STEPS:test_op:0S:C:_BadImplementedStep", - "SCH:{schedule_id}:STEPS:test_op:0S:U:_BadImplementedStep", + "SCH:{schedule_id}:STEPS:test_op:0S:E:_BadImplementedStep", + "SCH:{schedule_id}:STEPS:test_op:0S:R:_BadImplementedStep", }, - id="create-does-not-set-the-key-to-return", + id="execute-does-not-set-the-key-to-return", ), pytest.param( - [ + Operation( SingleStepGroup(_BadImplementedStep), - ], + ), { - "trigger_undo": True, + "trigger_revert": True, "to_return": { "add_to_return": True, "keys": {"a_key": None}, @@ -1618,43 +1686,43 @@ async def undo( }, f"{OperationContextValueIsNoneError.__name__}: Values of context cannot be None: {{'a_key'", [ - CreateSequence(_BadImplementedStep), - UndoSequence(_BadImplementedStep), + ExecuteSequence(_BadImplementedStep), + RevertSequence(_BadImplementedStep), ], { "SCH:{schedule_id}", - "SCH:{schedule_id}:GROUPS:test_op:0S:C", - "SCH:{schedule_id}:GROUPS:test_op:0S:U", + "SCH:{schedule_id}:GROUPS:test_op:0S:E", + "SCH:{schedule_id}:GROUPS:test_op:0S:R", "SCH:{schedule_id}:OP_CTX:test_op", - "SCH:{schedule_id}:STEPS:test_op:0S:C:_BadImplementedStep", - "SCH:{schedule_id}:STEPS:test_op:0S:U:_BadImplementedStep", + "SCH:{schedule_id}:STEPS:test_op:0S:E:_BadImplementedStep", + "SCH:{schedule_id}:STEPS:test_op:0S:R:_BadImplementedStep", }, - id="undo-returns-key-set-to-None", + id="revert-returns-key-set-to-None", ), pytest.param( - [ + Operation( SingleStepGroup(_BadImplementedStep), - ], + ), { - "trigger_undo": True, + "trigger_revert": True, "to_return": { "add_to_return": False, }, }, f"{ProvidedOperationContextKeysAreMissingError.__name__}: Provided context {{}} is missing keys {{'a_key'", [ - CreateSequence(_BadImplementedStep), - UndoSequence(_BadImplementedStep), + ExecuteSequence(_BadImplementedStep), + RevertSequence(_BadImplementedStep), ], { "SCH:{schedule_id}", - "SCH:{schedule_id}:GROUPS:test_op:0S:C", - "SCH:{schedule_id}:GROUPS:test_op:0S:U", + "SCH:{schedule_id}:GROUPS:test_op:0S:E", + "SCH:{schedule_id}:GROUPS:test_op:0S:R", "SCH:{schedule_id}:OP_CTX:test_op", - "SCH:{schedule_id}:STEPS:test_op:0S:C:_BadImplementedStep", - "SCH:{schedule_id}:STEPS:test_op:0S:U:_BadImplementedStep", + "SCH:{schedule_id}:STEPS:test_op:0S:E:_BadImplementedStep", + "SCH:{schedule_id}:STEPS:test_op:0S:R:_BadImplementedStep", }, - id="undo-does-not-set-the-key-to-return", + id="revert-does-not-set-the-key-to-return", ), ], ) @@ -1679,9 +1747,9 @@ async def test_step_does_not_provide_declared_key_or_is_none( schedule_id = await start_operation(selected_app, operation_name, initial_context) assert TypeAdapter(ScheduleId).validate_python(schedule_id) - await _esnure_log_mesage(caplog, message=expected_error_str) + await _ensure_log_mesage(caplog, message=expected_error_str) await ensure_expected_order(steps_call_order, expected_order) formatted_expected_keys = {k.format(schedule_id=schedule_id) for k in expected_keys} - await _ensure_keys_in_store(selected_app, expected_keys=formatted_expected_keys) + await ensure_keys_in_store(selected_app, expected_keys=formatted_expected_keys) diff --git a/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__deferred_runner.py b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__deferred_runner.py index af48fc6b1989..364e2585d96b 100644 --- a/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__deferred_runner.py +++ b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__deferred_runner.py @@ -136,16 +136,16 @@ def clear(cls) -> None: class _StepFinisheWithSuccess(BaseStep): @classmethod - async def create( + async def execute( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: _ = app _ = required_context - _StepResultStore.set_result(cls.__name__, "created") + _StepResultStore.set_result(cls.__name__, "executed") return {} @classmethod - async def undo( + async def revert( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: _ = app @@ -156,17 +156,17 @@ async def undo( class _StepFinisheError(BaseStep): @classmethod - async def create( + async def execute( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: _ = app _ = required_context - _StepResultStore.set_result(cls.__name__, "created") + _StepResultStore.set_result(cls.__name__, "executed") msg = "I failed creating" raise RuntimeError(msg) @classmethod - async def undo( + async def revert( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: _ = app @@ -178,17 +178,17 @@ async def undo( class _StepLongRunningToCancel(BaseStep): @classmethod - async def create( + async def execute( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: _ = app _ = required_context - _StepResultStore.set_result(cls.__name__, "created") + _StepResultStore.set_result(cls.__name__, "executed") await asyncio.sleep(10000) return {} @classmethod - async def undo( + async def revert( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: _ = app @@ -211,42 +211,42 @@ def _get_step_group( operation = OperationRegistry._OPERATIONS[operation_name][ # noqa: SLF001 "operation" ] - operations_count = len(operation) + operations_count = len(operation.step_groups) assert group_index < operations_count - return operation[group_index] + return operation.step_groups[group_index] @pytest.mark.parametrize( "operation, expected_step_status, action, expected_steps_count", [ ( - [ + Operation( SingleStepGroup(_StepFinisheWithSuccess), - ], + ), StepStatus.SUCCESS, _Action.DO_NOTHING, 1, ), ( - [ + Operation( SingleStepGroup(_StepFinisheError), - ], + ), StepStatus.FAILED, _Action.DO_NOTHING, 1, ), ( - [ + Operation( SingleStepGroup(_StepLongRunningToCancel), - ], + ), StepStatus.CANCELLED, _Action.CANCEL, 1, ), ], ) -@pytest.mark.parametrize("is_creating", [True, False]) +@pytest.mark.parametrize("is_executing", [True, False]) async def test_something( mock_enqueue_event: AsyncMock, registed_operation: None, @@ -255,7 +255,7 @@ async def test_something( schedule_id: ScheduleId, operation_name: OperationName, expected_step_status: StepStatus, - is_creating: bool, + is_executing: bool, action: _Action, expected_steps_count: NonNegativeInt, ) -> None: @@ -263,7 +263,11 @@ async def test_something( # setup schedule_data_proxy = ScheduleDataStoreProxy(store=store, schedule_id=schedule_id) await schedule_data_proxy.create_or_update_multiple( - {"operation_name": operation_name, "group_index": 0, "is_creating": is_creating} + { + "operation_name": operation_name, + "group_index": 0, + "is_executing": is_executing, + } ) step_group = _get_step_group(operation_name, 0) @@ -282,7 +286,7 @@ async def test_something( operation_name=operation_name, step_group_name=step_group_name, step_name=step_name, - is_creating=is_creating, + is_executing=is_executing, ) ### tests starts here @@ -292,7 +296,7 @@ async def test_something( operation_name=operation_name, step_group_name=step_group_name, step_name=step_name, - is_creating=is_creating, + is_executing=is_executing, expected_steps_count=expected_steps_count, ) @@ -305,7 +309,7 @@ async def test_something( await _assert_finshed_with_status(step_proxy, expected_step_status) assert _StepResultStore.get_result(step.__name__) == ( - "created" if is_creating else "destroyed" + "executed" if is_executing else "destroyed" ) if expected_step_status == StepStatus.FAILED: diff --git a/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__event_after.py b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__event_after.py new file mode 100644 index 000000000000..8fcd48a97307 --- /dev/null +++ b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__event_after.py @@ -0,0 +1,179 @@ +# pylint:disable=redefined-outer-name +# pylint:disable=too-many-arguments +# pylint:disable=unused-argument + +from collections.abc import AsyncIterable, Callable +from unittest.mock import AsyncMock, call + +import pytest +from asgi_lifespan import LifespanManager +from faker import Faker +from fastapi import FastAPI +from pydantic import TypeAdapter +from pytest_mock import MockerFixture +from pytest_simcore.helpers.typing_env import EnvVarsDict +from settings_library.rabbit import RabbitSettings +from settings_library.redis import RedisSettings +from simcore_service_dynamic_scheduler.core.application import create_app +from simcore_service_dynamic_scheduler.services.generic_scheduler import ( + BaseStep, + Operation, + OperationName, + ProvidedOperationContext, + RequiredOperationContext, + SingleStepGroup, +) +from simcore_service_dynamic_scheduler.services.generic_scheduler._errors import ( + OperationNotFoundError, +) +from simcore_service_dynamic_scheduler.services.generic_scheduler._event_after import ( + AfterEventManager, +) +from simcore_service_dynamic_scheduler.services.generic_scheduler._models import ( + EventType, + OperationContext, + OperationToStart, + ScheduleId, +) +from simcore_service_dynamic_scheduler.services.generic_scheduler._store import ( + OperationEventsProxy, + Store, +) +from utils import ensure_keys_in_store + +pytest_simcore_core_services_selection = [ + "rabbit", + "redis", +] +pytest_simcore_ops_services_selection = [ + "redis-commander", +] + + +@pytest.fixture +def app_environment( + disable_postgres_lifespan: None, + disable_service_tracker_lifespan: None, + disable_notifier_lifespan: None, + disable_status_monitor_lifespan: None, + app_environment: EnvVarsDict, + rabbit_service: RabbitSettings, + redis_service: RedisSettings, + remove_redis_data: None, +) -> EnvVarsDict: + return app_environment + + +@pytest.fixture +async def app(app_environment: EnvVarsDict) -> AsyncIterable[FastAPI]: + app = create_app() + async with LifespanManager(app): + yield app + + +@pytest.fixture +def after_event_manager(app: FastAPI) -> AfterEventManager: + return AfterEventManager.get_from_app_state(app) + + +@pytest.fixture +def store(app: FastAPI) -> Store: + return Store.get_from_app_state(app) + + +@pytest.fixture +def schedule_id(faker: Faker) -> ScheduleId: + return TypeAdapter(ScheduleId).validate_python(faker.uuid4()) + + +@pytest.fixture +def mock_start_operation(mocker: MockerFixture) -> AsyncMock: + return mocker.patch( + "simcore_service_dynamic_scheduler.services.generic_scheduler._event_after.start_operation", + autospec=True, + ) + + +@pytest.mark.parametrize("event_type", EventType) +async def test_operation_is_missing( + after_event_manager: AfterEventManager, + schedule_id: ScheduleId, + event_type: EventType, +): + await ensure_keys_in_store(after_event_manager.app, expected_keys=set()) + + with pytest.raises(OperationNotFoundError): + await after_event_manager.register_to_start_after( + schedule_id, + event_type, + to_start=OperationToStart( + operation_name="missing_operation", initial_context={} + ), + ) + await ensure_keys_in_store(after_event_manager.app, expected_keys=set()) + + +class _BS(BaseStep): + @classmethod + async def execute( + cls, app: FastAPI, required_context: RequiredOperationContext + ) -> ProvidedOperationContext | None: + _ = app + _ = required_context + + +@pytest.mark.parametrize( + "operation", + [ + Operation( + SingleStepGroup(_BS), + ), + ], +) +@pytest.mark.parametrize("event_type", EventType) +@pytest.mark.parametrize( + "initial_context", + [ + {"key": "value", "dict": {"some": "thing"}, "list": [1, 2, 3]}, + ], +) +async def test_something( + after_event_manager: AfterEventManager, + store: Store, + schedule_id: ScheduleId, + event_type: EventType, + register_operation: Callable[[OperationName, Operation], None], + operation: Operation, + mock_start_operation: AsyncMock, + initial_context: OperationContext, +): + operation_name = "op1" + + register_operation(operation_name, operation) + await ensure_keys_in_store(after_event_manager.app, expected_keys=set()) + + await after_event_manager.register_to_start_after( + schedule_id, + event_type, + to_start=OperationToStart( + operation_name=operation_name, + initial_context=initial_context, + ), + ) + await ensure_keys_in_store( + after_event_manager.app, + expected_keys={f"SCH:{schedule_id}:EVENTS:{event_type}"}, + ) + + # ensure is still scheduled even when the DB entry is gone + events_proxy = OperationEventsProxy(store, schedule_id, event_type) + await events_proxy.delete() + await ensure_keys_in_store(after_event_manager.app, expected_keys=set()) + + await after_event_manager.safe_on_event_type( + event_type, schedule_id, operation_name, initial_context + ) + + assert mock_start_operation.call_args_list == [ + call(after_event_manager.app, operation_name, initial_context) + ] diff --git a/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__event_scheduler.py b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__event_scheduler.py index 33aa2d906204..3851e1877f05 100644 --- a/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__event_scheduler.py +++ b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__event_scheduler.py @@ -3,6 +3,7 @@ import asyncio from collections.abc import Awaitable, Callable +from typing import Any, Final from unittest.mock import Mock, call from uuid import uuid4 @@ -12,10 +13,15 @@ from pytest_mock import MockerFixture from pytest_simcore.helpers.typing_env import EnvVarsDict from settings_library.rabbit import RabbitSettings -from simcore_service_dynamic_scheduler.services.generic_scheduler._event_scheduler import ( - EventScheduler, +from simcore_service_dynamic_scheduler.services.generic_scheduler._event import ( + enqueue_execute_completed_event, + enqueue_revert_completed_event, + enqueue_schedule_event, ) from simcore_service_dynamic_scheduler.services.generic_scheduler._models import ( + EventType, + OperationContext, + OperationName, ScheduleId, ) from tenacity import ( @@ -30,6 +36,13 @@ ] +_RETRY_PARAMS: Final[dict[str, Any]] = { + "wait": wait_fixed(0.1), + "stop": stop_after_delay(5), + "retry": retry_if_exception_type(AssertionError), +} + + @pytest.fixture def disable_other_generic_scheduler_modules(mocker: MockerFixture) -> None: # these also use redis @@ -38,6 +51,9 @@ def disable_other_generic_scheduler_modules(mocker: MockerFixture) -> None: ) mocker.patch(f"{generic_scheduler_module}._lifespan.Core", autospec=True) mocker.patch(f"{generic_scheduler_module}._lifespan.Store", autospec=True) + mocker.patch( + f"{generic_scheduler_module}._lifespan.AfterEventManager", autospec=True + ) @pytest.fixture @@ -55,11 +71,6 @@ def app_environment( return app_environment -@pytest.fixture -def event_scheduler(app: FastAPI) -> EventScheduler: - return app.state.generic_scheduler_event_scheduler - - @pytest.fixture def get_mock_safe_on_schedule_event( mocker: MockerFixture, @@ -75,7 +86,7 @@ async def _mock(schedule_id: ScheduleId) -> None: core_mock = Mock() core_mock.safe_on_schedule_event = _mock mocker.patch( - "simcore_service_dynamic_scheduler.services.generic_scheduler._event_scheduler.Core.get_from_app_state", + "simcore_service_dynamic_scheduler.services.generic_scheduler._event_queues.get_core", return_value=core_mock, ) return another_mock @@ -83,25 +94,20 @@ async def _mock(schedule_id: ScheduleId) -> None: return _ -async def _side_effect_nothing(schedule_id: ScheduleId) -> None: - pass - - -async def _side_effect_raise_error(schedule_id: ScheduleId) -> None: - msg = "always failing here as requesed" - raise RuntimeError(msg) - - -async def test_event_scheduling( +async def test_enqueue_schedule_event( get_mock_safe_on_schedule_event: Callable[ [Callable[[ScheduleId], Awaitable[None]]], Mock ], - event_scheduler: EventScheduler, + app: FastAPI, ) -> None: + + async def _side_effect_nothing(schedule_id: ScheduleId) -> None: + pass + mock = get_mock_safe_on_schedule_event(_side_effect_nothing) schedule_id = TypeAdapter(ScheduleId).validate_python(f"{uuid4()}") - await event_scheduler.enqueue_schedule_event(schedule_id) + await enqueue_schedule_event(app, schedule_id) async for attempt in AsyncRetrying( wait=wait_fixed(0.1), @@ -109,22 +115,27 @@ async def test_event_scheduling( retry=retry_if_exception_type(AssertionError), ): with attempt: - await asyncio.sleep(0) # wait for envet to trigger + await asyncio.sleep(0) # wait for event to trigger assert mock.call_args_list == [call(schedule_id)] -async def test_event_scheduling_raises_error( +async def test_enqueue_schedule_event_raises_error( get_mock_safe_on_schedule_event: Callable[ [Callable[[ScheduleId], Awaitable[None]]], Mock ], - event_scheduler: EventScheduler, + app: FastAPI, caplog: pytest.LogCaptureFixture, ) -> None: caplog.clear() + + async def _side_effect_raise_error(schedule_id: ScheduleId) -> None: + msg = "always failing here as requested" + raise RuntimeError(msg) + get_mock_safe_on_schedule_event(_side_effect_raise_error) schedule_id = TypeAdapter(ScheduleId).validate_python(f"{uuid4()}") - await event_scheduler.enqueue_schedule_event(schedule_id) + await enqueue_schedule_event(app, schedule_id) async for attempt in AsyncRetrying( wait=wait_fixed(0.1), @@ -132,5 +143,130 @@ async def test_event_scheduling_raises_error( retry=retry_if_exception_type(AssertionError), ): with attempt: - await asyncio.sleep(0) # wait for envet to trigger + await asyncio.sleep(0) # wait for event to trigger + assert "Unexpected error. Aborting message retry" in caplog.text + + +@pytest.fixture +def get_mock_safe_on_event_type( + mocker: MockerFixture, +) -> Callable[ + [ + Callable[ + [EventType, ScheduleId, OperationName, OperationContext], Awaitable[None] + ] + ], + Mock, +]: + + def _( + side_effect: Callable[ + [EventType, ScheduleId, OperationName, OperationContext], Awaitable[None] + ], + ) -> Mock: + another_mock = Mock() + + async def _mock( + event_type: EventType, + schedule_id: ScheduleId, + operation_name: OperationName, + initial_context: OperationContext, + ) -> None: + await side_effect(event_type, schedule_id, operation_name, initial_context) + another_mock(event_type, schedule_id, operation_name, initial_context) + + core_mock = Mock() + core_mock.safe_on_event_type = _mock + mocker.patch( + "simcore_service_dynamic_scheduler.services.generic_scheduler._event_queues.get_after_event_manager", + return_value=core_mock, + ) + return another_mock + + return _ + + +@pytest.mark.parametrize("expected_event_type", EventType) +async def test_enqueue_event_type( + get_mock_safe_on_event_type: Callable[ + [ + Callable[ + [EventType, ScheduleId, OperationName, OperationContext], + Awaitable[None], + ] + ], + Mock, + ], + app: FastAPI, + expected_event_type: EventType, +): + + async def _side_effect_nothing( + event_type: EventType, + schedule_id: ScheduleId, + operation_name: OperationName, + initial_context: OperationContext, + ) -> None: + pass + + mock = get_mock_safe_on_event_type(_side_effect_nothing) + + schedule_id = TypeAdapter(ScheduleId).validate_python(f"{uuid4()}") + match expected_event_type: + case EventType.ON_EXECUTEDD_COMPLETED: + await enqueue_execute_completed_event(app, schedule_id, "op1", {}) + case EventType.ON_REVERT_COMPLETED: + await enqueue_revert_completed_event(app, schedule_id, "op1", {}) + case _: + pytest.fail("unsupported case") + + async for attempt in AsyncRetrying(**_RETRY_PARAMS): + with attempt: + await asyncio.sleep(0) # wait for event to trigger + assert mock.call_args_list == [ + call(expected_event_type, schedule_id, "op1", {}) + ] + + +@pytest.mark.parametrize("expected_event_type", EventType) +async def test_enqueue_event_type_raises_error( + get_mock_safe_on_event_type: Callable[ + [ + Callable[ + [EventType, ScheduleId, OperationName, OperationContext], + Awaitable[None], + ] + ], + Mock, + ], + app: FastAPI, + caplog: pytest.LogCaptureFixture, + expected_event_type: EventType, +) -> None: + caplog.clear() + + async def _side_effect_raise_error( + event_type: EventType, + schedule_id: ScheduleId, + operation_name: OperationName, + initial_context: OperationContext, + ) -> None: + msg = "always failing here as requested" + raise RuntimeError(msg) + + get_mock_safe_on_event_type(_side_effect_raise_error) + + schedule_id = TypeAdapter(ScheduleId).validate_python(f"{uuid4()}") + + match expected_event_type: + case EventType.ON_EXECUTEDD_COMPLETED: + await enqueue_execute_completed_event(app, schedule_id, "op1", {}) + case EventType.ON_REVERT_COMPLETED: + await enqueue_revert_completed_event(app, schedule_id, "op1", {}) + case _: + pytest.fail("unsupported case") + + async for attempt in AsyncRetrying(**_RETRY_PARAMS): + with attempt: + await asyncio.sleep(0) # wait for event to trigger assert "Unexpected error. Aborting message retry" in caplog.text diff --git a/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__operation.py b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__operation.py index e6b199c2d15c..35ddccfd80c2 100644 --- a/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__operation.py +++ b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__operation.py @@ -23,7 +23,7 @@ class BaseBS(BaseStep): @classmethod - async def create( + async def execute( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: _ = app @@ -47,63 +47,63 @@ def wait_for_manual_intervention(cls) -> bool: class WrongBS1C(BaseBS): @classmethod - def get_create_provides_context_keys(cls) -> set[str]: - return {"create_key"} + def get_execute_provides_context_keys(cls) -> set[str]: + return {"execute_key"} class WrongBS2C(BaseBS): @classmethod - def get_create_provides_context_keys(cls) -> set[str]: - return {"create_key"} + def get_execute_provides_context_keys(cls) -> set[str]: + return {"execute_key"} class WrongBS1R(BaseBS): @classmethod - def get_undo_provides_context_keys(cls) -> set[str]: - return {"undo_key"} + def get_revert_provides_context_keys(cls) -> set[str]: + return {"revert_key"} class WrongBS2R(BaseBS): @classmethod - def get_undo_provides_context_keys(cls) -> set[str]: - return {"undo_key"} + def get_revert_provides_context_keys(cls) -> set[str]: + return {"revert_key"} @pytest.mark.parametrize( "operation", [ - [ + Operation( SingleStepGroup(BS1), ParallelStepGroup(BS2, BS3), - ], - [ + ), + Operation( SingleStepGroup(BS1), - ], - [ + ), + Operation( SingleStepGroup(BS1), SingleStepGroup(BS2), - ], - [ + ), + Operation( SingleStepGroup(WrongBS1C), SingleStepGroup(WrongBS1R), - ], - [ + ), + Operation( ParallelStepGroup(WrongBS2C, WrongBS2R), - ], - [ + ), + Operation( SingleStepGroup(BS2), ParallelStepGroup(BS1, BS3, repeat_steps=True), - ], - [ + ), + Operation( ParallelStepGroup(BS1, BS3), SingleStepGroup(BS2, repeat_steps=True), - ], - [ + ), + Operation( SingleStepGroup(BS1, repeat_steps=True), - ], - [ + ), + Operation( ParallelStepGroup(BS1, BS3, repeat_steps=True), - ], + ), ], ) def test_validate_operation_passes(operation: Operation): @@ -113,57 +113,57 @@ def test_validate_operation_passes(operation: Operation): @pytest.mark.parametrize( "operation, match", [ - ([], "List should have at least 1 item after validation"), + (Operation(), "Operation should have at least 1 item"), ( - [ + Operation( SingleStepGroup(BS1, repeat_steps=True), SingleStepGroup(BS2), - ], + ), "Only the last step group can have repeat_steps=True", ), ( - [ + Operation( SingleStepGroup(BS1), SingleStepGroup(BS1), - ], + ), f"step_name='{BS1.__name__}' is already used in this operation", ), ( - [ + Operation( ParallelStepGroup(BS2, BS2), - ], + ), f"step_name='{BS2.__name__}' is already used in this operation", ), ( - [ + Operation( ParallelStepGroup(BS1), - ], + ), f"{ParallelStepGroup.__name__} needs at least 2 steps", ), ( - [SingleStepGroup(WrongBS1C), SingleStepGroup(WrongBS2C)], - f"already provided key='create_key' in {BaseStep.get_create_provides_context_keys.__name__}", + Operation(SingleStepGroup(WrongBS1C), SingleStepGroup(WrongBS2C)), + f"already provided key='execute_key' in {BaseStep.get_execute_provides_context_keys.__name__}", ), ( - [ParallelStepGroup(WrongBS1C, WrongBS2C)], - f"already provided key='create_key' in {BaseStep.get_create_provides_context_keys.__name__}", + Operation(ParallelStepGroup(WrongBS1C, WrongBS2C)), + f"already provided key='execute_key' in {BaseStep.get_execute_provides_context_keys.__name__}", ), ( - [SingleStepGroup(WrongBS1R), SingleStepGroup(WrongBS2R)], - f"already provided key='undo_key' in {BaseStep.get_undo_provides_context_keys.__name__}", + Operation(SingleStepGroup(WrongBS1R), SingleStepGroup(WrongBS2R)), + f"already provided key='revert_key' in {BaseStep.get_revert_provides_context_keys.__name__}", ), ( - [ParallelStepGroup(WrongBS1R, WrongBS2R)], - f"already provided key='undo_key' in {BaseStep.get_undo_provides_context_keys.__name__}", + Operation(ParallelStepGroup(WrongBS1R, WrongBS2R)), + f"already provided key='revert_key' in {BaseStep.get_revert_provides_context_keys.__name__}", ), ( - [SingleStepGroup(MI1, repeat_steps=True)], + Operation(SingleStepGroup(MI1, repeat_steps=True)), "cannot have steps that require manual intervention", ), ( - [ + Operation( ParallelStepGroup(MI1, BS1, BS2, repeat_steps=True), - ], + ), "cannot have steps that require manual intervention", ), ], @@ -174,7 +174,7 @@ def test_validate_operations_fails(operation: Operation, match: str): def test_operation_registry_workflow(): - operation: Operation = [SingleStepGroup(BS1)] + operation = Operation(SingleStepGroup(BS1)) OperationRegistry.register("op1", operation) assert len(OperationRegistry._OPERATIONS) == 1 # noqa: SLF001 @@ -187,7 +187,7 @@ def test_operation_registry_workflow(): def test_operation_registry_raises_errors(): - operation: Operation = [SingleStepGroup(BS1)] + operation = Operation(SingleStepGroup(BS1)) OperationRegistry.register("op1", operation) with pytest.raises(OperationAlreadyRegisteredError): diff --git a/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__store.py b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__store.py index fb0c08bf1b4a..c22ced8f5f5d 100644 --- a/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__store.py +++ b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test__store.py @@ -17,15 +17,27 @@ StepStatus, ) from simcore_service_dynamic_scheduler.services.generic_scheduler._store import ( + EventType, + OperationContext, OperationContextProxy, + OperationEventsProxy, OperationRemovalProxy, ScheduleDataStoreProxy, StepGroupProxy, StepStoreProxy, Store, + _get_group_hash_key, + _get_operation_context_hash_key, + _get_scheduler_data_hash_key, + _get_step_hash_key, ) +@pytest.fixture +def schedule_id(faker: Faker) -> ScheduleId: + return faker.uuid4() + + @pytest.fixture async def store(use_in_memory_redis: RedisSettings) -> AsyncIterable[Store]: store = Store(use_in_memory_redis) @@ -47,17 +59,47 @@ async def _assert_keys_in_hash( assert keys == expected_keys +def test_ensure_keys_have_the_same_prefix(schedule_id: ScheduleId): + key_prefix = f"SCH:{schedule_id}" + + assert key_prefix == _get_scheduler_data_hash_key(schedule_id=schedule_id) + + keys: list[str] = [ + _get_scheduler_data_hash_key(schedule_id=schedule_id), + _get_step_hash_key( + schedule_id=schedule_id, + operation_name="op1", + group_name="sg1", + step_name="step1", + is_executing=True, + ), + _get_group_hash_key( + schedule_id=schedule_id, + operation_name="op1", + group_name="sg1", + is_executing=True, + ), + _get_operation_context_hash_key( + schedule_id=schedule_id, + operation_name="op1", + ), + ] + + for key in keys: + assert key.startswith(key_prefix) + + async def test_store_workflow(store: Store): # save single value await store.set_key_in_hash("hash1", "key1", "value1") await _assert_keys(store, {"hash1"}) await _assert_keys_in_hash(store, "hash1", {"key1"}) - assert await store.get_key_from_hash("hash1", "key1") == ("value1",) - assert await store.get_key_from_hash("hash1", "key1", "key1") == ( + assert await store.get_keys_from_hash("hash1", "key1") == ("value1",) + assert await store.get_keys_from_hash("hash1", "key1", "key1") == ( "value1", "value1", ) - assert await store.get_key_from_hash("hash1", "missing1", "missing2") == ( + assert await store.get_keys_from_hash("hash1", "missing1", "missing2") == ( None, None, ) @@ -66,13 +108,13 @@ async def test_store_workflow(store: Store): await store.delete_key_from_hash("hash1", "key1") await _assert_keys(store, set()) await _assert_keys_in_hash(store, "hash1", set()) - assert await store.get_key_from_hash("hash1", "key1") == (None,) + assert await store.get_keys_from_hash("hash1", "key1") == (None,) # save multiple values await store.set_keys_in_hash("hash2", {"key1": "value1", "key2": 2, "key3": True}) await _assert_keys(store, {"hash2"}) await _assert_keys_in_hash(store, "hash2", {"key1", "key2", "key3"}) - assert await store.get_key_from_hash("hash2", "key1", "key2", "key3") == ( + assert await store.get_keys_from_hash("hash2", "key1", "key2", "key3") == ( "value1", 2, True, @@ -84,7 +126,7 @@ async def test_store_workflow(store: Store): ) await _assert_keys(store, {"hash2"}) await _assert_keys_in_hash(store, "hash2", {"key2"}) - assert await store.get_key_from_hash("hash2", "key1", "key2", "key3") == ( + assert await store.get_keys_from_hash("hash2", "key1", "key2", "key3") == ( None, 2, None, @@ -100,7 +142,7 @@ async def test_store_workflow(store: Store): await store.delete("hash2") await _assert_keys(store, set()) await _assert_keys_in_hash(store, "hash2", set()) - assert await store.get_key_from_hash("hash2", "key1", "key2", "key3") == ( + assert await store.get_keys_from_hash("hash2", "key1", "key2", "key3") == ( None, None, None, @@ -122,12 +164,7 @@ async def test_store_workflow(store: Store): async def test_store_supporse_multiple_python_base_types(store: Store, value: Any): # values are stored and recovered in their original type await store.set_key_in_hash("hash1", "key1", value) - assert (await store.get_key_from_hash("hash1", "key1")) == (value,) - - -@pytest.fixture -def schedule_id(faker: Faker) -> ScheduleId: - return faker.uuid4() + assert (await store.get_keys_from_hash("hash1", "key1")) == (value,) async def test_schedule_data_store_proxy(store: Store, schedule_id: ScheduleId): @@ -137,19 +174,19 @@ async def test_schedule_data_store_proxy(store: Store, schedule_id: ScheduleId): # set await proxy.create_or_update("operation_name", "op1") await proxy.create_or_update("group_index", 1) - await proxy.create_or_update("is_creating", value=True) + await proxy.create_or_update("is_executing", value=True) await _assert_keys(store, {hash_key}) await _assert_keys_in_hash( - store, hash_key, {"operation_name", "group_index", "is_creating"} + store, hash_key, {"operation_name", "group_index", "is_executing"} ) # get assert await proxy.read("operation_name") == "op1" assert await proxy.read("group_index") == 1 - assert await proxy.read("is_creating") is True + assert await proxy.read("is_executing") is True # remove - await proxy.delete_keys("operation_name", "is_creating", "group_index") + await proxy.delete_keys("operation_name", "is_executing", "group_index") await _assert_keys(store, set()) await _assert_keys_in_hash(store, hash_key, set()) @@ -157,7 +194,7 @@ async def test_schedule_data_store_proxy(store: Store, schedule_id: ScheduleId): await proxy.create_or_update_multiple( { "group_index": 2, - "is_creating": False, + "is_executing": False, "operation_error_type": OperationErrorType.STEP_ISSUE, "operation_error_message": "mock_error_message", } @@ -168,7 +205,7 @@ async def test_schedule_data_store_proxy(store: Store, schedule_id: ScheduleId): hash_key, { "group_index", - "is_creating", + "is_executing", "operation_error_type", "operation_error_message", }, @@ -177,7 +214,7 @@ async def test_schedule_data_store_proxy(store: Store, schedule_id: ScheduleId): # remove all keys an even missing ones await proxy.delete_keys( "operation_name", - "is_creating", + "is_executing", "group_index", "operation_error_type", "operation_error_message", @@ -186,10 +223,10 @@ async def test_schedule_data_store_proxy(store: Store, schedule_id: ScheduleId): await _assert_keys_in_hash(store, hash_key, set()) -@pytest.mark.parametrize("is_creating", [True, False]) +@pytest.mark.parametrize("is_executing", [True, False]) @pytest.mark.parametrize("use_remove", [True, False]) async def test_steps_store_proxy( - store: Store, schedule_id: ScheduleId, is_creating: bool, use_remove: bool + store: Store, schedule_id: ScheduleId, is_executing: bool, use_remove: bool ): proxy = StepStoreProxy( store=store, @@ -197,10 +234,10 @@ async def test_steps_store_proxy( operation_name="op1", step_group_name="sg1", step_name="step", - is_creating=is_creating, + is_executing=is_executing, ) - is_creating_str = "C" if is_creating else "U" - hash_key = f"SCH:{schedule_id}:STEPS:op1:sg1:{is_creating_str}:step" + is_executing_str = "E" if is_executing else "R" + hash_key = f"SCH:{schedule_id}:STEPS:op1:sg1:{is_executing_str}:step" # set await proxy.create_or_update("status", StepStatus.RUNNING) @@ -253,22 +290,22 @@ async def test_steps_store_proxy( await _assert_keys_in_hash(store, hash_key, set()) -@pytest.mark.parametrize("is_creating", [True, False]) +@pytest.mark.parametrize("is_executing", [True, False]) async def test_step_group_proxy( store: Store, schedule_id: ScheduleId, - is_creating: bool, + is_executing: bool, ): step_group_proxy = StepGroupProxy( store=store, schedule_id=schedule_id, operation_name="op1", step_group_name="sg1", - is_creating=is_creating, + is_executing=is_executing, ) async def _get_steps_count() -> int | None: - (response,) = await store.get_key_from_hash( + (response,) = await store.get_keys_from_hash( step_group_proxy._get_hash_key(), "done_steps" # noqa: SLF001 ) return response @@ -326,7 +363,7 @@ async def test_operation_removal_proxy(store: Store, schedule_id: ScheduleId): await proxy.create_or_update_multiple( { "group_index": 1, - "is_creating": True, + "is_executing": True, "operation_error_type": OperationErrorType.STEP_ISSUE, "operation_error_message": "mock_error_message", "operation_name": "op1", @@ -339,7 +376,7 @@ async def test_operation_removal_proxy(store: Store, schedule_id: ScheduleId): operation_name="op1", step_group_name="sg1", step_name="step", - is_creating=True, + is_executing=True, ) await proxy.create_or_update_multiple( { @@ -356,7 +393,7 @@ async def test_operation_removal_proxy(store: Store, schedule_id: ScheduleId): schedule_id=schedule_id, operation_name="op1", step_group_name="sg1", - is_creating=True, + is_executing=True, ) await proxy.increment_and_get_done_steps_count() @@ -369,9 +406,9 @@ async def test_operation_removal_proxy(store: Store, schedule_id: ScheduleId): store, { f"SCH:{schedule_id}", - f"SCH:{schedule_id}:GROUPS:op1:sg1:C", + f"SCH:{schedule_id}:GROUPS:op1:sg1:E", f"SCH:{schedule_id}:OP_CTX:op1", - f"SCH:{schedule_id}:STEPS:op1:sg1:C:step", + f"SCH:{schedule_id}:STEPS:op1:sg1:E:step", }, ) @@ -381,3 +418,32 @@ async def test_operation_removal_proxy(store: Store, schedule_id: ScheduleId): # try to call when empty as well await proxy.delete() + + +async def test_operation_events_proxy(store: Store, schedule_id: ScheduleId): + operation_name = "op1" + initial_context: OperationContext = {"k1": "v1", "k2": 2} + + event_type = EventType.ON_EXECUTEDD_COMPLETED + proxy = OperationEventsProxy(store, schedule_id, event_type) + hash_key = f"SCH:{schedule_id}:EVENTS:{event_type}" + + assert await proxy.exists() is False + await _assert_keys(store, set()) + await _assert_keys_in_hash(store, hash_key, set()) + + await proxy.create_or_update_multiple( + {"operation_name": operation_name, "initial_context": initial_context} + ) + assert await proxy.exists() is True + + await _assert_keys(store, {hash_key}) + await _assert_keys_in_hash(store, hash_key, {"operation_name", "initial_context"}) + + assert await proxy.read("operation_name") == operation_name + assert await proxy.read("initial_context") == initial_context + + await proxy.delete() + assert await proxy.exists() is False + await _assert_keys(store, set()) + await _assert_keys_in_hash(store, hash_key, set()) diff --git a/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test_generic_scheduler.py b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test_generic_scheduler.py index 44084f36a912..b2605bfe6c42 100644 --- a/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test_generic_scheduler.py +++ b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/test_generic_scheduler.py @@ -1,4 +1,5 @@ # pylint:disable=redefined-outer-name +# pylint:disable=too-many-arguments # pylint:disable=unused-argument import asyncio @@ -30,17 +31,22 @@ BaseStep, Operation, OperationName, + OperationToStart, ParallelStepGroup, ProvidedOperationContext, RequiredOperationContext, SingleStepGroup, + register_to_start_after_on_executed_completed, + register_to_start_after_on_reverted_completed, start_operation, ) from utils import ( BaseExpectedStepOrder, - CreateRandom, - CreateSequence, + ExecuteRandom, + ExecuteSequence, + RevertSequence, ensure_expected_order, + ensure_keys_in_store, ) pytest_simcore_core_services_selection = [ @@ -55,6 +61,7 @@ _OPERATION_MIN_RUNTIME: Final[timedelta] = timedelta(seconds=2) _OPERATION_STEPS_COUNT: Final[NonNegativeInt] = 10 _STEP_SLEEP_DURATION: Final[timedelta] = _OPERATION_MIN_RUNTIME / _OPERATION_STEPS_COUNT +_RETRY_ATTEMPTS: Final[NonNegativeInt] = 10 def _get_random_interruption_duration() -> NonNegativeFloat: @@ -198,87 +205,136 @@ def process_manager( process_manager.kill() +@pytest.fixture +def operation_name() -> OperationName: + return "test-op" + + class _InterruptionType(str, Enum): REDIS = "redis" RABBIT = "rabbit" DYNAMIC_SCHEDULER = "dynamic-scheduler" -_CREATED: Final[str] = "create" -_UNDONE: Final[str] = "undo" +_EXECUTED: Final[str] = "executed" +_REVERTED: Final[str] = "reverted" _CTX_VALUE: Final[str] = "a_value" +_STEPS_CALL_ORDER: list[tuple[str, str]] = [] + + +@pytest.fixture +def steps_call_order() -> Iterable[list[tuple[str, str]]]: + _STEPS_CALL_ORDER.clear() + yield _STEPS_CALL_ORDER + _STEPS_CALL_ORDER.clear() + + class _BS(BaseStep): @classmethod - async def get_create_retries(cls, context: DeferredContext) -> int: + async def get_execute_retries(cls, context: DeferredContext) -> int: _ = context - return 10 + return _RETRY_ATTEMPTS @classmethod - async def get_create_wait_between_attempts( + async def get_execute_wait_between_attempts( cls, context: DeferredContext ) -> timedelta: _ = context return _STEP_SLEEP_DURATION @classmethod - async def create( + async def execute( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: - multiprocessing_queue: _AsyncMultiprocessingQueue = ( - app.state.multiprocessing_queue - ) - await multiprocessing_queue.put((cls.__name__, _CREATED)) + if hasattr(app.state, "multiprocessing_queue"): + multiprocessing_queue: _AsyncMultiprocessingQueue = ( + app.state.multiprocessing_queue + ) + await multiprocessing_queue.put((cls.__name__, _EXECUTED)) + _STEPS_CALL_ORDER.append((cls.__name__, _EXECUTED)) return { **required_context, - **{k: _CTX_VALUE for k in cls.get_create_provides_context_keys()}, + **{k: _CTX_VALUE for k in cls.get_execute_provides_context_keys()}, } @classmethod - async def undo( + async def revert( cls, app: FastAPI, required_context: RequiredOperationContext ) -> ProvidedOperationContext | None: - multiprocessing_queue: _AsyncMultiprocessingQueue = ( - app.state.multiprocessing_queue - ) - await multiprocessing_queue.put((cls.__name__, _UNDONE)) + if hasattr(app.state, "multiprocessing_queue"): + multiprocessing_queue: _AsyncMultiprocessingQueue = ( + app.state.multiprocessing_queue + ) + await multiprocessing_queue.put((cls.__name__, _REVERTED)) + _STEPS_CALL_ORDER.append((cls.__name__, _REVERTED)) return { **required_context, - **{k: _CTX_VALUE for k in cls.get_undo_provides_context_keys()}, + **{k: _CTX_VALUE for k in cls.get_revert_provides_context_keys()}, } -class _BS1(_BS): ... +class _S1(_BS): ... + +class _S2(_BS): ... -class _BS2(_BS): ... +class _S3(_BS): ... -class _BS3(_BS): ... + +class _ShortSleep(_BS): + @classmethod + async def execute( + cls, app: FastAPI, required_context: RequiredOperationContext + ) -> ProvidedOperationContext | None: + result = await super().execute(app, required_context) + # if sleeps more than this it will timeout + max_allowed_sleep = _STEP_SLEEP_DURATION.total_seconds() * 0.8 + await asyncio.sleep(max_allowed_sleep) + return result + + +class _ShortSleepThenRevert(_BS): + @classmethod + async def get_execute_retries(cls, context: DeferredContext) -> int: + _ = context + return 0 + + @classmethod + async def execute( + cls, app: FastAPI, required_context: RequiredOperationContext + ) -> ProvidedOperationContext | None: + await super().execute(app, required_context) + # if sleeps more than this it will timeout + max_allowed_sleep = _STEP_SLEEP_DURATION.total_seconds() * 0.8 + await asyncio.sleep(max_allowed_sleep) + msg = "Simulated error" + raise RuntimeError(msg) @pytest.mark.parametrize( "operation, expected_order", [ pytest.param( + Operation( + SingleStepGroup(_S1), + ), [ - SingleStepGroup(_BS1), - ], - [ - CreateSequence(_BS1), + ExecuteSequence(_S1), ], id="s1", ), pytest.param( + Operation( + ParallelStepGroup(_S1, _S2, _S3), + ), [ - ParallelStepGroup(_BS1, _BS2, _BS3), - ], - [ - CreateRandom(_BS1, _BS2, _BS3), + ExecuteRandom(_S1, _S2, _S3), ], id="p3", ), @@ -296,8 +352,8 @@ async def test_can_recover_from_interruption( queue_poller: _QueuePoller, process_manager: _ProcessManager, expected_order: list[BaseExpectedStepOrder], + operation_name: OperationName, ) -> None: - operation_name: OperationName = "test_op" register_operation(operation_name, operation) process_manager.start(operation_name) @@ -331,8 +387,88 @@ async def test_can_recover_from_interruption( await ensure_expected_order(queue_poller.events, expected_order) -# TODO: add a test that replaces a running operation with a new one! make sure nothing bad happens and that the old -# running operation manages to reach the end +@pytest.mark.parametrize("register_at_creation", [True, False]) +@pytest.mark.parametrize( + "is_executing, initial_op, after_op, expected_order", + [ + pytest.param( + True, + Operation(SingleStepGroup(_ShortSleep)), + Operation(SingleStepGroup(_S2)), + [ + ExecuteSequence(_ShortSleep), + ExecuteSequence(_S2), + ], + ), + pytest.param( + False, + Operation(SingleStepGroup(_ShortSleepThenRevert)), + Operation(SingleStepGroup(_S2)), + [ + ExecuteSequence(_ShortSleepThenRevert), + RevertSequence(_ShortSleepThenRevert), + ExecuteSequence(_S2), + ], + ), + ], +) +async def test_run_operation_after( + app: FastAPI, + preserve_caplog_for_async_logging: None, + steps_call_order: list[tuple[str, str]], + register_operation: Callable[[OperationName, Operation], None], + register_at_creation: bool, + is_executing: bool, + initial_op: Operation, + after_op: Operation, + expected_order: list[BaseExpectedStepOrder], +): + initial_op_name: OperationName = "initial" + after_op_name: OperationName = "after" + + register_operation(initial_op_name, initial_op) + register_operation(after_op_name, after_op) + + if is_executing: + on_execute_completed = ( + OperationToStart(operation_name=after_op_name, initial_context={}) + if register_at_creation + else None + ) + on_revert_completed = None + else: + on_execute_completed = None + on_revert_completed = ( + OperationToStart(operation_name=after_op_name, initial_context={}) + if register_at_creation + else None + ) + schedule_id = await start_operation( + app, + initial_op_name, + {}, + on_execute_completed=on_execute_completed, + on_revert_completed=on_revert_completed, + ) -# THe only way to do it is by cancelling the existing and waitin for it to finish before running something new. + if register_at_creation is False: + if is_executing: + await register_to_start_after_on_executed_completed( + app, + schedule_id, + to_start=OperationToStart( + operation_name=after_op_name, initial_context={} + ), + ) + else: + await register_to_start_after_on_reverted_completed( + app, + schedule_id, + to_start=OperationToStart( + operation_name=after_op_name, initial_context={} + ), + ) + + await ensure_expected_order(steps_call_order, expected_order) + await ensure_keys_in_store(app, expected_keys=set()) diff --git a/services/dynamic-scheduler/tests/unit/services/generic_scheduler/utils.py b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/utils.py index 075f301688ac..343720076a03 100644 --- a/services/dynamic-scheduler/tests/unit/services/generic_scheduler/utils.py +++ b/services/dynamic-scheduler/tests/unit/services/generic_scheduler/utils.py @@ -2,7 +2,9 @@ from copy import deepcopy from typing import Any, Final +from fastapi import FastAPI from simcore_service_dynamic_scheduler.services.generic_scheduler import BaseStep +from simcore_service_dynamic_scheduler.services.generic_scheduler._core import Store from tenacity import ( AsyncRetrying, retry_if_exception_type, @@ -12,12 +14,12 @@ _RETRY_PARAMS: Final[dict[str, Any]] = { "wait": wait_fixed(0.1), - "stop": stop_after_delay(5), + "stop": stop_after_delay(10), "retry": retry_if_exception_type(AssertionError), } -CREATED: Final[str] = "create" -UNDONE: Final[str] = "undo" +EXECUTED: Final[str] = "executed" +REVERTED: Final[str] = "reverted" class BaseExpectedStepOrder: @@ -31,20 +33,20 @@ def __repr__(self) -> str: return f"{self.__class__.__name__}({', '.join(step.get_step_name() for step in self.steps)})" -class CreateSequence(BaseExpectedStepOrder): - """steps appear in a sequence as CREATE""" +class ExecuteSequence(BaseExpectedStepOrder): + """steps appear in a sequence as EXECUTE""" -class CreateRandom(BaseExpectedStepOrder): - """steps appear in any given order as CREATE""" +class ExecuteRandom(BaseExpectedStepOrder): + """steps appear in any given order as EXECUTE""" -class UndoSequence(BaseExpectedStepOrder): - """steps appear in a sequence as UNDO""" +class RevertSequence(BaseExpectedStepOrder): + """steps appear in a sequence as REVERT""" -class UndoRandom(BaseExpectedStepOrder): - """steps appear in any given order as UNDO""" +class RevertRandom(BaseExpectedStepOrder): + """steps appear in any given order as REVERT""" def _assert_order_sequence( @@ -74,7 +76,7 @@ def _assert_order_random( def _assert_expected_order( - steps_call_order: list[tuple[str, str]], + detected_order: list[tuple[str, str]], expected_order: list[BaseExpectedStepOrder], *, use_only_first_entries: bool, @@ -85,7 +87,7 @@ def _assert_expected_order( expected_order_length = sum(len(x) for x in expected_order) # below operations are destructive make a copy - call_order = deepcopy(steps_call_order) + call_order = deepcopy(detected_order) if use_only_first_entries: call_order = call_order[:expected_order_length] @@ -95,14 +97,14 @@ def _assert_expected_order( assert len(call_order) == expected_order_length for group in expected_order: - if isinstance(group, CreateSequence): - _assert_order_sequence(call_order, group.steps, expected=CREATED) - elif isinstance(group, CreateRandom): - _assert_order_random(call_order, group.steps, expected=CREATED) - elif isinstance(group, UndoSequence): - _assert_order_sequence(call_order, group.steps, expected=UNDONE) - elif isinstance(group, UndoRandom): - _assert_order_random(call_order, group.steps, expected=UNDONE) + if isinstance(group, ExecuteSequence): + _assert_order_sequence(call_order, group.steps, expected=EXECUTED) + elif isinstance(group, ExecuteRandom): + _assert_order_random(call_order, group.steps, expected=EXECUTED) + elif isinstance(group, RevertSequence): + _assert_order_sequence(call_order, group.steps, expected=REVERTED) + elif isinstance(group, RevertRandom): + _assert_order_random(call_order, group.steps, expected=REVERTED) else: msg = f"Unknown {group=}" raise NotImplementedError(msg) @@ -110,7 +112,7 @@ def _assert_expected_order( async def ensure_expected_order( - detected_calls: list[tuple[str, str]], + detected_order: list[tuple[str, str]], expected_order: list[BaseExpectedStepOrder], *, use_only_first_entries: bool = False, @@ -118,10 +120,21 @@ async def ensure_expected_order( ) -> None: async for attempt in AsyncRetrying(**_RETRY_PARAMS): with attempt: - await asyncio.sleep(0) # wait for envet to trigger + await asyncio.sleep(0) # wait for event to trigger _assert_expected_order( - detected_calls, + detected_order, expected_order, use_only_first_entries=use_only_first_entries, use_only_last_entries=use_only_last_entries, ) + + +async def _get_keys_in_store(app: FastAPI) -> set[str]: + return set(await Store.get_from_app_state(app).redis.keys()) + + +async def ensure_keys_in_store(app: FastAPI, *, expected_keys: set[str]) -> None: + async for attempt in AsyncRetrying(**_RETRY_PARAMS): + with attempt: + keys_instore = await _get_keys_in_store(app) + assert keys_instore == expected_keys diff --git a/services/dynamic-scheduler/tests/unit/services/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/services/service_tracker/test__api.py index aa8b91bce65a..f9ec59761275 100644 --- a/services/dynamic-scheduler/tests/unit/services/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/services/service_tracker/test__api.py @@ -46,7 +46,7 @@ "redis", ] pytest_simcore_ops_services_selection = [ - # "redis-commander", + "redis-commander", ]