33"""
44
55import logging
6+ from collections .abc import Mapping
67from functools import singledispatchmethod
78from typing import TYPE_CHECKING , final
89
910from core .workflow .entities import GraphRuntimeState
10- from core .workflow .enums import NodeExecutionType
11+ from core .workflow .enums import ErrorStrategy , NodeExecutionType
1112from core .workflow .graph import Graph
1213from core .workflow .graph_events import (
1314 GraphNodeEventBase ,
@@ -122,13 +123,15 @@ def _(self, event: NodeRunStartedEvent) -> None:
122123 """
123124 # Track execution in domain model
124125 node_execution = self ._graph_execution .get_or_create_node_execution (event .node_id )
126+ is_initial_attempt = node_execution .retry_count == 0
125127 node_execution .mark_started (event .id )
126128
127129 # Track in response coordinator for stream ordering
128130 self ._response_coordinator .track_node_execution (event .node_id , event .id )
129131
130- # Collect the event
131- self ._event_collector .collect (event )
132+ # Collect the event only for the first attempt; retries remain silent
133+ if is_initial_attempt :
134+ self ._event_collector .collect (event )
132135
133136 @_dispatch .register
134137 def _ (self , event : NodeRunStreamChunkEvent ) -> None :
@@ -161,7 +164,7 @@ def _(self, event: NodeRunSucceededEvent) -> None:
161164 node_execution .mark_taken ()
162165
163166 # Store outputs in variable pool
164- self ._store_node_outputs (event )
167+ self ._store_node_outputs (event . node_id , event . node_run_result . outputs )
165168
166169 # Forward to response coordinator and emit streaming events
167170 streaming_events = self ._response_coordinator .intercept_event (event )
@@ -191,7 +194,7 @@ def _(self, event: NodeRunSucceededEvent) -> None:
191194
192195 # Handle response node outputs
193196 if node .execution_type == NodeExecutionType .RESPONSE :
194- self ._update_response_outputs (event )
197+ self ._update_response_outputs (event . node_run_result . outputs )
195198
196199 # Collect the event
197200 self ._event_collector .collect (event )
@@ -207,6 +210,7 @@ def _(self, event: NodeRunFailedEvent) -> None:
207210 # Update domain model
208211 node_execution = self ._graph_execution .get_or_create_node_execution (event .node_id )
209212 node_execution .mark_failed (event .error )
213+ self ._graph_execution .record_node_failure ()
210214
211215 result = self ._error_handler .handle_node_failure (event )
212216
@@ -227,10 +231,40 @@ def _(self, event: NodeRunExceptionEvent) -> None:
227231 Args:
228232 event: The node exception event
229233 """
230- # Node continues via fail-branch, so it's technically "succeeded"
234+ # Node continues via fail-branch/default-value, treat as completion
231235 node_execution = self ._graph_execution .get_or_create_node_execution (event .node_id )
232236 node_execution .mark_taken ()
233237
238+ # Persist outputs produced by the exception strategy (e.g. default values)
239+ self ._store_node_outputs (event .node_id , event .node_run_result .outputs )
240+
241+ node = self ._graph .nodes [event .node_id ]
242+
243+ if node .error_strategy == ErrorStrategy .DEFAULT_VALUE :
244+ ready_nodes , edge_streaming_events = self ._edge_processor .process_node_success (event .node_id )
245+ elif node .error_strategy == ErrorStrategy .FAIL_BRANCH :
246+ ready_nodes , edge_streaming_events = self ._edge_processor .handle_branch_completion (
247+ event .node_id , event .node_run_result .edge_source_handle
248+ )
249+ else :
250+ raise NotImplementedError (f"Unsupported error strategy: { node .error_strategy } " )
251+
252+ for edge_event in edge_streaming_events :
253+ self ._event_collector .collect (edge_event )
254+
255+ for node_id in ready_nodes :
256+ self ._state_manager .enqueue_node (node_id )
257+ self ._state_manager .start_execution (node_id )
258+
259+ # Update response outputs if applicable
260+ if node .execution_type == NodeExecutionType .RESPONSE :
261+ self ._update_response_outputs (event .node_run_result .outputs )
262+
263+ self ._state_manager .finish_execution (event .node_id )
264+
265+ # Collect the exception event for observers
266+ self ._event_collector .collect (event )
267+
234268 @_dispatch .register
235269 def _ (self , event : NodeRunRetryEvent ) -> None :
236270 """
@@ -242,21 +276,31 @@ def _(self, event: NodeRunRetryEvent) -> None:
242276 node_execution = self ._graph_execution .get_or_create_node_execution (event .node_id )
243277 node_execution .increment_retry ()
244278
245- def _store_node_outputs (self , event : NodeRunSucceededEvent ) -> None :
279+ # Finish the previous attempt before re-queuing the node
280+ self ._state_manager .finish_execution (event .node_id )
281+
282+ # Emit retry event for observers
283+ self ._event_collector .collect (event )
284+
285+ # Re-queue node for execution
286+ self ._state_manager .enqueue_node (event .node_id )
287+ self ._state_manager .start_execution (event .node_id )
288+
289+ def _store_node_outputs (self , node_id : str , outputs : Mapping [str , object ]) -> None :
246290 """
247291 Store node outputs in the variable pool.
248292
249293 Args:
250294 event: The node succeeded event containing outputs
251295 """
252- for variable_name , variable_value in event . node_run_result . outputs .items ():
253- self ._graph_runtime_state .variable_pool .add ((event . node_id , variable_name ), variable_value )
296+ for variable_name , variable_value in outputs .items ():
297+ self ._graph_runtime_state .variable_pool .add ((node_id , variable_name ), variable_value )
254298
255- def _update_response_outputs (self , event : NodeRunSucceededEvent ) -> None :
299+ def _update_response_outputs (self , outputs : Mapping [ str , object ] ) -> None :
256300 """Update response outputs for response nodes."""
257301 # TODO: Design a mechanism for nodes to notify the engine about how to update outputs
258302 # in runtime state, rather than allowing nodes to directly access runtime state.
259- for key , value in event . node_run_result . outputs .items ():
303+ for key , value in outputs .items ():
260304 if key == "answer" :
261305 existing = self ._graph_runtime_state .get_output ("answer" , "" )
262306 if existing :
0 commit comments