Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/core/workflow/graph_engine/graph_state_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ def enqueue_node(self, node_id: str) -> None:
node_id: The ID of the node to enqueue
"""
with self._lock:
self._graph.nodes[node_id].state = NodeState.TAKEN
self._ready_queue.put(node_id)
self._graph.nodes[node_id].state = NodeState.TAKEN
Comment on lines 52 to +54

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action Required

2. Invalid node ids queued 🐞 Bug

enqueue_node() now pushes node_id into the ready queue before touching
  self._graph.nodes[node_id]; if the node ID is invalid, a KeyError is raised after the enqueue,
  leaving a poisoned queue item behind.
• Workers dereference self._graph.nodes[node_id] outside the worker’s try: block, so a poisoned
  queue item can crash the worker thread and stall execution.
• Resume flow enqueues paused_nodes without validating the node still exists, increasing the
  chance of this regression surfacing (e.g., stale snapshots or graph changes between pause/resume).
Agent Prompt
### Issue description
`GraphStateManager.enqueue_node()` currently enqueues `node_id` into the ready queue before verifying that `node_id` exists in `self._graph.nodes`. If the node ID is invalid/stale, the method raises after the enqueue, leaving a bad ID in the queue; workers then crash when dereferencing `graph.nodes[node_id]`.

### Issue Context
This is particularly risky in resume flows: `GraphEngine._start_execution(resume=True)` enqueues `paused_nodes` without validating membership in the current graph, and `GraphRuntimeState` does not validate paused IDs.

### Fix Focus Areas
- api/core/workflow/graph_engine/graph_state_manager.py[42-55]
- api/core/workflow/graph_engine/graph_engine.py[317-340]
- api/core/workflow/graph_engine/worker.py[100-112]
- api/core/workflow/runtime/graph_runtime_state.py[342-352]

### Suggested implementation sketch
- In `enqueue_node()`:
  - resolve/validate the node first (`node = self._graph.nodes.get(node_id)`; if None -> raise or no-op depending on desired behavior)
  - then `self._ready_queue.put(node_id)`
  - then set `node.state = NodeState.TAKEN`
- (Optional hardening) Add a new `schedule_node()` API that performs enqueue + execution-tracking updates atomically and use it consistently across call sites.

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools


def mark_node_skipped(self, node_id: str) -> None:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def propagate_skip_from_edge(self, edge_id: str) -> None:
# If any edge is taken, node may still execute
if edge_states["has_taken"]:
# Enqueue node
self._state_manager.start_execution(downstream_node_id)
self._state_manager.enqueue_node(downstream_node_id)
return

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Tests for graph traversal components."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,308 @@
"""Unit tests for skip propagator."""

from typing import Any

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remediation Recommended

1. any used in tests 📘 Rule Violation

• The new unit test introduces typing.Any and uses it in annotations for
  get_incoming_edges_side_effect, weakening static typing and making refactors/type-checking less
  effective.
• This conflicts with the requirement to use strong typing and avoid overly-permissive types unless
  strictly necessary.
• It may also mask incorrect mock usage/signatures that stronger types would catch earlier.
Agent Prompt
## Issue description
The new unit tests introduce `typing.Any` and use it in annotations, which violates the strong-typing guideline and reduces the effectiveness of type checking.

## Issue Context
These helper functions are only used as mock side effects and can be typed precisely (e.g., node IDs are `str`, and the functions return lists of `Edge`-like objects).

## Fix Focus Areas
- api/tests/unit_tests/core/workflow/graph_engine/graph_traversal/test_skip_propagator.py[3-3]
- api/tests/unit_tests/core/workflow/graph_engine/graph_traversal/test_skip_propagator.py[205-223]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

from unittest.mock import MagicMock, create_autospec

from core.workflow.graph import Edge, Graph
from core.workflow.graph_engine.graph_state_manager import GraphStateManager
from core.workflow.graph_engine.graph_traversal.skip_propagator import SkipPropagator


class TestSkipPropagator:
"""Test suite for SkipPropagator."""

def test_propagate_skip_from_edge_with_unknown_edges_stops_processing(self) -> None:
"""When there are unknown incoming edges, propagation should stop."""
# Arrange
mock_graph = create_autospec(Graph)
mock_state_manager = create_autospec(GraphStateManager)

# Create a mock edge
mock_edge = MagicMock(spec=Edge)
mock_edge.id = "edge_1"
mock_edge.head = "node_2"

# Setup graph edges dict
mock_graph.edges = {"edge_1": mock_edge}

# Setup incoming edges
incoming_edges = [MagicMock(spec=Edge), MagicMock(spec=Edge)]
mock_graph.get_incoming_edges.return_value = incoming_edges

# Setup state manager to return has_unknown=True
mock_state_manager.analyze_edge_states.return_value = {
"has_unknown": True,
"has_taken": False,
"all_skipped": False,
}

propagator = SkipPropagator(mock_graph, mock_state_manager)

# Act
propagator.propagate_skip_from_edge("edge_1")

# Assert
mock_graph.get_incoming_edges.assert_called_once_with("node_2")
mock_state_manager.analyze_edge_states.assert_called_once_with(incoming_edges)
# Should not call any other state manager methods
mock_state_manager.enqueue_node.assert_not_called()
mock_state_manager.start_execution.assert_not_called()
mock_state_manager.mark_node_skipped.assert_not_called()

def test_propagate_skip_from_edge_with_taken_edge_enqueues_node(self) -> None:
"""When there is at least one taken edge, node should be enqueued."""
# Arrange
mock_graph = create_autospec(Graph)
mock_state_manager = create_autospec(GraphStateManager)

# Create a mock edge
mock_edge = MagicMock(spec=Edge)
mock_edge.id = "edge_1"
mock_edge.head = "node_2"

mock_graph.edges = {"edge_1": mock_edge}
incoming_edges = [MagicMock(spec=Edge)]
mock_graph.get_incoming_edges.return_value = incoming_edges

# Setup state manager to return has_taken=True
mock_state_manager.analyze_edge_states.return_value = {
"has_unknown": False,
"has_taken": True,
"all_skipped": False,
}

propagator = SkipPropagator(mock_graph, mock_state_manager)

# Act
propagator.propagate_skip_from_edge("edge_1")

# Assert
mock_state_manager.start_execution.assert_called_once_with("node_2")
mock_state_manager.enqueue_node.assert_called_once_with("node_2")
mock_state_manager.mark_node_skipped.assert_not_called()

def test_propagate_skip_from_edge_with_all_skipped_propagates_to_node(self) -> None:
"""When all incoming edges are skipped, should propagate skip to node."""
# Arrange
mock_graph = create_autospec(Graph)
mock_state_manager = create_autospec(GraphStateManager)

# Create a mock edge
mock_edge = MagicMock(spec=Edge)
mock_edge.id = "edge_1"
mock_edge.head = "node_2"

mock_graph.edges = {"edge_1": mock_edge}
incoming_edges = [MagicMock(spec=Edge)]
mock_graph.get_incoming_edges.return_value = incoming_edges

# Setup state manager to return all_skipped=True
mock_state_manager.analyze_edge_states.return_value = {
"has_unknown": False,
"has_taken": False,
"all_skipped": True,
}

propagator = SkipPropagator(mock_graph, mock_state_manager)

# Act
propagator.propagate_skip_from_edge("edge_1")

# Assert
mock_state_manager.mark_node_skipped.assert_called_once_with("node_2")
mock_state_manager.enqueue_node.assert_not_called()
mock_state_manager.start_execution.assert_not_called()

def test_propagate_skip_to_node_marks_node_and_outgoing_edges_skipped(self) -> None:
"""_propagate_skip_to_node should mark node and all outgoing edges as skipped."""
# Arrange
mock_graph = create_autospec(Graph)
mock_state_manager = create_autospec(GraphStateManager)

# Create outgoing edges
edge1 = MagicMock(spec=Edge)
edge1.id = "edge_2"
edge1.head = "node_downstream_1" # Set head for propagate_skip_from_edge

edge2 = MagicMock(spec=Edge)
edge2.id = "edge_3"
edge2.head = "node_downstream_2"

# Setup graph edges dict for propagate_skip_from_edge
mock_graph.edges = {"edge_2": edge1, "edge_3": edge2}
mock_graph.get_outgoing_edges.return_value = [edge1, edge2]

# Setup get_incoming_edges to return empty list to stop recursion
mock_graph.get_incoming_edges.return_value = []

propagator = SkipPropagator(mock_graph, mock_state_manager)

# Use mock to call private method
# Act
propagator._propagate_skip_to_node("node_1")

# Assert
mock_state_manager.mark_node_skipped.assert_called_once_with("node_1")
mock_state_manager.mark_edge_skipped.assert_any_call("edge_2")
mock_state_manager.mark_edge_skipped.assert_any_call("edge_3")
assert mock_state_manager.mark_edge_skipped.call_count == 2
# Should recursively propagate from each edge
# Since propagate_skip_from_edge is called, we need to verify it was called
# But we can't directly verify due to recursion. We'll trust the logic.

def test_skip_branch_paths_marks_unselected_edges_and_propagates(self) -> None:
"""skip_branch_paths should mark all unselected edges as skipped and propagate."""
# Arrange
mock_graph = create_autospec(Graph)
mock_state_manager = create_autospec(GraphStateManager)

# Create unselected edges
edge1 = MagicMock(spec=Edge)
edge1.id = "edge_1"
edge1.head = "node_downstream_1"

edge2 = MagicMock(spec=Edge)
edge2.id = "edge_2"
edge2.head = "node_downstream_2"

unselected_edges = [edge1, edge2]

# Setup graph edges dict
mock_graph.edges = {"edge_1": edge1, "edge_2": edge2}
# Setup get_incoming_edges to return empty list to stop recursion
mock_graph.get_incoming_edges.return_value = []

propagator = SkipPropagator(mock_graph, mock_state_manager)

# Act
propagator.skip_branch_paths(unselected_edges)

# Assert
mock_state_manager.mark_edge_skipped.assert_any_call("edge_1")
mock_state_manager.mark_edge_skipped.assert_any_call("edge_2")
assert mock_state_manager.mark_edge_skipped.call_count == 2
# propagate_skip_from_edge should be called for each edge
# We can't directly verify due to the mock, but the logic is covered

def test_propagate_skip_from_edge_recursively_propagates_through_graph(self) -> None:
"""Skip propagation should recursively propagate through the graph."""
# Arrange
mock_graph = create_autospec(Graph)
mock_state_manager = create_autospec(GraphStateManager)

# Create edge chain: edge_1 -> node_2 -> edge_3 -> node_4
edge1 = MagicMock(spec=Edge)
edge1.id = "edge_1"
edge1.head = "node_2"

edge3 = MagicMock(spec=Edge)
edge3.id = "edge_3"
edge3.head = "node_4"

mock_graph.edges = {"edge_1": edge1, "edge_3": edge3}

# Setup get_incoming_edges to return different values based on node
def get_incoming_edges_side_effect(node_id: Any) -> Any:
if node_id == "node_2":
return [edge1]
elif node_id == "node_4":
return [edge3]
return []

mock_graph.get_incoming_edges.side_effect = get_incoming_edges_side_effect

# Setup get_outgoing_edges to return different values based on node
def get_outgoing_edges_side_effect(node_id):
if node_id == "node_2":
return [edge3]
elif node_id == "node_4":
return [] # No outgoing edges, stops recursion
return []

mock_graph.get_outgoing_edges.side_effect = get_outgoing_edges_side_effect

# Setup state manager to return all_skipped for both nodes
mock_state_manager.analyze_edge_states.return_value = {
"has_unknown": False,
"has_taken": False,
"all_skipped": True,
}

propagator = SkipPropagator(mock_graph, mock_state_manager)

# Act
propagator.propagate_skip_from_edge("edge_1")

# Assert
# Should mark node_2 as skipped
mock_state_manager.mark_node_skipped.assert_any_call("node_2")
# Should mark edge_3 as skipped
mock_state_manager.mark_edge_skipped.assert_any_call("edge_3")
# Should propagate to node_4
mock_state_manager.mark_node_skipped.assert_any_call("node_4")
assert mock_state_manager.mark_node_skipped.call_count == 2

def test_propagate_skip_from_edge_with_mixed_edge_states_handles_correctly(self) -> None:
"""Test with mixed edge states (some unknown, some taken, some skipped)."""
# Arrange
mock_graph = create_autospec(Graph)
mock_state_manager = create_autospec(GraphStateManager)

mock_edge = MagicMock(spec=Edge)
mock_edge.id = "edge_1"
mock_edge.head = "node_2"

mock_graph.edges = {"edge_1": mock_edge}
incoming_edges = [MagicMock(spec=Edge), MagicMock(spec=Edge), MagicMock(spec=Edge)]
mock_graph.get_incoming_edges.return_value = incoming_edges

# Test 1: has_unknown=True, has_taken=False, all_skipped=False
mock_state_manager.analyze_edge_states.return_value = {
"has_unknown": True,
"has_taken": False,
"all_skipped": False,
}

propagator = SkipPropagator(mock_graph, mock_state_manager)

# Act
propagator.propagate_skip_from_edge("edge_1")

# Assert - should stop processing
mock_state_manager.enqueue_node.assert_not_called()
mock_state_manager.mark_node_skipped.assert_not_called()

# Reset mocks for next test
mock_state_manager.reset_mock()
mock_graph.reset_mock()

# Test 2: has_unknown=False, has_taken=True, all_skipped=False
mock_state_manager.analyze_edge_states.return_value = {
"has_unknown": False,
"has_taken": True,
"all_skipped": False,
}

# Act
propagator.propagate_skip_from_edge("edge_1")

# Assert - should enqueue node
mock_state_manager.start_execution.assert_called_once_with("node_2")
mock_state_manager.enqueue_node.assert_called_once_with("node_2")

# Reset mocks for next test
mock_state_manager.reset_mock()
mock_graph.reset_mock()

# Test 3: has_unknown=False, has_taken=False, all_skipped=True
mock_state_manager.analyze_edge_states.return_value = {
"has_unknown": False,
"has_taken": False,
"all_skipped": True,
}

# Act
propagator.propagate_skip_from_edge("edge_1")

# Assert - should propagate skip
mock_state_manager.mark_node_skipped.assert_called_once_with("node_2")