pydantic
diff --git a/‎pydantic_graph/pydantic_graph/beta/decision.py‎
Lines changed: 12 additions & 2 deletions b/‎pydantic_graph/pydantic_graph/beta/decision.py‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎pydantic_graph/pydantic_graph/beta/graph.py‎
Lines changed: 86 additions & 30 deletions b/‎pydantic_graph/pydantic_graph/beta/graph.py‎
Lines changed: 86 additions & 30 deletions
diff --git a/‎pydantic_graph/pydantic_graph/beta/graph_builder.py‎
Lines changed: 6 additions & 1 deletion b/‎pydantic_graph/pydantic_graph/beta/graph_builder.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎pydantic_graph/pydantic_graph/beta/join.py‎
Lines changed: 37 additions & 14 deletions b/‎pydantic_graph/pydantic_graph/beta/join.py‎
Lines changed: 37 additions & 14 deletions
@@ -13,7 +13,7 @@
 
 from typing_extensions import Never, Self, TypeVar
 
-from pydantic_graph.beta.id_types import ForkId, NodeId
+from pydantic_graph.beta.id_types import ForkId, JoinId, NodeId
 from pydantic_graph.beta.paths import Path, PathBuilder
 from pydantic_graph.beta.step import StepFunction
 from pydantic_graph.beta.util import TypeOrTypeExpression
@@ -213,17 +213,27 @@ def transform(
 
     def spread(
         self: DecisionBranchBuilder[StateT, DepsT, Iterable[T], SourceT, HandledT],
+        *,
+        fork_id: ForkId | None = None,
+        downstream_join_id: JoinId | None = None,
     ) -> DecisionBranchBuilder[StateT, DepsT, T, SourceT, HandledT]:
         """Spread the branch's output.
 
         To do this, the current output must be iterable, and any subsequent steps in the path being built for this
         branch will be applied to each item of the current output in parallel.
 
+        Args:
+            fork_id: Optional ID for the fork, defaults to a generated value
+            downstream_join_id: Optional ID of a downstream join node which is involved when spreading empty iterables
+
         Returns:
             A new DecisionBranchBuilder where spreading is performed prior to generating the final output.
         """
         return DecisionBranchBuilder(
-            decision=self.decision, source=self.source, matches=self.matches, path_builder=self.path_builder.spread()
+            decision=self.decision,
+            source=self.source,
+            matches=self.matches,
+            path_builder=self.path_builder.spread(fork_id=fork_id, downstream_join_id=downstream_join_id),
         )
 
     def label(self, label: str) -> DecisionBranchBuilder[StateT, DepsT, OutputT, SourceT, HandledT]:
 
@@ -346,7 +346,7 @@ def __init__(
         self.inputs = inputs
         """The initial input data."""
 
-        self._active_reducers: dict[tuple[JoinId, NodeRunId], Reducer[Any, Any, Any, Any]] = {}
+        self._active_reducers: dict[tuple[JoinId, NodeRunId], tuple[Reducer[Any, Any, Any, Any], ForkStack]] = {}
         """Active reducers for join operations."""
 
         self._next: EndMarker[OutputT] | JoinItem | Sequence[GraphTask] | None = None
@@ -469,39 +469,82 @@ def _handle_result(result: EndMarker[OutputT] | JoinItem | Sequence[GraphTask])
 
             if isinstance(result, JoinItem):
                 parent_fork_id = self.graph.get_parent_fork(result.join_id).fork_id
-                fork_run_id = [x.node_run_id for x in result.fork_stack[::-1] if x.fork_id == parent_fork_id][0]
-                reducer = self._active_reducers.get((result.join_id, fork_run_id))
-                if reducer is None:
+                for i, x in enumerate(result.fork_stack[::-1]):
+                    if x.fork_id == parent_fork_id:
+                        downstream_fork_stack = result.fork_stack[: len(result.fork_stack) - i]
+                        fork_run_id = x.node_run_id
+                        break
+                else:
+                    raise RuntimeError('Parent fork run not found')
+
+                reducer_and_fork_stack = self._active_reducers.get((result.join_id, fork_run_id))
+                if reducer_and_fork_stack is None:
                     join_node = self.graph.nodes[result.join_id]
                     assert isinstance(join_node, Join)
-                    reducer = join_node.create_reducer(StepContext(self.state, self.deps, result.inputs))
-                    self._active_reducers[(result.join_id, fork_run_id)] = reducer
+                    reducer = join_node.create_reducer()
+                    self._active_reducers[(result.join_id, fork_run_id)] = reducer, downstream_fork_stack
                 else:
+                    reducer, _ = reducer_and_fork_stack
+
+                try:
                     reducer.reduce(StepContext(self.state, self.deps, result.inputs))
+                except StopIteration:
+                    # cancel all concurrently running tasks with the same fork_run_id of the parent fork
+                    task_ids_to_cancel = set[TaskId]()
+                    for task_id, t in tasks_by_id.items():
+                        for item in t.fork_stack:
+                            if item.fork_id == parent_fork_id and item.node_run_id == fork_run_id:
+                                task_ids_to_cancel.add(task_id)
+                                break
+                    for task in list(pending):
+                        if task.get_name() in task_ids_to_cancel:
+                            task.cancel()
+                            pending.remove(task)
             else:
                 for new_task in result:
                     _start_task(new_task)
             return False
 
-        while pending:
-            done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
-            for task in done:
-                task_result = task.result()
-                source_task = tasks_by_id.pop(TaskId(task.get_name()))
-                maybe_overridden_result = yield task_result
-                if _handle_result(maybe_overridden_result):
-                    return
-
-                for join_id, fork_run_id, fork_stack in self._get_completed_fork_runs(
-                    source_task, tasks_by_id.values()
-                ):
-                    reducer = self._active_reducers.pop((join_id, fork_run_id))
+        while pending or self._active_reducers:
+            while pending:
+                done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
+                for task in done:
+                    task_result = task.result()
+                    source_task = tasks_by_id.pop(TaskId(task.get_name()))
+                    maybe_overridden_result = yield task_result
+                    if _handle_result(maybe_overridden_result):
+                        return
 
+                    for join_id, fork_run_id in self._get_completed_fork_runs(source_task, tasks_by_id.values()):
+                        reducer, fork_stack = self._active_reducers.pop((join_id, fork_run_id))
+                        output = reducer.finalize(StepContext(self.state, self.deps, None))
+                        join_node = self.graph.nodes[join_id]
+                        assert isinstance(
+                            join_node, Join
+                        )  # We could drop this but if it fails it means there is a bug.
+                        new_tasks = self._handle_edges(join_node, output, fork_stack)
+                        maybe_overridden_result = yield new_tasks  # give an opportunity to override these
+                        if _handle_result(maybe_overridden_result):
+                            return
+
+            if self._active_reducers:
+                # In this case, there are no pending tasks. We can therefore finalize all active reducers whose
+                # downstream fork stacks are not a strict "prefix" of another active reducer. (If it was, finalizing the
+                # deeper reducer could produce new tasks in the "prefix" reducer.)
+                active_fork_stacks = [fork_stack for _, fork_stack in self._active_reducers.values()]
+                for (join_id, fork_run_id), (reducer, fork_stack) in list(self._active_reducers.items()):
+                    if any(
+                        len(afs) > len(fork_stack) and fork_stack == afs[: len(fork_stack)]
+                        for afs in active_fork_stacks
+                    ):
+                        continue  # this reducer is a strict prefix for one of the other active reducers
+
+                    self._active_reducers.pop((join_id, fork_run_id))  # we're finalizing it now
                     output = reducer.finalize(StepContext(self.state, self.deps, None))
                     join_node = self.graph.nodes[join_id]
                     assert isinstance(join_node, Join)  # We could drop this but if it fails it means there is a bug.
                     new_tasks = self._handle_edges(join_node, output, fork_stack)
-                    maybe_overridden_result = yield new_tasks  # Need to give an opportunity to override these
+                    maybe_overridden_result = yield new_tasks  # give an opportunity to override these
                     if _handle_result(maybe_overridden_result):
                         return
 
@@ -588,19 +631,18 @@ def _get_completed_fork_runs(
         self,
         t: GraphTask,
         active_tasks: Iterable[GraphTask],
-    ) -> list[tuple[JoinId, NodeRunId, ForkStack]]:
-        completed_fork_runs: list[tuple[JoinId, NodeRunId, ForkStack]] = []
+    ) -> list[tuple[JoinId, NodeRunId]]:
+        completed_fork_runs: list[tuple[JoinId, NodeRunId]] = []
 
         fork_run_indices = {fsi.node_run_id: i for i, fsi in enumerate(t.fork_stack)}
         for join_id, fork_run_id in self._active_reducers.keys():
             fork_run_index = fork_run_indices.get(fork_run_id)
             if fork_run_index is None:
                 continue  # The fork_run_id is not in the current task's fork stack, so this task didn't complete it.
 
-            new_fork_stack = t.fork_stack[:fork_run_index]
             # This reducer _may_ now be ready to finalize:
             if self._is_fork_run_completed(active_tasks, join_id, fork_run_id):
-                completed_fork_runs.append((join_id, fork_run_id, new_fork_stack))
+                completed_fork_runs.append((join_id, fork_run_id))
 
         return completed_fork_runs
 
@@ -612,13 +654,27 @@ def _handle_path(self, path: Path, inputs: Any, fork_stack: ForkStack) -> Sequen
         if isinstance(item, DestinationMarker):
             return [GraphTask(item.destination_id, inputs, fork_stack)]
         elif isinstance(item, SpreadMarker):
+            # Eagerly raise a clear error if the input value is not iterable as expected
+            try:
+                iter(inputs)
+            except TypeError:
+                raise RuntimeError(f'Cannot spread non-iterable value: {inputs!r}')
+
             node_run_id = NodeRunId(str(uuid.uuid4()))
-            return [
-                GraphTask(
-                    item.fork_id, input_item, fork_stack + (ForkStackItem(item.fork_id, node_run_id, thread_index),)
+
+            # If the spread specifies a downstream join id, eagerly create a reducer for it
+            if item.downstream_join_id is not None:
+                join_node = self.graph.nodes[item.downstream_join_id]
+                assert isinstance(join_node, Join)
+                self._active_reducers[(item.downstream_join_id, node_run_id)] = join_node.create_reducer(), fork_stack
+
+            spread_tasks: list[GraphTask] = []
+            for thread_index, input_item in enumerate(inputs):
+                item_tasks = self._handle_path(
+                    path.next_path, input_item, fork_stack + (ForkStackItem(item.fork_id, node_run_id, thread_index),)
                 )
-                for thread_index, input_item in enumerate(inputs)
-            ]
+                spread_tasks += item_tasks
+            return spread_tasks
         elif isinstance(item, BroadcastMarker):
             return [GraphTask(item.fork_id, inputs, fork_stack)]
         elif isinstance(item, TransformMarker):
@@ -644,6 +700,6 @@ def _is_fork_run_completed(self, tasks: Iterable[GraphTask], join_id: JoinId, fo
         parent_fork = self.graph.get_parent_fork(join_id)
         for t in tasks:
             if fork_run_id in {x.node_run_id for x in t.fork_stack}:
-                if t.node_id in parent_fork.intermediate_nodes:
+                if t.node_id in parent_fork.intermediate_nodes or t.node_id == join_id:
                     return False
         return True
@@ -414,6 +414,8 @@ def add_spreading_edge(
         *,
         pre_spread_label: str | None = None,
         post_spread_label: str | None = None,
+        fork_id: ForkId | None = None,
+        downstream_join_id: JoinId | None = None,
     ) -> None:
         """Add an edge that spreads iterable data across parallel paths.
 
@@ -422,11 +424,14 @@ def add_spreading_edge(
             spread_to: The destination node that receives individual items
             pre_spread_label: Optional label before the spread operation
             post_spread_label: Optional label after the spread operation
+            fork_id: Optional ID for the fork node produced for this spread operation
+            downstream_join_id: Optional ID of a join node that will always be downstream of this spread.
+                Specifying this ensures correct handling if you try to spread an empty iterable.
         """
         builder = self.edge_from(source)
         if pre_spread_label is not None:
             builder = builder.label(pre_spread_label)
-        builder = builder.spread()
+        builder = builder.spread(fork_id=fork_id, downstream_join_id=downstream_join_id)
         if post_spread_label is not None:
             builder = builder.label(post_spread_label)
         self.add(builder.to(spread_to))
 
@@ -25,7 +25,7 @@
 V = TypeVar('V', infer_variance=True)
 
 
-@dataclass(init=False)
+@dataclass(kw_only=True)
 class Reducer(ABC, Generic[StateT, DepsT, InputT, OutputT]):
     """An abstract base class for reducing data from parallel execution paths.
 
@@ -40,14 +40,6 @@ class Reducer(ABC, Generic[StateT, DepsT, InputT, OutputT]):
         OutputT: The type of the final output after reduction
     """
 
-    def __init__(self, ctx: StepContext[StateT, DepsT, InputT]) -> None:
-        """Initialize the reducer with the first input context.
-
-        Args:
-            ctx: The step context containing the initial input data
-        """
-        self.reduce(ctx)
-
     def reduce(self, ctx: StepContext[StateT, DepsT, InputT]) -> None:
         """Accumulate input data from a step context into the reducer's internal state.
 
@@ -77,7 +69,7 @@ def finalize(self, ctx: StepContext[StateT, DepsT, None]) -> OutputT:
         raise NotImplementedError('Finalize method must be implemented in subclasses.')
 
 
-@dataclass(init=False)
+@dataclass(kw_only=True)
 class NullReducer(Reducer[object, object, object, None]):
     """A reducer that discards all input data and returns None.
 
@@ -98,7 +90,7 @@ def finalize(self, ctx: StepContext[object, object, object]) -> None:
         return None
 
 
-@dataclass(init=False)
+@dataclass(kw_only=True)
 class ListReducer(Reducer[object, object, T, list[T]], Generic[T]):
     """A reducer that collects all input values into a list.
 
@@ -132,7 +124,7 @@ def finalize(self, ctx: StepContext[object, object, None]) -> list[T]:
         return self.items
 
 
-@dataclass(init=False)
+@dataclass(kw_only=True)
 class DictReducer(Reducer[object, object, dict[K, V], dict[K, V]], Generic[K, V]):
     """A reducer that merges dictionary inputs into a single dictionary.
 
@@ -167,6 +159,37 @@ def finalize(self, ctx: StepContext[object, object, None]) -> dict[K, V]:
         return self.data
 
 
+@dataclass(kw_only=True)
+class EarlyStoppingReducer(Reducer[object, object, T, T | None], Generic[T]):
+    """A reducer that returns the first encountered value and cancels all other tasks started by its parent fork.
+
+    Type Parameters:
+        T: The type of elements in the resulting list
+    """
+
+    result: T | None = None
+
+    def reduce(self, ctx: StepContext[object, object, T]) -> None:
+        """Append the input value to the list of items.
+
+        Args:
+            ctx: The step context containing the input value to append
+        """
+        self.result = ctx.inputs
+        raise StopIteration
+
+    def finalize(self, ctx: StepContext[object, object, None]) -> T | None:
+        """Return the accumulated list of items.
+
+        Args:
+            ctx: The step context for finalization
+
+        Returns:
+            A list containing all accumulated input values in order
+        """
+        return self.result
+
+
 class Join(Generic[StateT, DepsT, InputT, OutputT]):
     """A join operation that synchronizes and aggregates parallel execution paths.
 
@@ -202,7 +225,7 @@ def __init__(
 
         # self._type_adapter: TypeAdapter[Any] = TypeAdapter(reducer_type)  # needs to be annotated this way for variance
 
-    def create_reducer(self, ctx: StepContext[StateT, DepsT, InputT]) -> Reducer[StateT, DepsT, InputT, OutputT]:
+    def create_reducer(self) -> Reducer[StateT, DepsT, InputT, OutputT]:
         """Create a reducer instance for this join operation.
 
         Args:
@@ -211,7 +234,7 @@ def create_reducer(self, ctx: StepContext[StateT, DepsT, InputT]) -> Reducer[Sta
         Returns:
             A new reducer instance initialized with the provided context
         """
-        return self._reducer_type(ctx)
+        return self._reducer_type()
 
     # TODO(P3): If we want the ability to snapshot graph-run state, we'll need a way to
     #  serialize/deserialize the associated reducers, something like this: