finished debugging test_specs

tclose · tclose · commit db8f7992a929 · 2025-02-26T14:26:48.000+11:00
diff --git a/pydra/engine/core.py b/pydra/engine/core.py
@@ -817,9 +817,7 @@ def node_names(self) -> list[str]:
     def execution_graph(self, submitter: "Submitter") -> DiGraph:
         from pydra.engine.submitter import NodeExecution
 
-        exec_nodes = [
-            NodeExecution(n, submitter, workflow_inputs=self.inputs) for n in self.nodes
-        ]
+        exec_nodes = [NodeExecution(n, submitter, workflow=self) for n in self.nodes]
         graph = self._create_graph(exec_nodes)
         # Set the graph attribute of the nodes so lazy fields can be resolved as tasks
         # are created
diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py
@@ -6,7 +6,7 @@
 from . import node
 
 if ty.TYPE_CHECKING:
-    from .submitter import NodeExecution
+    from .submitter import DiGraph, NodeExecution
     from .core import Task, Workflow
     from .specs import TaskDef
     from .state import StateIndex
@@ -47,15 +47,18 @@ def _apply_cast(self, value):
 
     def _get_value(
         self,
-        node_exec: "NodeExecution",
+        workflow: "Workflow",
+        graph: "DiGraph[NodeExecution]",
         state_index: "StateIndex | None" = None,
     ) -> ty.Any:
         """Return the value of a lazy field.
 
         Parameters
         ----------
-        node_exec: NodeExecution
-            the object representing the execution state of the current node
+        workflow: Workflow
+            the workflow object
+        graph: DiGraph[NodeExecution]
+            the graph representing the execution state of the workflow
         state_index : StateIndex, optional
             the state index of the field to access
 
@@ -90,25 +93,27 @@ def _source(self):
 
     def _get_value(
         self,
-        node_exec: "NodeExecution",
+        workflow: "Workflow",
+        graph: "DiGraph[NodeExecution]",
         state_index: "StateIndex | None" = None,
     ) -> ty.Any:
         """Return the value of a lazy field.
 
         Parameters
         ----------
-        node_exec: NodeExecution
-            the object representing the execution state of the current node
+        workflow: Workflow
+            the workflow object
+        graph: DiGraph[NodeExecution]
+            the graph representing the execution state of the workflow
         state_index : StateIndex, optional
-            the state index of the field to access (ignored, used for duck-typing with
-            LazyOutField)
+            the state index of the field to access
 
         Returns
         -------
         value : Any
             the resolved value of the lazy-field
         """
-        value = node_exec.workflow_inputs[self._field]
+        value = workflow.inputs[self._field]
         value = self._apply_cast(value)
         return value
 
@@ -127,15 +132,18 @@ def __repr__(self):
 
     def _get_value(
         self,
-        node_exec: "NodeExecution",
+        workflow: "Workflow",
+        graph: "DiGraph[NodeExecution]",
         state_index: "StateIndex | None" = None,
     ) -> ty.Any:
         """Return the value of a lazy field.
 
         Parameters
         ----------
-        node_exec: NodeExecution
-            the object representing the execution state of the current node
+        workflow: Workflow
+            the workflow object
+        graph: DiGraph[NodeExecution]
+            the graph representing the execution state of the workflow
         state_index : StateIndex, optional
             the state index of the field to access
 
@@ -152,7 +160,7 @@ def _get_value(
         if state_index is None:
             state_index = StateIndex()
 
-        task = node_exec.graph.node(self._node.name).task(state_index)
+        task = graph.node(self._node.name).task(state_index)
         _, split_depth = TypeParser.strip_splits(self._type)
 
         def get_nested(task: "Task[DefType]", depth: int):
diff --git a/pydra/engine/node.py b/pydra/engine/node.py
@@ -269,7 +269,11 @@ def _get_upstream_states(self) -> dict[str, tuple["State", list[str]]]:
         """Get the states of the upstream nodes that are connected to this node"""
         upstream_states = {}
         for inpt_name, val in self.input_values:
-            if isinstance(val, lazy.LazyOutField) and val._node.state:
+            if (
+                isinstance(val, lazy.LazyOutField)
+                and val._node.state
+                and val._node.state.depth
+            ):
                 node: Node = val._node
                 # variables that are part of inner splitters should be treated as a containers
                 if node.state and f"{node.name}.{inpt_name}" in node.state.splitter:
diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py
@@ -736,7 +736,7 @@ def _from_task(cls, task: "Task[WorkflowDef]") -> Self:
         nodes_dict = {n.name: n for n in exec_graph.nodes}
         for name, lazy_field in attrs_values(workflow.outputs).items():
             try:
-                val_out = lazy_field._get_value(exec_graph)
+                val_out = lazy_field._get_value(workflow=workflow, graph=exec_graph)
                 output_wf[name] = val_out
             except (ValueError, AttributeError):
                 output_wf[name] = None
diff --git a/pydra/engine/state.py b/pydra/engine/state.py
@@ -41,23 +41,63 @@ def __init__(self, indices: dict[str, int] | None = None):
         else:
             self.indices = OrderedDict(sorted(indices.items()))
 
-    def __repr__(self):
+    def __len__(self) -> int:
+        return len(self.indices)
+
+    def __iter__(self) -> ty.Generator[str, None, None]:
+        return iter(self.indices)
+
+    def __repr__(self) -> str:
         return (
             "StateIndex(" + ", ".join(f"{n}={v}" for n, v in self.indices.items()) + ")"
         )
 
     def __hash__(self):
         return hash(tuple(self.indices.items()))
 
-    def __eq__(self, other):
+    def __eq__(self, other) -> bool:
         return self.indices == other.indices
 
-    def __str__(self):
+    def __str__(self) -> str:
         return "__".join(f"{n}-{i}" for n, i in self.indices.items())
 
-    def __bool__(self):
+    def __bool__(self) -> bool:
         return bool(self.indices)
 
+    def subset(self, state_names: ty.Iterable[str]) -> ty.Self:
+        """Create a new StateIndex with only the specified fields
+
+        Parameters
+        ----------
+        fields : list[str]
+            the fields to keep in the new StateIndex
+
+        Returns
+        -------
+        StateIndex
+            a new StateIndex with only the specified fields
+        """
+        return type(self)({k: v for k, v in self.indices.items() if k in state_names})
+
+    def matches(self, other: "StateIndex") -> bool:
+        """Check if the indices that are present in the other StateIndex match
+
+        Parameters
+        ----------
+        other : StateIndex
+            the other StateIndex to compare against
+
+        Returns
+        -------
+        bool
+            True if all the indices in the other StateIndex match
+        """
+        if not set(self.indices).issuperset(other.indices):
+            raise ValueError(
+                f"StateIndex {self} does not contain all the indices in {other}"
+            )
+        return all(self.indices[k] == v for k, v in other.indices.items())
+
 
 class State:
     """
@@ -172,6 +212,9 @@ def __str__(self):
     def names(self):
         """Return the names of the states."""
         # analysing states from connected tasks if inner_inputs
+        if not hasattr(self, "keys_final"):
+            self.prepare_states()
+            self.prepare_inputs()
         previous_states_keys = {
             f"_{v.name}": v.keys_final for v in self.inner_inputs.values()
         }
diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py
@@ -32,7 +32,8 @@
 
 if ty.TYPE_CHECKING:
     from .node import Node
-    from .specs import TaskDef, TaskOutputs, WorkflowDef, TaskHooks, Result
+    from .specs import WorkflowDef, TaskDef, TaskOutputs, TaskHooks, Result
+    from .core import Workflow
     from .environments import Environment
     from .state import State
 
@@ -501,15 +502,15 @@ class NodeExecution(ty.Generic[DefType]):
 
     _tasks: dict[StateIndex | None, "Task[DefType]"] | None
 
-    workflow_inputs: "WorkflowDef"
+    workflow: "Workflow"
 
     graph: DiGraph["NodeExecution"] | None
 
     def __init__(
         self,
         node: "Node",
         submitter: Submitter,
-        workflow_inputs: "WorkflowDef",
+        workflow: "Workflow",
     ):
         self.name = node.name
         self.node = node
@@ -523,9 +524,17 @@ def __init__(
         self.running = {}  # Not used in logic, but may be useful for progress tracking
         self.unrunnable = defaultdict(list)
         self.state_names = self.node.state.names if self.node.state else []
-        self.workflow_inputs = workflow_inputs
+        self.workflow = workflow
         self.graph = None
 
+    def __repr__(self):
+        return (
+            f"NodeExecution(name={self.name!r}, blocked={list(self.blocked)}, "
+            f"queued={list(self.queued)}, running={list(self.running)}, "
+            f"successful={list(self.successful)}, errored={list(self.errored)}, "
+            f"unrunnable={list(self.unrunnable)})"
+        )
+
     @property
     def inputs(self) -> "Node.Inputs":
         return self.node.inputs
@@ -547,12 +556,16 @@ def tasks(self) -> ty.Iterable["Task[DefType]"]:
     def task(self, index: StateIndex = StateIndex()) -> "Task | list[Task[DefType]]":
         """Get a task object for a given state index."""
         self.tasks  # Ensure tasks are loaded
-        try:
-            return self._tasks[index]
-        except KeyError:
-            if not index:
-                return StateArray(self._tasks.values())
-            raise
+        task_index = next(iter(self._tasks))
+        if len(task_index) > len(index):
+            tasks = []
+            for ind, task in self._tasks.items():
+                if ind.matches(index):
+                    tasks.append(task)
+            return StateArray(tasks)
+        elif len(index) > len(task_index):
+            index = index.subset(task_index)
+        return self._tasks[index]
 
     @property
     def started(self) -> bool:
@@ -651,10 +664,12 @@ def _resolve_lazy_inputs(
             The task definition with all lazy fields resolved
         """
         resolved = {}
-        for name, value in attrs_values(self).items():
+        for name, value in attrs_values(task_def).items():
             if isinstance(value, LazyField):
-                resolved[name] = value._get_value(self, state_index)
-        return attrs.evolve(self, **resolved)
+                resolved[name] = value._get_value(
+                    workflow=self.workflow, graph=self.graph, state_index=state_index
+                )
+        return attrs.evolve(task_def, **resolved)
 
     def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]:
         """For a given node, check to see which tasks have been successfully run, are ready
@@ -676,19 +691,35 @@ def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]:
         runnable: list["Task[DefType]"] = []
         self.tasks  # Ensure tasks are loaded
         if not self.started:
+            assert self._tasks
             self.blocked = copy(self._tasks)
         # Check to see if any blocked tasks are now runnable/unrunnable
         for index, task in list(self.blocked.items()):
             pred: NodeExecution
             is_runnable = True
             for pred in graph.predecessors[self.node.name]:
-                if index not in pred.successful:
+                pred_jobs = pred.task(index)
+                if isinstance(pred_jobs, StateArray):
+                    pred_inds = [j.state_index for j in pred_jobs]
+                else:
+                    pred_inds = [pred_jobs.state_index]
+                if not all(i in pred.successful for i in pred_inds):
                     is_runnable = False
-                    if index in pred.errored:
-                        self.unrunnable[index].append(self.blocked.pop(index))
-                    if index in pred.unrunnable:
-                        self.unrunnable[index].extend(pred.unrunnable[index])
-                        self.blocked.pop(index)
+                    blocked = True
+                    if pred_errored := [i for i in pred_inds if i in pred.errored]:
+                        self.unrunnable[index].extend(
+                            [pred.errored[i] for i in pred_errored]
+                        )
+                        blocked = False
+                    if pred_unrunnable := [
+                        i for i in pred_inds if i in pred.unrunnable
+                    ]:
+                        self.unrunnable[index].extend(
+                            [pred.unrunnable[i] for i in pred_unrunnable]
+                        )
+                        blocked = False
+                    if not blocked:
+                        del self.blocked[index]
                     break
             if is_runnable:
                 runnable.append(self.blocked.pop(index))
diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py