debugging state preparation and lazy value resolution

tclose · tclose · commit 32931d3b7377 · 2025-03-14T13:08:09.000+11:00
diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py
@@ -176,8 +176,6 @@ def _get_value(
         value : Any
             the resolved value of the lazy-field
         """
-        state = self._node.state
-        jobs = graph.node(self._node.name).get_jobs(state_index)
 
         def retrieve_from_job(job: "Task[DefType]") -> ty.Any:
             if job.errored:
@@ -209,12 +207,42 @@ def retrieve_from_job(job: "Task[DefType]") -> ty.Any:
             val = self._apply_cast(val)
             return val
 
-        if not isinstance(jobs, StateArray):  # single job
-            return retrieve_from_job(jobs)
-        elif not state or not state.depth(before_combine=True):
-            assert len(jobs) == 1
-            return retrieve_from_job(jobs[0])
-        return [retrieve_from_job(j) for j in jobs]
+        # Get the execution node that the value is coming from
+        upstream_node = graph.node(self._node.name)
+
+        if not upstream_node._tasks:  # No jobs, return empty state array
+            return StateArray()
+        if not upstream_node.state:  # Return the singular job
+            value = retrieve_from_job(upstream_node._tasks[None])
+            if state_index is not None:
+                return value[state_index]
+            return value
+        if upstream_node.state.combiner:
+
+            # No state remains after the combination, return all values in a list
+            if not upstream_node.state.ind_l_final:
+                return [retrieve_from_job(j) for j in upstream_node.tasks]
+
+            # Group the values of the tasks into list before returning
+            def group_values(index: int) -> list:
+                # Get a slice of the tasks that match the given index of the state array of the
+                # combined values
+                final_index = set(upstream_node.state.states_ind_final[index].items())
+                return [
+                    retrieve_from_job(upstream_node._tasks[i])
+                    for i, ind in enumerate(upstream_node.state.states_ind)
+                    if set(ind.items()).issuperset(final_index)
+                ]
+
+            if state_index is None:  # return all groups if no index is given
+                return StateArray(
+                    group_values(i) for i in range(len(upstream_node.state.ind_l_final))
+                )
+            return group_values(state_index)  # select the group that matches the index
+        if state_index is None:  # return all jobs in a state array
+            return StateArray(retrieve_from_job(j) for j in upstream_node.tasks)
+        # Select the job that matches the index
+        return retrieve_from_job(upstream_node._tasks[state_index])
 
     @property
     def _source(self):
diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py
@@ -18,7 +18,6 @@
     attrs_values,
 )
 from pydra.utils.hash import PersistentCache
-from pydra.utils.typing import StateArray
 from pydra.engine.lazy import LazyField
 from .audit import Audit
 from .core import Task
@@ -536,7 +535,6 @@ def __init__(
         self.queued = {}
         self.running = {}  # Not used in logic, but may be useful for progress tracking
         self.unrunnable = defaultdict(list)
-        self.state_names = self.node.state.names if self.node.state else []
         self.workflow = workflow
         self.graph = None
 
@@ -566,47 +564,19 @@ def tasks(self) -> ty.Generator["Task[DefType]", None, None]:
             raise RuntimeError("Tasks have not been generated")
         return self._tasks.values()
 
-    def get_jobs(self, final_index: int | None = None) -> "Task | StateArray[Task]":
-        """Get the jobs that match a given state index.
-
-        Parameters
-        ----------
-        final_index : int, optional
-            The index of the output state array (i.e. after any combinations) of the
-            job to get, by default None
-
-        Returns
-        -------
-        matching : Task | StateArray[Task]
-            The task or tasks that match the given index
-        """
-        if not self.tasks:  # No jobs, return empty state array
-            return StateArray()
-        if not self.node.state:  # Return the singular job
-            return self._tasks[None]
-        if final_index is None:  # return all jobs in a state array
-            return StateArray(self._tasks.values())
-        if not self.node.state.combiner:  # Select the job that matches the index
-            return self._tasks[final_index]
-        # Get a slice of the tasks that match the given index of the state array of the
-        # combined values
-        final_index = set(self.node.state.states_ind_final[final_index].items())
-        return StateArray(
-            self._tasks[i]
-            for i, ind in enumerate(self.node.state.states_ind)
-            if set(ind.items()).issuperset(final_index)
-        )
-
     def start(self) -> None:
         """Prepare the execution node so that it can be processed"""
         self._tasks = {}
         if self.state:
             values = {}
             for name, value in self.node.state_values.items():
-                if name in self.node.state.names and isinstance(value, LazyField):
-                    values[name] = value._get_value(
-                        workflow=self.workflow, graph=self.graph
-                    )
+                if name in self.node.state.names:
+                    if isinstance(value, LazyField):
+                        values[name] = value._get_value(
+                            workflow=self.workflow, graph=self.graph
+                        )
+                    else:
+                        values[name] = value
             self.state.prepare_states(values)
             self.state.prepare_inputs()
             # Generate the tasks
diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py