fixed splitting of tasks over states where value comes from lazy field

tclose · tclose · commit 7b3aeb5058cc · 2025-03-04T12:03:18.000+11:00
diff --git a/pydra/engine/core.py b/pydra/engine/core.py
@@ -916,47 +916,6 @@ def _create_graph(
                     )
         return graph
 
-    def create_dotfile(self, type="simple", export=None, name=None, output_dir=None):
-        """creating a graph - dotfile and optionally exporting to other formats"""
-        outdir = output_dir if output_dir is not None else self.cache_dir
-        graph = self.graph
-        if not name:
-            name = f"graph_{self._node.name}"
-        if type == "simple":
-            for task in graph.nodes:
-                self.create_connections(task)
-            dotfile = graph.create_dotfile_simple(outdir=outdir, name=name)
-        elif type == "nested":
-            for task in graph.nodes:
-                self.create_connections(task)
-            dotfile = graph.create_dotfile_nested(outdir=outdir, name=name)
-        elif type == "detailed":
-            # create connections with detailed=True
-            for task in graph.nodes:
-                self.create_connections(task, detailed=True)
-            # adding wf outputs
-            for wf_out, lf in self._connections:
-                graph.add_edges_description(
-                    (self._node.name, wf_out, lf._node.name, lf.field)
-                )
-            dotfile = graph.create_dotfile_detailed(outdir=outdir, name=name)
-        else:
-            raise Exception(
-                f"type of the graph can be simple, detailed or nested, "
-                f"but {type} provided"
-            )
-        if not export:
-            return dotfile
-        else:
-            if export is True:
-                export = ["png"]
-            elif isinstance(export, str):
-                export = [export]
-            formatted_dot = []
-            for ext in export:
-                formatted_dot.append(graph.export_graph(dotfile=dotfile, ext=ext))
-            return dotfile, formatted_dot
-
 
 def is_workflow(obj):
     """Check whether an object is a :class:`Workflow` instance."""
diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py
@@ -18,7 +18,7 @@
 from fileformats.core import FileSet
 
 if ty.TYPE_CHECKING:
-    from .specs import TaskDef, Result, WorkflowOutputs
+    from .specs import TaskDef, Result, WorkflowOutputs, WorkflowDef
     from .core import Task
     from pydra.design.base import Field
 
@@ -28,6 +28,61 @@
 DefType = ty.TypeVar("DefType", bound="TaskDef")
 
 
+def plot_workflow(
+    workflow_task: "WorkflowDef",
+    out_dir: Path,
+    type="simple",
+    export=None,
+    name=None,
+    output_dir=None,
+):
+    """creating a graph - dotfile and optionally exporting to other formats"""
+    from .core import Workflow
+
+    # Create output directory
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # Construct the workflow object
+    wf = Workflow.construct(workflow_task)
+    graph = wf.graph
+    if not name:
+        name = f"graph_{wf._node.name}"
+    if type == "simple":
+        for task in graph.nodes:
+            wf.create_connections(task)
+        dotfile = graph.create_dotfile_simple(outdir=out_dir, name=name)
+    elif type == "nested":
+        for task in graph.nodes:
+            wf.create_connections(task)
+        dotfile = graph.create_dotfile_nested(outdir=out_dir, name=name)
+    elif type == "detailed":
+        # create connections with detailed=True
+        for task in graph.nodes:
+            wf.create_connections(task, detailed=True)
+        # adding wf outputs
+        for wf_out, lf in wf._connections:
+            graph.add_edges_description(
+                (wf._node.name, wf_out, lf._node.name, lf.field)
+            )
+        dotfile = graph.create_dotfile_detailed(outdir=out_dir, name=name)
+    else:
+        raise Exception(
+            f"type of the graph can be simple, detailed or nested, "
+            f"but {type} provided"
+        )
+    if not export:
+        return dotfile
+    else:
+        if export is True:
+            export = ["png"]
+        elif isinstance(export, str):
+            export = [export]
+        formatted_dot = []
+        for ext in export:
+            formatted_dot.append(graph.export_graph(dotfile=dotfile, ext=ext))
+        return dotfile, formatted_dot
+
+
 def attrs_fields(definition, exclude_names=()) -> list[attrs.Attribute]:
     """Get the fields of a definition, excluding some names."""
     return [
diff --git a/pydra/engine/node.py b/pydra/engine/node.py
@@ -1,18 +1,15 @@
 import typing as ty
-from copy import deepcopy, copy
+from copy import deepcopy
 from enum import Enum
 import attrs
 from pydra.utils.typing import TypeParser, StateArray
 from . import lazy
 from pydra.engine.helpers import (
-    ensure_list,
     attrs_values,
     is_lazy,
-    create_checksum,
 )
-from pydra.utils.hash import hash_function
 from pydra.engine import helpers_state as hlpst
-from pydra.engine.state import State, StateIndex
+from pydra.engine.state import State
 
 if ty.TYPE_CHECKING:
     from .core import Workflow
@@ -172,51 +169,6 @@ def combiner(self):
             return ()
         return self._state.combiner
 
-    def _checksum_states(self, state_index: StateIndex = StateIndex()):
-        """
-        Calculate a checksum for the specific state or all of the states of the task.
-        Replaces state-arrays in the inputs fields with a specific values for states.
-        Used to recreate names of the task directories,
-
-        Parameters
-        ----------
-        state_index :
-            TODO
-
-        """
-        # if is_workflow(self) and self._definition._graph_checksums is attr.NOTHING:
-        #     self._definition._graph_checksums = {
-        #         nd.name: nd.checksum for nd in self.graph_sorted
-        #     }
-        from pydra.engine.specs import WorkflowDef
-
-        if state_index:
-            inputs_copy = copy(self._definition)
-            for key, ind in self.state.inputs_ind[state_index].items():
-                val = self._extract_input_el(
-                    inputs=self._definition, inp_nm=key.split(".")[1], ind=ind
-                )
-                setattr(inputs_copy, key.split(".")[1], val)
-            # setting files_hash again in case it was cleaned by setting specific element
-            # that might be important for outer splitter of input variable with big files
-            # the file can be changed with every single index even if there are only two files
-            input_hash = inputs_copy.hash
-            if isinstance(self._definition, WorkflowDef):
-                con_hash = hash_function(self._connections)
-                # TODO: hash list is not used
-                hash_list = [input_hash, con_hash]  # noqa: F841
-                checksum_ind = create_checksum(
-                    self.__class__.__name__, self._checksum_wf(input_hash)
-                )
-            else:
-                checksum_ind = create_checksum(self.__class__.__name__, input_hash)
-            return checksum_ind
-        else:
-            checksum_list = []
-            for ind in range(len(self.state.inputs_ind)):
-                checksum_list.append(self._checksum_states(state_index=ind))
-            return checksum_list
-
     def _check_if_outputs_have_been_used(self, msg):
         used = []
         if self._lzout:
@@ -287,24 +239,6 @@ def _get_upstream_states(self) -> dict[str, tuple["State", list[str]]]:
                     upstream_states[node.name][1].append(inpt_name)
         return upstream_states
 
-    def _extract_input_el(self, inputs, inp_nm, ind):
-        """
-        Extracting element of the inputs taking into account
-        container dimension of the specific element that can be set in self.state.cont_dim.
-        If input name is not in cont_dim, it is assumed that the input values has
-        a container dimension of 1, so only the most outer dim will be used for splitting.
-        If
-        """
-        if f"{self.name}.{inp_nm}" in self.state.cont_dim:
-            return list(
-                hlpst.flatten(
-                    ensure_list(getattr(inputs, inp_nm)),
-                    max_depth=self.state.cont_dim[f"{self.name}.{inp_nm}"],
-                )
-            )[ind]
-        else:
-            return getattr(inputs, inp_nm)[ind]
-
         # else:
         #     # todo it never gets here
         #     breakpoint()
diff --git a/pydra/engine/state.py b/pydra/engine/state.py
@@ -1253,3 +1253,29 @@ def _single_op_splits(self, op_single):
             val = op["*"](val_ind)
             keys = [op_single]
             return val, keys
+
+    def _get_element(self, value: ty.Any, field_name: str, ind: int):
+        """
+        Extracting element of the inputs taking into account
+        container dimension of the specific element that can be set in self.state.cont_dim.
+        If input name is not in cont_dim, it is assumed that the input values has
+        a container dimension of 1, so only the most outer dim will be used for splitting.
+
+        Parameters
+        ----------
+        value : Any
+            inputs of the task
+        field_name : str
+            name of the input field
+        ind : int
+            index of the element
+        """
+        if f"{self.name}.{field_name}" in self.cont_dim:
+            return list(
+                hlpst.flatten(
+                    ensure_list(value),
+                    max_depth=self.cont_dim[f"{self.name}.{field_name}"],
+                )
+            )[ind]
+        else:
+            return value[ind]
diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py
@@ -690,24 +690,23 @@ def _split_definition(self) -> dict[StateIndex, "TaskDef[OutputType]"]:
             return {None: self.node._definition}
         split_defs = {}
         for input_ind in self.node.state.inputs_ind:
-            inputs_dict = {}
+            resolved = {}
             for inp in set(self.node.input_names):
+                value = getattr(self.node._definition, inp)
+                if isinstance(value, LazyField):
+                    value = resolved[inp] = value._get_value(
+                        workflow=self.workflow,
+                        graph=self.graph,
+                        state_index=StateIndex(input_ind),
+                    )
                 if f"{self.node.name}.{inp}" in input_ind:
-                    value = getattr(self.node._definition, inp)
-                    if isinstance(value, LazyField):
-                        inputs_dict[inp] = value._get_value(
-                            workflow=self.workflow,
-                            graph=self.graph,
-                            state_index=StateIndex(input_ind),
-                        )
-                    else:
-                        inputs_dict[inp] = self.node._extract_input_el(
-                            inputs=self.node._definition,
-                            inp_nm=inp,
-                            ind=input_ind[f"{self.node.name}.{inp}"],
-                        )
+                    resolved[inp] = self.node.state._get_element(
+                        value=value,
+                        field_name=inp,
+                        ind=input_ind[f"{self.node.name}.{inp}"],
+                    )
             split_defs[StateIndex(input_ind)] = attrs.evolve(
-                self.node._definition, **inputs_dict
+                self.node._definition, **resolved
             )
         return split_defs
 
diff --git a/pydra/engine/tests/test_numpy_examples.py b/pydra/engine/tests/test_numpy_examples.py
@@ -81,8 +81,7 @@ def test_task_numpyinput_1(tmp_path: Path):
     nn = Identity().split(x=[np.array([1, 2]), np.array([3, 4])])
     # checking the results
     outputs = nn(cache_dir=tmp_path)
-    assert (outputs.out[0] == np.array([1, 2])).all()
-    assert (outputs.out[1] == np.array([3, 4])).all()
+    assert (np.array(outputs.out) == np.array([[1, 2], [3, 4]])).all()
 
 
 def test_task_numpyinput_2(tmp_path: Path):
diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py
@@ -37,6 +37,7 @@
 )
 from pydra.engine.submitter import Submitter
 from pydra.design import python, workflow
+import pydra.engine.core
 from pydra.utils import exc_info_matches
 
 
@@ -959,8 +960,7 @@ def Workflow(x, y):
 
     assert not results.errored, "\n".join(results.errors["error message"])
 
-    assert results.outputs.out[0] == [13, 24, 35]
-    assert results.outputs.out[1] == [14, 26, 38]
+    assert results.outputs.out == [[13, 24, 35], [14, 26, 38]]
 
 
 def test_wf_ndst_7(plugin, tmpdir):
@@ -3735,13 +3735,14 @@ def Workflow1(x, y):
 def create_tasks():
     @workflow.define
     def Workflow(x):
-        t1 = workflow.add(Add2(x=x))
-        t2 = workflow.add(Multiply(x=t1.out, y=2))
+        t1 = workflow.add(Add2(x=x), name="t1")
+        t2 = workflow.add(Multiply(x=t1.out, y=2), name="t2")
         return t2.out
 
     wf = Workflow(x=1)
-    t1 = wf.name2obj["t1"]
-    t2 = wf.name2obj["t2"]
+    workflow_obj = pydra.engine.core.Workflow.construct(wf)
+    t1 = workflow_obj["t1"]
+    t2 = workflow_obj["t2"]
     return wf, t1, t2