Merge pull request #787 from nipype/touch-ups

tclose · web-flow · commit e7428363eeac · 2025-04-08T09:16:05.000+10:00
Touch-ups
diff --git a/pydra/compose/base/task.py b/pydra/compose/base/task.py
@@ -39,11 +39,16 @@ class Outputs:
     RESERVED_FIELD_NAMES = ("inputs",)
 
     _cache_dir: Path = attrs.field(default=None, init=False, repr=False)
+    _node = attrs.field(default=None, init=False, repr=False)
 
     @property
     def inputs(self):
         """The inputs object associated with a lazy-outputs object"""
-        return self._get_node().inputs
+        if self._node is None:
+            raise AttributeError(
+                f"{self} outputs object is not a lazy output of a workflow node"
+            )
+        return self._node.inputs
 
     @classmethod
     def _from_task(cls, job: "Job[TaskType]") -> Self:
@@ -81,14 +86,6 @@ def _results(self) -> "Result[Self]":
         with open(results_path, "rb") as f:
             return cp.load(f)
 
-    def _get_node(self):
-        try:
-            return self._node
-        except AttributeError:
-            raise AttributeError(
-                f"{self} outputs object is not a lazy output of a workflow node"
-            ) from None
-
     def __iter__(self) -> ty.Generator[str, None, None]:
         """The names of the fields in the output object"""
         return iter(sorted(f.name for f in attrs_fields(self)))
diff --git a/pydra/compose/python.py b/pydra/compose/python.py
@@ -2,7 +2,7 @@
 import inspect
 from typing import dataclass_transform
 import attrs
-from pydra.utils.general import task_fields, attrs_values
+from pydra.utils.general import task_fields, task_dict
 from pydra.compose import base
 from pydra.compose.base import (
     ensure_field_objects,
@@ -231,7 +231,7 @@ class PythonTask(base.Task[PythonOutputsType]):
 
     def _run(self, job: "Job[PythonTask]", rerun: bool = True) -> None:
         # Prepare the inputs to the function
-        inputs = attrs_values(self)
+        inputs = task_dict(self)
         del inputs["function"]
         # Run the actual function
         returned = self.function(**inputs)
diff --git a/pydra/compose/tests/test_workflow_fields.py b/pydra/compose/tests/test_workflow_fields.py
@@ -1,4 +1,5 @@
 from operator import attrgetter
+from pathlib import Path
 from copy import copy
 from unittest.mock import Mock
 import pytest
@@ -15,17 +16,17 @@
 
 
 @python.define
-def Add(a, b):
+def Add(a: int | float, b: int | float) -> int | float:
     return a + b
 
 
 @python.define
-def Mul(a, b):
+def Mul(a: int | float, b: int | float) -> int | float:
     return a * b
 
 
 @python.define(outputs=["divided"])
-def Divide(x, y):
+def Divide(x: int | float, y: int | float) -> float:
     return x / y
 
 
@@ -68,7 +69,9 @@ def MyTestWorkflow(a, b):
     wf = Workflow.construct(workflow_spec)
     assert wf.inputs.a == 1
     assert wf.inputs.b == 2.0
-    assert wf.outputs.out == LazyOutField(node=wf["Mul"], field="out", type=ty.Any)
+    assert wf.outputs.out == LazyOutField(
+        node=wf["Mul"], field="out", type=int | float, type_checked=True
+    )
 
     # Nodes are named after the specs by default
     assert list(wf.node_names) == ["Add", "Mul"]
@@ -185,7 +188,9 @@ class Outputs(workflow.Outputs):
     wf = Workflow.construct(workflow_spec)
     assert wf.inputs.a == 1
     assert wf.inputs.b == 2.0
-    assert wf.outputs.out == LazyOutField(node=wf["Mul"], field="out", type=ty.Any)
+    assert wf.outputs.out == LazyOutField(
+        node=wf["Mul"], field="out", type=int | float, type_checked=True
+    )
 
     # Nodes are named after the specs by default
     assert list(wf.node_names) == ["Add", "Mul"]
@@ -323,7 +328,7 @@ def MyTestWorkflow(a: int, b: float) -> tuple[float, float]:
         node=wf["Mul"], field="out", type=float, type_checked=True
     )
     assert wf.outputs.out2 == LazyOutField(
-        node=wf["division"], field="divided", type=ty.Any
+        node=wf["division"], field="divided", type=float, type_checked=True
     )
     assert list(wf.node_names) == ["addition", "Mul", "division"]
 
@@ -362,8 +367,12 @@ def MyTestWorkflow(a: int, b: float):
     wf = Workflow.construct(workflow_spec)
     assert wf.inputs.a == 1
     assert wf.inputs.b == 2.0
-    assert wf.outputs.out1 == LazyOutField(node=wf["Mul"], field="out", type=ty.Any)
-    assert wf.outputs.out2 == LazyOutField(node=wf["Add"], field="out", type=ty.Any)
+    assert wf.outputs.out1 == LazyOutField(
+        node=wf["Mul"], field="out", type=int | float, type_checked=True
+    )
+    assert wf.outputs.out2 == LazyOutField(
+        node=wf["Add"], field="out", type=int | float, type_checked=True
+    )
     assert list(wf.node_names) == ["Add", "Mul"]
 
 
@@ -500,3 +509,51 @@ def RecursiveNestedWorkflow(a: float, depth: int) -> float:
         type=float,
         type_checked=True,
     )
+
+
+def test_workflow_lzout_inputs1(tmp_path: Path):
+
+    @workflow.define
+    def InputAccessWorkflow(a, b, c):
+        add = workflow.add(Add(a=a, b=b))
+        add.inputs.a = c
+        mul = workflow.add(Mul(a=add.out, b=b))
+        return mul.out
+
+    input_access_workflow = InputAccessWorkflow(a=1, b=2.0, c=3.0)
+    outputs = input_access_workflow(cache_root=tmp_path)
+    assert outputs.out == 10.0
+
+
+def test_workflow_lzout_inputs2(tmp_path: Path):
+
+    @workflow.define
+    def InputAccessWorkflow2(a, b, c):
+        add = workflow.add(Add(a=a, b=b))
+        add.inputs.a = c
+        mul = workflow.add(Mul(a=add.out, b=b))
+        return mul.out
+
+    input_access_workflow = InputAccessWorkflow2(a=1, b=2.0, c=3.0)
+    outputs = input_access_workflow(cache_root=tmp_path)
+    assert outputs.out == 10.0
+
+
+def test_workflow_lzout_inputs_state_change_fail(tmp_path: Path):
+    """Set the inputs of the 'mul' node after its outputs have been accessed
+    with an upstream lazy field that has a different state than the original.
+    This changes the type of the input and is therefore not permitted"""
+
+    @workflow.define
+    def InputAccessWorkflow3(a, b, c):
+        add1 = workflow.add(Add(a=a, b=b), name="add1")
+        add2 = workflow.add(Add(a=a).split(b=c), name="add2")
+        mul1 = workflow.add(Mul(a=add1.out, b=b), name="mul1")
+        workflow.add(Mul(a=mul1.out, b=b), name="mul2")
+        mul1.inputs.a = add2.out
+
+    input_access_workflow = InputAccessWorkflow3(a=1, b=2.0, c=[3.0, 4.0])
+    with pytest.raises(
+        RuntimeError, match="have already been accessed and therefore cannot set"
+    ):
+        input_access_workflow.construct()
diff --git a/pydra/conftest.py b/pydra/conftest.py
@@ -43,15 +43,15 @@ def pytest_generate_tests(metafunc):
 
 # For debugging in IDE's don't catch raised exceptions and let the IDE
 # break at it
-if os.getenv("_PYTEST_RAISE", "0") != "0":
+if os.getenv("_PYTEST_RAISE", "0") != "0":  # pragma: no cover
 
-    @pytest.hookimpl(tryfirst=True)
-    def pytest_exception_interact(call):
-        raise call.excinfo.value
+    @pytest.hookimpl(tryfirst=True)  # pragma: no cover
+    def pytest_exception_interact(call):  # pragma: no cover
+        raise call.excinfo.value  # pragma: no cover
 
-    @pytest.hookimpl(tryfirst=True)
-    def pytest_internalerror(excinfo):
-        raise excinfo.value
+    @pytest.hookimpl(tryfirst=True)  # pragma: no cover
+    def pytest_internalerror(excinfo):  # pragma: no cover
+        raise excinfo.value  # pragma: no cover
 
 
 # Example VSCode launch configuration for debugging unittests
diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py
@@ -30,7 +30,7 @@ class LazyField(ty.Generic[T], metaclass=abc.ABCMeta):
 
     def __bytes_repr__(self, cache):
         yield type(self).__name__.encode() + b"("
-        yield from bytes(hash_single(self.source, cache))
+        yield b"source=" + bytes(hash_single(self._source, cache))
         yield b"field=" + self._field.encode()
         yield b"type=" + bytes(hash_single(self._type, cache))
         yield b"cast_from=" + bytes(hash_single(self._cast_from, cache))
diff --git a/pydra/engine/node.py b/pydra/engine/node.py
@@ -3,7 +3,7 @@
 from enum import Enum
 import attrs
 from pydra.engine import lazy
-from pydra.utils.general import attrs_values
+from pydra.utils.general import attrs_values, task_dict
 from pydra.utils.typing import is_lazy
 from pydra.engine.state import State, add_name_splitter, add_name_combiner
 
@@ -79,7 +79,7 @@ def __setattr__(self, name: str, value: ty.Any) -> None:
                         f"cannot set {name!r} input to {value} because it changes the "
                         f"state"
                     )
-                    self._set_state()
+                    self._node._set_state()
 
     @property
     def inputs(self) -> Inputs:
@@ -144,6 +144,7 @@ def lzout(self) -> OutputType:
             # output of an upstream node with additional state variables.
             outpt._type_checked = False
         self._lzout = outputs
+        outputs._node = self
         return outputs
 
     @property
@@ -161,10 +162,8 @@ def combiner(self):
     def _check_if_outputs_have_been_used(self, msg):
         used = []
         if self._lzout:
-            for outpt_name, outpt_val in attrs.asdict(
-                self._lzout, recurse=False
-            ).items():
-                if outpt_val.type_checked:
+            for outpt_name, outpt_val in task_dict(self._lzout).items():
+                if outpt_val._type_checked:
                     used.append(outpt_name)
         if used:
             raise RuntimeError(
diff --git a/pydra/engine/workflow.py b/pydra/engine/workflow.py
@@ -95,20 +95,20 @@ def construct(
         non_lazy_keys = frozenset(non_lazy_vals)
         hash_cache = Cache()  # share the hash cache to avoid recalculations
         non_lazy_hash = hash_function(non_lazy_vals, cache=hash_cache)
-        defn_hash = hash_function(type(task), cache=hash_cache)
+        task_hash = hash_function(type(task), cache=hash_cache)
         # Check for same non-lazy inputs
         try:
-            defn_cache = cls._constructed_cache[defn_hash]
+            cached_tasks = cls._constructed_cache[task_hash]
         except KeyError:
             pass
         else:
             if (
-                non_lazy_keys in defn_cache
-                and non_lazy_hash in defn_cache[non_lazy_keys]
+                non_lazy_keys in cached_tasks
+                and non_lazy_hash in cached_tasks[non_lazy_keys]
             ):
-                return defn_cache[non_lazy_keys][non_lazy_hash]
+                return cached_tasks[non_lazy_keys][non_lazy_hash]
             # Check for supersets of lazy inputs
-            for key_set, key_set_cache in defn_cache.items():
+            for key_set, key_set_cache in cached_tasks.items():
                 if key_set.issubset(non_lazy_keys):
                     subset_vals = {
                         k: v for k, v in non_lazy_vals.items() if k in key_set
@@ -193,7 +193,7 @@ def construct(
                     f"constructor of {workflow!r}"
                 )
         if not dont_cache:
-            cls._constructed_cache[defn_hash][non_lazy_keys][non_lazy_hash] = workflow
+            cls._constructed_cache[task_hash][non_lazy_keys][non_lazy_hash] = workflow
 
         return workflow
 
diff --git a/pydra/utils/tests/test_hash.py b/pydra/utils/tests/test_hash.py
@@ -21,6 +21,7 @@
 import random
 from fileformats.generic import Directory, File
 from pydra.utils.hash import hash_function
+from pydra.utils.tests.utils import Concatenate
 
 
 def test_hash_file(tmpdir):
@@ -558,3 +559,14 @@ def __repr__(self):
         ),
     ):
         hash_object(A())
+
+
+def test_hash_task(tmp_path):
+    """
+    Test that the hash of a task is consistent across runs
+    """
+
+    concatenate1 = Concatenate()
+    concatenate2 = Concatenate()
+
+    assert hash_function(concatenate1) == hash_function(concatenate2)
diff --git a/pydra/utils/tests/utils.py b/pydra/utils/tests/utils.py
@@ -1,3 +1,5 @@
+import typing as ty
+from pathlib import Path
 from fileformats.generic import File, BinaryFile
 from fileformats.core.mixin import WithSeparateHeader, WithMagicNumber
 from pydra.compose import shell, python
@@ -94,3 +96,33 @@ class Outputs(shell.Outputs):
         )
 
     executable = "echo"
+
+
+@python.define(outputs=["out_file"])
+def Concatenate(
+    in_file1: File,
+    in_file2: File,
+    out_file: ty.Optional[Path] = None,
+    duplicates: int = 1,
+) -> File:
+    """Concatenates the contents of two files and writes them to a third
+
+    Parameters
+    ----------
+    in_file1 : Path
+        A text file
+    in_file2 : Path
+        Another text file
+    out_file : Path
+       The path to write the output file to
+
+    Returns
+    -------
+    out_file: Path
+        A text file made by concatenating the two inputs
+    """
+    if out_file is None:
+        out_file = Path("out_file.txt").absolute()
+    contents = [Path(fname).read_text() for fname in (in_file1, in_file2)]
+    out_file.write_text("\n".join(contents * duplicates))
+    return out_file