Merge branch 'master' into fix/slurm_errorfiles

djarecka · djarecka · commit 6414c217a0c6 · 2020-05-17T00:03:44.000-04:00
diff --git a/pydra/engine/core.py b/pydra/engine/core.py
@@ -219,16 +219,15 @@ def version(self):
 
     @property
     def checksum(self):
-        """Calculate a unique checksum of this task."""
-        # if checksum is called before run the _graph_checksums is not ready
-        if is_workflow(self) and self.inputs._graph_checksums is attr.NOTHING:
-            self.inputs._graph_checksums = [nd.checksum for nd in self.graph_sorted]
-
+        """ Calculates the unique checksum of the task.
+            Used to create specific directory name for task that are run;
+            and to create nodes checksums needed for graph checkums
+            (before the tasks have inputs etc.)
+        """
         input_hash = self.inputs.hash
         if self.state is None:
             self._checksum = create_checksum(self.__class__.__name__, input_hash)
         else:
-            # including splitter in the hash
             splitter_hash = hash_function(self.state.splitter)
             self._checksum = create_checksum(
                 self.__class__.__name__, hash_function([input_hash, splitter_hash])
@@ -237,10 +236,9 @@ def checksum(self):
 
     def checksum_states(self, state_index=None):
         """
-        Calculate a checksum for the specific state or all of the states.
-
+        Calculate a checksum for the specific state or all of the states of the task.
         Replaces lists in the inputs fields with a specific values for states.
-        Can be used only for tasks with a state.
+        Used to recreate names of the task directories,
 
         Parameters
         ----------
@@ -259,7 +257,14 @@ def checksum_states(self, state_index=None):
                     getattr(inputs_copy, key.split(".")[1])[ind],
                 )
             input_hash = inputs_copy.hash
-            checksum_ind = create_checksum(self.__class__.__name__, input_hash)
+            if is_workflow(self):
+                con_hash = hash_function(self._connections)
+                hash_list = [input_hash, con_hash]
+                checksum_ind = create_checksum(
+                    self.__class__.__name__, self._checksum_wf(input_hash)
+                )
+            else:
+                checksum_ind = create_checksum(self.__class__.__name__, input_hash)
             return checksum_ind
         else:
             checksum_list = []
@@ -753,6 +758,41 @@ def graph_sorted(self):
         """Get a sorted graph representation of the workflow."""
         return self.graph.sorted_nodes
 
+    @property
+    def checksum(self):
+        """ Calculates the unique checksum of the task.
+            Used to create specific directory name for task that are run;
+            and to create nodes checksums needed for graph checkums
+            (before the tasks have inputs etc.)
+        """
+        # if checksum is called before run the _graph_checksums is not ready
+        if is_workflow(self) and self.inputs._graph_checksums is attr.NOTHING:
+            self.inputs._graph_checksums = [nd.checksum for nd in self.graph_sorted]
+
+        input_hash = self.inputs.hash
+        if not self.state:
+            self._checksum = create_checksum(
+                self.__class__.__name__, self._checksum_wf(input_hash)
+            )
+        else:
+            self._checksum = create_checksum(
+                self.__class__.__name__,
+                self._checksum_wf(input_hash, with_splitter=True),
+            )
+        return self._checksum
+
+    def _checksum_wf(self, input_hash, with_splitter=False):
+        """ creating hash value for workflows
+            includes connections and splitter if with_splitter is True
+        """
+        connection_hash = hash_function(self._connections)
+        hash_list = [input_hash, connection_hash]
+        if with_splitter and self.state:
+            # including splitter in the hash
+            splitter_hash = hash_function(self.state.splitter)
+            hash_list.append(splitter_hash)
+        return hash_function(hash_list)
+
     def add(self, task):
         """
         Add a task to the workflow.
@@ -887,18 +927,29 @@ def set_output(self, connections):
             TODO
 
         """
+        if self._connections is None:
+            self._connections = []
         if isinstance(connections, tuple) and len(connections) == 2:
-            self._connections = [connections]
+            new_connections = [connections]
         elif isinstance(connections, list) and all(
             [len(el) == 2 for el in connections]
         ):
-            self._connections = connections
+            new_connections = connections
         elif isinstance(connections, dict):
-            self._connections = list(connections.items())
+            new_connections = list(connections.items())
         else:
             raise Exception(
                 "Connections can be a 2-elements tuple, a list of these tuples, or dictionary"
             )
+        # checking if a new output name is already in the connections
+        connection_names = [name for name, _ in self._connections]
+        new_names = [name for name, _ in new_connections]
+        if set(connection_names).intersection(new_names):
+            raise Exception(
+                f"output name {set(connection_names).intersection(new_names)} is already set"
+            )
+
+        self._connections += new_connections
         fields = [(name, ty.Any) for name, _ in self._connections]
         self.output_spec = SpecInfo(name="Output", fields=fields, bases=(BaseSpec,))
         logger.info("Added %s to %s", self.output_spec, self)
diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py
@@ -535,11 +535,11 @@ def is_local_file(f):
     return f.type is File and "container_path" not in f.metadata
 
 
-def is_existing_file(f):
+def is_existing_file(value):
     """ checking if an object is an existing file"""
-    if not f:
+    if value is "":
         return False
     try:
-        return Path(f).exists()
+        return Path(value).exists()
     except TypeError:
         return False
diff --git a/pydra/engine/tests/test_boutiques.py b/pydra/engine/tests/test_boutiques.py
@@ -21,7 +21,7 @@
 
 @no_win
 @need_bosh_docker
-@pytest.mark.flaky(reruns=2)  # need for travis
+@pytest.mark.flaky(reruns=3)  # need for travis
 @pytest.mark.parametrize(
     "maskfile", ["test_brain.nii.gz", "test_brain", "test_brain.nii"]
 )
@@ -45,6 +45,7 @@ def test_boutiques_1(maskfile, plugin, results_function):
 
 @no_win
 @need_bosh_docker
+@pytest.mark.flaky(reruns=3)
 def test_boutiques_spec_1():
     """ testing spec: providing input/output fields names"""
     btask = BoshTask(
@@ -69,6 +70,7 @@ def test_boutiques_spec_1():
 
 @no_win
 @need_bosh_docker
+@pytest.mark.flaky(reruns=3)
 def test_boutiques_spec_2():
     """ testing spec: providing partial input/output fields names"""
     btask = BoshTask(
@@ -91,6 +93,7 @@ def test_boutiques_spec_2():
 
 @no_win
 @need_bosh_docker
+@pytest.mark.flaky(reruns=3)
 @pytest.mark.parametrize(
     "maskfile", ["test_brain.nii.gz", "test_brain", "test_brain.nii"]
 )
@@ -121,6 +124,7 @@ def test_boutiques_wf_1(maskfile, plugin):
 
 @no_win
 @need_bosh_docker
+@pytest.mark.flaky(reruns=3)
 @pytest.mark.parametrize(
     "maskfile", ["test_brain.nii.gz", "test_brain", "test_brain.nii"]
 )
diff --git a/pydra/engine/tests/test_numpy_examples.py b/pydra/engine/tests/test_numpy_examples.py
@@ -0,0 +1,35 @@
+import numpy as np
+import typing as ty
+import importlib
+import pytest
+
+from ..submitter import Submitter
+from ..core import Workflow
+from ...mark import task, annotate
+
+if importlib.util.find_spec("numpy") is None:
+    pytest.skip("can't find numpy library", allow_module_level=True)
+
+
+@task
+@annotate({"return": {"b": ty.Any}})
+def arrayout(val):
+    return np.array([val, val])
+
+
+def test_multiout(plugin):
+    """ testing a simple function that returns a numpy array"""
+    wf = Workflow("wf", input_spec=["val"], val=[0, 1, 2])
+    wf.add(arrayout(name="mo", val=wf.lzin.val))
+    wf.mo.split("val").combine("val")
+
+    wf.set_output([("array", wf.mo.lzout.b)])
+
+    with Submitter(plugin=plugin, n_procs=2) as sub:
+        sub(runnable=wf)
+
+    results = wf.result(return_inputs=True)
+
+    assert results[0] == {"wf.val": [0, 1, 2]}
+    for el in range(3):
+        assert np.array_equal(results[1].output.array[el], np.array([el, el]))
diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py
diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py