refactor: Refactored using decoder 2.5.0

Alan Christie · Alan Christie · commit 6397955b44e5 · 2025-09-02T09:45:09.000Z
diff --git a/tests/job-definitions/job-definitions.yaml b/tests/job-definitions/job-definitions.yaml
@@ -136,3 +136,10 @@ jobs:
   splitsmiles:
     command: >-
       copyf.py {{ inputFile }}
+    # Simulate multiple output files...
+    variables:
+      outputs:
+        properties:
+          outputBase:
+            creates: '{{ outputBase }}_*.smi'
+            type: files
diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
@@ -398,7 +398,6 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
     assert project_file_exists(output_file_2)
 
 
-@pytest.mark.skip(reason="WIP")
 def test_workflow_engine_simple_python_fanout(basic_engine):
     # Arrange
     md, da = basic_engine
diff --git a/workflow/decoder.py b/workflow/decoder.py
@@ -61,6 +61,16 @@ def get_steps(definition: dict[str, Any]) -> list[dict[str, Any]]:
     return response
 
 
+def get_step(definition: dict[str, Any], name: str) -> dict[str, Any]:
+    """Given a Workflow definition this function returns a named step
+    (if it exists)."""
+    steps: list[dict[str, Any]] = get_steps(definition)
+    for step in steps:
+        if step["name"] == name:
+            return step
+    return {}
+
+
 def get_name(definition: dict[str, Any]) -> str:
     """Given a Workflow definition this function returns its name."""
     return str(definition.get("name", ""))
@@ -117,8 +127,8 @@ def get_step_input_variable_names(
 
 
 def get_step_workflow_variable_mapping(*, step: dict[str, Any]) -> list[Translation]:
-    """Returns a list of workflow vaiable name to step variable name tuples
-    for the given step."""
+    """Returns a list of workflow vaiable name to step variable name
+    Translation objects for the given step."""
     variable_mapping: list[Translation] = []
     if "variable-mapping" in step:
         for v_map in step["variable-mapping"]:
@@ -134,8 +144,9 @@ def get_step_workflow_variable_mapping(*, step: dict[str, Any]) -> list[Translat
 def get_step_prior_step_variable_mapping(
     *, step: dict[str, Any]
 ) -> dict[str, list[Translation]]:
-    """Returns list of translate objects, indexed by prior step name,
-    that identify source step vaiable name to this step's variable name."""
+    """Returns list of Translation objects, indexed by prior step name,
+    that identify source step (output) variable name to this step's (input)
+    variable name."""
     variable_mapping: dict[str, list[Translation]] = {}
     if "variable-mapping" in step:
         for v_map in step["variable-mapping"]:
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
@@ -26,7 +26,8 @@
 import sys
 from typing import Any, Optional
 
-from decoder.decoder import TextEncoding, decode
+import decoder.decoder as job_defintion_decoder
+from decoder.decoder import TextEncoding
 from google.protobuf.message import Message
 from informaticsmatters.protobuf.datamanager.pod_message_pb2 import PodMessage
 from informaticsmatters.protobuf.datamanager.workflow_message_pb2 import WorkflowMessage
@@ -40,6 +41,7 @@
 
 from .decoder import (
     Translation,
+    get_step,
     get_step_prior_step_variable_mapping,
     get_step_workflow_variable_mapping,
 )
@@ -127,7 +129,7 @@ def _handle_workflow_start_message(self, r_wfid: str) -> None:
         # Launch it.
         # If there's a launch problem the step (and running workflow) will have
         # and error, stopping it. There will be no Pod event as the launch has failed.
-        self._launch(rwf=rwf_response, step=first_step)
+        self._launch(wf=wf_response, rwf=rwf_response, step=first_step)
 
     def _handle_workflow_stop_message(self, r_wfid: str) -> None:
         """Logic to handle a STOP message."""
@@ -263,7 +265,7 @@ def _handle_pod_message(self, msg: PodMessage) -> None:
                     # There's another step!
                     # For this simple logic it is the next step.
                     next_step = wf_response["steps"][step_index + 1]
-                    self._launch(rwf=rwf_response, step=next_step)
+                    self._launch(wf=wf_response, rwf=rwf_response, step=next_step)
 
                     # Something was started (or there was a launch error and the step
                     # and running workflow error will have been set).
@@ -278,28 +280,21 @@ def _handle_pod_message(self, msg: PodMessage) -> None:
                 success=True,
             )
 
-    def _validate_step_command(
-        self,
-        *,
-        running_workflow_id: str,
-        step: dict[str, Any],
-        running_workflow_variables: dict[str, Any],
-    ) -> str | dict[str, Any]:
-        """Returns an error message if the command isn't valid.
-        Without a message we return all the variables that were (successfully)
-        applied to the command."""
-
+    def _get_step_job(self, *, step: dict[str, Any]) -> dict[str, Any]:
+        """Gets the Job definition for a given Step."""
         # We get the Job from the step specification, which must contain
         # the keys "collection", "job", and "version". Here we assume that
         # the workflow definition has passed the RUN-level validation
         # which means we can get these values.
+        assert "specification" in step
         step_spec: dict[str, Any] = step["specification"]
         job_collection: str = step_spec["collection"]
         job_job: str = step_spec["job"]
         job_version: str = step_spec["version"]
         job, _ = self._wapi_adapter.get_job(
             collection=job_collection, job=job_job, version=job_version
         )
+
         _LOGGER.debug(
             "API.get_job(%s, %s, %s) returned: -\n%s",
             job_collection,
@@ -308,6 +303,19 @@ def _validate_step_command(
             str(job),
         )
 
+        return job
+
+    def _validate_step_command(
+        self,
+        *,
+        running_workflow_id: str,
+        step: dict[str, Any],
+        running_workflow_variables: dict[str, Any],
+    ) -> str | dict[str, Any]:
+        """Returns an error message if the command isn't valid.
+        Without a message we return all the variables that were (successfully)
+        applied to the command."""
+
         # Start with any variables provided in the step's specification.
         # This will be ou t"all variables" map for this step,
         # whcih we will add to (and maybe even over-write)...
@@ -345,12 +353,15 @@ def _validate_step_command(
                 all_variables[tr.out] = prior_step["variables"][tr.in_]
 
         # Now ... can the command be compiled!?
-        message, success = decode(
+        job: dict[str, Any] = self._get_step_job(step=step)
+        message, success = job_defintion_decoder.decode(
             job["command"], all_variables, "command", TextEncoding.JINJA2_3_0
         )
         return all_variables if success else message
 
-    def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None:
+    def _launch(
+        self, *, wf: dict[str, Any], rwf: dict[str, Any], step: dict[str, Any]
+    ) -> None:
         step_name: str = step["name"]
         rwf_id: str = rwf["id"]
         project_id = rwf["project"]["id"]
@@ -380,17 +391,53 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None:
         # A step replication number,
         # used only for steps expected to run in parallel (even if just once)
         step_replication_number: int = 0
+        # Do we replicate this step (run it more than once)?
+        # We do if a variable in this step's mapping block
+        # refers to an output of a prior step whose type is 'files'.
+        # If the prior step is a 'splitter' we populate the 'replication_values' array
+        # with the list of files the prior step genrated for its output.
         replication_values: list[str] = []
-        source_is_splitter: bool = False
         iter_variable: str | None = None
+        tr_map: dict[str, list[Translation]] = get_step_prior_step_variable_mapping(
+            step=step
+        )
+        for p_step_name, tr_list in tr_map.items():
+            # We need to get the Job definition for each step
+            # and then check whether the (ouptu) variable is of type 'files'...
+            wf_step: dict[str, Any] = get_step(wf, p_step_name)
+            assert wf_step
+            job_definition: dict[str, Any] = self._get_step_job(step=wf_step)
+            jd_outputs: dict[str, Any] = job_defintion_decoder.get_outputs(
+                job_definition
+            )
+            for tr in tr_list:
+                if jd_outputs.get(tr.in_, {}).get("type") == "files":
+                    iter_variable = tr.out
+                    # Get the prior running step's output values
+                    response, _ = self._wapi_adapter.get_running_workflow_step_by_name(
+                        name=p_step_name,
+                        running_workflow_id=rwf_id,
+                    )
+                    rwfs_id = response["id"]
+                    assert rwfs_id
+                    result, _ = (
+                        self._wapi_adapter.get_running_workflow_step_output_values_for_output(
+                            running_workflow_step_id=rwfs_id,
+                            output_variable=tr.in_,
+                        )
+                    )
+                    replication_values = result["output"].copy()
+                    break
+            # Stop if we've got an iteration variable
+            if iter_variable:
+                break
 
         num_step_instances: int = max(1, len(replication_values))
         for iteration in range(num_step_instances):
 
             # If we are replicating this step then we must replace the step's variable
             # with a value expected for this iteration.
-            if source_is_splitter:
-                assert iter_variable
+            if iter_variable:
                 iter_value: str = replication_values[iteration]
                 _LOGGER.info(
                     "Replicating step: %s iteration=%s variable=%s value=%s",
@@ -427,7 +474,7 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None:
                 step_replication_number=step_replication_number,
             )
             lr: LaunchResult = self._instance_launcher.launch(launch_parameters=lp)
-            rwfs_id: str | None = lr.running_workflow_step_id
+            rwfs_id = lr.running_workflow_step_id
             assert rwfs_id
 
             if lr.error_num: