feat: First successful replicating workflow test

Alan Christie · Alan Christie · commit 7770d7f44874 · 2025-08-28T15:34:22.000Z
diff --git a/tests/test_decoder.py b/tests/test_decoder.py
@@ -142,17 +142,6 @@ def test_validate_schema_for_step_specification_variable_names():
     assert error is None
 
 
-@pytest.mark.skip(reason="We do not support combination atm")
-def test_validate_schema_for_simple_python_parallel():
-    # Arrange
-
-    # Act
-    error = decoder.validate_schema(_SIMPLE_PYTHON_PARALLEL_WORKFLOW)
-
-    # Assert
-    assert error is None
-
-
 def test_get_workflow_variables_for_smiple_python_molprops():
     # Arrange
 
diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
@@ -398,41 +398,22 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
     assert project_file_exists(output_file_2)
 
 
-@pytest.mark.skip(reason="WIP")
 def test_workflow_engine_simple_python_fanout(basic_engine):
     # Arrange
     md, da = basic_engine
 
     da.mock_get_running_workflow_step_output_values_for_output(
         step_name="first-step",
-        output_variable="outputFile",
+        output_variable="outputBase",
         output=["chunk_1.smi", "chunk_2.smi"],
     )
 
-    # da.mock_get_running_workflow_step_output_values_for_output(
-    #     step_name="parallel-step",
-    #     output_variable="outputFile",
-    #     output=["chunk_1_proc.smi", "chunk_2_proc.smi"]
-    # )
-
-    # da.mock_get_running_workflow_step_output_values_for_output(
-    #     step_name="final-step",
-    #     output_variable="outputFile",
-    #     output=["final-step.out.smi"],
-    # )
-
     # Make sure files that should be generated by the test
     # do not exist before we run the test.
     output_file_first = "chunk_1.smi"
     output_file_second = "chunk_2.smi"
     assert not project_file_exists(output_file_first)
     assert not project_file_exists(output_file_second)
-    output_file_p_first = "chunk_1_proc.smi"
-    output_file_p_second = "chunk_2_proc.smi"
-    assert not project_file_exists(output_file_p_first)
-    assert not project_file_exists(output_file_p_second)
-    # output_file_final = "final-step.out.smi"
-    # assert not project_file_exists(output_file_final)
     # And create the test's input file.
     input_file_1 = "input1.smi"
     input_file_1_content = """O=C(CSCc1ccc(Cl)s1)N1CCC(O)CC1
@@ -458,15 +439,10 @@ def test_workflow_engine_simple_python_fanout(basic_engine):
     print("response")
     pprint(response)
 
-    assert response["count"] == 2
+    assert response["count"] == 3
     assert response["running_workflow_steps"][0]["done"]
     assert response["running_workflow_steps"][0]["success"]
     assert response["running_workflow_steps"][1]["done"]
     assert response["running_workflow_steps"][1]["success"]
-    # assert response["running_workflow_steps"][2]["done"]
-    # assert response["running_workflow_steps"][2]["success"]
-    # assert response["running_workflow_steps"][3]["done"]
-    # assert response["running_workflow_steps"][3]["success"]
-    # This test should generate a file in the simulated project directory
-    # assert project_file_exists(output_file_first)
-    # assert project_file_exists(output_file_final)
+    assert response["running_workflow_steps"][2]["done"]
+    assert response["running_workflow_steps"][2]["success"]
diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py
@@ -217,28 +217,6 @@ def test_validate_simple_python_molprops_with_missing_input():
     ]
 
 
-@pytest.mark.skip("Unsupported workflow")
-def test_validate_simple_python_parallel():
-    # Arrange
-    workflow_filename: str = os.path.join(
-        os.path.dirname(__file__),
-        "workflow-definitions",
-        "simple-python-parallel.yaml",
-    )
-    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
-        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
-    assert workflow
-
-    # Act
-    error = WorkflowValidator.validate(
-        level=ValidationLevel.TAG,
-        workflow_definition=workflow,
-    )
-
-    # Assert
-    assert error.error_num == 0
-
-
 def test_validate_replicate_using_undeclared_input():
     # Arrange
     workflow_filename: str = os.path.join(
diff --git a/tests/test_workflow_validator_for_tag_level.py b/tests/test_workflow_validator_for_tag_level.py
@@ -109,28 +109,6 @@ def test_validate_shortcut_example_1():
     assert error.error_msg is None
 
 
-@pytest.mark.skip("Unsupported workflow")
-def test_validate_simple_python_parallel():
-    # Arrange
-    workflow_filename: str = os.path.join(
-        os.path.dirname(__file__),
-        "workflow-definitions",
-        "simple-python-parallel.yaml",
-    )
-    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
-        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
-    assert workflow
-
-    # Act
-    error = WorkflowValidator.validate(
-        level=ValidationLevel.TAG,
-        workflow_definition=workflow,
-    )
-
-    # Assert
-    assert error.error_num == 0
-
-
 def test_validate_simple_python_molprops():
     # Arrange
     workflow_filename: str = os.path.join(
diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml
@@ -7,15 +7,15 @@ description: >-
 
 steps:
 - name: first-step
-  description: Create inputs
+  description: Split an input file
   specification:
     collection: workflow-engine-unit-test-jobs
     job: splitsmiles
     version: "1.0.0"
     variables:
       name: count
       value: "1"
-      outputFile: results.smi
+      outputBase: chunk
   variable-mapping:
   - variable: inputFile
     from-workflow:
@@ -38,6 +38,6 @@ steps:
   - variable: inputFile
     from-step:
       name: first-step
-      variable: outputFile
+      variable: outputBase
   out:
   - outputFile
diff --git a/workflow/decoder.py b/workflow/decoder.py
@@ -5,6 +5,7 @@
 
 import os
 from dataclasses import dataclass
+from enum import Enum
 from typing import Any
 
 import jsonschema
@@ -32,6 +33,26 @@ class Translation:
     out: str
 
 
+class ReplicationOrigin(Enum):
+    """Oirgin of a replication variable."""
+
+    STEP_VARIABLE = 1
+    WORKFLOW_VARIABLE = 2
+
+
+@dataclass
+class ReplicationDriver:
+    """A step's replication driver.
+    The 'variable' is the variable for the step-to-be-executed
+    whose value is 'driven' by the values of the 'source_variable'.
+    The source variable is either from a step (or a workflow)."""
+
+    origin: ReplicationOrigin
+    variable: str
+    source_variable: str
+    source_step_name: str | None = None
+
+
 def validate_schema(workflow: dict[str, Any]) -> str | None:
     """Checks the Workflow Definition against the built-in schema.
     If there's an error the error text is returned, otherwise None.
@@ -154,11 +175,36 @@ def get_step_prior_step_variable_mapping(
     return variable_mapping
 
 
-def get_step_replicator(*, step: dict[str, Any]) -> str | Any:
-    """Return step's replication info"""
-    replicator = step.get("replicate")
-    if replicator:
+def get_step_replication_driver(*, step: dict[str, Any]) -> ReplicationDriver | None:
+    """If the step is expected to replicate we return its replication driver,
+    which consists of a (prior) step name and an (output) variable name.
+    Otherwise it returns nothing."""
+    if replicator := step.get("replicate"):
+        # We need the variable we replicate against,
+        # and the step that owns the variable.
+        #
         # 'using' is a dict but there can be only single value for now
-        replicator = list(replicator["using"].values())[0]
+        variable: str = replicator["using"]["variable"]
+        source_variable: str | None = None
+        # Is the variable from a prior step?
+        step_name: str | None = None
+        step_v_map = get_step_prior_step_variable_mapping(step=step)
+        for step_name_candidate, mappings in step_v_map.items():
+            for mapping in mappings:
+                if mapping.out == variable:
+                    step_name = step_name_candidate
+                    source_variable = mapping.in_
+                    break
+            if step_name:
+                break
+        assert step_name
+        assert source_variable
+
+        return ReplicationDriver(
+            origin=ReplicationOrigin.STEP_VARIABLE,
+            variable=variable,
+            source_step_name=step_name,
+            source_variable=source_variable,
+        )
 
-    return replicator
+    return None
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
@@ -39,8 +39,11 @@
 )
 
 from .decoder import (
+    ReplicationDriver,
+    ReplicationOrigin,
     Translation,
     get_step_prior_step_variable_mapping,
+    get_step_replication_driver,
     get_step_workflow_variable_mapping,
 )
 
@@ -340,8 +343,6 @@ def _validate_step_command(
                 name=prior_step_name, running_workflow_id=running_workflow_id
             )
             # Copy "in" value to "out"...
-            print(v_map)
-            print(prior_step["variables"])
             for tr in v_map:
                 assert tr.in_ in prior_step["variables"]
                 all_variables[tr.out] = prior_step["variables"][tr.in_]
@@ -378,34 +379,63 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None:
             return
 
         variables: dict[str, Any] = error_or_variables
-        num_replicas: int = 0
-        # Is this a replicating step?
-        # The number of 'replicas' is zero if the step is only launched once
-        # (i.e. there are no replicas).
-
-        #        replicator = get_step_replicator(step=step)
-        #        if replicator:
-        #            single_step_variables = []
-        #            for replicating_param in variables[replicator]:
-        #                ssv = {**variables}
-        #                ssv[replicator] = replicating_param
-        #                single_step_variables.append(ssv)
-        #        else:
-        #            single_step_variables = [variables]
-
-        assert num_replicas >= 0
-        step_replication_number: int = 1 if num_replicas else 0
-        for _ in range(1 + num_replicas):
+
+        # A replication number,
+        # use only for steps expected to replicate (even if just once)
+        step_replication_number: int = 0
+        # Does this step have a replicating driver?
+        r_driver: ReplicationDriver | None = get_step_replication_driver(step=step)
+        replication_values: list[str] = []
+        if r_driver:
+            if r_driver.origin == ReplicationOrigin.STEP_VARIABLE:
+                # We need to get the variable values from a prior step
+                # We need the prior steps running-workflow-step-id
+                assert r_driver.source_step_name
+                response, _ = self._wapi_adapter.get_running_workflow_step_by_name(
+                    name=r_driver.source_step_name,
+                    running_workflow_id=rwf_id,
+                )
+                assert "id" in response
+                o_rwfs_id: str = response["id"]
+                response, _ = (
+                    self._wapi_adapter.get_running_workflow_step_output_values_for_output(
+                        running_workflow_step_id=o_rwfs_id,
+                        output_variable=r_driver.source_variable,
+                    )
+                )
+                assert "output" in response
+                replication_values = response["output"]
+            else:
+                assert False, "Unsupported origin"
+
+        num_step_instances: int = max(1, len(replication_values))
+        for iteration in range(num_step_instances):
+
+            # If we are replicating this step then we must replace the step's variable
+            # with a value expected for this iteration.
+            if r_driver:
+                iter_variable: str = r_driver.variable
+                iter_value: str = replication_values[iteration]
+                _LOGGER.info(
+                    "Replicating step: %s iteration=%s variable=%s value=%s",
+                    step_name,
+                    iteration,
+                    iter_variable,
+                    iter_value,
+                )
+                # Over-write the replicating variable
+                # and set the replication numebr to a unique +ve non-zero value...
+                variables[iter_variable] = iter_value
+                step_replication_number = iteration + 1
 
             _LOGGER.info(
                 "Launching step: %s RunningWorkflow=%s (name=%s)"
-                " variables=%s project=%s (step_replication_number=%s)",
+                " variables=%s project=%s",
                 step_name,
                 rwf_id,
                 rwf["name"],
                 variables,
                 project_id,
-                step_replication_number,
             )
 
             lp: LaunchParameters = LaunchParameters(
@@ -436,10 +466,6 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None:
                     lr.command,
                 )
 
-            # Do we need to increment the replication number?
-            if num_replicas:
-                step_replication_number += 1
-
     def _set_step_error(
         self,
         step_name: str,