From a74ff9133689cc600e48f778d8262428dad6a558 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Mon, 23 Jun 2025 09:40:57 +0100
Subject: [PATCH 01/57] feat: kind-version now 2025.2

BREAKING CHANGE
---
 tests/workflow-definitions/duplicate-step-names.yaml            | 2 +-
 .../workflow-definitions/duplicate-workflow-variable-names.yaml | 2 +-
 tests/workflow-definitions/example-nop-fail.yaml                | 2 +-
 tests/workflow-definitions/example-smiles-to-file.yaml          | 2 +-
 tests/workflow-definitions/example-two-step-nop.yaml            | 2 +-
 tests/workflow-definitions/minimal.yaml                         | 2 +-
 tests/workflow-definitions/shortcut-example-1.yaml              | 2 +-
 .../simple-python-molprops-with-options.yaml                    | 2 +-
 tests/workflow-definitions/simple-python-molprops.yaml          | 2 +-
 tests/workflow-definitions/simple-python-parallel.yaml          | 2 +-
 .../workflow-definitions/step-specification-variable-names.yaml | 2 +-
 tests/workflow-definitions/workflow-options.yaml                | 2 +-
 workflow/workflow-schema.yaml                                   | 2 +-
 13 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/workflow-definitions/duplicate-step-names.yaml b/tests/workflow-definitions/duplicate-step-names.yaml
index cd9920f..c9a6028 100644
--- a/tests/workflow-definitions/duplicate-step-names.yaml
+++ b/tests/workflow-definitions/duplicate-step-names.yaml
@@ -1,6 +1,6 @@
 ---
 kind: DataManagerWorkflow
-kind-version: "2025.1"
+kind-version: "2025.2"
 name: duplicate-step-names
 
 steps:
diff --git a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml
index dbaeafa..8179bd4 100644
--- a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml
+++ b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml
@@ -1,6 +1,6 @@
 ---
 kind: DataManagerWorkflow
-kind-version: "2025.1"
+kind-version: "2025.2"
 name: duplicate-workflow-variable-names
 description: A workflow with a duplicate variable name in the input and output
 variable-mapping:
diff --git a/tests/workflow-definitions/example-nop-fail.yaml b/tests/workflow-definitions/example-nop-fail.yaml
index a0a7194..6e639c2 100644
--- a/tests/workflow-definitions/example-nop-fail.yaml
+++ b/tests/workflow-definitions/example-nop-fail.yaml
@@ -1,6 +1,6 @@
 ---
 kind: DataManagerWorkflow
-kind-version: "2025.1"
+kind-version: "2025.2"
 name: nop-fail
 description: >-
   A workflow with one step that fails
diff --git a/tests/workflow-definitions/example-smiles-to-file.yaml b/tests/workflow-definitions/example-smiles-to-file.yaml
index 54b7ea8..b7dc70c 100644
--- a/tests/workflow-definitions/example-smiles-to-file.yaml
+++ b/tests/workflow-definitions/example-smiles-to-file.yaml
@@ -1,6 +1,6 @@
 ---
 kind: DataManagerWorkflow
-kind-version: "2025.1"
+kind-version: "2025.2"
 name: smiles-to-file
 description: >-
   A workflow with one step that uses variables.
diff --git a/tests/workflow-definitions/example-two-step-nop.yaml b/tests/workflow-definitions/example-two-step-nop.yaml
index 7030e84..b52e83a 100644
--- a/tests/workflow-definitions/example-two-step-nop.yaml
+++ b/tests/workflow-definitions/example-two-step-nop.yaml
@@ -1,6 +1,6 @@
 ---
 kind: DataManagerWorkflow
-kind-version: "2025.1"
+kind-version: "2025.2"
 name: two-step-nop
 description: >-
   A workflow with two steps.
diff --git a/tests/workflow-definitions/minimal.yaml b/tests/workflow-definitions/minimal.yaml
index 4c3c90a..f744057 100644
--- a/tests/workflow-definitions/minimal.yaml
+++ b/tests/workflow-definitions/minimal.yaml
@@ -1,6 +1,6 @@
 ---
 kind: DataManagerWorkflow
-kind-version: "2025.1"
+kind-version: "2025.2"
 name: workflow-minimal
 
 steps:
diff --git a/tests/workflow-definitions/shortcut-example-1.yaml b/tests/workflow-definitions/shortcut-example-1.yaml
index 214fb24..e5b719d 100644
--- a/tests/workflow-definitions/shortcut-example-1.yaml
+++ b/tests/workflow-definitions/shortcut-example-1.yaml
@@ -1,6 +1,6 @@
 ---
 kind: DataManagerWorkflow
-kind-version: "2025.1"
+kind-version: "2025.2"
 name: shortcut-example-1
 description: The shortcut example 1 workflow
 
diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
index 69dfc4f..eeb6111 100644
--- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml
+++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
@@ -1,6 +1,6 @@
 ---
 kind: DataManagerWorkflow
-kind-version: "2025.1"
+kind-version: "2025.2"
 name: python-workflow
 description: A simple python experimental workflow
 
diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml
index 6290731..4d9451c 100644
--- a/tests/workflow-definitions/simple-python-molprops.yaml
+++ b/tests/workflow-definitions/simple-python-molprops.yaml
@@ -1,6 +1,6 @@
 ---
 kind: DataManagerWorkflow
-kind-version: "2025.1"
+kind-version: "2025.2"
 name: python-workflow
 description: A simple python experimental workflow
 variable-mapping:
diff --git a/tests/workflow-definitions/simple-python-parallel.yaml b/tests/workflow-definitions/simple-python-parallel.yaml
index ece197a..9fea80a 100644
--- a/tests/workflow-definitions/simple-python-parallel.yaml
+++ b/tests/workflow-definitions/simple-python-parallel.yaml
@@ -1,6 +1,6 @@
 ---
 kind: DataManagerWorkflow
-kind-version: "2025.1"
+kind-version: "2025.2"
 name: python-workflow
 description: A simple branching workflow
 variable-mapping:
diff --git a/tests/workflow-definitions/step-specification-variable-names.yaml b/tests/workflow-definitions/step-specification-variable-names.yaml
index 2064844..e899b7f 100644
--- a/tests/workflow-definitions/step-specification-variable-names.yaml
+++ b/tests/workflow-definitions/step-specification-variable-names.yaml
@@ -1,6 +1,6 @@
 ---
 kind: DataManagerWorkflow
-kind-version: "2025.1"
+kind-version: "2025.2"
 name: step-variables
 description: Test a lot of variables whose format is supported
 
diff --git a/tests/workflow-definitions/workflow-options.yaml b/tests/workflow-definitions/workflow-options.yaml
index 4be5083..9e742fe 100644
--- a/tests/workflow-definitions/workflow-options.yaml
+++ b/tests/workflow-definitions/workflow-options.yaml
@@ -1,6 +1,6 @@
 ---
 kind: DataManagerWorkflow
-kind-version: "2025.1"
+kind-version: "2025.2"
 name: workflow-options
 description: Illustrate the use of workflow options
 variable-mapping:
diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml
index 6f2c468..792f866 100644
--- a/workflow/workflow-schema.yaml
+++ b/workflow/workflow-schema.yaml
@@ -17,7 +17,7 @@ properties:
     const: DataManagerWorkflow
   kind-version:
     enum:
-    - '2025.1'
+    - '2025.2'
   name:
     $ref: '#/definitions/rfc1035-label-name'
   description:

From d955a19379ba6f2743b3e576caab5d2fddabcc3e Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Mon, 23 Jun 2025 14:18:33 +0100
Subject: [PATCH 02/57] feat: Initial schema for replicate step declaration

---
 tests/test_decoder.py                         | 52 ++++++++++++++
 .../test_workflow_validator_for_run_level.py  | 69 +++++++++++++++++++
 .../test_workflow_validator_for_tag_level.py  | 69 +++++++++++++++++++
 .../duplicate-step-output-variable-names.yaml | 56 +++++++++++++++
 .../duplicate-workflow-variable-names.yaml    |  1 -
 .../replicate-using-undeclared-input.yaml     | 54 +++++++++++++++
 .../simple-python-molprops-with-options.yaml  |  1 -
 .../simple-python-molprops.yaml               |  1 -
 .../simple-python-parallel.yaml               |  1 -
 workflow/decoder.py                           | 30 ++++++++
 workflow/workflow-schema.yaml                 | 23 +++++--
 workflow/workflow_validator.py                | 47 +++++++++++--
 12 files changed, 391 insertions(+), 13 deletions(-)
 create mode 100644 tests/workflow-definitions/duplicate-step-output-variable-names.yaml
 create mode 100644 tests/workflow-definitions/replicate-using-undeclared-input.yaml

diff --git a/tests/test_decoder.py b/tests/test_decoder.py
index f870f8a..8a25774 100644
--- a/tests/test_decoder.py
+++ b/tests/test_decoder.py
@@ -56,6 +56,15 @@
     )
 assert _DUPLICATE_WORKFLOW_VARIABLE_NAMES_WORKFLOW
 
+_SIMPLE_PYTHON_PARALLEL_FILE: str = os.path.join(
+    os.path.dirname(__file__),
+    "workflow-definitions",
+    "simple-python-parallel.yaml",
+)
+with open(_SIMPLE_PYTHON_PARALLEL_FILE, "r", encoding="utf8") as workflow_file:
+    _SIMPLE_PYTHON_PARALLEL_WORKFLOW: Dict[str, Any] = yaml.safe_load(workflow_file)
+assert _SIMPLE_PYTHON_PARALLEL_WORKFLOW
+
 _STEP_SPECIFICATION_VARIABLE_NAMES_WORKFLOW_FILE: str = os.path.join(
     os.path.dirname(__file__),
     "workflow-definitions",
@@ -165,6 +174,16 @@ def test_validate_schema_for_workflow_options():
     assert error is None
 
 
+def test_validate_schema_for_simple_python_parallel():
+    # Arrange
+
+    # Act
+    error = decoder.validate_schema(_SIMPLE_PYTHON_PARALLEL_WORKFLOW)
+
+    # Assert
+    assert error is None
+
+
 def test_get_workflow_variables_for_smiple_python_molprops():
     # Arrange
 
@@ -329,3 +348,36 @@ def test_get_workflow_outputs_for_step_with_unkown_step_name():
 
     # Assert
     assert not outputs
+
+
+def test_get_step_input_variable_names_when_duplicates():
+    # Arrange
+
+    # Act
+    inputs = decoder.get_step_input_variable_names(
+        _SIMPLE_PYTHON_PARALLEL_WORKFLOW, "final-step"
+    )
+
+    # Assert
+    assert len(inputs) == 2
+    assert inputs[0] == "inputFile"
+    assert inputs[1] == "inputFile"
+
+
+def test_get_step_output_variable_names_when_duplicates():
+    # Arrange
+    workflow_filename: str = os.path.join(
+        os.path.dirname(__file__),
+        "workflow-definitions",
+        "duplicate-step-output-variable-names.yaml",
+    )
+    with open(workflow_filename, "r", encoding="utf8") as wf_file:
+        definition: Dict[str, Any] = yaml.safe_load(wf_file)
+
+    # Act
+    outputs = decoder.get_step_output_variable_names(definition, "step-1")
+
+    # Assert
+    assert len(outputs) == 2
+    assert outputs[0] == "outputFile"
+    assert outputs[1] == "outputFile"
diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py
index e5fdfb6..2df1630 100644
--- a/tests/test_workflow_validator_for_run_level.py
+++ b/tests/test_workflow_validator_for_run_level.py
@@ -233,3 +233,72 @@ def test_validate_duplicate_workflow_variable_names():
     # Assert
     assert error.error_num == 6
     assert error.error_msg == ["Duplicate workflow variable names found: x"]
+
+
+def test_validate_simple_python_parallel():
+    # Arrange
+    workflow_file: str = os.path.join(
+        os.path.dirname(__file__),
+        "workflow-definitions",
+        "simple-python-parallel.yaml",
+    )
+    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
+    assert workflow
+
+    # Act
+    error = WorkflowValidator.validate(
+        level=ValidationLevel.TAG,
+        workflow_definition=workflow,
+    )
+
+    # Assert
+    assert error.error_num == 0
+
+
+def test_validate_replicate_using_undeclared_input():
+    # Arrange
+    workflow_file: str = os.path.join(
+        os.path.dirname(__file__),
+        "workflow-definitions",
+        "replicate-using-undeclared-input.yaml",
+    )
+    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
+    assert workflow
+
+    # Act
+    error = WorkflowValidator.validate(
+        level=ValidationLevel.TAG,
+        workflow_definition=workflow,
+    )
+
+    # Assert
+    assert error.error_num == 7
+    assert error.error_msg == [
+        "Replicate input variable is not declared: y (step=step-2)"
+    ]
+
+
+def test_validate_duplicate_step_output_variable_names():
+    # Arrange
+    workflow_file: str = os.path.join(
+        os.path.dirname(__file__),
+        "workflow-definitions",
+        "duplicate-step-output-variable-names.yaml",
+    )
+    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
+    assert workflow
+
+    # Act
+    error = WorkflowValidator.validate(
+        level=ValidationLevel.TAG,
+        workflow_definition=workflow,
+    )
+
+    # Assert
+    assert error.error_num == 3
+    assert error.error_msg == [
+        "Duplicate step output variable: outputFile (step=step-1)"
+    ]
diff --git a/tests/test_workflow_validator_for_tag_level.py b/tests/test_workflow_validator_for_tag_level.py
index 8c68400..4445502 100644
--- a/tests/test_workflow_validator_for_tag_level.py
+++ b/tests/test_workflow_validator_for_tag_level.py
@@ -109,6 +109,27 @@ def test_validate_shortcut_example_1():
     assert error.error_msg is None
 
 
+def test_validate_simple_python_parallel():
+    # Arrange
+    workflow_file: str = os.path.join(
+        os.path.dirname(__file__),
+        "workflow-definitions",
+        "simple-python-parallel.yaml",
+    )
+    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
+    assert workflow
+
+    # Act
+    error = WorkflowValidator.validate(
+        level=ValidationLevel.TAG,
+        workflow_definition=workflow,
+    )
+
+    # Assert
+    assert error.error_num == 0
+
+
 def test_validate_simple_python_molprops():
     # Arrange
     workflow_file: str = os.path.join(
@@ -171,3 +192,51 @@ def test_validate_duplicate_workflow_variable_names():
     # Assert
     assert error.error_num == 6
     assert error.error_msg == ["Duplicate workflow variable names found: x"]
+
+
+def test_validate_replicate_using_undeclared_input():
+    # Arrange
+    workflow_file: str = os.path.join(
+        os.path.dirname(__file__),
+        "workflow-definitions",
+        "replicate-using-undeclared-input.yaml",
+    )
+    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
+    assert workflow
+
+    # Act
+    error = WorkflowValidator.validate(
+        level=ValidationLevel.TAG,
+        workflow_definition=workflow,
+    )
+
+    # Assert
+    assert error.error_num == 7
+    assert error.error_msg == [
+        "Replicate input variable is not declared: y (step=step-2)"
+    ]
+
+
+def test_validate_duplicate_step_output_variable_names():
+    # Arrange
+    workflow_file: str = os.path.join(
+        os.path.dirname(__file__),
+        "workflow-definitions",
+        "duplicate-step-output-variable-names.yaml",
+    )
+    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
+    assert workflow
+
+    # Act
+    error = WorkflowValidator.validate(
+        level=ValidationLevel.TAG,
+        workflow_definition=workflow,
+    )
+
+    # Assert
+    assert error.error_num == 3
+    assert error.error_msg == [
+        "Duplicate step output variable: outputFile (step=step-1)"
+    ]
diff --git a/tests/workflow-definitions/duplicate-step-output-variable-names.yaml b/tests/workflow-definitions/duplicate-step-output-variable-names.yaml
new file mode 100644
index 0000000..d37c544
--- /dev/null
+++ b/tests/workflow-definitions/duplicate-step-output-variable-names.yaml
@@ -0,0 +1,56 @@
+---
+kind: DataManagerWorkflow
+kind-version: "2025.2"
+name: duplicate-step-output-variable-names
+description: A workflow where step-1 has duplicate output variable names
+variable-mapping:
+  inputs:
+  - name: x
+  outputs:
+  - name: y
+    from:
+      step: step-2
+      output: outputFile
+    as: clustered-molecules.smi
+
+steps:
+
+- name: step-1
+  description: Add column 1
+  specification:
+    collection: workflow-engine-unit-test-jobs
+    job: rdkit-molprops
+    version: "1.0.0"
+    variables:
+      name: "col1"
+      value: 123
+  inputs:
+  - input: inputFile
+    from:
+      workflow-input: candidateMolecules
+  - input: inputFile
+    from:
+      workflow-input: candidateMolecules
+  outputs:
+  - output: outputFile
+    as: __step1__out.smi
+  - output: outputFile
+    as: __step1__out.smi
+
+- name: step-2
+  description: Add column 2
+  specification:
+    collection: workflow-engine-unit-test-jobs
+    job: cluster-butina
+    version: "1.0.0"
+    variables:
+      name: "col2"
+      value: "999"
+  inputs:
+  - input: inputFile
+    from:
+      step: step1
+      output: outputFile
+  outputs:
+  - output: outputFile
+    as: __step2__out.smi
diff --git a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml
index 8179bd4..6c1206f 100644
--- a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml
+++ b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml
@@ -6,7 +6,6 @@ description: A workflow with a duplicate variable name in the input and output
 variable-mapping:
   inputs:
   - name: x
-    type: squonk/x-smiles
   outputs:
   - name: x
     from:
diff --git a/tests/workflow-definitions/replicate-using-undeclared-input.yaml b/tests/workflow-definitions/replicate-using-undeclared-input.yaml
new file mode 100644
index 0000000..cc454e5
--- /dev/null
+++ b/tests/workflow-definitions/replicate-using-undeclared-input.yaml
@@ -0,0 +1,54 @@
+---
+kind: DataManagerWorkflow
+kind-version: "2025.2"
+name: replicate-using-undeclared-input
+description: A workflow that replicates from a variable that's not declared
+variable-mapping:
+  inputs:
+  - name: x
+  outputs:
+  - name: y
+    from:
+      step: step2
+      output: outputFile
+    as: clustered-molecules.smi
+
+steps:
+
+- name: step-1
+  description: Add column 1
+  specification:
+    collection: workflow-engine-unit-test-jobs
+    job: rdkit-molprops
+    version: "1.0.0"
+    variables:
+      name: "col1"
+      value: 123
+  inputs:
+  - input: inputFile
+    from:
+      workflow-input: candidateMolecules
+  outputs:
+  - output: outputFile
+    as: __step-1__out.smi
+
+- name: step-2
+  description: Add column 2
+  specification:
+    collection: workflow-engine-unit-test-jobs
+    job: cluster-butina
+    version: "1.0.0"
+    variables:
+      name: "col2"
+      value: "999"
+  replicate:
+    using:
+      input: y
+  inputs:
+  - input: inputFile
+    from:
+      step: step-1
+      output: outputFile
+  outputs:
+  - output: outputFile
+    as: __step-2__out.smi
diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
index eeb6111..afc1ae8 100644
--- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml
+++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
@@ -56,7 +56,6 @@ variables:
 variable-mapping:
   inputs:
   - name: candidateMolecules
-    type: squonk/x-smiles
   outputs:
   - name: clusteredMolecules
     from:
diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml
index 4d9451c..cf28dd2 100644
--- a/tests/workflow-definitions/simple-python-molprops.yaml
+++ b/tests/workflow-definitions/simple-python-molprops.yaml
@@ -6,7 +6,6 @@ description: A simple python experimental workflow
 variable-mapping:
   inputs:
   - name: candidateMolecules
-    type: squonk/x-smiles
   outputs:
   - name: clusteredMolecules
     from:
diff --git a/tests/workflow-definitions/simple-python-parallel.yaml b/tests/workflow-definitions/simple-python-parallel.yaml
index 9fea80a..bfd3935 100644
--- a/tests/workflow-definitions/simple-python-parallel.yaml
+++ b/tests/workflow-definitions/simple-python-parallel.yaml
@@ -6,7 +6,6 @@ description: A simple branching workflow
 variable-mapping:
   inputs:
   - name: candidateMolecules
-    type: squonk/x-smiles
   outputs:
   - name: clusteredMolecules
     from:
diff --git a/workflow/decoder.py b/workflow/decoder.py
index 9e75133..19abc94 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -82,6 +82,36 @@ def get_variable_names(definition: dict[str, Any]) -> list[str]:
     return wf_variable_names
 
 
+def get_step_output_variable_names(
+    definition: dict[str, Any], step_name: str
+) -> list[str]:
+    """Given a Workflow definition and a Step name this function returns all the names
+    of the output variables defined at the Step level. This function DOES NOT
+    de-duplicate names, that is the role of the validator."""
+    variable_names: list[str] = []
+    steps: list[dict[str, Any]] = get_steps(definition)
+    for step in steps:
+        if step["name"] == step_name:
+            variable_names.extend(
+                output["output"] for output in step.get("outputs", [])
+            )
+    return variable_names
+
+
+def get_step_input_variable_names(
+    definition: dict[str, Any], step_name: str
+) -> list[str]:
+    """Given a Workflow definition and a Step name (expected to exist)
+    this function returns all the names of the input
+    variables defined at the step level."""
+    variable_names: list[str] = []
+    steps: list[dict[str, Any]] = get_steps(definition)
+    for step in steps:
+        if step["name"] == step_name:
+            variable_names.extend(input["input"] for input in step.get("inputs", []))
+    return variable_names
+
+
 def get_workflow_job_input_names_for_step(
     definition: dict[str, Any], name: str
 ) -> list[str]:
diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml
index 792f866..70e7b10 100644
--- a/workflow/workflow-schema.yaml
+++ b/workflow/workflow-schema.yaml
@@ -90,12 +90,8 @@ definitions:
     properties:
       name:
         $ref: '#/definitions/template-variable-name'
-      type:
-        type: string
-        description: The MIME type of the parameter
     required:
     - name
-    - type
 
   # A workflow output parameter is essentially a file
   # taken from the output of a step with a default (as) value.
@@ -176,6 +172,17 @@ definitions:
     - name
     - as
 
+  # A step replication control variable
+  # that is based on a step input variable
+  replicate-using-input:
+    type: object
+    additionalProperties: false
+    properties:
+      input:
+        $ref: '#/definitions/template-variable-name'
+    required:
+    - input
+
   # A Step input (from an output of a prior step)
   step-input-from-step:
     type: object
@@ -262,6 +269,14 @@ definitions:
         description: A description of the step
       specification:
         $ref: '#/definitions/step-specification'
+      replicate:
+        # Used to indicate one input variable that is used to replicate/spawn
+        # step instances based on the number of values generated for the variable.
+        type: object
+        additionalProperties: false
+        properties:
+          using:
+            $ref: '#/definitions/replicate-using-input'
       inputs:
         type: array
         items:
diff --git a/workflow/workflow_validator.py b/workflow/workflow_validator.py
index 78198b8..6324bd5 100644
--- a/workflow/workflow_validator.py
+++ b/workflow/workflow_validator.py
@@ -6,6 +6,8 @@
 
 from .decoder import (
     get_required_variable_names,
+    get_step_input_variable_names,
+    get_step_output_variable_names,
     get_steps,
     get_variable_names,
     validate_schema,
@@ -83,21 +85,37 @@ def _validate_tag_level(
         assert workflow_definition
 
         # TAG level requires that each step name is unique,
+        # and all the output variable names in the step are unique.
         duplicate_names: set[str] = set()
-        step_names: set[str] = set()
+        all_step_names: set[str] = set()
+        variable_names: set[str] = set()
         for step in get_steps(workflow_definition):
             step_name: str = step["name"]
-            if step_name not in duplicate_names and step_name in step_names:
+            if step_name not in duplicate_names and step_name in all_step_names:
                 duplicate_names.add(step_name)
-            step_names.add(step_name)
+            all_step_names.add(step_name)
+            # Are output variable names unique?
+            variable_names.clear()
+            step_variables: list[str] = get_step_output_variable_names(
+                workflow_definition, step_name
+            )
+            for step_variable in step_variables:
+                if step_variable in variable_names:
+                    return ValidationResult(
+                        error_num=3,
+                        error_msg=[
+                            f"Duplicate step output variable: {step_variable} (step={step_name})"
+                        ],
+                    )
+                variable_names.add(step_variable)
         if duplicate_names:
             return ValidationResult(
                 error_num=2,
                 error_msg=[f"Duplicate step names found: {', '.join(duplicate_names)}"],
             )
         # Workflow variables must be unique.
-        duplicate_names = set()
-        variable_names: set[str] = set()
+        duplicate_names.clear()
+        variable_names.clear()
         wf_variable_names: list[str] = get_variable_names(workflow_definition)
         for wf_variable_name in wf_variable_names:
             if (
@@ -113,6 +131,25 @@ def _validate_tag_level(
                     f"Duplicate workflow variable names found: {', '.join(duplicate_names)}"
                 ],
             )
+        # For each 'replicating' step the replicating variable
+        # must be declared in the step.
+        for step in get_steps(workflow_definition):
+            if (
+                replicate_using_input := step.get("replicate", {})
+                .get("using", {})
+                .get("input")
+            ):
+                step_name = step["name"]
+                if replicate_using_input not in get_step_input_variable_names(
+                    workflow_definition, step_name
+                ):
+                    return ValidationResult(
+                        error_num=7,
+                        error_msg=[
+                            "Replicate input variable is not declared:"
+                            f" {replicate_using_input} (step={step_name})"
+                        ],
+                    )
 
         return _VALIDATION_SUCCESS
 

From ab94bd7fecc31af77781cc36a50d9b414eefec91 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Mon, 23 Jun 2025 14:34:41 +0100
Subject: [PATCH 03/57] feat: Add get_generated_outputs_for_step_output() to
 API adapter

---
 tests/wapi_adapter.py    |  7 +++++++
 workflow/workflow_abc.py | 13 +++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py
index 0f9915f..7b7d8d3 100644
--- a/tests/wapi_adapter.py
+++ b/tests/wapi_adapter.py
@@ -381,6 +381,13 @@ def get_running_workflow_steps(self, *, running_workflow_id: str) -> dict[str, A
                 steps.append(item)
         return {"count": len(steps), "running_workflow_steps": steps}
 
+    def get_generated_outputs_for_step_output(
+        self, *, running_workflow_step_id: str, output: str
+    ) -> tuple[dict[str, Any], int]:
+        del running_workflow_step_id
+        del output
+        return {"outputs": []}, HTTPStatus.OK
+
     def realise_outputs(
         self, *, running_workflow_step_id: str, outputs: list[str, str]
     ) -> tuple[dict[str, Any], int]:
diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py
index 12e3251..65b1b3d 100644
--- a/workflow/workflow_abc.py
+++ b/workflow/workflow_abc.py
@@ -341,6 +341,19 @@ def get_job(
         # }
         # If not present an empty dictionary should be returned.
 
+    @abstractmethod
+    def get_generated_outputs_for_step_output(
+        self, *, running_workflow_step_id: str, output: str
+    ) -> tuple[dict[str, Any], int]:
+        """Gets the set of outputs for the output variable of a given step.
+        The step must have stopped for this to return any meaningful value.
+        Returned files might also include paths that are relative to the
+        Step's instance directory."""
+        # Should return a (possibly empty) list of paths and filenames:
+        # {
+        #   "outputs": ["file1.sdf", "file2.sdf"]
+        # }
+
     @abstractmethod
     def realise_outputs(
         self, *, running_workflow_step_id: str, outputs: list[str]

From b4be3117e5d2574884e40003bd4dc6ea4cc43224 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Mon, 23 Jun 2025 14:52:19 +0100
Subject: [PATCH 04/57] feat: Rename new API method

---
 tests/wapi_adapter.py    | 2 +-
 workflow/workflow_abc.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py
index 7b7d8d3..11e2798 100644
--- a/tests/wapi_adapter.py
+++ b/tests/wapi_adapter.py
@@ -381,7 +381,7 @@ def get_running_workflow_steps(self, *, running_workflow_id: str) -> dict[str, A
                 steps.append(item)
         return {"count": len(steps), "running_workflow_steps": steps}
 
-    def get_generated_outputs_for_step_output(
+    def get_running_workflow_step_outputs(
         self, *, running_workflow_step_id: str, output: str
     ) -> tuple[dict[str, Any], int]:
         del running_workflow_step_id
diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py
index 65b1b3d..251f6c5 100644
--- a/workflow/workflow_abc.py
+++ b/workflow/workflow_abc.py
@@ -342,7 +342,7 @@ def get_job(
         # If not present an empty dictionary should be returned.
 
     @abstractmethod
-    def get_generated_outputs_for_step_output(
+    def get_running_workflow_step_outputs(
         self, *, running_workflow_step_id: str, output: str
     ) -> tuple[dict[str, Any], int]:
         """Gets the set of outputs for the output variable of a given step.

From 27c5a83dd1d0561e4e62c976899713f16b1d02c2 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Mon, 23 Jun 2025 15:24:24 +0100
Subject: [PATCH 05/57] feat: Add replica to step creation (and step-by-name
 query)

---
 tests/wapi_adapter.py    | 17 +++++++++++++----
 workflow/workflow_abc.py | 13 ++++++++++---
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py
index 11e2798..9150b90 100644
--- a/tests/wapi_adapter.py
+++ b/tests/wapi_adapter.py
@@ -136,8 +136,12 @@ def create_running_workflow_step(
         *,
         running_workflow_id: str,
         step: str,
+        replica: int = 0,
         prior_running_workflow_step_id: str | None = None,
     ) -> dict[str, Any]:
+        if replica:
+            assert replica > 0
+
         UnitTestWorkflowAPIAdapter.lock.acquire()
         with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "rb") as pickle_file:
             running_workflow_step = Unpickler(pickle_file).load()
@@ -150,6 +154,7 @@ def create_running_workflow_step(
             "name": step,
             "done": False,
             "success": False,
+            "replica": replica,
             "variables": {},
             "running_workflow": {"id": running_workflow_id},
         }
@@ -177,24 +182,28 @@ def get_running_workflow_step(
             return {}, 0
         response = running_workflow_step[running_workflow_step_id]
         response["id"] = running_workflow_step_id
+        if response["replica"] == 0:
+            _ = response.pop("replica")
         return response, 0
 
     def get_running_workflow_step_by_name(
-        self, *, name: str, running_workflow_id: str
+        self, *, name: str, running_workflow_id: str, replica: int = 0
     ) -> dict[str, Any]:
+        if replica:
+            assert replica > 0
         UnitTestWorkflowAPIAdapter.lock.acquire()
         with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "rb") as pickle_file:
             running_workflow_step = Unpickler(pickle_file).load()
         UnitTestWorkflowAPIAdapter.lock.release()
 
-        print(f"name={name} running_workflow_id={running_workflow_id}")
         for rwfs_id, record in running_workflow_step.items():
-            print(f"rwfs_id={rwfs_id} record={record}")
             if record["running_workflow"]["id"] != running_workflow_id:
                 continue
-            if record["name"] == name:
+            if record["name"] == name and record["replica"] == replica:
                 response = record
                 response["id"] = rwfs_id
+                if record["replica"] == 0:
+                    _ = response.pop("replica")
                 return response, 0
         return {}, 0
 
diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py
index 251f6c5..f65a5a1 100644
--- a/workflow/workflow_abc.py
+++ b/workflow/workflow_abc.py
@@ -205,9 +205,14 @@ def create_running_workflow_step(
         *,
         running_workflow_id: str,
         step: str,
+        replica: int = 0,
         prior_running_workflow_step_id: str | None = None,
     ) -> tuple[dict[str, Any], int]:
-        """Create a RunningWorkflowStep Record (from a RunningWorkflow)"""
+        """Create a RunningWorkflowStep Record (from a RunningWorkflow).
+        If this is a replica (concurrent execution) of a step the replica
+        value must be set to a value greater than 0. The replica is unique
+        for a given step and is used to distinguish between running workflow steps
+        generated from the same step name."""
         # Should return:
         # {
         #    "id": "r-workflow-step-00000000-0000-0000-0000-000000000001",
@@ -244,10 +249,12 @@ def get_running_workflow_step(
 
     @abstractmethod
     def get_running_workflow_step_by_name(
-        self, *, name: str, running_workflow_id: str
+        self, *, name: str, running_workflow_id: str, replica: int = 0
     ) -> tuple[dict[str, Any], int]:
         """Get a RunningWorkflowStep Record given a step name
-        (and its RUnningWorkflow ID)"""
+        (and its RunningWorkflow ID). For steps that may be replicated
+        the replica, a value of 1 or higher, is used to identify the specific replica.
+        """
         # Should return:
         # {
         #       "id": "r-workflow-step-00000000-0000-0000-0000-000000000001",

From 1d22716eea8c5d16460726e0b839b8c5cd8b93c5 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Mon, 23 Jun 2025 16:00:04 +0100
Subject: [PATCH 06/57] feat: Removed 'as' from workflow mapping output
 declaration

---
 tests/test_decoder.py                          | 13 ++++++-------
 tests/test_test_api_adapter.py                 |  1 -
 tests/wapi_adapter.py                          |  3 +--
 .../duplicate-step-output-variable-names.yaml  |  1 -
 .../duplicate-workflow-variable-names.yaml     |  1 -
 .../replicate-using-undeclared-input.yaml      |  1 -
 .../simple-python-molprops-with-options.yaml   |  1 -
 .../simple-python-molprops.yaml                |  1 -
 .../simple-python-parallel.yaml                |  1 -
 workflow/decoder.py                            | 18 +++++-------------
 workflow/workflow-schema.yaml                  |  3 ---
 workflow/workflow_abc.py                       |  6 ++----
 workflow/workflow_engine.py                    |  7 +++----
 13 files changed, 17 insertions(+), 40 deletions(-)

diff --git a/tests/test_decoder.py b/tests/test_decoder.py
index 8a25774..f9ead02 100644
--- a/tests/test_decoder.py
+++ b/tests/test_decoder.py
@@ -317,37 +317,36 @@ def test_get_workflow_outputs_for_step_with_name_step1():
     # Arrange
 
     # Act
-    outputs = decoder.get_workflow_output_values_for_step(
+    has_outputs = decoder.workflow_step_has_outputs(
         _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "step1"
     )
 
     # Assert
-    assert not outputs
+    assert not has_outputs
 
 
 def test_get_workflow_outputs_for_step_with_name_step2():
     # Arrange
 
     # Act
-    outputs = decoder.get_workflow_output_values_for_step(
+    has_outputs = decoder.workflow_step_has_outputs(
         _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "step2"
     )
 
     # Assert
-    assert len(outputs) == 1
-    assert "clustered-molecules.smi" in outputs
+    assert has_outputs
 
 
 def test_get_workflow_outputs_for_step_with_unkown_step_name():
     # Arrange
 
     # Act
-    outputs = decoder.get_workflow_output_values_for_step(
+    has_outputs = decoder.workflow_step_has_outputs(
         _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "unknown"
     )
 
     # Assert
-    assert not outputs
+    assert not has_outputs
 
 
 def test_get_step_input_variable_names_when_duplicates():
diff --git a/tests/test_test_api_adapter.py b/tests/test_test_api_adapter.py
index 673e725..f54d4a4 100644
--- a/tests/test_test_api_adapter.py
+++ b/tests/test_test_api_adapter.py
@@ -450,7 +450,6 @@ def test_basic_realise():
     # Act
     response, _ = utaa.realise_outputs(
         running_workflow_step_id="r-workflow-step-00000000-0000-0000-0000-000000000001",
-        outputs=["a.txt"],
     )
 
     # Assert
diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py
index 9150b90..13e0e97 100644
--- a/tests/wapi_adapter.py
+++ b/tests/wapi_adapter.py
@@ -398,8 +398,7 @@ def get_running_workflow_step_outputs(
         return {"outputs": []}, HTTPStatus.OK
 
     def realise_outputs(
-        self, *, running_workflow_step_id: str, outputs: list[str, str]
+        self, *, running_workflow_step_id: str
     ) -> tuple[dict[str, Any], int]:
         del running_workflow_step_id
-        del outputs
         return {}, HTTPStatus.OK
diff --git a/tests/workflow-definitions/duplicate-step-output-variable-names.yaml b/tests/workflow-definitions/duplicate-step-output-variable-names.yaml
index d37c544..5a371a2 100644
--- a/tests/workflow-definitions/duplicate-step-output-variable-names.yaml
+++ b/tests/workflow-definitions/duplicate-step-output-variable-names.yaml
@@ -11,7 +11,6 @@ variable-mapping:
     from:
       step: step-2
       output: outputFile
-    as: clustered-molecules.smi
 
 steps:
 
diff --git a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml
index 6c1206f..f524c44 100644
--- a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml
+++ b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml
@@ -11,7 +11,6 @@ variable-mapping:
     from:
       step: step2
       output: outputFile
-    as: clustered-molecules.smi
 
 steps:
 
diff --git a/tests/workflow-definitions/replicate-using-undeclared-input.yaml b/tests/workflow-definitions/replicate-using-undeclared-input.yaml
index cc454e5..883ec62 100644
--- a/tests/workflow-definitions/replicate-using-undeclared-input.yaml
+++ b/tests/workflow-definitions/replicate-using-undeclared-input.yaml
@@ -11,7 +11,6 @@ variable-mapping:
     from:
       step: step2
       output: outputFile
-    as: clustered-molecules.smi
 
 steps:
 
diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
index afc1ae8..2fc1155 100644
--- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml
+++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
@@ -61,7 +61,6 @@ variable-mapping:
     from:
       step: step2
       output: outputFile
-    as: clustered-molecules.smi
   options:
   - name: rdkitPropertyName
     default: name
diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml
index cf28dd2..dddb080 100644
--- a/tests/workflow-definitions/simple-python-molprops.yaml
+++ b/tests/workflow-definitions/simple-python-molprops.yaml
@@ -11,7 +11,6 @@ variable-mapping:
     from:
       step: step2
       output: outputFile
-    as: clustered-molecules.smi
 
 steps:
 
diff --git a/tests/workflow-definitions/simple-python-parallel.yaml b/tests/workflow-definitions/simple-python-parallel.yaml
index bfd3935..e620cda 100644
--- a/tests/workflow-definitions/simple-python-parallel.yaml
+++ b/tests/workflow-definitions/simple-python-parallel.yaml
@@ -11,7 +11,6 @@ variable-mapping:
     from:
       step: final-step
       output: outputFile
-    as: clustered-molecules.smi
 
 
 steps:
diff --git a/workflow/decoder.py b/workflow/decoder.py
index 19abc94..6970caf 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -132,22 +132,14 @@ def get_workflow_job_input_names_for_step(
     return inputs
 
 
-def get_workflow_output_values_for_step(
-    definition: dict[str, Any], name: str
-) -> list[str]:
-    """Given a Workflow definition and a step name we return a list of workflow
-    out variable names the step creates. To do this we iterate through the workflows's
-    outputs to find those that are declared 'from' our step."""
+def workflow_step_has_outputs(definition: dict[str, Any], name: str) -> bool:
+    """Given a Workflow definition and a step name we return a boolean
+    that is true if the step produces outputs."""
     wf_outputs = definition.get("variable-mapping", {}).get("outputs", {})
-    outputs: list[str] = []
-    outputs.extend(
-        output["as"]
+    return any(
+        "from" in output and "step" in output["from"] and output["from"]["step"] == name
         for output in wf_outputs
-        if "from" in output
-        and "step" in output["from"]
-        and output["from"]["step"] == name
     )
-    return outputs
 
 
 def set_variables_from_options_for_step(
diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml
index 70e7b10..70a6ad3 100644
--- a/workflow/workflow-schema.yaml
+++ b/workflow/workflow-schema.yaml
@@ -103,11 +103,8 @@ definitions:
         $ref: '#/definitions/template-variable-name'
       from:
         $ref: '#/definitions/from-step-output'
-      as:
-        $ref: '#/definitions/file-name'
     required:
     - name
-    - as
 
   # Declaration of a step option value from a workflow option (variable)
   as-step-option:
diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py
index f65a5a1..680cb3b 100644
--- a/workflow/workflow_abc.py
+++ b/workflow/workflow_abc.py
@@ -363,11 +363,9 @@ def get_running_workflow_step_outputs(
 
     @abstractmethod
     def realise_outputs(
-        self, *, running_workflow_step_id: str, outputs: list[str]
+        self, *, running_workflow_step_id: str
     ) -> tuple[dict[str, Any], int]:
-        """Copy (link) the step's files as outputs into the Project directory.
-        A step ID is provided, along with a list of outputs
-        (files that will be in the step's instance directory)."""
+        """Copy (link) the step's files as outputs into the Project directory."""
         # Should return an empty map or:
         # {
         #   "error": "<error message>",
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index 7d6bc8f..e11d17a 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -41,8 +41,8 @@
 
 from .decoder import (
     get_workflow_job_input_names_for_step,
-    get_workflow_output_values_for_step,
     set_step_variables,
+    workflow_step_has_outputs,
 )
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -260,12 +260,11 @@ def _handle_pod_message(self, msg: PodMessage) -> None:
 
         error_num: int | None = None
         error_msg: str | None = None
-        if output_values := get_workflow_output_values_for_step(wf_response, step_name):
-            # Got some output values
+        if workflow_step_has_outputs(wf_response, step_name):
+            # The step produces at least one output.
             # Inform the DM so it can link them to the Project directory
             response, status_code = self._wapi_adapter.realise_outputs(
                 running_workflow_step_id=r_wfsid,
-                outputs=output_values,
             )
             if status_code != HTTPStatus.OK:
                 error_num = status_code

From 0c60799747f290afe827d1a49444debfc5929705 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Mon, 23 Jun 2025 16:38:05 +0100
Subject: [PATCH 07/57] feat: Add
 get_running_workflow_step_output_values_for_output() to API

---
 tests/test_test_api_adapter.py | 15 +++++++++++++++
 tests/wapi_adapter.py          |  2 +-
 workflow/workflow_abc.py       |  6 +++---
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/tests/test_test_api_adapter.py b/tests/test_test_api_adapter.py
index f54d4a4..0cf4032 100644
--- a/tests/test_test_api_adapter.py
+++ b/tests/test_test_api_adapter.py
@@ -443,6 +443,21 @@ def test_get_running_workflow_step_by_name():
     assert response["id"] == rwfs_id
 
 
+def test_basic_get_running_workflow_step_output_values_for_output_when_step_unknown():
+    # Arrange
+    utaa = UnitTestWorkflowAPIAdapter()
+
+    # Act
+    response, _ = utaa.get_running_workflow_step_output_values_for_output(
+        running_workflow_step_id="r-workflow-step-00000000-0000-0000-0000-000000000001",
+        output="outputFile",
+    )
+
+    # Assert
+    assert "outputs" in response
+    assert len(response["outputs"]) == 0
+
+
 def test_basic_realise():
     # Arrange
     utaa = UnitTestWorkflowAPIAdapter()
diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py
index 13e0e97..d215301 100644
--- a/tests/wapi_adapter.py
+++ b/tests/wapi_adapter.py
@@ -390,7 +390,7 @@ def get_running_workflow_steps(self, *, running_workflow_id: str) -> dict[str, A
                 steps.append(item)
         return {"count": len(steps), "running_workflow_steps": steps}
 
-    def get_running_workflow_step_outputs(
+    def get_running_workflow_step_output_values_for_output(
         self, *, running_workflow_step_id: str, output: str
     ) -> tuple[dict[str, Any], int]:
         del running_workflow_step_id
diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py
index 680cb3b..e3ae8dd 100644
--- a/workflow/workflow_abc.py
+++ b/workflow/workflow_abc.py
@@ -349,16 +349,16 @@ def get_job(
         # If not present an empty dictionary should be returned.
 
     @abstractmethod
-    def get_running_workflow_step_outputs(
+    def get_running_workflow_step_output_values_for_output(
         self, *, running_workflow_step_id: str, output: str
     ) -> tuple[dict[str, Any], int]:
-        """Gets the set of outputs for the output variable of a given step.
+        """Gets the set of outputs generated for the output variable of a given step.
         The step must have stopped for this to return any meaningful value.
         Returned files might also include paths that are relative to the
         Step's instance directory."""
         # Should return a (possibly empty) list of paths and filenames:
         # {
-        #   "outputs": ["file1.sdf", "file2.sdf"]
+        #   "outputs": ["dir/file1.sdf", "dir/file2.sdf"]
         # }
 
     @abstractmethod

From 12b3602148712a52d5aca457c88ea5965eb8886d Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Mon, 23 Jun 2025 16:55:06 +0100
Subject: [PATCH 08/57] fix: Removed rogue 'print' statement

---
 workflow/decoder.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/workflow/decoder.py b/workflow/decoder.py
index 6970caf..55512e6 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -260,7 +260,4 @@ def set_step_variables(
     )
 
     result |= options
-
-    print("final step vars", result)
-
     return result

From da1286929de3b7ff1b14fc84b5cb252903f0f0bf Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Thu, 26 Jun 2025 10:05:07 +0100
Subject: [PATCH 09/57] test: Add mock of step outputs

---
 tests/test_test_api_adapter.py         | 64 +++++++++++++++++++++--
 tests/test_workflow_engine_examples.py | 16 +++---
 tests/wapi_adapter.py                  | 70 ++++++++++++++++++++++----
 workflow/workflow_abc.py               |  4 +-
 4 files changed, 131 insertions(+), 23 deletions(-)

diff --git a/tests/test_test_api_adapter.py b/tests/test_test_api_adapter.py
index 0cf4032..88a5f61 100644
--- a/tests/test_test_api_adapter.py
+++ b/tests/test_test_api_adapter.py
@@ -443,19 +443,75 @@ def test_get_running_workflow_step_by_name():
     assert response["id"] == rwfs_id
 
 
-def test_basic_get_running_workflow_step_output_values_for_output_when_step_unknown():
+def test_mock_get_running_workflow_step_output_values_for_output():
     # Arrange
     utaa = UnitTestWorkflowAPIAdapter()
+    response = utaa.create_workflow(workflow_definition={"name": "blah"})
+    response = utaa.create_running_workflow(
+        user_id="dlister",
+        workflow_id=response["id"],
+        project_id=TEST_PROJECT_ID,
+        variables={},
+    )
+    response, _ = utaa.create_running_workflow_step(
+        running_workflow_id=response["id"], step="step-1"
+    )
 
     # Act
+    utaa.mock_get_running_workflow_step_output_values_for_output(
+        step_name="step-1", output_variable="results", output=["a", "b"]
+    )
+
+    # Assert
     response, _ = utaa.get_running_workflow_step_output_values_for_output(
         running_workflow_step_id="r-workflow-step-00000000-0000-0000-0000-000000000001",
-        output="outputFile",
+        output_variable="results",
+    )
+    assert "output" in response
+    assert len(response["output"]) == 2
+    assert "a" in response["output"]
+    assert "b" in response["output"]
+
+
+def test_basic_get_running_workflow_step_output_values_for_output_when_step_variable_name_unknown():
+    # Arrange
+    utaa = UnitTestWorkflowAPIAdapter()
+    response = utaa.create_workflow(workflow_definition={"name": "blah"})
+    response = utaa.create_running_workflow(
+        user_id="dlister",
+        workflow_id=response["id"],
+        project_id=TEST_PROJECT_ID,
+        variables={},
+    )
+    response, _ = utaa.create_running_workflow_step(
+        running_workflow_id=response["id"], step="step-1"
+    )
+
+    # Act
+    utaa.mock_get_running_workflow_step_output_values_for_output(
+        step_name="step-1", output_variable="results", output=["a", "b"]
     )
 
     # Assert
-    assert "outputs" in response
-    assert len(response["outputs"]) == 0
+    with pytest.raises(AssertionError):
+        _, _ = utaa.get_running_workflow_step_output_values_for_output(
+            running_workflow_step_id="r-workflow-step-00000000-0000-0000-0000-000000000001",
+            output_variable="unknownVariable",
+        )
+
+
+def test_basic_get_running_workflow_step_output_values_for_output_when_step_unknown():
+    # Arrange
+    utaa = UnitTestWorkflowAPIAdapter()
+
+    # Act
+    with pytest.raises(AssertionError):
+        _, _ = utaa.get_running_workflow_step_output_values_for_output(
+            running_workflow_step_id="r-workflow-step-00000000-0000-0000-0000-000000000001",
+            output_variable="outputFile",
+        )
+
+    # Assert
 
 
 def test_basic_realise():
diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
index 51b1053..8fa25b9 100644
--- a/tests/test_workflow_engine_examples.py
+++ b/tests/test_workflow_engine_examples.py
@@ -42,7 +42,7 @@ def basic_engine():
     print("Starting message queue...")
     message_queue.start()
 
-    yield [wapi_adapter, message_dispatcher]
+    yield [message_dispatcher, wapi_adapter]
 
     print("Stopping message queue...")
     message_queue.stop()
@@ -145,7 +145,7 @@ def wait_for_workflow(
 
 def test_workflow_engine_example_two_step_nop(basic_engine):
     # Arrange
-    da, md = basic_engine
+    md, da = basic_engine
 
     # Act
     r_wfid = start_workflow(md, da, "example-two-step-nop", {})
@@ -164,7 +164,7 @@ def test_workflow_engine_example_two_step_nop(basic_engine):
 
 def test_workflow_engine_example_nop_fail(basic_engine):
     # Arrange
-    da, md = basic_engine
+    md, da = basic_engine
 
     # Act
     r_wfid = start_workflow(md, da, "example-nop-fail", {})
@@ -181,7 +181,7 @@ def test_workflow_engine_example_nop_fail(basic_engine):
 
 def test_workflow_engine_example_smiles_to_file(basic_engine):
     # Arrange
-    da, md = basic_engine
+    md, da = basic_engine
     # Make sure a file that should be generated by the test
     # does not exist before we run the test.
     output_file = "ethanol.smi"
@@ -206,7 +206,7 @@ def test_workflow_engine_example_smiles_to_file(basic_engine):
 
 def test_workflow_engine_shortcut_example_1(basic_engine):
     # Arrange
-    da, md = basic_engine
+    md, da = basic_engine
     # Make sure files that should be generated by the test
     # do not exist before we run the test.
     output_file_a = "a.sdf"
@@ -234,7 +234,7 @@ def test_workflow_engine_shortcut_example_1(basic_engine):
 
 def test_workflow_engine_simple_python_molprops(basic_engine):
     # Arrange
-    da, md = basic_engine
+    md, da = basic_engine
     # Make sure files that should be generated by the test
     # do not exist before we run the test.
     output_file_1 = "step1.out.smi"
@@ -315,7 +315,7 @@ def test_workflow_engine_simple_python_molprops(basic_engine):
 
 def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
     # Arrange
-    da, md = basic_engine
+    md, da = basic_engine
     # Make sure files that should be generated by the test
     # do not exist before we run the test.
     output_file_1 = "step1.out.smi"
@@ -403,7 +403,7 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
 
 def test_workflow_engine_simple_python_parallel(basic_engine):
     # Arrange
-    da, md = basic_engine
+    md, da = basic_engine
     # Make sure files that should be generated by the test
     # do not exist before we run the test.
     output_file_first = "first-step.out.smi"
diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py
index d215301..f604689 100644
--- a/tests/wapi_adapter.py
+++ b/tests/wapi_adapter.py
@@ -15,6 +15,7 @@
 method.
 """
 
+import copy
 import os
 from http import HTTPStatus
 from multiprocessing import Lock
@@ -49,6 +50,7 @@
     f"{_PICKLE_DIRECTORY}/running-workflow-step.pickle"
 )
 _INSTANCE_PICKLE_FILE: str = f"{_PICKLE_DIRECTORY}/instance.pickle"
+_MOCK_STEP_OUTPUT_FILE: str = f"{_PICKLE_DIRECTORY}/mock-output.pickle"
 
 
 class UnitTestWorkflowAPIAdapter(WorkflowAPIAdapter):
@@ -73,12 +75,13 @@ def __init__(self):
             _RUNNING_WORKFLOW_PICKLE_FILE,
             _RUNNING_WORKFLOW_STEP_PICKLE_FILE,
             _INSTANCE_PICKLE_FILE,
+            _MOCK_STEP_OUTPUT_FILE,
         ]:
             with open(file, "wb") as pickle_file:
                 Pickler(pickle_file).dump({})
         UnitTestWorkflowAPIAdapter.lock.release()
 
-    def get_workflow(self, *, workflow_id: str) -> dict[str, Any]:
+    def get_workflow(self, *, workflow_id: str) -> tuple[dict[str, Any], int]:
         UnitTestWorkflowAPIAdapter.lock.acquire()
         with open(_WORKFLOW_PICKLE_FILE, "rb") as pickle_file:
             workflow = Unpickler(pickle_file).load()
@@ -138,7 +141,7 @@ def create_running_workflow_step(
         step: str,
         replica: int = 0,
         prior_running_workflow_step_id: str | None = None,
-    ) -> dict[str, Any]:
+    ) -> tuple[dict[str, Any], int]:
         if replica:
             assert replica > 0
 
@@ -172,7 +175,7 @@ def create_running_workflow_step(
 
     def get_running_workflow_step(
         self, *, running_workflow_step_id: str
-    ) -> dict[str, Any]:
+    ) -> tuple[dict[str, Any], int]:
         UnitTestWorkflowAPIAdapter.lock.acquire()
         with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "rb") as pickle_file:
             running_workflow_step = Unpickler(pickle_file).load()
@@ -188,7 +191,7 @@ def get_running_workflow_step(
 
     def get_running_workflow_step_by_name(
         self, *, name: str, running_workflow_id: str, replica: int = 0
-    ) -> dict[str, Any]:
+    ) -> tuple[dict[str, Any], int]:
         if replica:
             assert replica > 0
         UnitTestWorkflowAPIAdapter.lock.acquire()
@@ -293,7 +296,9 @@ def get_instance(self, *, instance_id: str) -> dict[str, Any]:
         response = {} if instance_id not in instances else instances[instance_id]
         return response, 0
 
-    def get_job(self, *, collection: str, job: str, version: str) -> dict[str, Any]:
+    def get_job(
+        self, *, collection: str, job: str, version: str
+    ) -> tuple[dict[str, Any], int]:
         assert collection == _JOB_DEFINITIONS["collection"]
         assert job in _JOB_DEFINITIONS["jobs"]
         assert version
@@ -391,14 +396,61 @@ def get_running_workflow_steps(self, *, running_workflow_id: str) -> dict[str, A
         return {"count": len(steps), "running_workflow_steps": steps}
 
     def get_running_workflow_step_output_values_for_output(
-        self, *, running_workflow_step_id: str, output: str
+        self, *, running_workflow_step_id: str, output_variable: str
     ) -> tuple[dict[str, Any], int]:
-        del running_workflow_step_id
-        del output
-        return {"outputs": []}, HTTPStatus.OK
+        """We use the 'mock' data to return output values, otherwise
+        we return an empty list. And we need to get the step in order to get its name.
+        """
+        # The RunningWorkflowStep must exist...
+        step, _ = self.get_running_workflow_step(
+            running_workflow_step_id=running_workflow_step_id
+        )
+        assert step
+        step_name: str = step["name"]
+        # Now we can inspect the 'mock' data...
+        UnitTestWorkflowAPIAdapter.lock.acquire()
+        with open(_MOCK_STEP_OUTPUT_FILE, "rb") as pickle_file:
+            mock_output = Unpickler(pickle_file).load()
+        UnitTestWorkflowAPIAdapter.lock.release()
+
+        if step_name not in mock_output:
+            return {"output": []}, 0
+        # The record's output variable must match (there's only one record per step atm)
+        assert mock_output[step_name]["output_variable"] == output_variable
+        # Now return what was provided to the mock method...
+        response = {"output": copy.copy(mock_output[step_name]["output"])}
+        return response, 0
 
     def realise_outputs(
         self, *, running_workflow_step_id: str
     ) -> tuple[dict[str, Any], int]:
         del running_workflow_step_id
         return {}, HTTPStatus.OK
+
+    # Custom (test) methods
+    # Methods not declared in the ABC
+
+    def mock_get_running_workflow_step_output_values_for_output(
+        self, *, step_name: str, output_variable: str, output: list[str]
+    ) -> None:
+        """Sets the output response for a step.
+        Limitation is that there can only be one record for each step name
+        so, for now, the output_variable is superfluous and only used
+        to check the output variable name matches."""
+        assert isinstance(step_name, str)
+        assert isinstance(output_variable, str)
+        assert isinstance(output, list)
+
+        UnitTestWorkflowAPIAdapter.lock.acquire()
+        with open(_MOCK_STEP_OUTPUT_FILE, "rb") as pickle_file:
+            mock_output = Unpickler(pickle_file).load()
+
+        record = {
+            "output_variable": output_variable,
+            "output": output,
+        }
+        mock_output[step_name] = record
+
+        with open(_MOCK_STEP_OUTPUT_FILE, "wb") as pickle_file:
+            Pickler(pickle_file).dump(mock_output)
+        UnitTestWorkflowAPIAdapter.lock.release()
diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py
index e3ae8dd..aa01199 100644
--- a/workflow/workflow_abc.py
+++ b/workflow/workflow_abc.py
@@ -350,7 +350,7 @@ def get_job(
 
     @abstractmethod
     def get_running_workflow_step_output_values_for_output(
-        self, *, running_workflow_step_id: str, output: str
+        self, *, running_workflow_step_id: str, output_variable: str
     ) -> tuple[dict[str, Any], int]:
         """Gets the set of outputs generated for the output variable of a given step.
         The step must have stopped for this to return any meaningful value.
@@ -358,7 +358,7 @@ def get_running_workflow_step_output_values_for_output(
         Step's instance directory."""
         # Should return a (possibly empty) list of paths and filenames:
         # {
-        #   "outputs": ["dir/file1.sdf", "dir/file2.sdf"]
+        #   "output": ["dir/file1.sdf", "dir/file2.sdf"]
         # }
 
     @abstractmethod

From cc5d8cc71dc5fad7f9b025e0e8b24f9bc45b8626 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Fri, 18 Jul 2025 16:56:11 +0100
Subject: [PATCH 10/57] fix: it's a mess

---
 tests/job-definitions/job-definitions.yaml    |   4 +
 tests/jobs/copyf.py                           |  30 ++++++
 tests/jobs/copyf.sh                           |   4 +
 tests/jobs/split-smi.sh                       |  72 +++++++++++++
 tests/test_workflow_engine_examples.py        |  58 +++++++---
 tests/wapi_adapter.py                         |   6 ++
 .../simple-python-fanout.yaml                 |  70 ++++++++++++
 workflow/decoder.py                           |  23 +++-
 workflow/workflow-schema.yaml                 |  28 ++---
 workflow/workflow_engine.py                   | 101 ++++++++++++++----
 10 files changed, 344 insertions(+), 52 deletions(-)
 create mode 100644 tests/jobs/copyf.py
 create mode 100755 tests/jobs/copyf.sh
 create mode 100755 tests/jobs/split-smi.sh
 create mode 100644 tests/workflow-definitions/simple-python-fanout.yaml

diff --git a/tests/job-definitions/job-definitions.yaml b/tests/job-definitions/job-definitions.yaml
index 66afcd3..7e3e1b7 100644
--- a/tests/job-definitions/job-definitions.yaml
+++ b/tests/job-definitions/job-definitions.yaml
@@ -132,3 +132,7 @@ jobs:
   concatenate:
     command: >-
       concatenate.py {% for ifile in  inputFile %}{{ ifile }} {% endfor %} --outputFile {{ outputFile }}
+
+  splitsmiles:
+    command: >-
+      copyf.py {{ inputFile }}
diff --git a/tests/jobs/copyf.py b/tests/jobs/copyf.py
new file mode 100644
index 0000000..23dc38b
--- /dev/null
+++ b/tests/jobs/copyf.py
@@ -0,0 +1,30 @@
+import shutil
+import sys
+from pathlib import Path
+
+
+def main():
+    print("copyf job runnint")
+    if len(sys.argv) != 2:
+        print("Usage: python copy_file.py <filename>")
+        sys.exit(1)
+
+    original_path = Path(sys.argv[1])
+
+    if not original_path.exists() or not original_path.is_file():
+        print(f"Error: '{original_path}' does not exist or is not a file.")
+        sys.exit(1)
+
+    # Create a new filename like 'example_copy.txt'
+    new_name = original_path.absolute().parent.joinpath("chunk_1.smi")
+    new_path = original_path.with_name(new_name.name)
+    shutil.copyfile(original_path, new_path)
+
+    new_name = original_path.absolute().parent.joinpath("chunk_2.smi")
+    new_path = original_path.with_name(new_name.name)
+
+    shutil.copyfile(original_path, new_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/jobs/copyf.sh b/tests/jobs/copyf.sh
new file mode 100755
index 0000000..8994a2b
--- /dev/null
+++ b/tests/jobs/copyf.sh
@@ -0,0 +1,4 @@
+#! /bin/bash
+
+cp "$1" chunk_1.smi
+cp "$1" chunk_2.smi
diff --git a/tests/jobs/split-smi.sh b/tests/jobs/split-smi.sh
new file mode 100755
index 0000000..48a2fb3
--- /dev/null
+++ b/tests/jobs/split-smi.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+set -euo pipefail
+
+if [[ $# -lt 3 || $# -gt 4 ]]; then
+  echo "Usage: $0 <input_file(.smi or .smi.gz)> <lines_per_file> <output_basename> [has_header: yes]"
+  exit 1
+fi
+
+input_file="$1"
+lines_per_file="$2"
+base_name="$3"
+has_header="${4:-no}"
+
+# Determine how to read the file (plain text or gzipped)
+if [[ "$input_file" == *.gz ]]; then
+  reader="zcat"
+else
+  reader="cat"
+fi
+
+if ! [[ -f "$input_file" ]]; then
+  echo "Error: File '$input_file' not found"
+  exit 1
+fi
+
+# Extract header if present
+if [[ "$has_header" == "yes" ]]; then
+  header="$($reader "$input_file" | head -n1)"
+  data_start=2
+else
+  header=""
+  data_start=1
+fi
+
+# Count number of data lines (excluding header if present)
+data_lines="$($reader "$input_file" | tail -n +"$data_start" | wc -l)"
+if [[ "$data_lines" -eq 0 ]]; then
+  echo "No data lines to process."
+  exit 0
+fi
+
+# Calculate number of output files and required zero padding
+num_files=$(( (data_lines + lines_per_file - 1) / lines_per_file ))
+pad_width=0
+if [[ "$num_files" -gt 1 ]]; then
+  pad_width=${#num_files}
+fi
+
+# Split logic
+$reader "$input_file" | tail -n +"$data_start" | awk -v header="$header" -v lines="$lines_per_file" -v base="$base_name" -v pad="$pad_width" '
+function new_file() {
+  suffix = (pad > 0) ? sprintf("%0*d", pad, file_index) : file_index
+  file = base "_" suffix ".smi"
+  if (header != "") {
+    print header > file
+  }
+  file_index++
+  line_count = 0
+}
+{
+  if (line_count == 0) {
+    new_file()
+  }
+  print >> file
+  line_count++
+  if (line_count == lines) {
+    close(file)
+    print file " created"
+    line_count = 0
+  }
+}
+' file_index=1
diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
index 8fa25b9..8e4208a 100644
--- a/tests/test_workflow_engine_examples.py
+++ b/tests/test_workflow_engine_examples.py
@@ -83,6 +83,7 @@ def start_workflow(
         variables=variables,
         level=ValidationLevel.RUN,
     )
+    print("vr_result", vr_result)
     assert vr_result.error_num == 0
     # 3.
     response = da.create_running_workflow(
@@ -401,22 +402,44 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
     assert project_file_exists(output_file_2)
 
 
-def test_workflow_engine_simple_python_parallel(basic_engine):
+def test_workflow_engine_simple_python_fanout(basic_engine):
     # Arrange
     md, da = basic_engine
+
+    da.mock_get_running_workflow_step_output_values_for_output(
+        step_name="first-step",
+        output_variable="outputFile",
+        output=["chunk_1.smi", "chunk_2.smi"],
+    )
+
+    # da.mock_get_running_workflow_step_output_values_for_output(
+    #     step_name="parallel-step",
+    #     output_variable="outputFile",
+    #     output=["chunk_1_proc.smi", "chunk_2_proc.smi"]
+    # )
+
+    # da.mock_get_running_workflow_step_output_values_for_output(
+    #     step_name="final-step",
+    #     output_variable="outputFile",
+    #     output=["final-step.out.smi"],
+    # )
+
     # Make sure files that should be generated by the test
     # do not exist before we run the test.
-    output_file_first = "first-step.out.smi"
+    output_file_first = "chunk_1.smi"
+    output_file_second = "chunk_2.smi"
     assert not project_file_exists(output_file_first)
-    output_file_pa = "parallel-step-a.out.smi"
-    assert not project_file_exists(output_file_pa)
-    output_file_pb = "parallel-step-b.out.smi"
-    assert not project_file_exists(output_file_pb)
-    output_file_final = "final-step.out.smi"
-    assert not project_file_exists(output_file_final)
+    assert not project_file_exists(output_file_second)
+    output_file_p_first = "chunk_1_proc.smi"
+    output_file_p_second = "chunk_2_proc.smi"
+    assert not project_file_exists(output_file_p_first)
+    assert not project_file_exists(output_file_p_second)
+    # output_file_final = "final-step.out.smi"
+    # assert not project_file_exists(output_file_final)
     # And create the test's input file.
     input_file_1 = "input1.smi"
-    input_file_1_content = "O=C(CSCc1ccc(Cl)s1)N1CCC(O)CC1"
+    input_file_1_content = """O=C(CSCc1ccc(Cl)s1)N1CCC(O)CC1
+    COCN1C(=O)NC(C)(C)C1=O"""
     with open(
         f"{EXECUTION_DIRECTORY}/{input_file_1}", mode="wt", encoding="utf8"
     ) as input_file:
@@ -426,7 +449,7 @@ def test_workflow_engine_simple_python_parallel(basic_engine):
     r_wfid = start_workflow(
         md,
         da,
-        "simple-python-parallel",
+        "simple-python-fanout",
         {"candidateMolecules": input_file_1},
     )
 
@@ -435,16 +458,17 @@ def test_workflow_engine_simple_python_parallel(basic_engine):
     # Additional, detailed checks...
     # Check we only have one RunningWorkflowStep, and it succeeded
     response = da.get_running_workflow_steps(running_workflow_id=r_wfid)
+    print("response", response)
 
-    assert response["count"] == 4
+    assert response["count"] == 2
     assert response["running_workflow_steps"][0]["done"]
     assert response["running_workflow_steps"][0]["success"]
     assert response["running_workflow_steps"][1]["done"]
     assert response["running_workflow_steps"][1]["success"]
-    assert response["running_workflow_steps"][2]["done"]
-    assert response["running_workflow_steps"][2]["success"]
-    assert response["running_workflow_steps"][3]["done"]
-    assert response["running_workflow_steps"][3]["success"]
+    # assert response["running_workflow_steps"][2]["done"]
+    # assert response["running_workflow_steps"][2]["success"]
+    # assert response["running_workflow_steps"][3]["done"]
+    # assert response["running_workflow_steps"][3]["success"]
     # This test should generate a file in the simulated project directory
-    assert project_file_exists(output_file_first)
-    assert project_file_exists(output_file_final)
+    # assert project_file_exists(output_file_first)
+    # assert project_file_exists(output_file_final)
diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py
index f604689..e91db77 100644
--- a/tests/wapi_adapter.py
+++ b/tests/wapi_adapter.py
@@ -202,6 +202,7 @@ def get_running_workflow_step_by_name(
         for rwfs_id, record in running_workflow_step.items():
             if record["running_workflow"]["id"] != running_workflow_id:
                 continue
+            print("running wf step by name, record:", record)
             if record["name"] == name and record["replica"] == replica:
                 response = record
                 response["id"] = rwfs_id
@@ -413,6 +414,11 @@ def get_running_workflow_step_output_values_for_output(
             mock_output = Unpickler(pickle_file).load()
         UnitTestWorkflowAPIAdapter.lock.release()
 
+        print("mock output", mock_output)
+        print("step", step)
+        print("step_name", step_name)
+        # mock output {'first-step': {'output_variable': 'results', 'output': ['chunk_1.smi', 'chunk_2.smi']}}
+
         if step_name not in mock_output:
             return {"output": []}, 0
         # The record's output variable must match (there's only one record per step atm)
diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml
new file mode 100644
index 0000000..01f576f
--- /dev/null
+++ b/tests/workflow-definitions/simple-python-fanout.yaml
@@ -0,0 +1,70 @@
+---
+kind: DataManagerWorkflow
+kind-version: "2025.2"
+name: python-workflow
+description: >-
+  A simple parallel workflow. Input is split into N chunks and N processes of the same job is started
+variable-mapping:
+  inputs:
+  - name: candidateMolecules
+  outputs:
+  - name: clusteredMolecules
+    from:
+      step: final-step
+      output: outputFile
+
+
+steps:
+
+- name: first-step
+  description: Create inputs
+  specification:
+    collection: workflow-engine-unit-test-jobs
+    job: splitsmiles
+    version: "1.0.0"
+    variables:
+      name: "count"
+      value: "1"
+  inputs:
+  - input: inputFile
+    from:
+      workflow-input: candidateMolecules
+  outputs:
+  - output: outputFile
+    # as: chunk_*.smi
+
+- name: parallel-step
+  description: Add some params
+  specification:
+    collection: workflow-engine-unit-test-jobs
+    job: append-col
+    version: "1.0.0"
+    variables:
+      name: "desc1"
+      value: "777"
+  replicate:
+    using:
+      input: inputFile
+  inputs:
+  - input: inputFile
+    from:
+      step: first-step
+      output: outputFile
+  outputs:
+  - output: outputFile
+    # as: parallel-step.out.smi
+
+# - name: final-step
+#   description: Collate results
+#   specification:
+#     collection: workflow-engine-unit-test-jobs
+#     job: concatenate
+#     version: "1.0.0"
+#   inputs:
+#   - input: inputFile
+#     from:
+#       step: parallel-step
+#       output: outputFile
+#   outputs:
+#   - output: outputFile
+#     # as: final-step.out.smi
diff --git a/workflow/decoder.py b/workflow/decoder.py
index 55512e6..3e12ecb 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -213,6 +213,7 @@ def set_step_variables(
     workflow: dict[str, Any],
     inputs: list[dict[str, Any]],
     outputs: list[dict[str, Any]],
+    step_outputs: dict[str, Any],
     previous_step_outputs: list[dict[str, Any]],
     workflow_variables: dict[str, Any],
     step_name: str,
@@ -224,6 +225,13 @@ def set_step_variables(
     """
     result = {}
 
+    print("ssv: wf vars", workflow_variables)
+    print("ssv: inputs", inputs)
+    print("ssv: outputs", outputs)
+    print("ssv: step_outputs", step_outputs)
+    print("ssv: prev step outputs", previous_step_outputs)
+    print("ssv: step_name", step_name)
+
     for item in inputs:
         p_key = item["input"]
         p_val = ""
@@ -234,7 +242,16 @@ def set_step_variables(
         elif "step" in val.keys():
             for out in previous_step_outputs:
                 if out["output"] == val["output"]:
-                    p_val = out["as"]
+                    # p_val = out["as"]
+                    if step_outputs["output"]:
+                        p_val = step_outputs["output"]
+                        print("\n!!!!!!!!!!!!!if clause!!!!!!!!!!!!!!!!!!!!!\n")
+                        print(p_val)
+                    else:
+                        # what do I need to do here??
+                        print("\n!!!!!!!!!!!!!else clause!!!!!!!!!!!!!!!!!!!!!\n")
+                        print(out)
+                        print(val)
 
                     # this bit handles multiple inputs: if a step
                     # requires input from multiple steps, add them to
@@ -250,7 +267,9 @@ def set_step_variables(
 
     for item in outputs:
         p_key = item["output"]
-        p_val = item["as"]
+        # p_val = item["as"]
+        # p_val = step_outputs["output"]
+        p_val = "somefile.smi"
         result[p_key] = p_val
 
     options = set_variables_from_options_for_step(
diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml
index 70a6ad3..d904e45 100644
--- a/workflow/workflow-schema.yaml
+++ b/workflow/workflow-schema.yaml
@@ -206,17 +206,17 @@ definitions:
     - from
 
   # A Step output (with an 'as' - a declared value)
-  step-output-as:
-    type: object
-    additionalProperties: false
-    properties:
-      output:
-        $ref: '#/definitions/template-variable-name'
-      as:
-        $ref: '#/definitions/file-name'
-    required:
-    - output
-    - as
+  # step-output-as:
+  #   type: object
+  #   additionalProperties: false
+  #   properties:
+  #     output:
+  #       $ref: '#/definitions/template-variable-name'
+  #     as:
+  #       $ref: '#/definitions/file-name'
+  #   required:
+  #   - output
+  #   - as
 
 
   # A step specification variable
@@ -282,9 +282,9 @@ definitions:
           - $ref: "#/definitions/step-input-from-workflow"
       outputs:
         type: array
-        items:
-          anyOf:
-          - $ref: "#/definitions/step-output-as"
+        # items:
+        #   anyOf:
+        #   - $ref: "#/definitions/step-output-as"
     required:
     - name
     - specification
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index e11d17a..672e9bb 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -424,9 +424,10 @@ def _validate_step_command(
             running_workflow_step_id,
         )
 
+        # resolve all previous steps
+        previous_step_names = set()
         if our_step_index > 0:
-            # resolve all previous steps
-            previous_step_names = set()
+            print("prev step inputs", inputs)
             for inp in inputs:
                 if step_name := inp["from"].get("step", None):
                     previous_step_names.add(step_name)
@@ -465,14 +466,54 @@ def _validate_step_command(
         running_wf, _ = self._wapi_adapter.get_running_workflow(
             running_workflow_id=running_wf_id
         )
+        print("running wf", running_wf)
         workflow_id = running_wf["workflow"]["id"]
         workflow, _ = self._wapi_adapter.get_workflow(workflow_id=workflow_id)
 
+        print("workflow", workflow)
+
+        # for step in workflow["steps"]:
+        #     if step["name"] in previous_step_names:
+
+        previous_step_id = None
+        for name in previous_step_names:
+            result, _ = self._wapi_adapter.get_running_workflow_step_by_name(
+                name=name, running_workflow_id=running_wf_id, replica=0
+            )
+            print("by name results", result)
+            print("by name results, vars", result["variables"])
+            print("by name results, od", result["id"])
+            previous_step_id = result["id"]
+            # if name == 'first-step':
+            #     previous_step_id = result["id"]
+
+        # don't understand how this is structured
+        print("prev steps", previous_step_names)
+        print("outputs", outputs)
+        print()
+        step_outputs: dict[str, Any] = {"output": []}
+        if previous_step_id:
+            for output in outputs:
+                for k, v in output.items():
+                    print("sending params to output mock", k, v)
+                    try:
+                        step_outputs, _ = (
+                            self._wapi_adapter.get_running_workflow_step_output_values_for_output(
+                                running_workflow_step_id=previous_step_id,
+                                output_variable=v,  # foraeach outputs key
+                            )
+                        )
+
+                        print("mockputs", running_workflow_step_id, step_outputs)
+                    except AssertionError:
+                        print("no output for step", running_workflow_step_id, k, v)
+
         step_vars = set_step_variables(
             workflow=workflow,
             workflow_variables=all_variables,
             inputs=inputs,
             outputs=outputs,
+            step_outputs=step_outputs,
             previous_step_outputs=previous_step_outputs,
             step_name=running_wf_step["name"],
         )
@@ -515,6 +556,7 @@ def _launch(
         wf_step_data, _ = self._wapi_adapter.get_workflow_steps_driving_this_step(
             running_workflow_step_id=rwfs_id,
         )
+        print("wf_step_data", wf_step_data)
         assert wf_step_data["caller_step_index"] >= 0
         our_step_index: int = wf_step_data["caller_step_index"]
 
@@ -541,6 +583,13 @@ def _launch(
 
         project_id = rwf["project"]["id"]
         variables: dict[str, Any] = error_or_variables
+        print("variables", variables)
+        # find out if and by which parameter this step should be replicated
+        replicator = ""
+        if replicate := step.get("replicate", {}):
+            if using := replicate.get("using", {}):
+                # using is a dict but there can be only single value for now
+                replicator = list(using.values())[0]
 
         _LOGGER.info(
             "Launching step: RunningWorkflow=%s RunningWorkflowStep=%s step=%s"
@@ -587,24 +636,38 @@ def _launch(
         #       A list of Job input variable names
         inputs: list[str] = []
         inputs.extend(iter(get_workflow_job_input_names_for_step(wf, step_name)))
-        lp: LaunchParameters = LaunchParameters(
-            project_id=project_id,
-            name=step_name,
-            debug=rwf.get("debug"),
-            launching_user_name=rwf["running_user"],
-            launching_user_api_token=rwf["running_user_api_token"],
-            specification=step["specification"],
-            specification_variables=variables,
-            running_workflow_id=rwf_id,
-            running_workflow_step_id=rwfs_id,
-            running_workflow_step_prior_steps=prior_steps,
-            running_workflow_step_inputs=inputs,
-        )
-        lr: LaunchResult = self._instance_launcher.launch(launch_parameters=lp)
-        if lr.error_num:
-            self._set_step_error(step_name, rwf_id, rwfs_id, lr.error_num, lr.error_msg)
+        if replicator:
+            single_step_variables = []
+            for replicating_param in variables[replicator]:
+                ssv = {**variables}
+                ssv[replicator] = replicating_param
+                single_step_variables.append(ssv)
         else:
-            _LOGGER.info("Launched step '%s' (command=%s)", step_name, lr.command)
+            single_step_variables = [variables]
+
+        print("single step variables", single_step_variables)
+
+        for params in single_step_variables:
+            lp: LaunchParameters = LaunchParameters(
+                project_id=project_id,
+                name=step_name,
+                debug=rwf.get("debug"),
+                launching_user_name=rwf["running_user"],
+                launching_user_api_token=rwf["running_user_api_token"],
+                specification=step["specification"],
+                specification_variables=params,
+                running_workflow_id=rwf_id,
+                running_workflow_step_id=rwfs_id,
+                running_workflow_step_prior_steps=prior_steps,
+                running_workflow_step_inputs=inputs,
+            )
+            lr: LaunchResult = self._instance_launcher.launch(launch_parameters=lp)
+            if lr.error_num:
+                self._set_step_error(
+                    step_name, rwf_id, rwfs_id, lr.error_num, lr.error_msg
+                )
+            else:
+                _LOGGER.info("Launched step '%s' (command=%s)", step_name, lr.command)
 
     def _set_step_error(
         self,

From ad0020579a96daa7f706af5cf020a7e572a58827 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Fri, 8 Aug 2025 10:16:25 +0100
Subject: [PATCH 11/57] docs: Add instance-directory comment

---
 workflow/workflow_abc.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py
index aa01199..0d7b1c8 100644
--- a/workflow/workflow_abc.py
+++ b/workflow/workflow_abc.py
@@ -240,6 +240,12 @@ def get_running_workflow_step(
         # }
         # If not present an empty dictionary should be returned.
         #
+        # Additionally, if the step has started (an instance has been created)
+        # the response will contain a "instance_directory" top-level property
+        # that is the directory within the Project that's the step's working directory.
+        #
+        #       "instance_directory": ".instance-00000000-0000-0000-0000-00000000000a",
+        #
         # For steps that are not the first in a workflow the following field
         # can be expected in the response: -
         #
@@ -273,6 +279,12 @@ def get_running_workflow_step_by_name(
         # }
         # If not present an empty dictionary should be returned.
         #
+        # Additionally, if the step has started (an instance has been created)
+        # the response will contain a "instance_directory" top-level property
+        # that is the directory within the Project that's the step's working directory.
+        #
+        #       "instance_directory": ".instance-00000000-0000-0000-0000-00000000000a",
+        #
         # For steps that are not the first in a workflow the following field
         # can be expected in the response: -
         #
@@ -322,6 +334,12 @@ def get_workflow_steps_driving_this_step(
         #       }
         #     ]
         # }
+        #
+        # Additionally, if the step has started (an instance has been created)
+        # each entry on the array of steps will contain a "instance_directory" property
+        # that is the directory within the Project that's the step's working directory.
+        #
+        #       "instance_directory": ".instance-00000000-0000-0000-0000-00000000000a",
 
     @abstractmethod
     def get_instance(self, *, instance_id: str) -> tuple[dict[str, Any], int]:

From 95eafcbcddfa7c2f6a65b0324742233051740fa9 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Fri, 8 Aug 2025 10:48:02 +0100
Subject: [PATCH 12/57] feat: Creating instances now adds instance-directory
 property to step record

---
 tests/wapi_adapter.py    | 11 +++++++++++
 workflow/workflow_abc.py |  6 ------
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py
index f604689..9d87e38 100644
--- a/tests/wapi_adapter.py
+++ b/tests/wapi_adapter.py
@@ -377,6 +377,17 @@ def create_instance(self, *, running_workflow_step_id: str) -> dict[str, Any]:
 
         with open(_INSTANCE_PICKLE_FILE, "wb") as pickle_file:
             Pickler(pickle_file).dump(instances)
+
+        # Use the instance ID as the step's instance-directory (prefixing with '.')
+        with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "rb") as pickle_file:
+            running_workflow_step = Unpickler(pickle_file).load()
+        assert running_workflow_step_id in running_workflow_step
+        running_workflow_step[running_workflow_step_id][
+            "instance_directory"
+        ] = f".{instance_id}"
+        with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "wb") as pickle_file:
+            Pickler(pickle_file).dump(running_workflow_step)
+
         UnitTestWorkflowAPIAdapter.lock.release()
 
         return {"id": instance_id}
diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py
index 0d7b1c8..2024fba 100644
--- a/workflow/workflow_abc.py
+++ b/workflow/workflow_abc.py
@@ -334,12 +334,6 @@ def get_workflow_steps_driving_this_step(
         #       }
         #     ]
         # }
-        #
-        # Additionally, if the step has started (an instance has been created)
-        # each entry on the array of steps will contain a "instance_directory" property
-        # that is the directory within the Project that's the step's working directory.
-        #
-        #       "instance_directory": ".instance-00000000-0000-0000-0000-00000000000a",
 
     @abstractmethod
     def get_instance(self, *, instance_id: str) -> tuple[dict[str, Any], int]:

From f37ea7417b4920324bbcffc759a27e1ff753d06a Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Fri, 8 Aug 2025 11:21:30 +0100
Subject: [PATCH 13/57] fix: get_instance() response type

---
 tests/wapi_adapter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py
index 9d87e38..322507c 100644
--- a/tests/wapi_adapter.py
+++ b/tests/wapi_adapter.py
@@ -287,7 +287,7 @@ def get_workflow_steps_driving_this_step(
             "steps": wf_response["steps"].copy(),
         }, 0
 
-    def get_instance(self, *, instance_id: str) -> dict[str, Any]:
+    def get_instance(self, *, instance_id: str) -> tuple[dict[str, Any], int]:
         UnitTestWorkflowAPIAdapter.lock.acquire()
         with open(_INSTANCE_PICKLE_FILE, "rb") as pickle_file:
             instances = Unpickler(pickle_file).load()

From 13e44705e370919f6917cc2e10203c349959f085 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Fri, 8 Aug 2025 11:22:09 +0100
Subject: [PATCH 14/57] test: Add tests for instance_directory

---
 tests/test_test_api_adapter.py | 82 ++++++++++++++++++++++++++++++++--
 1 file changed, 79 insertions(+), 3 deletions(-)

diff --git a/tests/test_test_api_adapter.py b/tests/test_test_api_adapter.py
index 88a5f61..a8ebc33 100644
--- a/tests/test_test_api_adapter.py
+++ b/tests/test_test_api_adapter.py
@@ -324,9 +324,20 @@ def test_get_running_workflow_step_with_prior_step():
 def test_create_instance():
     # Arrange
     utaa = UnitTestWorkflowAPIAdapter()
+    response = utaa.create_workflow(workflow_definition={"name": "blah"})
+    response = utaa.create_running_workflow(
+        user_id="dlister",
+        workflow_id=response["id"],
+        project_id=TEST_PROJECT_ID,
+        variables={},
+    )
+    response, _ = utaa.create_running_workflow_step(
+        running_workflow_id=response["id"], step="step-1"
+    )
+    rwfs_id = response["id"]
 
     # Act
-    response = utaa.create_instance(running_workflow_step_id="r-workflow-step-000")
+    response = utaa.create_instance(running_workflow_step_id=rwfs_id)
 
     # Assert
     assert "id" in response
@@ -335,14 +346,79 @@ def test_create_instance():
 def test_create_and_get_instance():
     # Arrange
     utaa = UnitTestWorkflowAPIAdapter()
-    response = utaa.create_instance(running_workflow_step_id="r-workflow-step-000")
+    response = utaa.create_workflow(workflow_definition={"name": "blah"})
+    response = utaa.create_running_workflow(
+        user_id="dlister",
+        workflow_id=response["id"],
+        project_id=TEST_PROJECT_ID,
+        variables={},
+    )
+    response, _ = utaa.create_running_workflow_step(
+        running_workflow_id=response["id"], step="step-1"
+    )
+    rwfs_id = response["id"]
+    response = utaa.create_instance(running_workflow_step_id=rwfs_id)
     instance_id = response["id"]
 
     # Act
     response, _ = utaa.get_instance(instance_id=instance_id)
 
     # Assert
-    assert response["running_workflow_step_id"] == "r-workflow-step-000"
+    assert response["running_workflow_step_id"] == rwfs_id
+
+
+def test_create_instance_and_get_step_instance_directory():
+    # Arrange
+    utaa = UnitTestWorkflowAPIAdapter()
+    response = utaa.create_workflow(workflow_definition={"name": "blah"})
+    response = utaa.create_running_workflow(
+        user_id="dlister",
+        workflow_id=response["id"],
+        project_id=TEST_PROJECT_ID,
+        variables={},
+    )
+    response, _ = utaa.create_running_workflow_step(
+        running_workflow_id=response["id"], step="step-1"
+    )
+    rwfs_id = response["id"]
+    response = utaa.create_instance(running_workflow_step_id=rwfs_id)
+    i_id = response["id"]
+
+    # Act
+    response, _ = utaa.get_running_workflow_step(running_workflow_step_id=rwfs_id)
+
+    # Assert
+    assert "instance_directory" in response
+    assert response["instance_directory"] == f".{i_id}"
+
+
+def test_create_instance_and_get_step_instance_directory_by_name():
+    # Arrange
+    utaa = UnitTestWorkflowAPIAdapter()
+    response = utaa.create_workflow(workflow_definition={"name": "blah"})
+    wf_id = response["id"]
+    response = utaa.create_running_workflow(
+        user_id="dlister",
+        workflow_id=wf_id,
+        project_id=TEST_PROJECT_ID,
+        variables={},
+    )
+    rwf_id = response["id"]
+    response, _ = utaa.create_running_workflow_step(
+        running_workflow_id=rwf_id, step="step-1"
+    )
+    rwfs_id = response["id"]
+    response = utaa.create_instance(running_workflow_step_id=rwfs_id)
+    i_id = response["id"]
+
+    # Act
+    response, _ = utaa.get_running_workflow_step_by_name(
+        running_workflow_id=rwf_id, name="step-1"
+    )
+
+    # Assert
+    assert "instance_directory" in response
+    assert response["instance_directory"] == f".{i_id}"
 
 
 def test_get_workflow_steps_driving_this_step_when_1st_step():

From 84866d0235b51777ae44fdb997d82e1f820410d1 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Mon, 11 Aug 2025 17:06:24 +0100
Subject: [PATCH 15/57] build: Add experimental json copy of the schema

---
 workflow/workflow-schema.json | 344 ++++++++++++++++++++++++++++++++++
 1 file changed, 344 insertions(+)
 create mode 100644 workflow/workflow-schema.json

diff --git a/workflow/workflow-schema.json b/workflow/workflow-schema.json
new file mode 100644
index 0000000..b70a0b4
--- /dev/null
+++ b/workflow/workflow-schema.json
@@ -0,0 +1,344 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "Data Manager Workflow Schema",
+  "description": "The Schema for Data Manager Workflows",
+  "type": "object",
+  "properties": {
+    "kind": {
+      "const": "DataManagerWorkflow"
+    },
+    "kind-version": {
+      "enum": [
+        "2025.2"
+      ]
+    },
+    "name": {
+      "$ref": "#/definitions/rfc1035-label-name"
+    },
+    "description": {
+      "type": "string",
+      "description": "A description of the workflow"
+    },
+    "steps": {
+      "type": "array",
+      "items": {
+        "$ref": "#/definitions/step"
+      }
+    },
+    "variables": {
+      "type": "object",
+      "additionalProperties": true
+    },
+    "variable-mapping": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "inputs": {
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/workflow-input-parameter"
+          }
+        },
+        "outputs": {
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/workflow-output-parameter"
+          }
+        },
+        "options": {
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/workflow-option-parameter"
+          }
+        }
+      }
+    }
+  },
+  "required": [
+    "kind",
+    "kind-version",
+    "name",
+    "steps"
+  ],
+  "definitions": {
+    "rfc1035-label-name": {
+      "type": "string",
+      "pattern": "^[a-z][a-z0-9-]{,63}(?<!-)$",
+      "description": "A value compatible with Kubernetes variables to allow it to be used ins Pod Label"
+    },
+    "template-variable-name": {
+      "type": "string",
+      "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*$"
+    },
+    "file-name": {
+      "type": "string",
+      "pattern": "^[a-zA-Z0-9._-]+$"
+    },
+    "workflow-input-parameter": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "name": {
+          "$ref": "#/definitions/template-variable-name"
+        }
+      },
+      "required": [
+        "name"
+      ]
+    },
+    "workflow-output-parameter": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "name": {
+          "$ref": "#/definitions/template-variable-name"
+        },
+        "from": {
+          "$ref": "#/definitions/from-step-output"
+        }
+      },
+      "required": [
+        "name"
+      ]
+    },
+    "as-step-option": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "option": {
+          "$ref": "#/definitions/template-variable-name"
+        },
+        "step": {
+          "$ref": "#/definitions/rfc1035-label-name"
+        }
+      },
+      "required": [
+        "option",
+        "step"
+      ]
+    },
+    "from-workflow-input": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "workflow-input": {
+          "$ref": "#/definitions/template-variable-name"
+        }
+      },
+      "required": [
+        "workflow-input"
+      ]
+    },
+    "from-step-output": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "step": {
+          "$ref": "#/definitions/rfc1035-label-name"
+        },
+        "output": {
+          "$ref": "#/definitions/template-variable-name"
+        }
+      },
+      "required": [
+        "step",
+        "output"
+      ]
+    },
+    "workflow-option-parameter": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "name": {
+          "$ref": "#/definitions/template-variable-name"
+        },
+        "description": {
+          "type": "string"
+        },
+        "default": {
+          "oneOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "number"
+            },
+            {
+              "type": "boolean"
+            }
+          ]
+        },
+        "minimum": {
+          "type": "number"
+        },
+        "maximum": {
+          "type": "number"
+        },
+        "as": {
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/as-step-option"
+          }
+        }
+      },
+      "required": [
+        "name",
+        "as"
+      ]
+    },
+    "replicate-using-input": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "input": {
+          "$ref": "#/definitions/template-variable-name"
+        }
+      },
+      "required": [
+        "input"
+      ]
+    },
+    "step-input-from-step": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "input": {
+          "$ref": "#/definitions/template-variable-name"
+        },
+        "from": {
+          "$ref": "#/definitions/from-step-output"
+        }
+      },
+      "required": [
+        "input"
+      ]
+    },
+    "step-input-from-workflow": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "input": {
+          "$ref": "#/definitions/template-variable-name"
+        },
+        "from": {
+          "$ref": "#/definitions/from-workflow-input"
+        }
+      },
+      "required": [
+        "input",
+        "from"
+      ]
+    },
+    "step-output-as": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "output": {
+          "$ref": "#/definitions/template-variable-name"
+        },
+        "as": {
+          "$ref": "#/definitions/file-name"
+        }
+      },
+      "required": [
+        "output",
+        "as"
+      ]
+    },
+    "step-specification-variable": {
+      "type": "object",
+      "additionalProperties": false,
+      "patternProperties": {
+        "^[a-zA-Z]{1}[a-zA-Z0-9_]{0,79}$": {
+          "oneOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "integer"
+            },
+            {
+              "type": "boolean"
+            }
+          ]
+        }
+      },
+      "minProperties": 1
+    },
+    "step-specification": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "collection": {
+          "type": "string"
+        },
+        "job": {
+          "type": "string"
+        },
+        "version": {
+          "type": "string"
+        },
+        "variables": {
+          "$ref": "#/definitions/step-specification-variable"
+        }
+      },
+      "required": [
+        "collection",
+        "job",
+        "version"
+      ]
+    },
+    "step": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "name": {
+          "$ref": "#/definitions/rfc1035-label-name"
+        },
+        "description": {
+          "type": "string",
+          "description": "A description of the step"
+        },
+        "specification": {
+          "$ref": "#/definitions/step-specification"
+        },
+        "replicate": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "using": {
+              "$ref": "#/definitions/replicate-using-input"
+            }
+          }
+        },
+        "inputs": {
+          "type": "array",
+          "items": {
+            "anyOf": [
+              {
+                "$ref": "#/definitions/step-input-from-step"
+              },
+              {
+                "$ref": "#/definitions/step-input-from-workflow"
+              }
+            ]
+          }
+        },
+        "outputs": {
+          "type": "array",
+          "items": {
+            "anyOf": [
+              {
+                "$ref": "#/definitions/step-output-as"
+              }
+            ]
+          }
+        }
+      },
+      "required": [
+        "name",
+        "specification"
+      ]
+    }
+  }
+}

From 20a0e3e677ab28afb3df1096bc83cb1b5ac7859b Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Mon, 11 Aug 2025 17:16:18 +0100
Subject: [PATCH 16/57] style: Removed unnecessary json schema file

---
 workflow/workflow-schema.json | 344 ----------------------------------
 1 file changed, 344 deletions(-)
 delete mode 100644 workflow/workflow-schema.json

diff --git a/workflow/workflow-schema.json b/workflow/workflow-schema.json
deleted file mode 100644
index b70a0b4..0000000
--- a/workflow/workflow-schema.json
+++ /dev/null
@@ -1,344 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "title": "Data Manager Workflow Schema",
-  "description": "The Schema for Data Manager Workflows",
-  "type": "object",
-  "properties": {
-    "kind": {
-      "const": "DataManagerWorkflow"
-    },
-    "kind-version": {
-      "enum": [
-        "2025.2"
-      ]
-    },
-    "name": {
-      "$ref": "#/definitions/rfc1035-label-name"
-    },
-    "description": {
-      "type": "string",
-      "description": "A description of the workflow"
-    },
-    "steps": {
-      "type": "array",
-      "items": {
-        "$ref": "#/definitions/step"
-      }
-    },
-    "variables": {
-      "type": "object",
-      "additionalProperties": true
-    },
-    "variable-mapping": {
-      "type": "object",
-      "additionalProperties": false,
-      "properties": {
-        "inputs": {
-          "type": "array",
-          "items": {
-            "$ref": "#/definitions/workflow-input-parameter"
-          }
-        },
-        "outputs": {
-          "type": "array",
-          "items": {
-            "$ref": "#/definitions/workflow-output-parameter"
-          }
-        },
-        "options": {
-          "type": "array",
-          "items": {
-            "$ref": "#/definitions/workflow-option-parameter"
-          }
-        }
-      }
-    }
-  },
-  "required": [
-    "kind",
-    "kind-version",
-    "name",
-    "steps"
-  ],
-  "definitions": {
-    "rfc1035-label-name": {
-      "type": "string",
-      "pattern": "^[a-z][a-z0-9-]{,63}(?<!-)$",
-      "description": "A value compatible with Kubernetes variables to allow it to be used ins Pod Label"
-    },
-    "template-variable-name": {
-      "type": "string",
-      "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*$"
-    },
-    "file-name": {
-      "type": "string",
-      "pattern": "^[a-zA-Z0-9._-]+$"
-    },
-    "workflow-input-parameter": {
-      "type": "object",
-      "additionalProperties": false,
-      "properties": {
-        "name": {
-          "$ref": "#/definitions/template-variable-name"
-        }
-      },
-      "required": [
-        "name"
-      ]
-    },
-    "workflow-output-parameter": {
-      "type": "object",
-      "additionalProperties": false,
-      "properties": {
-        "name": {
-          "$ref": "#/definitions/template-variable-name"
-        },
-        "from": {
-          "$ref": "#/definitions/from-step-output"
-        }
-      },
-      "required": [
-        "name"
-      ]
-    },
-    "as-step-option": {
-      "type": "object",
-      "additionalProperties": false,
-      "properties": {
-        "option": {
-          "$ref": "#/definitions/template-variable-name"
-        },
-        "step": {
-          "$ref": "#/definitions/rfc1035-label-name"
-        }
-      },
-      "required": [
-        "option",
-        "step"
-      ]
-    },
-    "from-workflow-input": {
-      "type": "object",
-      "additionalProperties": false,
-      "properties": {
-        "workflow-input": {
-          "$ref": "#/definitions/template-variable-name"
-        }
-      },
-      "required": [
-        "workflow-input"
-      ]
-    },
-    "from-step-output": {
-      "type": "object",
-      "additionalProperties": false,
-      "properties": {
-        "step": {
-          "$ref": "#/definitions/rfc1035-label-name"
-        },
-        "output": {
-          "$ref": "#/definitions/template-variable-name"
-        }
-      },
-      "required": [
-        "step",
-        "output"
-      ]
-    },
-    "workflow-option-parameter": {
-      "type": "object",
-      "additionalProperties": false,
-      "properties": {
-        "name": {
-          "$ref": "#/definitions/template-variable-name"
-        },
-        "description": {
-          "type": "string"
-        },
-        "default": {
-          "oneOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "number"
-            },
-            {
-              "type": "boolean"
-            }
-          ]
-        },
-        "minimum": {
-          "type": "number"
-        },
-        "maximum": {
-          "type": "number"
-        },
-        "as": {
-          "type": "array",
-          "items": {
-            "$ref": "#/definitions/as-step-option"
-          }
-        }
-      },
-      "required": [
-        "name",
-        "as"
-      ]
-    },
-    "replicate-using-input": {
-      "type": "object",
-      "additionalProperties": false,
-      "properties": {
-        "input": {
-          "$ref": "#/definitions/template-variable-name"
-        }
-      },
-      "required": [
-        "input"
-      ]
-    },
-    "step-input-from-step": {
-      "type": "object",
-      "additionalProperties": false,
-      "properties": {
-        "input": {
-          "$ref": "#/definitions/template-variable-name"
-        },
-        "from": {
-          "$ref": "#/definitions/from-step-output"
-        }
-      },
-      "required": [
-        "input"
-      ]
-    },
-    "step-input-from-workflow": {
-      "type": "object",
-      "additionalProperties": false,
-      "properties": {
-        "input": {
-          "$ref": "#/definitions/template-variable-name"
-        },
-        "from": {
-          "$ref": "#/definitions/from-workflow-input"
-        }
-      },
-      "required": [
-        "input",
-        "from"
-      ]
-    },
-    "step-output-as": {
-      "type": "object",
-      "additionalProperties": false,
-      "properties": {
-        "output": {
-          "$ref": "#/definitions/template-variable-name"
-        },
-        "as": {
-          "$ref": "#/definitions/file-name"
-        }
-      },
-      "required": [
-        "output",
-        "as"
-      ]
-    },
-    "step-specification-variable": {
-      "type": "object",
-      "additionalProperties": false,
-      "patternProperties": {
-        "^[a-zA-Z]{1}[a-zA-Z0-9_]{0,79}$": {
-          "oneOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "integer"
-            },
-            {
-              "type": "boolean"
-            }
-          ]
-        }
-      },
-      "minProperties": 1
-    },
-    "step-specification": {
-      "type": "object",
-      "additionalProperties": false,
-      "properties": {
-        "collection": {
-          "type": "string"
-        },
-        "job": {
-          "type": "string"
-        },
-        "version": {
-          "type": "string"
-        },
-        "variables": {
-          "$ref": "#/definitions/step-specification-variable"
-        }
-      },
-      "required": [
-        "collection",
-        "job",
-        "version"
-      ]
-    },
-    "step": {
-      "type": "object",
-      "additionalProperties": false,
-      "properties": {
-        "name": {
-          "$ref": "#/definitions/rfc1035-label-name"
-        },
-        "description": {
-          "type": "string",
-          "description": "A description of the step"
-        },
-        "specification": {
-          "$ref": "#/definitions/step-specification"
-        },
-        "replicate": {
-          "type": "object",
-          "additionalProperties": false,
-          "properties": {
-            "using": {
-              "$ref": "#/definitions/replicate-using-input"
-            }
-          }
-        },
-        "inputs": {
-          "type": "array",
-          "items": {
-            "anyOf": [
-              {
-                "$ref": "#/definitions/step-input-from-step"
-              },
-              {
-                "$ref": "#/definitions/step-input-from-workflow"
-              }
-            ]
-          }
-        },
-        "outputs": {
-          "type": "array",
-          "items": {
-            "anyOf": [
-              {
-                "$ref": "#/definitions/step-output-as"
-              }
-            ]
-          }
-        }
-      },
-      "required": [
-        "name",
-        "specification"
-      ]
-    }
-  }
-}

From 82f14bfa0180d09524ed53deac7ef0c590a07296 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Mon, 11 Aug 2025 18:58:04 +0100
Subject: [PATCH 17/57] fix: Better rfc1035-label-name pattern

---
 workflow/workflow-schema.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml
index 70a6ad3..475d3a1 100644
--- a/workflow/workflow-schema.yaml
+++ b/workflow/workflow-schema.yaml
@@ -65,7 +65,7 @@ definitions:
   # See https://kubernetes.io/docs/concepts/overview/working-with-objects/names/
   rfc1035-label-name:
     type: string
-    pattern: ^[a-z][a-z0-9-]{,63}(?<!-)$
+    pattern: ^[a-z][a-z0-9-]{,63}$(?<!-)
     description: >-
       A value compatible with Kubernetes variables
       to allow it to be used ins Pod Label

From de34339a16e5b7cdc732ab8eb8a2a0d4773bac4b Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Mon, 11 Aug 2025 19:21:14 +0100
Subject: [PATCH 18/57] fix: Better rfc1035-label-name regex

---
 workflow/workflow-schema.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml
index 475d3a1..651308e 100644
--- a/workflow/workflow-schema.yaml
+++ b/workflow/workflow-schema.yaml
@@ -65,7 +65,7 @@ definitions:
   # See https://kubernetes.io/docs/concepts/overview/working-with-objects/names/
   rfc1035-label-name:
     type: string
-    pattern: ^[a-z][a-z0-9-]{,63}$(?<!-)
+    pattern: ^[a-z][a-z0-9-]{0,63}$(?<!-)
     description: >-
       A value compatible with Kubernetes variables
       to allow it to be used ins Pod Label

From e7adc1b536982dc5d3b9b0b8526da5ce4bfda090 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Mon, 11 Aug 2025 19:25:38 +0100
Subject: [PATCH 19/57] test: Fix decoder tests

---
 tests/test_decoder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_decoder.py b/tests/test_decoder.py
index f9ead02..ddac876 100644
--- a/tests/test_decoder.py
+++ b/tests/test_decoder.py
@@ -130,7 +130,7 @@ def test_workflow_name_with_spaces():
 
     # Assert
     assert (
-        error == "'workflow with spaces' does not match '^[a-z][a-z0-9-]{,63}(?<!-)$'"
+        error == "'workflow with spaces' does not match '^[a-z][a-z0-9-]{0,63}$(?<!-)'"
     )
 
 

From 00e49bbde9604f48e17da61f0d1f498b7b219444 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Fri, 15 Aug 2025 16:06:21 +0100
Subject: [PATCH 20/57] test: Fix test module name (for consistency)

---
 tests/{test_test_api_adapter.py => test_test_wapi_adapter.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/{test_test_api_adapter.py => test_test_wapi_adapter.py} (100%)

diff --git a/tests/test_test_api_adapter.py b/tests/test_test_wapi_adapter.py
similarity index 100%
rename from tests/test_test_api_adapter.py
rename to tests/test_test_wapi_adapter.py

From 39bb840888eee2a4305495dda689d72ad5379316 Mon Sep 17 00:00:00 2001
From: "Alan B. Christie" <29806285+alanbchristie@users.noreply.github.com>
Date: Fri, 15 Aug 2025 22:25:22 +0100
Subject: [PATCH 21/57] Remove unnecessary logic

---
 tests/instance_launcher.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/tests/instance_launcher.py b/tests/instance_launcher.py
index ab7cab2..3256c0b 100644
--- a/tests/instance_launcher.py
+++ b/tests/instance_launcher.py
@@ -76,19 +76,15 @@ def launch(self, launch_parameters: LaunchParameters) -> LaunchResult:
 
         os.makedirs(EXECUTION_DIRECTORY, exist_ok=True)
 
-        # We're passed a RunningWorkflowStep ID but a record is expected to have been
-        # created bt the caller, we simply create instance records.
-        response, _ = self._api_adapter.get_running_workflow_step(
-            running_workflow_step_id=launch_parameters.running_workflow_step_id
-        )
-        # Now simulate the creation of a Task and Instance record
+        # Create an Instance record (and dummy Task ID)
         response = self._api_adapter.create_instance(
             running_workflow_step_id=launch_parameters.running_workflow_step_id
         )
         instance_id = response["id"]
         task_id = "task-00000000-0000-0000-0000-000000000001"
 
-        # Apply variables to the step's Job command.
+        # Get the job defitnion.
+        # This is expected to exist in the tests/job-definitions directory.
         job, _ = self._api_adapter.get_job(
             collection=launch_parameters.specification["collection"],
             job=launch_parameters.specification["job"],
@@ -96,7 +92,8 @@ def launch(self, launch_parameters: LaunchParameters) -> LaunchResult:
         )
         assert job
 
-        # Now apply the variables to the command
+        # Now apply the provided variables to the command.
+        # The command may not need any, but we do the decoding anyway.
         decoded_command, status = job_decoder.decode(
             job["command"],
             launch_parameters.specification_variables,

From 2a6b708afd98c89b4093988857f6c331f82755aa Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Mon, 18 Aug 2025 12:51:34 +0100
Subject: [PATCH 22/57] fix: stashing

---
 tests/test_workflow_engine_examples.py |  4 +++-
 workflow/decoder.py                    | 10 ++++++++++
 workflow/workflow_engine.py            | 27 +++++++++++++++++---------
 3 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
index 8e4208a..e2d23ae 100644
--- a/tests/test_workflow_engine_examples.py
+++ b/tests/test_workflow_engine_examples.py
@@ -1,6 +1,7 @@
 import os
 import time
 from datetime import datetime, timezone
+from pprint import pprint
 from typing import Any
 
 import pytest
@@ -458,7 +459,8 @@ def test_workflow_engine_simple_python_fanout(basic_engine):
     # Additional, detailed checks...
     # Check we only have one RunningWorkflowStep, and it succeeded
     response = da.get_running_workflow_steps(running_workflow_id=r_wfid)
-    print("response", response)
+    print("response")
+    pprint(response)
 
     assert response["count"] == 2
     assert response["running_workflow_steps"][0]["done"]
diff --git a/workflow/decoder.py b/workflow/decoder.py
index 3e12ecb..6947036 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -280,3 +280,13 @@ def set_step_variables(
 
     result |= options
     return result
+
+
+def get_step_replication_param(*, step: dict[str, Any]) -> str | Any:
+    """Return step's replication info"""
+    replicator = step.get("replicate", None)
+    if replicator:
+        # 'using' is a dict but there can be only single value for now
+        replicator = list(replicator["using"].values())[0]
+
+    return replicator
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index 672e9bb..a502893 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -25,6 +25,7 @@
 import logging
 import sys
 from http import HTTPStatus
+from pprint import pprint
 from typing import Any, Dict, Optional
 
 from decoder.decoder import TextEncoding, decode
@@ -40,6 +41,7 @@
 )
 
 from .decoder import (
+    get_step_replication_param,
     get_workflow_job_input_names_for_step,
     set_step_variables,
     workflow_step_has_outputs,
@@ -466,11 +468,13 @@ def _validate_step_command(
         running_wf, _ = self._wapi_adapter.get_running_workflow(
             running_workflow_id=running_wf_id
         )
-        print("running wf", running_wf)
+        print("running wf")
+        pprint(running_wf)
         workflow_id = running_wf["workflow"]["id"]
         workflow, _ = self._wapi_adapter.get_workflow(workflow_id=workflow_id)
 
-        print("workflow", workflow)
+        print("workflow")
+        pprint(workflow)
 
         # for step in workflow["steps"]:
         #     if step["name"] in previous_step_names:
@@ -556,10 +560,14 @@ def _launch(
         wf_step_data, _ = self._wapi_adapter.get_workflow_steps_driving_this_step(
             running_workflow_step_id=rwfs_id,
         )
-        print("wf_step_data", wf_step_data)
+        print("wf_step_data")
+        pprint(wf_step_data)
         assert wf_step_data["caller_step_index"] >= 0
         our_step_index: int = wf_step_data["caller_step_index"]
 
+        print("step in _launch:", step_name)
+        pprint(step)
+
         # Now check the step command can be executed
         # (by trying to decoding the Job command).
         #
@@ -585,11 +593,7 @@ def _launch(
         variables: dict[str, Any] = error_or_variables
         print("variables", variables)
         # find out if and by which parameter this step should be replicated
-        replicator = ""
-        if replicate := step.get("replicate", {}):
-            if using := replicate.get("using", {}):
-                # using is a dict but there can be only single value for now
-                replicator = list(using.values())[0]
+        replicator = get_step_replication_param(step=step)
 
         _LOGGER.info(
             "Launching step: RunningWorkflow=%s RunningWorkflowStep=%s step=%s"
@@ -634,6 +638,10 @@ def _launch(
         #
         #   'running_workflow_step_inputs'
         #       A list of Job input variable names
+
+        print("variables")
+        pprint(variables)
+
         inputs: list[str] = []
         inputs.extend(iter(get_workflow_job_input_names_for_step(wf, step_name)))
         if replicator:
@@ -645,7 +653,8 @@ def _launch(
         else:
             single_step_variables = [variables]
 
-        print("single step variables", single_step_variables)
+        print("single step variables")
+        pprint(single_step_variables)
 
         for params in single_step_variables:
             lp: LaunchParameters = LaunchParameters(

From 340670e795c7e5d6329564930c627845591b814a Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Tue, 19 Aug 2025 12:10:09 +0100
Subject: [PATCH 23/57] fix: stashing

---
 tests/test_workflow_engine_examples.py        |  18 ++++
 tests/wapi_adapter.py                         |   3 +-
 .../shortcut-example-1.yaml                   |   4 +-
 .../simple-python-fanout.yaml                 |  17 ---
 .../simple-python-molprops-with-options.yaml  | 100 +++++++++++++-----
 .../simple-python-molprops.yaml               |   2 -
 workflow/decoder.py                           |  60 ++++++-----
 workflow/workflow_engine.py                   |   2 +
 8 files changed, 128 insertions(+), 78 deletions(-)

diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
index e2d23ae..d4c1a6f 100644
--- a/tests/test_workflow_engine_examples.py
+++ b/tests/test_workflow_engine_examples.py
@@ -206,9 +206,11 @@ def test_workflow_engine_example_smiles_to_file(basic_engine):
     assert project_file_exists(output_file)
 
 
+@pytest.mark.skip("Unrealistic test")
 def test_workflow_engine_shortcut_example_1(basic_engine):
     # Arrange
     md, da = basic_engine
+
     # Make sure files that should be generated by the test
     # do not exist before we run the test.
     output_file_a = "a.sdf"
@@ -234,9 +236,17 @@ def test_workflow_engine_shortcut_example_1(basic_engine):
     assert project_file_exists(output_file_b)
 
 
+@pytest.mark.skip("temporary skip")
 def test_workflow_engine_simple_python_molprops(basic_engine):
     # Arrange
     md, da = basic_engine
+
+    da.mock_get_running_workflow_step_output_values_for_output(
+        step_name="step2",
+        output_variable="outputFile",
+        output="step1.out.smi",
+    )
+
     # Make sure files that should be generated by the test
     # do not exist before we run the test.
     output_file_1 = "step1.out.smi"
@@ -318,6 +328,13 @@ def test_workflow_engine_simple_python_molprops(basic_engine):
 def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
     # Arrange
     md, da = basic_engine
+
+    da.mock_get_running_workflow_step_output_values_for_output(
+        step_name="step1",
+        output_variable="outputFile",
+        output="step1.out.smi",
+    )
+
     # Make sure files that should be generated by the test
     # do not exist before we run the test.
     output_file_1 = "step1.out.smi"
@@ -383,6 +400,7 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
         "simple-python-molprops-with-options",
         {
             "candidateMolecules": input_file_1,
+            "outputFile": output_file_1,
             "rdkitPropertyName": "prop",
             "rdkitPropertyValue": 1.2,
         },
diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py
index a770f3b..1f1f6d0 100644
--- a/tests/wapi_adapter.py
+++ b/tests/wapi_adapter.py
@@ -448,7 +448,7 @@ def realise_outputs(
     # Methods not declared in the ABC
 
     def mock_get_running_workflow_step_output_values_for_output(
-        self, *, step_name: str, output_variable: str, output: list[str]
+        self, *, step_name: str, output_variable: str, output: list[str] | str
     ) -> None:
         """Sets the output response for a step.
         Limitation is that there can only be one record for each step name
@@ -456,7 +456,6 @@ def mock_get_running_workflow_step_output_values_for_output(
         to check the output variable name matches."""
         assert isinstance(step_name, str)
         assert isinstance(output_variable, str)
-        assert isinstance(output, list)
 
         UnitTestWorkflowAPIAdapter.lock.acquire()
         with open(_MOCK_STEP_OUTPUT_FILE, "rb") as pickle_file:
diff --git a/tests/workflow-definitions/shortcut-example-1.yaml b/tests/workflow-definitions/shortcut-example-1.yaml
index e5b719d..6a0ef31 100644
--- a/tests/workflow-definitions/shortcut-example-1.yaml
+++ b/tests/workflow-definitions/shortcut-example-1.yaml
@@ -14,7 +14,7 @@ steps:
     version: "1.0.0"
   outputs:
   - output: 'outputFile'
-    as: 'a.sdf'
+    # as: 'a.sdf'
 
 - name: example-1-step-2
   description: The first step
@@ -29,4 +29,4 @@ steps:
       output: 'outputFile'
   outputs:
   - output: 'outputFile'
-    as: 'b.sdf'
+    # as: 'b.sdf'
diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml
index 01f576f..c0fd343 100644
--- a/tests/workflow-definitions/simple-python-fanout.yaml
+++ b/tests/workflow-definitions/simple-python-fanout.yaml
@@ -31,7 +31,6 @@ steps:
       workflow-input: candidateMolecules
   outputs:
   - output: outputFile
-    # as: chunk_*.smi
 
 - name: parallel-step
   description: Add some params
@@ -52,19 +51,3 @@ steps:
       output: outputFile
   outputs:
   - output: outputFile
-    # as: parallel-step.out.smi
-
-# - name: final-step
-#   description: Collate results
-#   specification:
-#     collection: workflow-engine-unit-test-jobs
-#     job: concatenate
-#     version: "1.0.0"
-#   inputs:
-#   - input: inputFile
-#     from:
-#       step: parallel-step
-#       output: outputFile
-#   outputs:
-#   - output: outputFile
-#     # as: final-step.out.smi
diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
index 2fc1155..0e8874d 100644
--- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml
+++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
@@ -53,40 +53,82 @@ variables:
         minimum: 0
         maximum: 1
 
-variable-mapping:
-  inputs:
-  - name: candidateMolecules
-  outputs:
-  - name: clusteredMolecules
-    from:
-      step: step2
-      output: outputFile
-  options:
-  - name: rdkitPropertyName
-    default: name
-    as:
-    - option: name
-      step: step1
-  - name: rdkitPropertyValue
-    as:
-    - option: value
-      step: step1
+# variable-mapping:
+#   inputs:
+#   - name: candidateMolecules
+#   outputs:
+#   - name: outputFile
+#     from:
+#       step: step1
+#       output: outputFile
+#   - name: clusteredMolecules
+#     from:
+#       step: step1
+#       output: outputFile
+#   options:
+#   - name: rdkitPropertyName
+#     default: name
+#     as:
+#     - option: name
+#       step: step1
+#   - name: rdkitPropertyValue
+#     as:
+#     - option: value
+#       step: step1
 
 steps:
 
+# - name: step1
+#   description: Add column 1
+#   specification:
+#     collection: workflow-engine-unit-test-jobs
+#     job: rdkit-molprops
+#     version: "1.0.0"
+#   inputs:
+#   - input: inputFile
+#     from:
+#       workflow-input: candidateMolecules
+#   # outputs:
+#   # - output: outputFile
+
+# - name: step2
+#   description: Add column 2
+#   specification:
+#     collection: workflow-engine-unit-test-jobs
+#     job: cluster-butina
+#     version: "1.0.0"
+#     variables:
+#       name: "col2"
+#       value: "999"
+#   inputs:
+#   - input: inputFile
+#     from:
+#       step: step1
+#       output: outputFile
+#   outputs:
+#   - output: outputFile
+
+
 - name: step1
   description: Add column 1
   specification:
     collection: workflow-engine-unit-test-jobs
     job: rdkit-molprops
     version: "1.0.0"
-  inputs:
-  - input: inputFile
+  variable-mapping:
+  - name: inputFile
     from:
-      workflow-input: candidateMolecules
-  outputs:
-  - output: outputFile
-    as: step1.out.smi
+      workflow-variable: candidateMolecules
+  - name: name
+    from:
+      workflow-variable: rdkitPropertyName
+  - name: value
+    from:
+      workflow-variable: rdkitPropertyValue
+  - name: outputFile
+    from:
+      workflow-variable: clusteredMolecules
+
 
 - name: step2
   description: Add column 2
@@ -97,11 +139,11 @@ steps:
     variables:
       name: "col2"
       value: "999"
-  inputs:
-  - input: inputFile
+  variable-mapping:
+  - name: inputFile
     from:
       step: step1
       output: outputFile
-  outputs:
-  - output: outputFile
-    as: step2.out.smi
+  - name: outputFile
+    from:
+      workflow-variable: outputFile
diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml
index dddb080..8abd637 100644
--- a/tests/workflow-definitions/simple-python-molprops.yaml
+++ b/tests/workflow-definitions/simple-python-molprops.yaml
@@ -29,7 +29,6 @@ steps:
       workflow-input: candidateMolecules
   outputs:
   - output: outputFile
-    as: step1.out.smi
 
 - name: step2
   description: Add column 2
@@ -47,4 +46,3 @@ steps:
       output: outputFile
   outputs:
   - output: outputFile
-    as: step2.out.smi
diff --git a/workflow/decoder.py b/workflow/decoder.py
index 6947036..d03b314 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -4,6 +4,7 @@
 """
 
 import os
+from pprint import pprint
 from typing import Any
 
 import jsonschema
@@ -225,8 +226,10 @@ def set_step_variables(
     """
     result = {}
 
-    print("ssv: wf vars", workflow_variables)
-    print("ssv: inputs", inputs)
+    print("ssv: wf vars:")
+    pprint(workflow_variables)
+    print("ssv: inputs:")
+    pprint(inputs)
     print("ssv: outputs", outputs)
     print("ssv: step_outputs", step_outputs)
     print("ssv: prev step outputs", previous_step_outputs)
@@ -240,30 +243,35 @@ def set_step_variables(
             p_val = workflow_variables[val["workflow-input"]]
             result[p_key] = p_val
         elif "step" in val.keys():
-            for out in previous_step_outputs:
-                if out["output"] == val["output"]:
-                    # p_val = out["as"]
-                    if step_outputs["output"]:
-                        p_val = step_outputs["output"]
-                        print("\n!!!!!!!!!!!!!if clause!!!!!!!!!!!!!!!!!!!!!\n")
-                        print(p_val)
-                    else:
-                        # what do I need to do here??
-                        print("\n!!!!!!!!!!!!!else clause!!!!!!!!!!!!!!!!!!!!!\n")
-                        print(out)
-                        print(val)
-
-                    # this bit handles multiple inputs: if a step
-                    # requires input from multiple steps, add them to
-                    # the list in result dict. this is the reason for
-                    # mypy ignore statements, mypy doesn't understand
-                    # redefinition
-                    if p_key in result:
-                        if not isinstance(result[p_key], set):
-                            result[p_key] = {result[p_key]}  # type: ignore [assignment]
-                        result[p_key].add(p_val)  # type: ignore [attr-defined]
-                    else:
-                        result[p_key] = p_val
+            # this links the variable to previous step output
+            if previous_step_outputs:
+                for out in previous_step_outputs:
+                    if out["output"] == val["output"]:
+                        # p_val = out["as"]
+                        if step_outputs["output"]:
+                            p_val = step_outputs["output"]
+                            print("\n!!!!!!!!!!!!!if clause!!!!!!!!!!!!!!!!!!!!!\n")
+                            print(p_val)
+                        else:
+                            # what do I need to do here??
+                            print("\n!!!!!!!!!!!!!else clause!!!!!!!!!!!!!!!!!!!!!\n")
+                            print(out)
+                            print(val)
+
+                        # this bit handles multiple inputs: if a step
+                        # requires input from multiple steps, add them to
+                        # the list in result dict. this is the reason for
+                        # mypy ignore statements, mypy doesn't understand
+                        # redefinition
+                        if p_key in result:
+                            if not isinstance(result[p_key], set):
+                                result[p_key] = {result[p_key]}  # type: ignore [assignment]
+                            result[p_key].add(p_val)  # type: ignore [attr-defined]
+                        else:
+                            result[p_key] = p_val
+            else:
+                if val["output"] in workflow_variables:
+                    result[p_key] = workflow_variables[val["output"]]
 
     for item in outputs:
         p_key = item["output"]
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index a502893..4736851 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -512,6 +512,8 @@ def _validate_step_command(
                     except AssertionError:
                         print("no output for step", running_workflow_step_id, k, v)
 
+        print("final prev step outputs", previous_step_outputs)
+
         step_vars = set_step_variables(
             workflow=workflow,
             workflow_variables=all_variables,

From e3ece79dedc54a59a6e29d4e33525bee5c6c7a83 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Tue, 19 Aug 2025 15:53:23 +0100
Subject: [PATCH 24/57] refactor: Major refactor (new variable-mapping schema)

---
 tests/test_decoder.py                         | 137 ++---------
 ...est_workflow_validator_for_create_level.py |  24 +-
 .../test_workflow_validator_for_run_level.py  |  78 +++----
 .../test_workflow_validator_for_tag_level.py  |  67 ++----
 ...ate-step-input-output-variable-names.yaml} |  47 ++--
 .../duplicate-workflow-variable-names.yaml    |  50 ----
 .../replicate-using-undeclared-input.yaml     |  37 ++-
 .../shortcut-example-1.yaml                   |  18 +-
 .../simple-python-fanout.yaml                 |  34 +--
 .../simple-python-molprops-with-options.yaml  |  96 ++------
 .../simple-python-molprops.yaml               |  36 ++-
 .../simple-python-parallel.yaml               |  52 ++---
 .../step-specification-variable-names.yaml    |   1 -
 .../workflow-options.yaml                     |  54 -----
 workflow/decoder.py                           | 156 +++----------
 workflow/workflow-schema.yaml                 | 217 ++++++------------
 workflow/workflow_engine.py                   |   4 +-
 workflow/workflow_validator.py                |  23 +-
 18 files changed, 283 insertions(+), 848 deletions(-)
 rename tests/workflow-definitions/{duplicate-step-output-variable-names.yaml => duplicate-step-input-output-variable-names.yaml} (52%)
 delete mode 100644 tests/workflow-definitions/duplicate-workflow-variable-names.yaml
 delete mode 100644 tests/workflow-definitions/workflow-options.yaml

diff --git a/tests/test_decoder.py b/tests/test_decoder.py
index ddac876..5f5da9f 100644
--- a/tests/test_decoder.py
+++ b/tests/test_decoder.py
@@ -43,19 +43,6 @@
     )
 assert _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW
 
-_DUPLICATE_WORKFLOW_VARIABLE_NAMES_WORKFLOW_FILE: str = os.path.join(
-    os.path.dirname(__file__),
-    "workflow-definitions",
-    "duplicate-workflow-variable-names.yaml",
-)
-with open(
-    _DUPLICATE_WORKFLOW_VARIABLE_NAMES_WORKFLOW_FILE, "r", encoding="utf8"
-) as workflow_file:
-    _DUPLICATE_WORKFLOW_VARIABLE_NAMES_WORKFLOW: Dict[str, Any] = yaml.safe_load(
-        workflow_file
-    )
-assert _DUPLICATE_WORKFLOW_VARIABLE_NAMES_WORKFLOW
-
 _SIMPLE_PYTHON_PARALLEL_FILE: str = os.path.join(
     os.path.dirname(__file__),
     "workflow-definitions",
@@ -78,15 +65,6 @@
     )
 assert _STEP_SPECIFICATION_VARIABLE_NAMES_WORKFLOW
 
-_WORKFLOW_OPTIONS_WORKFLOW_FILE: str = os.path.join(
-    os.path.dirname(__file__),
-    "workflow-definitions",
-    "workflow-options.yaml",
-)
-with open(_WORKFLOW_OPTIONS_WORKFLOW_FILE, "r", encoding="utf8") as workflow_file:
-    _WORKFLOW_OPTIONS: Dict[str, Any] = yaml.safe_load(workflow_file)
-assert _WORKFLOW_OPTIONS
-
 
 def test_validate_schema_for_minimal():
     # Arrange
@@ -144,7 +122,7 @@ def test_validate_schema_for_shortcut_example_1():
     assert error is None
 
 
-def test_validate_schema_for_python_simple_molprops():
+def test_validate_schema_for_simple_python_molprops():
     # Arrange
 
     # Act
@@ -164,16 +142,7 @@ def test_validate_schema_for_step_specification_variable_names():
     assert error is None
 
 
-def test_validate_schema_for_workflow_options():
-    # Arrange
-
-    # Act
-    error = decoder.validate_schema(_WORKFLOW_OPTIONS)
-
-    # Assert
-    assert error is None
-
-
+@pytest.mark.skip(reason="DO not support combination atm")
 def test_validate_schema_for_simple_python_parallel():
     # Arrange
 
@@ -188,7 +157,7 @@ def test_get_workflow_variables_for_smiple_python_molprops():
     # Arrange
 
     # Act
-    wf_variables = decoder.get_variable_names(_SIMPLE_PYTHON_MOLPROPS_WORKFLOW)
+    wf_variables = decoder.get_workflow_variable_names(_SIMPLE_PYTHON_MOLPROPS_WORKFLOW)
 
     # Assert
     assert len(wf_variables) == 2
@@ -228,91 +197,6 @@ def test_get_workflow_steps():
     assert steps[1]["name"] == "step2"
 
 
-def test_get_workflow_variables_for_duplicate_variables():
-    # Arrange
-
-    # Act
-    names = decoder.get_variable_names(_DUPLICATE_WORKFLOW_VARIABLE_NAMES_WORKFLOW)
-
-    # Assert
-    assert len(names) == 2
-    assert names[0] == "x"
-    assert names[1] == "x"
-
-
-def test_get_required_variable_names_for_simnple_python_molprops_with_options():
-    # Arrange
-
-    # Act
-    rqd_variables = decoder.get_required_variable_names(
-        _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW
-    )
-
-    # Assert
-    assert len(rqd_variables) == 2
-    assert "candidateMolecules" in rqd_variables
-    assert "rdkitPropertyValue" in rqd_variables
-
-
-def test_set_variables_from_options_for_step_for_simnple_python_molprops_with_options():
-    # Arrange
-    variables = {
-        "rdkitPropertyName": "propertyName",
-        "rdkitPropertyValue": "propertyValue",
-    }
-
-    # Act
-    new_variables = decoder.set_variables_from_options_for_step(
-        _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW,
-        variables,
-        "step1",
-    )
-
-    # Assert
-    assert len(new_variables) == 2
-    assert "name" in new_variables
-    assert "value" in new_variables
-    assert new_variables["name"] == "propertyName"
-    assert new_variables["value"] == "propertyValue"
-
-
-def test_get_workflow_inputs_for_step_with_name_step1():
-    # Arrange
-
-    # Act
-    inputs = decoder.get_workflow_job_input_names_for_step(
-        _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "step1"
-    )
-
-    # Assert
-    assert len(inputs) == 1
-    assert "inputFile" in inputs
-
-
-def test_get_workflow_inputs_for_step_with_name_step2():
-    # Arrange
-
-    # Act
-    inputs = decoder.get_workflow_job_input_names_for_step(
-        _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "step2"
-    )
-
-    # Assert
-    assert not inputs
-
-
-def test_get_workflow_inputs_for_step_with_unkown_step_name():
-    # Arrange
-
-    # Act
-    inputs = decoder.get_workflow_job_input_names_for_step(
-        _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "unknown"
-    )
-
-    # Assert
-    assert not inputs
-
-
 def test_get_workflow_outputs_for_step_with_name_step1():
     # Arrange
 
@@ -351,11 +235,16 @@ def test_get_workflow_outputs_for_step_with_unkown_step_name():
 
 def test_get_step_input_variable_names_when_duplicates():
     # Arrange
+    workflow_filename: str = os.path.join(
+        os.path.dirname(__file__),
+        "workflow-definitions",
+        "duplicate-step-input-output-variable-names.yaml",
+    )
+    with open(workflow_filename, "r", encoding="utf8") as wf_file:
+        definition: Dict[str, Any] = yaml.safe_load(wf_file)
 
     # Act
-    inputs = decoder.get_step_input_variable_names(
-        _SIMPLE_PYTHON_PARALLEL_WORKFLOW, "final-step"
-    )
+    inputs = decoder.get_step_input_variable_names(definition, "step-1")
 
     # Assert
     assert len(inputs) == 2
@@ -368,13 +257,13 @@ def test_get_step_output_variable_names_when_duplicates():
     workflow_filename: str = os.path.join(
         os.path.dirname(__file__),
         "workflow-definitions",
-        "duplicate-step-output-variable-names.yaml",
+        "duplicate-step-input-output-variable-names.yaml",
     )
     with open(workflow_filename, "r", encoding="utf8") as wf_file:
         definition: Dict[str, Any] = yaml.safe_load(wf_file)
 
     # Act
-    outputs = decoder.get_step_output_variable_names(definition, "step-1")
+    outputs = decoder.get_step_output_variable_names(definition, "step-2")
 
     # Assert
     assert len(outputs) == 2
diff --git a/tests/test_workflow_validator_for_create_level.py b/tests/test_workflow_validator_for_create_level.py
index ad429c9..18f621f 100644
--- a/tests/test_workflow_validator_for_create_level.py
+++ b/tests/test_workflow_validator_for_create_level.py
@@ -26,10 +26,10 @@ def test_validate_minimal():
 
 def test_validate_example_nop_file():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "example-nop-fail.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -46,10 +46,10 @@ def test_validate_example_nop_file():
 
 def test_validate_example_smiles_to_file():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "example-smiles-to-file.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -66,10 +66,10 @@ def test_validate_example_smiles_to_file():
 
 def test_validate_example_two_step_nop():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "example-two-step-nop.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -86,10 +86,10 @@ def test_validate_example_two_step_nop():
 
 def test_validate_shortcut_example_1():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "shortcut-example-1.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -106,10 +106,10 @@ def test_validate_shortcut_example_1():
 
 def test_validate_simple_python_molprops():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "simple-python-molprops.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -126,12 +126,12 @@ def test_validate_simple_python_molprops():
 
 def test_validate_simple_python_molprops_with_options():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__),
         "workflow-definitions",
         "simple-python-molprops-with-options.yaml",
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py
index 2df1630..b1f6118 100644
--- a/tests/test_workflow_validator_for_run_level.py
+++ b/tests/test_workflow_validator_for_run_level.py
@@ -11,10 +11,10 @@
 
 def test_validate_example_nop_file():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "example-nop-fail.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -31,10 +31,10 @@ def test_validate_example_nop_file():
 
 def test_validate_duplicate_step_names():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "duplicate-step-names.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -51,10 +51,10 @@ def test_validate_duplicate_step_names():
 
 def test_validate_example_smiles_to_file():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "example-smiles-to-file.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -71,10 +71,10 @@ def test_validate_example_smiles_to_file():
 
 def test_validate_example_two_step_nop():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "example-two-step-nop.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -91,10 +91,10 @@ def test_validate_example_two_step_nop():
 
 def test_validate_shortcut_example_1():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "shortcut-example-1.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -111,10 +111,10 @@ def test_validate_shortcut_example_1():
 
 def test_validate_simple_python_molprops():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "simple-python-molprops.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
     variables = {"candidateMolecules": "input.sdf", "clusteredMolecules": "output.sdf"}
@@ -133,17 +133,19 @@ def test_validate_simple_python_molprops():
 
 def test_validate_simple_python_molprops_with_options_when_missing_required():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__),
         "workflow-definitions",
         "simple-python-molprops-with-options.yaml",
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
     variables = {
         "candidateMolecules": "input.sdf",
         "clusteredMolecules": "output.sdf",
+        "outputFile": "results.sdf",
+        "rdkitPropertyName": "name",
     }
 
     # Act
@@ -162,12 +164,12 @@ def test_validate_simple_python_molprops_with_options_when_missing_required():
 
 def test_validate_simple_python_molprops_with_options():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__),
         "workflow-definitions",
         "simple-python-molprops-with-options.yaml",
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
     variables = {
@@ -175,6 +177,7 @@ def test_validate_simple_python_molprops_with_options():
         "clusteredMolecules": "output.sdf",
         "rdkitPropertyName": "col1",
         "rdkitPropertyValue": 123,
+        "outputFile": "results.sdf",
     }
 
     # Act
@@ -191,10 +194,10 @@ def test_validate_simple_python_molprops_with_options():
 
 def test_validate_simple_python_molprops_with_missing_input():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "simple-python-molprops.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
     variables = {"clusteredMolecules": "output.sdf"}
@@ -213,36 +216,15 @@ def test_validate_simple_python_molprops_with_missing_input():
     ]
 
 
-def test_validate_duplicate_workflow_variable_names():
-    # Arrange
-    workflow_file: str = os.path.join(
-        os.path.dirname(__file__),
-        "workflow-definitions",
-        "duplicate-workflow-variable-names.yaml",
-    )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
-        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
-    assert workflow
-
-    # Act
-    error = WorkflowValidator.validate(
-        level=ValidationLevel.TAG,
-        workflow_definition=workflow,
-    )
-
-    # Assert
-    assert error.error_num == 6
-    assert error.error_msg == ["Duplicate workflow variable names found: x"]
-
-
+@pytest.mark.skip("Unsupported workflow")
 def test_validate_simple_python_parallel():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__),
         "workflow-definitions",
         "simple-python-parallel.yaml",
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -258,12 +240,12 @@ def test_validate_simple_python_parallel():
 
 def test_validate_replicate_using_undeclared_input():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__),
         "workflow-definitions",
         "replicate-using-undeclared-input.yaml",
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -282,12 +264,12 @@ def test_validate_replicate_using_undeclared_input():
 
 def test_validate_duplicate_step_output_variable_names():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__),
         "workflow-definitions",
-        "duplicate-step-output-variable-names.yaml",
+        "duplicate-step-input-output-variable-names.yaml",
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -300,5 +282,5 @@ def test_validate_duplicate_step_output_variable_names():
     # Assert
     assert error.error_num == 3
     assert error.error_msg == [
-        "Duplicate step output variable: outputFile (step=step-1)"
+        "Duplicate step output variable: outputFile (step=step-2)"
     ]
diff --git a/tests/test_workflow_validator_for_tag_level.py b/tests/test_workflow_validator_for_tag_level.py
index 4445502..8a5d264 100644
--- a/tests/test_workflow_validator_for_tag_level.py
+++ b/tests/test_workflow_validator_for_tag_level.py
@@ -11,10 +11,10 @@
 
 def test_validate_example_nop_file():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "example-nop-fail.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -31,10 +31,10 @@ def test_validate_example_nop_file():
 
 def test_validate_duplicate_step_names():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "duplicate-step-names.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -51,10 +51,10 @@ def test_validate_duplicate_step_names():
 
 def test_validate_example_smiles_to_file():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "example-smiles-to-file.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -71,10 +71,10 @@ def test_validate_example_smiles_to_file():
 
 def test_validate_example_two_step_nop():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "example-two-step-nop.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -91,10 +91,10 @@ def test_validate_example_two_step_nop():
 
 def test_validate_shortcut_example_1():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "shortcut-example-1.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -109,14 +109,15 @@ def test_validate_shortcut_example_1():
     assert error.error_msg is None
 
 
+@pytest.mark.skip("Unsupported workflow")
 def test_validate_simple_python_parallel():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__),
         "workflow-definitions",
         "simple-python-parallel.yaml",
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -132,10 +133,10 @@ def test_validate_simple_python_parallel():
 
 def test_validate_simple_python_molprops():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__), "workflow-definitions", "simple-python-molprops.yaml"
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -152,12 +153,12 @@ def test_validate_simple_python_molprops():
 
 def test_validate_simple_python_molprops_with_options():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__),
         "workflow-definitions",
         "simple-python-molprops-with-options.yaml",
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -172,36 +173,14 @@ def test_validate_simple_python_molprops_with_options():
     assert error.error_msg is None
 
 
-def test_validate_duplicate_workflow_variable_names():
-    # Arrange
-    workflow_file: str = os.path.join(
-        os.path.dirname(__file__),
-        "workflow-definitions",
-        "duplicate-workflow-variable-names.yaml",
-    )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
-        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
-    assert workflow
-
-    # Act
-    error = WorkflowValidator.validate(
-        level=ValidationLevel.TAG,
-        workflow_definition=workflow,
-    )
-
-    # Assert
-    assert error.error_num == 6
-    assert error.error_msg == ["Duplicate workflow variable names found: x"]
-
-
 def test_validate_replicate_using_undeclared_input():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__),
         "workflow-definitions",
         "replicate-using-undeclared-input.yaml",
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -220,12 +199,12 @@ def test_validate_replicate_using_undeclared_input():
 
 def test_validate_duplicate_step_output_variable_names():
     # Arrange
-    workflow_file: str = os.path.join(
+    workflow_filename: str = os.path.join(
         os.path.dirname(__file__),
         "workflow-definitions",
-        "duplicate-step-output-variable-names.yaml",
+        "duplicate-step-input-output-variable-names.yaml",
     )
-    with open(workflow_file, "r", encoding="utf8") as workflow_file:
+    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
         workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
     assert workflow
 
@@ -238,5 +217,5 @@ def test_validate_duplicate_step_output_variable_names():
     # Assert
     assert error.error_num == 3
     assert error.error_msg == [
-        "Duplicate step output variable: outputFile (step=step-1)"
+        "Duplicate step output variable: outputFile (step=step-2)"
     ]
diff --git a/tests/workflow-definitions/duplicate-step-output-variable-names.yaml b/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml
similarity index 52%
rename from tests/workflow-definitions/duplicate-step-output-variable-names.yaml
rename to tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml
index 5a371a2..bc0b761 100644
--- a/tests/workflow-definitions/duplicate-step-output-variable-names.yaml
+++ b/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml
@@ -3,17 +3,8 @@ kind: DataManagerWorkflow
 kind-version: "2025.2"
 name: duplicate-step-output-variable-names
 description: A workflow where step-1 has duplicate output variable names
-variable-mapping:
-  inputs:
-  - name: x
-  outputs:
-  - name: y
-    from:
-      step: step-2
-      output: outputFile
 
 steps:
-
 - name: step-1
   description: Add column 1
   specification:
@@ -23,18 +14,16 @@ steps:
     variables:
       name: "col1"
       value: 123
-  inputs:
-  - input: inputFile
-    from:
-      workflow-input: candidateMolecules
-  - input: inputFile
-    from:
-      workflow-input: candidateMolecules
-  outputs:
-  - output: outputFile
-    as: __step1__out.smi
-  - output: outputFile
-    as: __step1__out.smi
+  variable-mapping:
+  - variable: inputFile
+    from-workflow-variable:
+      variable: candidateMolecules
+  - variable: inputFile
+    from-workflow-variable:
+      variable: candidateMolecules
+  in:
+  - inputFile
+  - inputFile
 
 - name: step-2
   description: Add column 2
@@ -45,11 +34,11 @@ steps:
     variables:
       name: "col2"
       value: "999"
-  inputs:
-  - input: inputFile
-    from:
-      step: step1
-      output: outputFile
-  outputs:
-  - output: outputFile
-    as: __step2__out.smi
+  variable-mapping:
+  - variable: inputFile
+    from-step:
+      name: step1
+      variable: outputFile
+  out:
+  - outputFile
+  - outputFile
diff --git a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml
deleted file mode 100644
index f524c44..0000000
--- a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml
+++ /dev/null
@@ -1,50 +0,0 @@
----
-kind: DataManagerWorkflow
-kind-version: "2025.2"
-name: duplicate-workflow-variable-names
-description: A workflow with a duplicate variable name in the input and output
-variable-mapping:
-  inputs:
-  - name: x
-  outputs:
-  - name: x
-    from:
-      step: step2
-      output: outputFile
-
-steps:
-
-- name: step1
-  description: Add column 1
-  specification:
-    collection: workflow-engine-unit-test-jobs
-    job: rdkit-molprops
-    version: "1.0.0"
-    variables:
-      name: "col1"
-      value: 123
-  inputs:
-  - input: inputFile
-    from:
-      workflow-input: candidateMolecules
-  outputs:
-  - output: outputFile
-    as: __step1__out.smi
-
-- name: step2
-  description: Add column 2
-  specification:
-    collection: workflow-engine-unit-test-jobs
-    job: cluster-butina
-    version: "1.0.0"
-    variables:
-      name: "col2"
-      value: "999"
-  inputs:
-  - input: inputFile
-    from:
-      step: step1
-      output: outputFile
-  outputs:
-  - output: outputFile
-    as: __step2__out.smi
diff --git a/tests/workflow-definitions/replicate-using-undeclared-input.yaml b/tests/workflow-definitions/replicate-using-undeclared-input.yaml
index 883ec62..488ddba 100644
--- a/tests/workflow-definitions/replicate-using-undeclared-input.yaml
+++ b/tests/workflow-definitions/replicate-using-undeclared-input.yaml
@@ -3,17 +3,8 @@ kind: DataManagerWorkflow
 kind-version: "2025.2"
 name: replicate-using-undeclared-input
 description: A workflow that replicates from a variable that's not declared
-variable-mapping:
-  inputs:
-  - name: x
-  outputs:
-  - name: y
-    from:
-      step: step2
-      output: outputFile
 
 steps:
-
 - name: step-1
   description: Add column 1
   specification:
@@ -23,13 +14,12 @@ steps:
     variables:
       name: "col1"
       value: 123
-  inputs:
-  - input: inputFile
-    from:
-      workflow-input: candidateMolecules
-  outputs:
-  - output: outputFile
-    as: __step-1__out.smi
+  variable-mapping:
+  - variable: inputFile
+    from-workflow-variable:
+      variable: candidateMolecules
+  in:
+  - outputFile
 
 - name: step-2
   description: Add column 2
@@ -43,11 +33,10 @@ steps:
   replicate:
     using:
       input: y
-  inputs:
-  - input: inputFile
-    from:
-      step: step-1
-      output: outputFile
-  outputs:
-  - output: outputFile
-    as: __step-2__out.smi
+  variable-mapping:
+  - variable: inputFile
+    from-step:
+      name: step-1
+      variable: outputFile
+  out:
+  - outputFile
diff --git a/tests/workflow-definitions/shortcut-example-1.yaml b/tests/workflow-definitions/shortcut-example-1.yaml
index 6a0ef31..0b6c2c3 100644
--- a/tests/workflow-definitions/shortcut-example-1.yaml
+++ b/tests/workflow-definitions/shortcut-example-1.yaml
@@ -12,9 +12,6 @@ steps:
     collection: workflow-engine-unit-test-jobs
     job: shortcut-example-1-process-a
     version: "1.0.0"
-  outputs:
-  - output: 'outputFile'
-    # as: 'a.sdf'
 
 - name: example-1-step-2
   description: The first step
@@ -22,11 +19,10 @@ steps:
     collection: workflow-engine-unit-test-jobs
     job: shortcut-example-1-process-b
     version: "1.0.0"
-  inputs:
-  - input: 'inputFile'
-    from:
-      step: example-1-step-1
-      output: 'outputFile'
-  outputs:
-  - output: 'outputFile'
-    # as: 'b.sdf'
+  variable-mapping:
+  - variable: inputFile
+    from-step:
+      name: example-1-step-1
+      variable: outputFile
+  out:
+  - outputFile
diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml
index c0fd343..9e2042f 100644
--- a/tests/workflow-definitions/simple-python-fanout.yaml
+++ b/tests/workflow-definitions/simple-python-fanout.yaml
@@ -4,18 +4,8 @@ kind-version: "2025.2"
 name: python-workflow
 description: >-
   A simple parallel workflow. Input is split into N chunks and N processes of the same job is started
-variable-mapping:
-  inputs:
-  - name: candidateMolecules
-  outputs:
-  - name: clusteredMolecules
-    from:
-      step: final-step
-      output: outputFile
-
 
 steps:
-
 - name: first-step
   description: Create inputs
   specification:
@@ -25,12 +15,10 @@ steps:
     variables:
       name: "count"
       value: "1"
-  inputs:
-  - input: inputFile
-    from:
-      workflow-input: candidateMolecules
-  outputs:
-  - output: outputFile
+  variable-mapping:
+  - variable: inputFile
+    from-workflow-variable:
+      variable: candidateMolecules
 
 - name: parallel-step
   description: Add some params
@@ -44,10 +32,10 @@ steps:
   replicate:
     using:
       input: inputFile
-  inputs:
-  - input: inputFile
-    from:
-      step: first-step
-      output: outputFile
-  outputs:
-  - output: outputFile
+  variable-mapping:
+  - variable: inputFile
+    from-step:
+      name: first-step
+      variable: outputFile
+  out:
+  - outputFile
diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
index 0e8874d..696855d 100644
--- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml
+++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
@@ -53,62 +53,7 @@ variables:
         minimum: 0
         maximum: 1
 
-# variable-mapping:
-#   inputs:
-#   - name: candidateMolecules
-#   outputs:
-#   - name: outputFile
-#     from:
-#       step: step1
-#       output: outputFile
-#   - name: clusteredMolecules
-#     from:
-#       step: step1
-#       output: outputFile
-#   options:
-#   - name: rdkitPropertyName
-#     default: name
-#     as:
-#     - option: name
-#       step: step1
-#   - name: rdkitPropertyValue
-#     as:
-#     - option: value
-#       step: step1
-
 steps:
-
-# - name: step1
-#   description: Add column 1
-#   specification:
-#     collection: workflow-engine-unit-test-jobs
-#     job: rdkit-molprops
-#     version: "1.0.0"
-#   inputs:
-#   - input: inputFile
-#     from:
-#       workflow-input: candidateMolecules
-#   # outputs:
-#   # - output: outputFile
-
-# - name: step2
-#   description: Add column 2
-#   specification:
-#     collection: workflow-engine-unit-test-jobs
-#     job: cluster-butina
-#     version: "1.0.0"
-#     variables:
-#       name: "col2"
-#       value: "999"
-#   inputs:
-#   - input: inputFile
-#     from:
-#       step: step1
-#       output: outputFile
-#   outputs:
-#   - output: outputFile
-
-
 - name: step1
   description: Add column 1
   specification:
@@ -116,19 +61,18 @@ steps:
     job: rdkit-molprops
     version: "1.0.0"
   variable-mapping:
-  - name: inputFile
-    from:
-      workflow-variable: candidateMolecules
-  - name: name
-    from:
-      workflow-variable: rdkitPropertyName
-  - name: value
-    from:
-      workflow-variable: rdkitPropertyValue
-  - name: outputFile
-    from:
-      workflow-variable: clusteredMolecules
-
+  - variable: inputFile
+    from-workflow-variable:
+      variable: candidateMolecules
+  - variable: name
+    from-workflow-variable:
+      variable: rdkitPropertyName
+  - variable: value
+    from-workflow-variable:
+      variable: rdkitPropertyValue
+  - variable: outputFile
+    from-workflow-variable:
+      variable: clusteredMolecules
 
 - name: step2
   description: Add column 2
@@ -140,10 +84,12 @@ steps:
       name: "col2"
       value: "999"
   variable-mapping:
-  - name: inputFile
-    from:
-      step: step1
-      output: outputFile
-  - name: outputFile
-    from:
-      workflow-variable: outputFile
+  - variable: inputFile
+    from-step:
+      name: step1
+      variable: outputFile
+  - variable: outputFile
+    from-workflow-variable:
+      variable: outputFile
+  out:
+  - outputFile
diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml
index 8abd637..5a0797e 100644
--- a/tests/workflow-definitions/simple-python-molprops.yaml
+++ b/tests/workflow-definitions/simple-python-molprops.yaml
@@ -3,17 +3,8 @@ kind: DataManagerWorkflow
 kind-version: "2025.2"
 name: python-workflow
 description: A simple python experimental workflow
-variable-mapping:
-  inputs:
-  - name: candidateMolecules
-  outputs:
-  - name: clusteredMolecules
-    from:
-      step: step2
-      output: outputFile
 
 steps:
-
 - name: step1
   description: Add column 1
   specification:
@@ -23,12 +14,10 @@ steps:
     variables:
       name: "col1"
       value: 123
-  inputs:
-  - input: inputFile
-    from:
-      workflow-input: candidateMolecules
-  outputs:
-  - output: outputFile
+  variable-mapping:
+  - variable: inputFile
+    from-workflow-variable:
+      variable: candidateMolecules
 
 - name: step2
   description: Add column 2
@@ -39,10 +28,13 @@ steps:
     variables:
       name: "col2"
       value: "999"
-  inputs:
-  - input: inputFile
-    from:
-      step: step1
-      output: outputFile
-  outputs:
-  - output: outputFile
+  variable-mapping:
+  - variable: inputFile
+    from-step:
+      name: step1
+      variable: outputFile
+  - variable: outputFile
+    from-workflow-variable:
+      variable: clusteredMolecules
+  out:
+  - outputFile
diff --git a/tests/workflow-definitions/simple-python-parallel.yaml b/tests/workflow-definitions/simple-python-parallel.yaml
index e620cda..1137413 100644
--- a/tests/workflow-definitions/simple-python-parallel.yaml
+++ b/tests/workflow-definitions/simple-python-parallel.yaml
@@ -3,18 +3,8 @@ kind: DataManagerWorkflow
 kind-version: "2025.2"
 name: python-workflow
 description: A simple branching workflow
-variable-mapping:
-  inputs:
-  - name: candidateMolecules
-  outputs:
-  - name: clusteredMolecules
-    from:
-      step: final-step
-      output: outputFile
-
 
 steps:
-
 - name: first-step
   description: Create inputs
   specification:
@@ -24,13 +14,10 @@ steps:
     variables:
       name: "unnecessary"
       value: "0"
-  inputs:
-  - input: inputFile
-    from:
-      workflow-input: candidateMolecules
-  outputs:
-  - output: outputFile
-    as: first-step.out.smi
+  variable-mapping:
+  - variable: inputFile
+    from-workflow-variable:
+      variable: candidateMolecules
 
 - name: parallel-step-a
   description: Add some params
@@ -41,14 +28,11 @@ steps:
     variables:
       name: "desc1"
       value: "777"
-  inputs:
-  - input: inputFile
-    from:
-      step: first-step
-      output: outputFile
-  outputs:
-  - output: outputFile
-    as: parallel-step-a.out.smi
+  variable-mapping:
+  - variable: inputFile
+    from-step:
+      name: first-step
+      variable: outputFile
 
 - name: parallel-step-b
   description: Add some other params
@@ -59,14 +43,11 @@ steps:
     variables:
       name: "desc2"
       value: "999"
-  inputs:
-  - input: inputFile
-    from:
-      step: first-step
-      output: outputFile
-  outputs:
-  - output: outputFile
-    as: parallel-step-b.out.smi
+  variable-mapping:
+  - variable: inputFile
+    from-step:
+      name: first-step
+      variable: outputFile
 
 - name: final-step
   description: Collate results
@@ -83,6 +64,5 @@ steps:
     from:
       step: parallel-step-b
       output: outputFile
-  outputs:
-  - output: outputFile
-    as: final-step.out.smi
+  out:
+  - outputFile
diff --git a/tests/workflow-definitions/step-specification-variable-names.yaml b/tests/workflow-definitions/step-specification-variable-names.yaml
index e899b7f..99ae052 100644
--- a/tests/workflow-definitions/step-specification-variable-names.yaml
+++ b/tests/workflow-definitions/step-specification-variable-names.yaml
@@ -5,7 +5,6 @@ name: step-variables
 description: Test a lot of variables whose format is supported
 
 steps:
-
 - name: step-1
   specification:
     collection: a
diff --git a/tests/workflow-definitions/workflow-options.yaml b/tests/workflow-definitions/workflow-options.yaml
deleted file mode 100644
index 9e742fe..0000000
--- a/tests/workflow-definitions/workflow-options.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
----
-kind: DataManagerWorkflow
-kind-version: "2025.2"
-name: workflow-options
-description: Illustrate the use of workflow options
-variable-mapping:
-  options:
-  - name: variableWithoutDefault
-    as:
-    - option: variable1
-      step: step-1
-    - option: variable2
-      step: step-2
-  - name: variableWithIntegerDefault
-    default: 7
-    as:
-    - option: variable3
-      step: step-1
-  - name: variableWithIntegerDefaultAndRange
-    default: 7
-    minimum: 1
-    maximum: 8
-    as:
-    - option: variable4
-      step: step-1
-  - name: variableWithFloatDefault
-    default: 1.0
-    as:
-    - option: variable5
-      step: step-1
-  - name: variableWithBooleanDefault
-    default: true
-    as:
-    - option: variable6
-      step: step-1
-  - name: variableWithStringDefault
-    default: Hello, World!
-    as:
-    - option: variable7
-      step: step-1
-
-steps:
-
-- name: step-1
-  specification:
-    collection: a
-    job: b
-    version: '1.0.0'
-
-- name: step-2
-  specification:
-    collection: a
-    job: b
-    version: '1.0.0'
diff --git a/workflow/decoder.py b/workflow/decoder.py
index d03b314..0fa00a5 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -63,23 +63,18 @@ def get_description(definition: dict[str, Any]) -> str | None:
     return definition.get("description")
 
 
-def get_variable_names(definition: dict[str, Any]) -> list[str]:
+def get_workflow_variable_names(definition: dict[str, Any]) -> set[str]:
     """Given a Workflow definition this function returns all the names of the
-    variables defined at the workflow level. These are the 'names' for inputs,
-    outputs and options. This function DOES NOT de-duplicate names,
-    that is the role of the validator."""
-    wf_variable_names: list[str] = []
-    variables: dict[str, Any] | None = definition.get("variable-mapping")
-    if variables:
-        wf_variable_names.extend(
-            input_variable["name"] for input_variable in variables.get("inputs", [])
-        )
-        wf_variable_names.extend(
-            output_variable["name"] for output_variable in variables.get("outputs", [])
-        )
-        wf_variable_names.extend(
-            option_variable["name"] for option_variable in variables.get("options", [])
-        )
+    variables that need to be defined at the workflow level. These are the 'variables'
+    used in every steps' variabale-mapping block.
+    """
+    wf_variable_names: set[str] = set()
+    steps: list[dict[str, Any]] = get_steps(definition)
+    for step in steps:
+        if v_map := step.get("variable-mapping"):
+            for v in v_map:
+                if "from-workflow-variable" in v:
+                    wf_variable_names.add(v["from-workflow-variable"]["variable"])
     return wf_variable_names
 
 
@@ -87,128 +82,41 @@ def get_step_output_variable_names(
     definition: dict[str, Any], step_name: str
 ) -> list[str]:
     """Given a Workflow definition and a Step name this function returns all the names
-    of the output variables defined at the Step level. This function DOES NOT
-    de-duplicate names, that is the role of the validator."""
+    of the output variables defined at the Step level. These are the names
+    of variables that have files assocaited with them that need copying to
+    the Project directory (from the Instance)."""
     variable_names: list[str] = []
     steps: list[dict[str, Any]] = get_steps(definition)
     for step in steps:
         if step["name"] == step_name:
-            variable_names.extend(
-                output["output"] for output in step.get("outputs", [])
-            )
+            variable_names.extend(step.get("out", []))
     return variable_names
 
 
 def get_step_input_variable_names(
     definition: dict[str, Any], step_name: str
 ) -> list[str]:
-    """Given a Workflow definition and a Step name (expected to exist)
-    this function returns all the names of the input
-    variables defined at the step level."""
+    """Given a Workflow definition and a Step name this function returns all the names
+    of the input variables defined at the Step level. These are the names
+    of variables that have files assocaited with them that need copying to
+    the Instance directory (from the Project)."""
     variable_names: list[str] = []
     steps: list[dict[str, Any]] = get_steps(definition)
     for step in steps:
         if step["name"] == step_name:
-            variable_names.extend(input["input"] for input in step.get("inputs", []))
+            variable_names.extend(step.get("in", []))
     return variable_names
 
 
-def get_workflow_job_input_names_for_step(
-    definition: dict[str, Any], name: str
-) -> list[str]:
-    """Given a Workflow definition and a step name we return a list of step Job input
-    variable names the step expects. To do this we iterate through the step's
-    inputs to find those that are declared 'from->workflow-input'."""
-    inputs: list[str] = []
-    for step in definition.get("steps", {}):
-        if step["name"] == name and "inputs" in step:
-            # Find all the workflow inputs.
-            # This gives us the name of the workflow input variable
-            # and the name of the step input (Job) variable.
-            inputs.extend(
-                step_input["input"]
-                for step_input in step["inputs"]
-                if "from" in step_input and "workflow-input" in step_input["from"]
-            )
-    return inputs
-
-
 def workflow_step_has_outputs(definition: dict[str, Any], name: str) -> bool:
     """Given a Workflow definition and a step name we return a boolean
-    that is true if the step produces outputs."""
-    wf_outputs = definition.get("variable-mapping", {}).get("outputs", {})
-    return any(
-        "from" in output and "step" in output["from"] and output["from"]["step"] == name
-        for output in wf_outputs
+    that is true if the step produces outputs. This requires inspection
+    of the 'as-yet-unused' variables block."""
+    return (
+        len(get_step_output_variable_names(definition=definition, step_name=name)) > 0
     )
 
 
-def set_variables_from_options_for_step(
-    definition: dict[str, Any], variables: dict[str, Any], step_name: str
-) -> dict[str, Any]:
-    """Given a Workflow definition, an existing map of variables and values,
-    and a step name this function returns a new set of variables by adding
-    variables and values that are required for the step that have been defined in the
-    workflow's variables->options block.
-
-    As an example, the following option, which is used if the step name is 'step1',
-    expects 'rdkitPropertyName' to exist in the current set of variables,
-    and should be copied into the new set of variables using the key 'propertyName'
-    and value that is the same as the one provided in the original 'rdkitPropertyName': -
-
-        name: rdkitPropertyName
-        default: propertyName
-        as:
-        - option: propertyName
-          step: step1
-
-    And ... in the above example ... if the input variables map
-    is {"rdkitPropertyName": "rings"} then the output map would be
-    {"rdkitPropertyName": "rings", "propertyName": "rings"}
-
-    The function returns a new variable map, with and an optional error string on error.
-    """
-
-    assert isinstance(definition, dict)
-    assert step_name
-
-    result = {}
-    options = definition.get("variable-mapping", {}).get("options", [])
-
-    for opt in options:
-        for step_alias in opt["as"]:
-            if step_alias["step"] == step_name:
-                result[step_alias["option"]] = variables[opt["name"]]
-                # can break the loop because a variable can be a step
-                # variable only once
-                break
-
-    # Success...
-    return result
-
-
-def get_required_variable_names(definition: dict[str, Any]) -> list[str]:
-    """Given a Workflow definition this function returns all the names of the
-    variables that are required to be defined when it is RUN - i.e.
-    all those the user needs to provide."""
-    required_variables: list[str] = []
-    variables: dict[str, Any] | None = definition.get("variable-mapping")
-    if variables:
-        # All inputs are required (no defaults atm)...
-        required_variables.extend(
-            input_variable["name"] for input_variable in variables.get("inputs", [])
-        )
-        # Options without defaults are required...
-        # It is the role of the engine to provide the actual default for those
-        # that have defaults but no user-defined value.
-        required_variables.extend(
-            option_variable["name"]
-            for option_variable in variables.get("options", [])
-            if "default" not in option_variable
-        )
-    return required_variables
-
-
 def set_step_variables(
     *,
     workflow: dict[str, Any],
@@ -224,6 +132,8 @@ def set_step_variables(
     Inputs are defined in step definition but their values may
     come from previous step outputs.
     """
+    assert workflow
+
     result = {}
 
     print("ssv: wf vars:")
@@ -280,13 +190,13 @@ def set_step_variables(
         p_val = "somefile.smi"
         result[p_key] = p_val
 
-    options = set_variables_from_options_for_step(
-        definition=workflow,
-        variables=workflow_variables,
-        step_name=step_name,
-    )
-
-    result |= options
+    #    options = set_variables_from_options_for_step(
+    #        definition=workflow,
+    #        variables=workflow_variables,
+    #        step_name=step_name,
+    #    )
+    #
+    #    result |= options
     return result
 
 
diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml
index c839158..1896a76 100644
--- a/workflow/workflow-schema.yaml
+++ b/workflow/workflow-schema.yaml
@@ -1,5 +1,5 @@
 ---
-# The JSONSchema for 'Workflow' YAML files.
+# The schema for 'Workflow' YAML files.
 #
 # See https://json-schema.org/understanding-json-schema/index.html
 
@@ -33,24 +33,6 @@ properties:
     # and, like Jobs, has no current schema so we permit anything here.
     type: object
     additionalProperties: true
-  variable-mapping:
-    # Workflow-specific variable declarations,
-    # used (at the moment) to map workflow variables to steps.
-    type: object
-    additionalProperties: false
-    properties:
-      inputs:
-        type: array
-        items:
-          $ref: "#/definitions/workflow-input-parameter"
-      outputs:
-        type: array
-        items:
-          $ref: "#/definitions/workflow-output-parameter"
-      options:
-        type: array
-        items:
-          $ref: "#/definitions/workflow-option-parameter"
 required:
 - kind
 - kind-version
@@ -73,102 +55,10 @@ definitions:
   # What does a Job specification template variable look like?
   # The values found in Jinja variables like '{{ x }}'.
   # Stuff like 'candidateMolecules' or 'clustered_molecules'
-  template-variable-name:
+  variable-name:
     type: string
     pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
 
-  # What does a filename look like?
-  # We do not (at the moment) permit spaces!
-  file-name:
-    type: string
-    pattern: ^[a-zA-Z0-9._-]+$
-
-  # An input parameter has a name and a (MIME) type.
-  workflow-input-parameter:
-    type: object
-    additionalProperties: false
-    properties:
-      name:
-        $ref: '#/definitions/template-variable-name'
-    required:
-    - name
-
-  # A workflow output parameter is essentially a file
-  # taken from the output of a step with a default (as) value.
-  workflow-output-parameter:
-    type: object
-    additionalProperties: false
-    properties:
-      name:
-        $ref: '#/definitions/template-variable-name'
-      from:
-        $ref: '#/definitions/from-step-output'
-    required:
-    - name
-
-  # Declaration of a step option value from a workflow option (variable)
-  as-step-option:
-    type: object
-    additionalProperties: false
-    properties:
-      option:
-        $ref: '#/definitions/template-variable-name'
-      step:
-        $ref: '#/definitions/rfc1035-label-name'
-    required:
-    - option
-    - step
-
-
-  # Declaration of a value from a workflow input (variable)
-  from-workflow-input:
-    type: object
-    additionalProperties: false
-    properties:
-      workflow-input:
-        $ref: '#/definitions/template-variable-name'
-    required:
-    - workflow-input
-
-  # Declaration of a value from another step
-  from-step-output:
-    type: object
-    additionalProperties: false
-    properties:
-      step:
-        $ref: '#/definitions/rfc1035-label-name'
-      output:
-        $ref: '#/definitions/template-variable-name'
-    required:
-    - step
-    - output
-
-  # A workflow option used as a step option
-  workflow-option-parameter:
-    type: object
-    additionalProperties: false
-    properties:
-      name:
-        $ref: '#/definitions/template-variable-name'
-      description:
-        type: string
-      default:
-        oneOf:
-        - type: string
-        - type: number
-        - type: boolean
-      minimum:
-        type: number
-      maximum:
-        type: number
-      as:
-        type: array
-        items:
-          $ref: '#/definitions/as-step-option'
-    required:
-    - name
-    - as
-
   # A step replication control variable
   # that is based on a step input variable
   replicate-using-input:
@@ -176,48 +66,52 @@ definitions:
     additionalProperties: false
     properties:
       input:
-        $ref: '#/definitions/template-variable-name'
+        $ref: '#/definitions/variable-name'
     required:
     - input
 
-  # A Step input (from an output of a prior step)
-  step-input-from-step:
+  # A Step variable
+  # (whose value is derived from a variable used in a prior step)
+  step-variable-from-step:
     type: object
     additionalProperties: false
     properties:
-      input:
-        $ref: '#/definitions/template-variable-name'
-      from:
-        $ref: '#/definitions/from-step-output'
+      variable:
+        $ref: '#/definitions/variable-name'
+      from-step:
+        type: object
+        additionalProperties: false
+        properties:
+          name:
+            $ref: '#/definitions/rfc1035-label-name'
+          variable:
+            $ref: '#/definitions/variable-name'
+        required:
+        - name
+        - variable
     required:
-    - input
+    - variable
+    - from-step
 
-  # A Step input (from a workflow input)
-  step-input-from-workflow:
+  # A Step variable
+  # (whose value is derived from a workflow variable)
+  step-variable-from-workflow:
     type: object
     additionalProperties: false
     properties:
-      input:
-        $ref: '#/definitions/template-variable-name'
-      from:
-        $ref: '#/definitions/from-workflow-input'
+      variable:
+        $ref: '#/definitions/variable-name'
+      from-workflow-variable:
+        type: object
+        additionalProperties: false
+        properties:
+          variable:
+            $ref: '#/definitions/variable-name'
+        required:
+        - variable
     required:
-    - input
-    - from
-
-  # A Step output (with an 'as' - a declared value)
-  # step-output-as:
-  #   type: object
-  #   additionalProperties: false
-  #   properties:
-  #     output:
-  #       $ref: '#/definitions/template-variable-name'
-  #     as:
-  #       $ref: '#/definitions/file-name'
-  #   required:
-  #   - output
-  #   - as
-
+    - variable
+    - from-workflow-variable
 
   # A step specification variable
   # (there must be at least one if a variables block is defined).
@@ -260,11 +154,18 @@ definitions:
     additionalProperties: false
     properties:
       name:
+        # A unique name for the step
         $ref: '#/definitions/rfc1035-label-name'
       description:
+        # An optional description
         type: string
         description: A description of the step
       specification:
+        # The step Job specififcation.
+        # This MUST define `collection`, a 'job', and a 'version'.
+        # 'variables' (a map of name and value)can also be provided.
+        # The format of this is essentially idenical to the specification
+        # used when a Job is launched via the DM API.
         $ref: '#/definitions/step-specification'
       replicate:
         # Used to indicate one input variable that is used to replicate/spawn
@@ -274,17 +175,35 @@ definitions:
         properties:
           using:
             $ref: '#/definitions/replicate-using-input'
-      inputs:
+      variable-mapping:
+        # The map of the source of the step's variables.
+        # all variables the step needs (that aren;t already in the specification)
+        # need to be declared here. They either come "from" a prior step
+        # or are expected in th erunning workflow variables. Here we simply
+        # associate every required variable to a source.
         type: array
         items:
           anyOf:
-          - $ref: "#/definitions/step-input-from-step"
-          - $ref: "#/definitions/step-input-from-workflow"
-      outputs:
+          - $ref: "#/definitions/step-variable-from-step"
+          - $ref: "#/definitions/step-variable-from-workflow"
+        minItems: 1
+      in:
+        # An optional list of the step variables that are inputs.
+        # These are typically files, expected to be present in the Project directory,
+        # that need to be copied (by the DM) into the step's instance directory.
+        type: array
+        items:
+          $ref: '#/definitions/variable-name'
+        minItems: 1
+      out:
+        # An optional list of the step variables that are outputs.
+        # These are typically files, expected to be present in the Step Instance directory,
+        # when it finished (successfully), that need to be copied (by the DM)
+        # into the Project directory via "realise_outputs()"
         type: array
-        # items:
-        #   anyOf:
-        #   - $ref: "#/definitions/step-output-as"
+        items:
+          $ref: '#/definitions/variable-name'
+        minItems: 1
     required:
     - name
     - specification
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index 4736851..e1f5c1b 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -41,8 +41,8 @@
 )
 
 from .decoder import (
+    get_step_input_variable_names,
     get_step_replication_param,
-    get_workflow_job_input_names_for_step,
     set_step_variables,
     workflow_step_has_outputs,
 )
@@ -645,7 +645,7 @@ def _launch(
         pprint(variables)
 
         inputs: list[str] = []
-        inputs.extend(iter(get_workflow_job_input_names_for_step(wf, step_name)))
+        inputs.extend(iter(get_step_input_variable_names(wf, step_name)))
         if replicator:
             single_step_variables = []
             for replicating_param in variables[replicator]:
diff --git a/workflow/workflow_validator.py b/workflow/workflow_validator.py
index 6324bd5..1efd18c 100644
--- a/workflow/workflow_validator.py
+++ b/workflow/workflow_validator.py
@@ -5,11 +5,10 @@
 from typing import Any
 
 from .decoder import (
-    get_required_variable_names,
     get_step_input_variable_names,
     get_step_output_variable_names,
     get_steps,
-    get_variable_names,
+    get_workflow_variable_names,
     validate_schema,
 )
 
@@ -113,24 +112,6 @@ def _validate_tag_level(
                 error_num=2,
                 error_msg=[f"Duplicate step names found: {', '.join(duplicate_names)}"],
             )
-        # Workflow variables must be unique.
-        duplicate_names.clear()
-        variable_names.clear()
-        wf_variable_names: list[str] = get_variable_names(workflow_definition)
-        for wf_variable_name in wf_variable_names:
-            if (
-                wf_variable_name not in duplicate_names
-                and wf_variable_name in variable_names
-            ):
-                duplicate_names.add(wf_variable_name)
-            variable_names.add(wf_variable_name)
-        if duplicate_names:
-            return ValidationResult(
-                error_num=6,
-                error_msg=[
-                    f"Duplicate workflow variable names found: {', '.join(duplicate_names)}"
-                ],
-            )
         # For each 'replicating' step the replicating variable
         # must be declared in the step.
         for step in get_steps(workflow_definition):
@@ -163,7 +144,7 @@ def _validate_run_level(
         assert workflow_definition
 
         # We must have values for all the variables defined in the workflow.
-        wf_variables: list[str] = get_required_variable_names(workflow_definition)
+        wf_variables: set[str] = get_workflow_variable_names(workflow_definition)
         missing_values: list[str] = []
         missing_values.extend(
             wf_variable

From f9d4aca320a7cd8b7029010914e1db761f3f4506 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Tue, 19 Aug 2025 16:27:27 +0100
Subject: [PATCH 25/57] refactor: from-workflow-variable becomes from-workflow

---
 workflow/workflow-schema.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml
index 1896a76..910dc66 100644
--- a/workflow/workflow-schema.yaml
+++ b/workflow/workflow-schema.yaml
@@ -101,7 +101,7 @@ definitions:
     properties:
       variable:
         $ref: '#/definitions/variable-name'
-      from-workflow-variable:
+      from-workflow:
         type: object
         additionalProperties: false
         properties:
@@ -111,7 +111,7 @@ definitions:
         - variable
     required:
     - variable
-    - from-workflow-variable
+    - from-workflow
 
   # A step specification variable
   # (there must be at least one if a variables block is defined).

From 2c4c867518466b1d3f63bdfaa9850686fbfd14d4 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Tue, 19 Aug 2025 16:28:02 +0100
Subject: [PATCH 26/57] refactor: from-workflow-variable becomes from-workflow

---
 .../duplicate-step-input-output-variable-names.yaml    |  4 ++--
 .../replicate-using-undeclared-input.yaml              |  2 +-
 tests/workflow-definitions/simple-python-fanout.yaml   |  2 +-
 .../simple-python-molprops-with-options.yaml           | 10 +++++-----
 tests/workflow-definitions/simple-python-molprops.yaml |  4 ++--
 tests/workflow-definitions/simple-python-parallel.yaml |  2 +-
 workflow/decoder.py                                    |  4 ++--
 7 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml b/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml
index bc0b761..3ba3926 100644
--- a/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml
+++ b/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml
@@ -16,10 +16,10 @@ steps:
       value: 123
   variable-mapping:
   - variable: inputFile
-    from-workflow-variable:
+    from-workflow:
       variable: candidateMolecules
   - variable: inputFile
-    from-workflow-variable:
+    from-workflow:
       variable: candidateMolecules
   in:
   - inputFile
diff --git a/tests/workflow-definitions/replicate-using-undeclared-input.yaml b/tests/workflow-definitions/replicate-using-undeclared-input.yaml
index 488ddba..1382f0c 100644
--- a/tests/workflow-definitions/replicate-using-undeclared-input.yaml
+++ b/tests/workflow-definitions/replicate-using-undeclared-input.yaml
@@ -16,7 +16,7 @@ steps:
       value: 123
   variable-mapping:
   - variable: inputFile
-    from-workflow-variable:
+    from-workflow:
       variable: candidateMolecules
   in:
   - outputFile
diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml
index 9e2042f..598f0e6 100644
--- a/tests/workflow-definitions/simple-python-fanout.yaml
+++ b/tests/workflow-definitions/simple-python-fanout.yaml
@@ -17,7 +17,7 @@ steps:
       value: "1"
   variable-mapping:
   - variable: inputFile
-    from-workflow-variable:
+    from-workflow:
       variable: candidateMolecules
 
 - name: parallel-step
diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
index 696855d..3a3904a 100644
--- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml
+++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
@@ -62,16 +62,16 @@ steps:
     version: "1.0.0"
   variable-mapping:
   - variable: inputFile
-    from-workflow-variable:
+    from-workflow:
       variable: candidateMolecules
   - variable: name
-    from-workflow-variable:
+    from-workflow:
       variable: rdkitPropertyName
   - variable: value
-    from-workflow-variable:
+    from-workflow:
       variable: rdkitPropertyValue
   - variable: outputFile
-    from-workflow-variable:
+    from-workflow:
       variable: clusteredMolecules
 
 - name: step2
@@ -89,7 +89,7 @@ steps:
       name: step1
       variable: outputFile
   - variable: outputFile
-    from-workflow-variable:
+    from-workflow:
       variable: outputFile
   out:
   - outputFile
diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml
index 5a0797e..39fb6cd 100644
--- a/tests/workflow-definitions/simple-python-molprops.yaml
+++ b/tests/workflow-definitions/simple-python-molprops.yaml
@@ -16,7 +16,7 @@ steps:
       value: 123
   variable-mapping:
   - variable: inputFile
-    from-workflow-variable:
+    from-workflow:
       variable: candidateMolecules
 
 - name: step2
@@ -34,7 +34,7 @@ steps:
       name: step1
       variable: outputFile
   - variable: outputFile
-    from-workflow-variable:
+    from-workflow:
       variable: clusteredMolecules
   out:
   - outputFile
diff --git a/tests/workflow-definitions/simple-python-parallel.yaml b/tests/workflow-definitions/simple-python-parallel.yaml
index 1137413..c1f5c8f 100644
--- a/tests/workflow-definitions/simple-python-parallel.yaml
+++ b/tests/workflow-definitions/simple-python-parallel.yaml
@@ -16,7 +16,7 @@ steps:
       value: "0"
   variable-mapping:
   - variable: inputFile
-    from-workflow-variable:
+    from-workflow:
       variable: candidateMolecules
 
 - name: parallel-step-a
diff --git a/workflow/decoder.py b/workflow/decoder.py
index 0fa00a5..510c41f 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -73,8 +73,8 @@ def get_workflow_variable_names(definition: dict[str, Any]) -> set[str]:
     for step in steps:
         if v_map := step.get("variable-mapping"):
             for v in v_map:
-                if "from-workflow-variable" in v:
-                    wf_variable_names.add(v["from-workflow-variable"]["variable"])
+                if "from-workflow" in v:
+                    wf_variable_names.add(v["from-workflow"]["variable"])
     return wf_variable_names
 
 

From c2eb21c6f4152d59fe08ef37877402017029c9c2 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Tue, 19 Aug 2025 17:23:35 +0100
Subject: [PATCH 27/57] refactor: Replicate now uses variable not input

---
 .../replicate-using-undeclared-input.yaml              |  2 +-
 tests/workflow-definitions/simple-python-fanout.yaml   |  2 +-
 workflow/decoder.py                                    |  4 ++--
 workflow/workflow-schema.yaml                          | 10 +++++-----
 workflow/workflow_engine.py                            |  4 ++--
 workflow/workflow_validator.py                         |  2 +-
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/workflow-definitions/replicate-using-undeclared-input.yaml b/tests/workflow-definitions/replicate-using-undeclared-input.yaml
index 1382f0c..447521b 100644
--- a/tests/workflow-definitions/replicate-using-undeclared-input.yaml
+++ b/tests/workflow-definitions/replicate-using-undeclared-input.yaml
@@ -32,7 +32,7 @@ steps:
       value: "999"
   replicate:
     using:
-      input: y
+      variable: y
   variable-mapping:
   - variable: inputFile
     from-step:
diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml
index 598f0e6..1ad06bb 100644
--- a/tests/workflow-definitions/simple-python-fanout.yaml
+++ b/tests/workflow-definitions/simple-python-fanout.yaml
@@ -31,7 +31,7 @@ steps:
       value: "777"
   replicate:
     using:
-      input: inputFile
+      variable: inputFile
   variable-mapping:
   - variable: inputFile
     from-step:
diff --git a/workflow/decoder.py b/workflow/decoder.py
index 510c41f..9270d0d 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -200,9 +200,9 @@ def set_step_variables(
     return result
 
 
-def get_step_replication_param(*, step: dict[str, Any]) -> str | Any:
+def get_step_replicator(*, step: dict[str, Any]) -> str | Any:
     """Return step's replication info"""
-    replicator = step.get("replicate", None)
+    replicator = step.get("replicate")
     if replicator:
         # 'using' is a dict but there can be only single value for now
         replicator = list(replicator["using"].values())[0]
diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml
index 910dc66..27c726c 100644
--- a/workflow/workflow-schema.yaml
+++ b/workflow/workflow-schema.yaml
@@ -60,15 +60,15 @@ definitions:
     pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
 
   # A step replication control variable
-  # that is based on a step input variable
-  replicate-using-input:
+  # that is based on a step variable
+  replicate-using-variable:
     type: object
     additionalProperties: false
     properties:
-      input:
+      variable:
         $ref: '#/definitions/variable-name'
     required:
-    - input
+    - variable
 
   # A Step variable
   # (whose value is derived from a variable used in a prior step)
@@ -174,7 +174,7 @@ definitions:
         additionalProperties: false
         properties:
           using:
-            $ref: '#/definitions/replicate-using-input'
+            $ref: '#/definitions/replicate-using-variable'
       variable-mapping:
         # The map of the source of the step's variables.
         # all variables the step needs (that aren;t already in the specification)
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index e1f5c1b..b8a90dc 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -42,7 +42,7 @@
 
 from .decoder import (
     get_step_input_variable_names,
-    get_step_replication_param,
+    get_step_replicator,
     set_step_variables,
     workflow_step_has_outputs,
 )
@@ -595,7 +595,7 @@ def _launch(
         variables: dict[str, Any] = error_or_variables
         print("variables", variables)
         # find out if and by which parameter this step should be replicated
-        replicator = get_step_replication_param(step=step)
+        replicator = get_step_replicator(step=step)
 
         _LOGGER.info(
             "Launching step: RunningWorkflow=%s RunningWorkflowStep=%s step=%s"
diff --git a/workflow/workflow_validator.py b/workflow/workflow_validator.py
index 1efd18c..702ef88 100644
--- a/workflow/workflow_validator.py
+++ b/workflow/workflow_validator.py
@@ -118,7 +118,7 @@ def _validate_tag_level(
             if (
                 replicate_using_input := step.get("replicate", {})
                 .get("using", {})
-                .get("input")
+                .get("variable")
             ):
                 step_name = step["name"]
                 if replicate_using_input not in get_step_input_variable_names(

From 8d05f15b07c297f0014d1dc321b5989ccf5cd112 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Tue, 19 Aug 2025 18:05:33 +0100
Subject: [PATCH 28/57] fix: No longer need realise-outputs

---
 tests/test_decoder.py                         | 36 -------------------
 .../test_workflow_validator_for_run_level.py  |  4 +--
 workflow/decoder.py                           |  9 -----
 workflow/workflow_abc.py                      | 10 ------
 workflow/workflow_engine.py                   | 35 ++----------------
 workflow/workflow_validator.py                |  2 +-
 6 files changed, 6 insertions(+), 90 deletions(-)

diff --git a/tests/test_decoder.py b/tests/test_decoder.py
index 5f5da9f..8aae4cb 100644
--- a/tests/test_decoder.py
+++ b/tests/test_decoder.py
@@ -197,42 +197,6 @@ def test_get_workflow_steps():
     assert steps[1]["name"] == "step2"
 
 
-def test_get_workflow_outputs_for_step_with_name_step1():
-    # Arrange
-
-    # Act
-    has_outputs = decoder.workflow_step_has_outputs(
-        _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "step1"
-    )
-
-    # Assert
-    assert not has_outputs
-
-
-def test_get_workflow_outputs_for_step_with_name_step2():
-    # Arrange
-
-    # Act
-    has_outputs = decoder.workflow_step_has_outputs(
-        _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "step2"
-    )
-
-    # Assert
-    assert has_outputs
-
-
-def test_get_workflow_outputs_for_step_with_unkown_step_name():
-    # Arrange
-
-    # Act
-    has_outputs = decoder.workflow_step_has_outputs(
-        _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "unknown"
-    )
-
-    # Assert
-    assert not has_outputs
-
-
 def test_get_step_input_variable_names_when_duplicates():
     # Arrange
     workflow_filename: str = os.path.join(
diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py
index b1f6118..ee28a15 100644
--- a/tests/test_workflow_validator_for_run_level.py
+++ b/tests/test_workflow_validator_for_run_level.py
@@ -156,7 +156,7 @@ def test_validate_simple_python_molprops_with_options_when_missing_required():
     )
 
     # Assert
-    assert error.error_num == 7
+    assert error.error_num == 8
     assert error.error_msg == [
         "Missing workflow variable values for: rdkitPropertyValue"
     ]
@@ -210,7 +210,7 @@ def test_validate_simple_python_molprops_with_missing_input():
     )
 
     # Assert
-    assert error.error_num == 7
+    assert error.error_num == 8
     assert error.error_msg == [
         "Missing workflow variable values for: candidateMolecules"
     ]
diff --git a/workflow/decoder.py b/workflow/decoder.py
index 9270d0d..c1b3c87 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -108,15 +108,6 @@ def get_step_input_variable_names(
     return variable_names
 
 
-def workflow_step_has_outputs(definition: dict[str, Any], name: str) -> bool:
-    """Given a Workflow definition and a step name we return a boolean
-    that is true if the step produces outputs. This requires inspection
-    of the 'as-yet-unused' variables block."""
-    return (
-        len(get_step_output_variable_names(definition=definition, step_name=name)) > 0
-    )
-
-
 def set_step_variables(
     *,
     workflow: dict[str, Any],
diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py
index 2024fba..4c4f6ff 100644
--- a/workflow/workflow_abc.py
+++ b/workflow/workflow_abc.py
@@ -373,16 +373,6 @@ def get_running_workflow_step_output_values_for_output(
         #   "output": ["dir/file1.sdf", "dir/file2.sdf"]
         # }
 
-    @abstractmethod
-    def realise_outputs(
-        self, *, running_workflow_step_id: str
-    ) -> tuple[dict[str, Any], int]:
-        """Copy (link) the step's files as outputs into the Project directory."""
-        # Should return an empty map or:
-        # {
-        #   "error": "<error message>",
-        # }
-
 
 class MessageDispatcher(ABC):
     """The class handling the sending of messages (on the Data Manager message bus)."""
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index b8a90dc..66d3e3e 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -24,7 +24,6 @@
 
 import logging
 import sys
-from http import HTTPStatus
 from pprint import pprint
 from typing import Any, Dict, Optional
 
@@ -44,7 +43,6 @@
     get_step_input_variable_names,
     get_step_replicator,
     set_step_variables,
-    workflow_step_has_outputs,
 )
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -250,44 +248,17 @@ def _handle_pod_message(self, msg: PodMessage) -> None:
             self._set_step_error(step_name, r_wfid, r_wfsid, exit_code, "Job failed")
             return
 
-        # If we get here the prior step completed successfully and we can decide
-        # whether the step has outputs (files) that need to be written to the
-        # Project directory, while also marking the Step as DONE (successfully).
-        # We pass the outputs to the DM via a call to the API adapter's realise_outputs().
-        # In return it copies (links) these files to the Project directory.
+        # If we get here the prior step completed successfullyso we
+        # mark the Step as DONE (successfully).
         wfid = rwf_response["workflow"]["id"]
         assert wfid
         wf_response, _ = self._wapi_adapter.get_workflow(workflow_id=wfid)
         _LOGGER.debug("API.get_workflow(%s) returned: -\n%s", wfid, str(wf_response))
 
-        error_num: int | None = None
-        error_msg: str | None = None
-        if workflow_step_has_outputs(wf_response, step_name):
-            # The step produces at least one output.
-            # Inform the DM so it can link them to the Project directory
-            response, status_code = self._wapi_adapter.realise_outputs(
-                running_workflow_step_id=r_wfsid,
-            )
-            if status_code != HTTPStatus.OK:
-                error_num = status_code
-                error_msg = (
-                    response["error"]
-                    if "error" in response
-                    else "Undisclosed error when realising outputs"
-                )
-
-        if error_num is not None:
-            # The job was successful but linking outputs (back to the Project directory)
-            # appears to have failed.
-            self._set_step_error(step_name, r_wfid, r_wfsid, error_num, error_msg)
-            return
-
         # We then inspect the Workflow to determine the next step.
         self._wapi_adapter.set_running_workflow_step_done(
             running_workflow_step_id=r_wfsid,
-            success=error_num is None,
-            error_num=error_num,
-            error_msg=error_msg,
+            success=True,
         )
 
         # We have the step from the Instance that's just finished,
diff --git a/workflow/workflow_validator.py b/workflow/workflow_validator.py
index 702ef88..773f7e3 100644
--- a/workflow/workflow_validator.py
+++ b/workflow/workflow_validator.py
@@ -153,7 +153,7 @@ def _validate_run_level(
         )
         if missing_values:
             return ValidationResult(
-                error_num=7,
+                error_num=8,
                 error_msg=[
                     f"Missing workflow variable values for: {', '.join(missing_values)}"
                 ],

From 0da1a595fe46fd9015fb58ebc1e8da108cf06aff Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Thu, 21 Aug 2025 17:02:36 +0000
Subject: [PATCH 29/57] build: Add devcontainer

---
 .devcontainer/devcontainer.json | 49 +++++++++++++++++++++++++++++++++
 .gitignore                      |  1 +
 requirements.txt                |  2 ++
 3 files changed, 52 insertions(+)
 create mode 100644 .devcontainer/devcontainer.json
 create mode 100644 requirements.txt

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 0000000..cc5a44e
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,49 @@
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/python
+{
+	"name": "Python 3",
+	"image": "mcr.microsoft.com/devcontainers/python:1-3.13-bullseye",
+	"features": {
+		"ghcr.io/devcontainers/features/docker-in-docker:2": {
+			"moby": true,
+			"azureDnsAutoDetection": true,
+			"installDockerBuildx": true,
+			"installDockerComposeSwitch": true,
+			"version": "latest",
+			"dockerDashComposeVersion": "v2"
+		},
+		"ghcr.io/devcontainers/features/git:1": {
+			"ppa": true,
+			"version": "os-provided"
+		}
+	},
+  // We mount bash history in an attempt to preserver history
+  // between container restarts
+  // (see https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history)
+  "mounts": [
+      "source=projectname-bashhistory,target=/commandhistory,type=volume"
+  ],
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "codezombiech.gitignore",
+        "donjayamanne.githistory",
+        "donjayamanne.git-extension-pack",
+        "eamodio.gitlens",
+        "github.vscode-github-actions",
+        "ms-kubernetes-tools.vscode-kubernetes-tools",
+        "ms-python.vscode-pylance",
+        "sourcery.sourcery",
+        "streetsidesoftware.code-spell-checker",
+        "trond-snekvik.simple-rst",
+        "vivaxy.vscode-conventional-commits",
+        "yzhang.markdown-all-in-one"
+      ]
+    }
+  },
+  "postCreateCommand": {
+		"Install Python requirements": "pip3 install --user -r requirements.txt",
+    "Fix Volume Permissions": "sudo chown -R $(whoami): /commandhistory"
+	},
+  "forwardPorts": []
+}
diff --git a/.gitignore b/.gitignore
index 6e25b42..27fa4a4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ dist/
 **/__pycache__/
 **/*.pickle
 tests/project-root/project-*/
+**/.DS_Store
 
 # temp files
 *~
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..801c6a7
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+poetry == 1.8.5
+pre-commit == 4.2.0

From 6ddca27390fb816e846b6dc34840e2d10f5a3728 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Thu, 21 Aug 2025 17:12:14 +0000
Subject: [PATCH 30/57] build: No need for DinD

---
 .devcontainer/devcontainer.json | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index cc5a44e..c15be0a 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,17 +1,9 @@
 // For format details, see https://aka.ms/devcontainer.json. For config options, see the
 // README at: https://github.com/devcontainers/templates/tree/main/src/python
 {
-	"name": "Python 3",
+	"name": "WorkflowEngine Python 3.13",
 	"image": "mcr.microsoft.com/devcontainers/python:1-3.13-bullseye",
 	"features": {
-		"ghcr.io/devcontainers/features/docker-in-docker:2": {
-			"moby": true,
-			"azureDnsAutoDetection": true,
-			"installDockerBuildx": true,
-			"installDockerComposeSwitch": true,
-			"version": "latest",
-			"dockerDashComposeVersion": "v2"
-		},
 		"ghcr.io/devcontainers/features/git:1": {
 			"ppa": true,
 			"version": "os-provided"

From bca4f9c6295ca02b58edef5b0870412c062c1907 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Thu, 21 Aug 2025 17:12:32 +0000
Subject: [PATCH 31/57] docs: Docs on devcontainer

---
 README.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.rst b/README.rst
index c86b422..1f471d3 100644
--- a/README.rst
+++ b/README.rst
@@ -38,10 +38,9 @@ The project's written in Python and uses `Poetry`_ for dependency and package
 management. We also use `pre-commit`_ to manage our pre-commit hooks, which
 rely on `black`_, `mypy`_, `pylint`_, amongst others.
 
-Create your environment::
+From within a VS Code `devcontainer`_] environment (recommended)::
 
-    poetry shell
-    poetry install --with dev
+    poetry install --with dev --sync
     pre-commit install -t commit-msg -t pre-commit
 
 And then start by running the pre-commit hooks to ensure you're stating with a
@@ -51,9 +50,10 @@ _clean_ project::
 
 And then run the tests::
 
-    coverage run -m pytest
-    coverage report
+    poetry run coverage run -m pytest
+    poetry run coverage report
 
+.. _devcontainer: https://code.visualstudio.com/docs/devcontainers/containers
 .. _Poetry: https://python-poetry.org
 .. _pre-commit: https://pre-commit.com
 .. _black: https://github.com/psf/black

From b9e3f00e6d750fedbd6b23a4ade8eb33877d0230 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Fri, 22 Aug 2025 16:34:21 +0000
Subject: [PATCH 32/57] docs: Doc tweak

---
 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index 1f471d3..086a375 100644
--- a/README.rst
+++ b/README.rst
@@ -38,7 +38,7 @@ The project's written in Python and uses `Poetry`_ for dependency and package
 management. We also use `pre-commit`_ to manage our pre-commit hooks, which
 rely on `black`_, `mypy`_, `pylint`_, amongst others.
 
-From within a VS Code `devcontainer`_] environment (recommended)::
+From within a VS Code `devcontainer`_ environment (recommended)::
 
     poetry install --with dev --sync
     pre-commit install -t commit-msg -t pre-commit

From 5a9903b3098b845d34cde9558c3447de402ccb83 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Fri, 22 Aug 2025 16:38:47 +0000
Subject: [PATCH 33/57] feat: Some work on the refactored engine

---
 tests/test_workflow_engine_examples.py        |   4 +-
 .../example-smiles-to-file.yaml               |   9 ++
 .../simple-python-molprops-with-options.yaml  |  56 +--------
 workflow/decoder.py                           | 117 ++++++------------
 workflow/workflow_engine.py                   |  59 +++++----
 5 files changed, 88 insertions(+), 157 deletions(-)

diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
index d4c1a6f..641e608 100644
--- a/tests/test_workflow_engine_examples.py
+++ b/tests/test_workflow_engine_examples.py
@@ -130,6 +130,7 @@ def wait_for_workflow(
     # are the responsibility of the caller.
     attempts = 0
     done = False
+    response = None
     while not done:
         response, _ = da.get_running_workflow(running_workflow_id=r_wfid)
         if response["done"]:
@@ -141,6 +142,7 @@ def wait_for_workflow(
             time.sleep(completion_poll_period_s)
     # When we get here the workflow must have finished (not timed-out),
     # and it must have passed (or failed) according the the caller's expectation.
+    assert response
     assert response["done"]
     assert response["success"] == expect_success
 
@@ -400,7 +402,7 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
         "simple-python-molprops-with-options",
         {
             "candidateMolecules": input_file_1,
-            "outputFile": output_file_1,
+            "clusteredMolecules": output_file_2,
             "rdkitPropertyName": "prop",
             "rdkitPropertyValue": 1.2,
         },
diff --git a/tests/workflow-definitions/example-smiles-to-file.yaml b/tests/workflow-definitions/example-smiles-to-file.yaml
index b7dc70c..018d90c 100644
--- a/tests/workflow-definitions/example-smiles-to-file.yaml
+++ b/tests/workflow-definitions/example-smiles-to-file.yaml
@@ -13,3 +13,12 @@ steps:
     collection: workflow-engine-unit-test-jobs
     job: smiles-to-file
     version: "1.0.0"
+  variable-mapping:
+  - variable: outputFile
+    from-workflow:
+      variable: outputFile
+  - variable: smiles
+    from-workflow:
+      variable: smiles
+  out:
+  - outputFile
diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
index 3a3904a..9ef80e5 100644
--- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml
+++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
@@ -4,55 +4,6 @@ kind-version: "2025.2"
 name: python-workflow
 description: A simple python experimental workflow
 
-# Some meaningless variables.
-# Just to make sure the decoder accepts this.
-# The Workflow engin eis not (yet) interested in this block.
-variables:
-  inputs:
-    type: object
-    required:
-    - inputFile
-    properties:
-      inputFile:
-        title: Molecules to pick from
-        mime-types:
-        - squonk/x-smiles
-        type: file
-      seeds:
-        title: Molecules that are already picked
-        mime-types:
-        - squonk/x-smiles
-        type: file
-        multiple: true
-  outputs:
-    type: object
-    properties:
-      outputFile:
-        title: Output file
-        mime-types:
-        - chemical/x-csv
-        creates: '{{ outputFile }}'
-        type: file
-  options:
-    type: object
-    required:
-    - count
-    properties:
-      outputFile:
-        title: Output file name
-        type: string
-        pattern: "^[A-Za-z0-9_/\\.\\-]+$"
-        default: diverse.smi
-      count:
-        title: Number of molecules to pick
-        type: integer
-        minimum: 1
-      threshold:
-        title: Similarity threshold
-        type: number
-        minimum: 0
-        maximum: 1
-
 steps:
 - name: step1
   description: Add column 1
@@ -60,6 +11,8 @@ steps:
     collection: workflow-engine-unit-test-jobs
     job: rdkit-molprops
     version: "1.0.0"
+    variables:
+      outputFile: step1.out.smi
   variable-mapping:
   - variable: inputFile
     from-workflow:
@@ -70,9 +23,6 @@ steps:
   - variable: value
     from-workflow:
       variable: rdkitPropertyValue
-  - variable: outputFile
-    from-workflow:
-      variable: clusteredMolecules
 
 - name: step2
   description: Add column 2
@@ -90,6 +40,6 @@ steps:
       variable: outputFile
   - variable: outputFile
     from-workflow:
-      variable: outputFile
+      variable: clusteredMolecules
   out:
   - outputFile
diff --git a/workflow/decoder.py b/workflow/decoder.py
index c1b3c87..d7e320f 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -4,7 +4,6 @@
 """
 
 import os
-from pprint import pprint
 from typing import Any
 
 import jsonschema
@@ -108,87 +107,41 @@ def get_step_input_variable_names(
     return variable_names
 
 
-def set_step_variables(
-    *,
-    workflow: dict[str, Any],
-    inputs: list[dict[str, Any]],
-    outputs: list[dict[str, Any]],
-    step_outputs: dict[str, Any],
-    previous_step_outputs: list[dict[str, Any]],
-    workflow_variables: dict[str, Any],
-    step_name: str,
-) -> dict[str, Any]:
-    """Prepare input- and output variables for the following step.
-
-    Inputs are defined in step definition but their values may
-    come from previous step outputs.
-    """
-    assert workflow
-
-    result = {}
-
-    print("ssv: wf vars:")
-    pprint(workflow_variables)
-    print("ssv: inputs:")
-    pprint(inputs)
-    print("ssv: outputs", outputs)
-    print("ssv: step_outputs", step_outputs)
-    print("ssv: prev step outputs", previous_step_outputs)
-    print("ssv: step_name", step_name)
-
-    for item in inputs:
-        p_key = item["input"]
-        p_val = ""
-        val = item["from"]
-        if "workflow-input" in val.keys():
-            p_val = workflow_variables[val["workflow-input"]]
-            result[p_key] = p_val
-        elif "step" in val.keys():
-            # this links the variable to previous step output
-            if previous_step_outputs:
-                for out in previous_step_outputs:
-                    if out["output"] == val["output"]:
-                        # p_val = out["as"]
-                        if step_outputs["output"]:
-                            p_val = step_outputs["output"]
-                            print("\n!!!!!!!!!!!!!if clause!!!!!!!!!!!!!!!!!!!!!\n")
-                            print(p_val)
-                        else:
-                            # what do I need to do here??
-                            print("\n!!!!!!!!!!!!!else clause!!!!!!!!!!!!!!!!!!!!!\n")
-                            print(out)
-                            print(val)
-
-                        # this bit handles multiple inputs: if a step
-                        # requires input from multiple steps, add them to
-                        # the list in result dict. this is the reason for
-                        # mypy ignore statements, mypy doesn't understand
-                        # redefinition
-                        if p_key in result:
-                            if not isinstance(result[p_key], set):
-                                result[p_key] = {result[p_key]}  # type: ignore [assignment]
-                            result[p_key].add(p_val)  # type: ignore [attr-defined]
-                        else:
-                            result[p_key] = p_val
-            else:
-                if val["output"] in workflow_variables:
-                    result[p_key] = workflow_variables[val["output"]]
-
-    for item in outputs:
-        p_key = item["output"]
-        # p_val = item["as"]
-        # p_val = step_outputs["output"]
-        p_val = "somefile.smi"
-        result[p_key] = p_val
-
-    #    options = set_variables_from_options_for_step(
-    #        definition=workflow,
-    #        variables=workflow_variables,
-    #        step_name=step_name,
-    #    )
-    #
-    #    result |= options
-    return result
+def get_step_workflow_variable_mapping(
+    *, step: dict[str, Any]
+) -> list[tuple[str, str]]:
+    """Returns a list of workflow vaiable name to step variable name tuples
+    for the given step."""
+    variable_mapping: list[tuple[str, str]] = []
+    if "variable-mapping" in step:
+        for v_map in step["variable-mapping"]:
+            if "from-workflow" in v_map:
+                # Tuple is "from" -> "to"
+                variable_mapping.append(
+                    (v_map["from-workflow"]["variable"], v_map["variable"])
+                )
+    return variable_mapping
+
+
+def get_step_prior_step_variable_mapping(
+    *, step: dict[str, Any]
+) -> dict[str, list[tuple[str, str]]]:
+    """Returns list of tuples, indexed by prior step name, of source step vaiable name
+    to this step's variable name."""
+    variable_mapping: dict[str, list[tuple[str, str]]] = {}
+    if "variable-mapping" in step:
+        for v_map in step["variable-mapping"]:
+            if "from-step" in v_map:
+                step_name = v_map["from-step"]["name"]
+                step_variable = v_map["from-step"]["variable"]
+                # Tuple is "from" -> "to"
+                if step_name in variable_mapping:
+                    variable_mapping[step_name].append(
+                        (step_variable, v_map["variable"])
+                    )
+                else:
+                    variable_mapping[step_name] = [(step_variable, v_map["variable"])]
+    return variable_mapping
 
 
 def get_step_replicator(*, step: dict[str, Any]) -> str | Any:
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index 66d3e3e..71ee0ac 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -41,8 +41,9 @@
 
 from .decoder import (
     get_step_input_variable_names,
+    get_step_prior_step_variable_mapping,
     get_step_replicator,
-    set_step_variables,
+    get_step_workflow_variable_mapping,
 )
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -485,16 +486,6 @@ def _validate_step_command(
 
         print("final prev step outputs", previous_step_outputs)
 
-        step_vars = set_step_variables(
-            workflow=workflow,
-            workflow_variables=all_variables,
-            inputs=inputs,
-            outputs=outputs,
-            step_outputs=step_outputs,
-            previous_step_outputs=previous_step_outputs,
-            step_name=running_wf_step["name"],
-        )
-        all_variables |= step_vars
         _LOGGER.debug(
             "Index %s (%s) all_variables=%s",
             our_step_index,
@@ -533,27 +524,52 @@ def _launch(
         wf_step_data, _ = self._wapi_adapter.get_workflow_steps_driving_this_step(
             running_workflow_step_id=rwfs_id,
         )
-        print("wf_step_data")
-        pprint(wf_step_data)
         assert wf_step_data["caller_step_index"] >= 0
         our_step_index: int = wf_step_data["caller_step_index"]
 
         print("step in _launch:", step_name)
         pprint(step)
 
+        # Workflow variables set by the user...
+        rwf_variables: dict[str, Any] = rwf.get("variables", {})
+
         # Now check the step command can be executed
         # (by trying to decoding the Job command).
+        # Before we do this we have to construct the variable map
+        # for this step.
         #
-        # We pass in the workflow variables (these are provided by the user
-        # when the workflow is run. All workflow variables will be present in the
-        # running workflow record)
-        running_workflow_variables: dict[str, Any] | None = rwf.get("variables")
+        # We start with any variables provided in the step specification
+        all_variables: dict[str, Any] = step["specification"].get("variables", {})
+        # We now have to iterate through the step's variable mapping block.
+        # This will name any workflow variables (from)
+        # and their corresponding step variable (to).
+        step_wf_v_map: list[tuple[str, str]] = get_step_workflow_variable_mapping(
+            step=step
+        )
+        for from_to in step_wf_v_map:
+            all_variables[from_to[1]] = rwf_variables[from_to[0]]
+        # We must now apply variables from prior steps identified in the
+        # current step's mapping block. We're given a map indexed by
+        # prior step name that's a list of tuples name the prior step's
+        # variable (from) and the curent step variable (to).
+        step_prior_v_map: dict[str, list[tuple[str, str]]] = (
+            get_step_prior_step_variable_mapping(step=step)
+        )
+        for prior_step_name, v_map in step_prior_v_map.items():
+            # Load the prior step
+            prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name(
+                name=prior_step_name, running_workflow_id=rwf_id
+            )
+            # Get its variables and copy the value
+            for from_to in v_map:
+                all_variables[from_to[1]] = prior_step["variables"][from_to[0]]
+
         error_or_variables: str | dict[str, Any] = self._validate_step_command(
             running_workflow_step_id=rwfs_id,
             step=step,
             workflow_steps=wf_step_data["steps"],
             our_step_index=our_step_index,
-            running_workflow_variables=running_workflow_variables,
+            running_workflow_variables=all_variables,
         )
         if isinstance(error_or_variables, str):
             error_msg = error_or_variables
@@ -570,13 +586,14 @@ def _launch(
 
         _LOGGER.info(
             "Launching step: RunningWorkflow=%s RunningWorkflowStep=%s step=%s"
-            " (name=%s project=%s, variables=%s)",
+            " variables=%s name=%s project=%s, (all_variables=%s)",
             rwf_id,
             rwfs_id,
             step_name,
+            variables,
             rwf["name"],
             project_id,
-            variables,
+            all_variables,
         )
 
         # When we launch a step we need to identify all the prior steps in the workflow,
@@ -593,7 +610,7 @@ def _launch(
         prior_steps: list[str] = []
         if our_step_index > 0:
             # We need the step ID of the prior step.
-            prior_step_name: str = wf_step_data["steps"][our_step_index - 1]["name"]
+            prior_step_name = wf_step_data["steps"][our_step_index - 1]["name"]
             step_response, _ = self._wapi_adapter.get_running_workflow_step_by_name(
                 name=prior_step_name,
                 running_workflow_id=rwf_id,

From f3bbc6da45f9e4bccc1a54117f24880ffe26a3c6 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Wed, 27 Aug 2025 13:46:28 +0000
Subject: [PATCH 34/57] fix: More fixes for engine

---
 tests/test_workflow_engine_examples.py        |  2 ++
 .../test_workflow_validator_for_run_level.py  |  1 +
 workflow/decoder.py                           |  4 +--
 workflow/workflow_validator.py                | 29 ++++++++++++++-----
 4 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
index 641e608..3aaf046 100644
--- a/tests/test_workflow_engine_examples.py
+++ b/tests/test_workflow_engine_examples.py
@@ -327,6 +327,7 @@ def test_workflow_engine_simple_python_molprops(basic_engine):
     assert project_file_exists(output_file_2)
 
 
+@pytest.mark.skip("Unrealistic test")
 def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
     # Arrange
     md, da = basic_engine
@@ -423,6 +424,7 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
     assert project_file_exists(output_file_2)
 
 
+@pytest.mark.skip("Unrealistic test")
 def test_workflow_engine_simple_python_fanout(basic_engine):
     # Arrange
     md, da = basic_engine
diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py
index ee28a15..175d828 100644
--- a/tests/test_workflow_validator_for_run_level.py
+++ b/tests/test_workflow_validator_for_run_level.py
@@ -62,6 +62,7 @@ def test_validate_example_smiles_to_file():
     error = WorkflowValidator.validate(
         level=ValidationLevel.RUN,
         workflow_definition=workflow,
+        variables={"smiles": "C", "outputFile": "blob.smi"},
     )
 
     # Assert
diff --git a/workflow/decoder.py b/workflow/decoder.py
index d7e320f..10d6d4e 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -137,10 +137,10 @@ def get_step_prior_step_variable_mapping(
                 # Tuple is "from" -> "to"
                 if step_name in variable_mapping:
                     variable_mapping[step_name].append(
-                        (step_variable, v_map["variable"])
+                        (v_map["variable"], step_variable)
                     )
                 else:
-                    variable_mapping[step_name] = [(step_variable, v_map["variable"])]
+                    variable_mapping[step_name] = [(v_map["variable"], step_variable)]
     return variable_mapping
 
 
diff --git a/workflow/workflow_validator.py b/workflow/workflow_validator.py
index 773f7e3..10a2482 100644
--- a/workflow/workflow_validator.py
+++ b/workflow/workflow_validator.py
@@ -5,8 +5,9 @@
 from typing import Any
 
 from .decoder import (
-    get_step_input_variable_names,
     get_step_output_variable_names,
+    get_step_prior_step_variable_mapping,
+    get_step_workflow_variable_mapping,
     get_steps,
     get_workflow_variable_names,
     validate_schema,
@@ -113,22 +114,36 @@ def _validate_tag_level(
                 error_msg=[f"Duplicate step names found: {', '.join(duplicate_names)}"],
             )
         # For each 'replicating' step the replicating variable
-        # must be declared in the step.
+        # must be declared in the step - which is either a workflow variable
+        # or a prior step variable.
         for step in get_steps(workflow_definition):
             if (
                 replicate_using_input := step.get("replicate", {})
                 .get("using", {})
                 .get("variable")
             ):
-                step_name = step["name"]
-                if replicate_using_input not in get_step_input_variable_names(
-                    workflow_definition, step_name
-                ):
+                found: bool = False
+                for variable_map in get_step_workflow_variable_mapping(step=step):
+                    if replicate_using_input == variable_map[0]:
+                        found = True
+                        break
+                if not found:
+                    for (
+                        step_name,
+                        variable_map_list,
+                    ) in get_step_prior_step_variable_mapping(step=step).items():
+                        for variable_map in variable_map_list:
+                            if replicate_using_input == variable_map[0]:
+                                found = True
+                                break
+                        if found:
+                            break
+                if not found:
                     return ValidationResult(
                         error_num=7,
                         error_msg=[
                             "Replicate input variable is not declared:"
-                            f" {replicate_using_input} (step={step_name})"
+                            f" {replicate_using_input} (step={step["name"]})"
                         ],
                     )
 

From 880944009d504ce456be0125c43cd14720b58a79 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Wed, 27 Aug 2025 15:12:04 +0000
Subject: [PATCH 35/57] fix: More work on the decoder

---
 tests/test_decoder.py                         |   2 +-
 tests/test_workflow_engine_examples.py        |  59 ++---
 tests/wapi_adapter.py                         |   6 -
 .../simple-python-molprops.yaml               |   1 +
 workflow/decoder.py                           |   4 +-
 workflow/workflow_engine.py                   | 228 ++++--------------
 6 files changed, 65 insertions(+), 235 deletions(-)

diff --git a/tests/test_decoder.py b/tests/test_decoder.py
index 8aae4cb..46c05d9 100644
--- a/tests/test_decoder.py
+++ b/tests/test_decoder.py
@@ -142,7 +142,7 @@ def test_validate_schema_for_step_specification_variable_names():
     assert error is None
 
 
-@pytest.mark.skip(reason="DO not support combination atm")
+@pytest.mark.skip(reason="We do not support combination atm")
 def test_validate_schema_for_simple_python_parallel():
     # Arrange
 
diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
index 3aaf046..bcd2498 100644
--- a/tests/test_workflow_engine_examples.py
+++ b/tests/test_workflow_engine_examples.py
@@ -208,37 +208,6 @@ def test_workflow_engine_example_smiles_to_file(basic_engine):
     assert project_file_exists(output_file)
 
 
-@pytest.mark.skip("Unrealistic test")
-def test_workflow_engine_shortcut_example_1(basic_engine):
-    # Arrange
-    md, da = basic_engine
-
-    # Make sure files that should be generated by the test
-    # do not exist before we run the test.
-    output_file_a = "a.sdf"
-    assert not project_file_exists(output_file_a)
-    output_file_b = "b.sdf"
-    assert not project_file_exists(output_file_b)
-
-    # Act
-    r_wfid = start_workflow(md, da, "shortcut-example-1", {})
-
-    # Assert
-    wait_for_workflow(da, r_wfid)
-    # Additional, detailed checks...
-    # Check we only have one RunningWorkflowStep, and it succeeded
-    response = da.get_running_workflow_steps(running_workflow_id=r_wfid)
-    assert response["count"] == 2
-    assert response["running_workflow_steps"][0]["done"]
-    assert response["running_workflow_steps"][0]["success"]
-    assert response["running_workflow_steps"][1]["done"]
-    assert response["running_workflow_steps"][1]["success"]
-    # This test should generate a file in the simulated project directory
-    assert project_file_exists(output_file_a)
-    assert project_file_exists(output_file_b)
-
-
-@pytest.mark.skip("temporary skip")
 def test_workflow_engine_simple_python_molprops(basic_engine):
     # Arrange
     md, da = basic_engine
@@ -249,13 +218,7 @@ def test_workflow_engine_simple_python_molprops(basic_engine):
         output="step1.out.smi",
     )
 
-    # Make sure files that should be generated by the test
-    # do not exist before we run the test.
-    output_file_1 = "step1.out.smi"
-    assert not project_file_exists(output_file_1)
-    output_file_2 = "step2.out.smi"
-    assert not project_file_exists(output_file_2)
-    # And create the test's input file.
+    # Create the test's input file.
     input_file_1 = "input1.smi"
     input_file_1_content = """O=C(CSCc1ccc(Cl)s1)N1CCC(O)CC1
         RDKit          3D
@@ -307,9 +270,22 @@ def test_workflow_engine_simple_python_molprops(basic_engine):
     ) as input_file:
         input_file.writelines(input_file_1_content)
 
+    # Make sure files that should be generated by the test
+    # do not exist before we run the test.
+    output_file_1 = "results.smi"
+    assert not project_file_exists(output_file_1)
+    output_file_2 = "clustered-results.smi"
+    assert not project_file_exists(output_file_2)
+
     # Act
     r_wfid = start_workflow(
-        md, da, "simple-python-molprops", {"candidateMolecules": input_file_1}
+        md,
+        da,
+        "simple-python-molprops",
+        {
+            "candidateMolecules": input_file_1,
+            "clusteredMolecules": "clustered-results.smi",
+        },
     )
 
     # Assert
@@ -322,12 +298,10 @@ def test_workflow_engine_simple_python_molprops(basic_engine):
     assert response["running_workflow_steps"][0]["success"]
     assert response["running_workflow_steps"][1]["done"]
     assert response["running_workflow_steps"][1]["success"]
-    # This test should generate a file in the simulated project directory
-    assert project_file_exists(output_file_1)
+    # This test should generate the expected file in the simulated project directory
     assert project_file_exists(output_file_2)
 
 
-@pytest.mark.skip("Unrealistic test")
 def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
     # Arrange
     md, da = basic_engine
@@ -424,7 +398,6 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
     assert project_file_exists(output_file_2)
 
 
-@pytest.mark.skip("Unrealistic test")
 def test_workflow_engine_simple_python_fanout(basic_engine):
     # Arrange
     md, da = basic_engine
diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py
index 1f1f6d0..c283ee1 100644
--- a/tests/wapi_adapter.py
+++ b/tests/wapi_adapter.py
@@ -202,7 +202,6 @@ def get_running_workflow_step_by_name(
         for rwfs_id, record in running_workflow_step.items():
             if record["running_workflow"]["id"] != running_workflow_id:
                 continue
-            print("running wf step by name, record:", record)
             if record["name"] == name and record["replica"] == replica:
                 response = record
                 response["id"] = rwfs_id
@@ -425,11 +424,6 @@ def get_running_workflow_step_output_values_for_output(
             mock_output = Unpickler(pickle_file).load()
         UnitTestWorkflowAPIAdapter.lock.release()
 
-        print("mock output", mock_output)
-        print("step", step)
-        print("step_name", step_name)
-        # mock output {'first-step': {'output_variable': 'results', 'output': ['chunk_1.smi', 'chunk_2.smi']}}
-
         if step_name not in mock_output:
             return {"output": []}, 0
         # The record's output variable must match (there's only one record per step atm)
diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml
index 39fb6cd..ba0d1d0 100644
--- a/tests/workflow-definitions/simple-python-molprops.yaml
+++ b/tests/workflow-definitions/simple-python-molprops.yaml
@@ -14,6 +14,7 @@ steps:
     variables:
       name: "col1"
       value: 123
+      outputFile: "results.smi"
   variable-mapping:
   - variable: inputFile
     from-workflow:
diff --git a/workflow/decoder.py b/workflow/decoder.py
index 10d6d4e..d7e320f 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -137,10 +137,10 @@ def get_step_prior_step_variable_mapping(
                 # Tuple is "from" -> "to"
                 if step_name in variable_mapping:
                     variable_mapping[step_name].append(
-                        (v_map["variable"], step_variable)
+                        (step_variable, v_map["variable"])
                     )
                 else:
-                    variable_mapping[step_name] = [(v_map["variable"], step_variable)]
+                    variable_mapping[step_name] = [(step_variable, v_map["variable"])]
     return variable_mapping
 
 
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index 71ee0ac..dcb2047 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -24,7 +24,6 @@
 
 import logging
 import sys
-from pprint import pprint
 from typing import Any, Dict, Optional
 
 from decoder.decoder import TextEncoding, decode
@@ -317,11 +316,11 @@ def _handle_pod_message(self, msg: PodMessage) -> None:
     def _validate_step_command(
         self,
         *,
+        running_workflow_id: str,
         running_workflow_step_id: str,
         step: dict[str, Any],
-        workflow_steps: list[dict[str, Any]],
         our_step_index: int,
-        running_workflow_variables: dict[str, Any] | None = None,
+        running_workflow_variables: dict[str, Any],
     ) -> str | dict[str, Any]:
         """Returns an error message if the command isn't valid.
         Without a message we return all the variables that were (successfully)
@@ -351,143 +350,42 @@ def _validate_step_command(
             str(job),
         )
 
-        # The step's 'specification' is a string - pass it directly to the
-        # launcher along with any (optional) 'workflow variables'. The launcher
-        # will apply the variables to the step's Job command but we need to handle
-        # any launch problems. The validator should have checked to ensure that
-        # variable expansion will work, but we must prepare for the unexpected.
-        #
-        # What the engine has to do here is make sure that the Job
-        # that's about to be launched has all its configuration requirements
-        # satisfied (inputs, outputs and options). Basically we must ensure
-        # that the Job definition's 'command' can be compiled by applying
-        # the available variables.
-        #
-        # To prevent launcher errors relating to decoding we get the command ourselves
-        # and then apply the current set of variables. And we use the JobDecoder's
-        # 'decode()' method to do this. It returns a tuple (str and boolean).
-        # If the boolean is True then the command can be compiled
-        # (i.e. it has no missing variables) and the launcher should not complain
-        # about the command (as we'll pass the same variables to it.
-        # If the returned boolean is False then we can expect the returned str
-        # to contain an error message.
-        #
-        # The full set of step variables can be obtained
-        # (in ascending order of priority) from...
-        #
-        # 1. The Job Step Specification
-        # 2. The RunningWorkflow
-        #
-        # If variable 'x' is defined in all three then the RunningWorkflow's
-        # value must be used.
-
-        # 1. Get any variables from the step specification.
-        all_variables = step_spec.pop("variables") if "variables" in step_spec else {}
-        # 2. Merge running workflow variables on top of these
-        if running_workflow_variables:
-            all_variables |= running_workflow_variables
-
-        # We must always process the current step's variables
-        _LOGGER.debug("Validating step %s (%s)", step, running_workflow_step_id)
-        inputs = step.get("inputs", [])
-        outputs = step.get("outputs", [])
-        previous_step_outputs = []
-        _LOGGER.debug(
-            "We are at workflow step index %d (%s)",
-            our_step_index,
-            running_workflow_step_id,
-        )
-
-        # resolve all previous steps
-        previous_step_names = set()
-        if our_step_index > 0:
-            print("prev step inputs", inputs)
-            for inp in inputs:
-                if step_name := inp["from"].get("step", None):
-                    previous_step_names.add(step_name)
-
-            for step in workflow_steps:
-                if step["name"] in previous_step_names:
-                    previous_step_outputs.extend(step.get("outputs", []))
-
-        _LOGGER.debug(
-            "Index %s (%s) workflow_variables=%s",
-            our_step_index,
-            running_workflow_step_id,
-            all_variables,
-        )
-        _LOGGER.debug(
-            "Index %s (%s) inputs=%s", our_step_index, running_workflow_step_id, inputs
-        )
-        _LOGGER.debug(
-            "Index %s (%s) outputs=%s",
-            our_step_index,
-            running_workflow_step_id,
-            outputs,
-        )
-        _LOGGER.debug(
-            "Index %s (%s) previous_step_outputs=%s",
-            our_step_index,
-            running_workflow_step_id,
-            previous_step_outputs,
-        )
+        # Start with any variables provided in the step's specification.
+        # This will be ou t"all variables" map for this step,
+        # whcih we will add to (and maybe even over-write)...
+        all_variables: dict[str, Any] = step["specification"].get("variables", {})
 
-        # there should probably be an easier way to access this
-        running_wf_step, _ = self._wapi_adapter.get_running_workflow_step(
-            running_workflow_step_id=running_workflow_step_id
-        )
-        running_wf_id = running_wf_step["running_workflow"]["id"]
-        running_wf, _ = self._wapi_adapter.get_running_workflow(
-            running_workflow_id=running_wf_id
+        # Next, we iterate through the step's "variable mapping" block.
+        # This tells us all the variables that are set from either the
+        # 'workflow' or 'a prior step'.
+
+        # Start with any workflow variables in the step.
+        # This will be a list of tuples of "in" and "out" variable names.
+        # "in" variables are worklfow variables, and "out" variables
+        # are expected Job variables. We use this to add variables
+        # to the "all variables" map.
+        for from_to in get_step_workflow_variable_mapping(step=step):
+            all_variables[from_to[1]] = running_workflow_variables[from_to[0]]
+
+        # Now we apply variables from the "variable mapping" block
+        # related to values used in prior steps. The decoder gives
+        # us a map indexed by prior step name that's a list of "in" "out"
+        # tuples as above.
+        step_prior_v_map: dict[str, list[tuple[str, str]]] = (
+            get_step_prior_step_variable_mapping(step=step)
         )
-        print("running wf")
-        pprint(running_wf)
-        workflow_id = running_wf["workflow"]["id"]
-        workflow, _ = self._wapi_adapter.get_workflow(workflow_id=workflow_id)
-
-        print("workflow")
-        pprint(workflow)
-
-        # for step in workflow["steps"]:
-        #     if step["name"] in previous_step_names:
-
-        previous_step_id = None
-        for name in previous_step_names:
-            result, _ = self._wapi_adapter.get_running_workflow_step_by_name(
-                name=name, running_workflow_id=running_wf_id, replica=0
+        for prior_step_name, v_map in step_prior_v_map.items():
+            # Retrieve the prior "running" step
+            # in order to get the variables that were set there...
+            prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name(
+                name=prior_step_name, running_workflow_id=running_workflow_id
             )
-            print("by name results", result)
-            print("by name results, vars", result["variables"])
-            print("by name results, od", result["id"])
-            previous_step_id = result["id"]
-            # if name == 'first-step':
-            #     previous_step_id = result["id"]
-
-        # don't understand how this is structured
-        print("prev steps", previous_step_names)
-        print("outputs", outputs)
-        print()
-        step_outputs: dict[str, Any] = {"output": []}
-        if previous_step_id:
-            for output in outputs:
-                for k, v in output.items():
-                    print("sending params to output mock", k, v)
-                    try:
-                        step_outputs, _ = (
-                            self._wapi_adapter.get_running_workflow_step_output_values_for_output(
-                                running_workflow_step_id=previous_step_id,
-                                output_variable=v,  # foraeach outputs key
-                            )
-                        )
-
-                        print("mockputs", running_workflow_step_id, step_outputs)
-                    except AssertionError:
-                        print("no output for step", running_workflow_step_id, k, v)
-
-        print("final prev step outputs", previous_step_outputs)
+            # Copy "in" value to "out"...
+            for from_to in v_map:
+                all_variables[from_to[1]] = prior_step["variables"][from_to[0]]
 
         _LOGGER.debug(
-            "Index %s (%s) all_variables=%s",
+            "Index %s (%s) workflow_variables=%s",
             our_step_index,
             running_workflow_step_id,
             all_variables,
@@ -527,49 +425,19 @@ def _launch(
         assert wf_step_data["caller_step_index"] >= 0
         our_step_index: int = wf_step_data["caller_step_index"]
 
-        print("step in _launch:", step_name)
-        pprint(step)
+        # A mojor pievce of work is to get ourselves into a position
+        # that allows us to check the step command can be executed.
+        # We do this by compiling a map of varibales we belive the step needs.
 
-        # Workflow variables set by the user...
+        # Get all the workflow variables that were provided
+        # by the user when they "ran" the workflow...
         rwf_variables: dict[str, Any] = rwf.get("variables", {})
-
-        # Now check the step command can be executed
-        # (by trying to decoding the Job command).
-        # Before we do this we have to construct the variable map
-        # for this step.
-        #
-        # We start with any variables provided in the step specification
-        all_variables: dict[str, Any] = step["specification"].get("variables", {})
-        # We now have to iterate through the step's variable mapping block.
-        # This will name any workflow variables (from)
-        # and their corresponding step variable (to).
-        step_wf_v_map: list[tuple[str, str]] = get_step_workflow_variable_mapping(
-            step=step
-        )
-        for from_to in step_wf_v_map:
-            all_variables[from_to[1]] = rwf_variables[from_to[0]]
-        # We must now apply variables from prior steps identified in the
-        # current step's mapping block. We're given a map indexed by
-        # prior step name that's a list of tuples name the prior step's
-        # variable (from) and the curent step variable (to).
-        step_prior_v_map: dict[str, list[tuple[str, str]]] = (
-            get_step_prior_step_variable_mapping(step=step)
-        )
-        for prior_step_name, v_map in step_prior_v_map.items():
-            # Load the prior step
-            prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name(
-                name=prior_step_name, running_workflow_id=rwf_id
-            )
-            # Get its variables and copy the value
-            for from_to in v_map:
-                all_variables[from_to[1]] = prior_step["variables"][from_to[0]]
-
         error_or_variables: str | dict[str, Any] = self._validate_step_command(
+            running_workflow_id=rwf_id,
             running_workflow_step_id=rwfs_id,
             step=step,
-            workflow_steps=wf_step_data["steps"],
             our_step_index=our_step_index,
-            running_workflow_variables=all_variables,
+            running_workflow_variables=rwf_variables,
         )
         if isinstance(error_or_variables, str):
             error_msg = error_or_variables
@@ -580,22 +448,22 @@ def _launch(
 
         project_id = rwf["project"]["id"]
         variables: dict[str, Any] = error_or_variables
-        print("variables", variables)
-        # find out if and by which parameter this step should be replicated
-        replicator = get_step_replicator(step=step)
 
         _LOGGER.info(
             "Launching step: RunningWorkflow=%s RunningWorkflowStep=%s step=%s"
-            " variables=%s name=%s project=%s, (all_variables=%s)",
+            " variables=%s name=%s project=%s, (variables=%s)",
             rwf_id,
             rwfs_id,
             step_name,
             variables,
             rwf["name"],
             project_id,
-            all_variables,
+            variables,
         )
 
+        # Is this a replicating step?
+        replicator = get_step_replicator(step=step)
+
         # When we launch a step we need to identify all the prior steps in the workflow,
         # those we depend on. The DataManager will then link their outputs to
         # out instance directory. For simple workflows there is only one prior step,
@@ -629,9 +497,6 @@ def _launch(
         #   'running_workflow_step_inputs'
         #       A list of Job input variable names
 
-        print("variables")
-        pprint(variables)
-
         inputs: list[str] = []
         inputs.extend(iter(get_step_input_variable_names(wf, step_name)))
         if replicator:
@@ -643,9 +508,6 @@ def _launch(
         else:
             single_step_variables = [variables]
 
-        print("single step variables")
-        pprint(single_step_variables)
-
         for params in single_step_variables:
             lp: LaunchParameters = LaunchParameters(
                 project_id=project_id,

From 77e3cff78fcaa318c7cbbbca20a83fa5fd0de2b8 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Wed, 27 Aug 2025 16:00:30 +0000
Subject: [PATCH 36/57] fix: Variable mapping now exposed as a Translation
 dataclass

---
 workflow/decoder.py            | 34 ++++++++++++++++++++++------------
 workflow/workflow_engine.py    | 11 ++++++-----
 workflow/workflow_validator.py | 10 +++++-----
 3 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/workflow/decoder.py b/workflow/decoder.py
index d7e320f..78fb211 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -4,6 +4,7 @@
 """
 
 import os
+from dataclasses import dataclass
 from typing import Any
 
 import jsonschema
@@ -23,6 +24,14 @@
 assert _WORKFLOW_SCHEMA
 
 
+@dataclass
+class Translation:
+    """A source ("in_") to destination ("out") variable map."""
+
+    in_: str
+    out: str
+
+
 def validate_schema(workflow: dict[str, Any]) -> str | None:
     """Checks the Workflow Definition against the built-in schema.
     If there's an error the error text is returned, otherwise None.
@@ -107,28 +116,27 @@ def get_step_input_variable_names(
     return variable_names
 
 
-def get_step_workflow_variable_mapping(
-    *, step: dict[str, Any]
-) -> list[tuple[str, str]]:
+def get_step_workflow_variable_mapping(*, step: dict[str, Any]) -> list[Translation]:
     """Returns a list of workflow vaiable name to step variable name tuples
     for the given step."""
-    variable_mapping: list[tuple[str, str]] = []
+    variable_mapping: list[Translation] = []
     if "variable-mapping" in step:
         for v_map in step["variable-mapping"]:
             if "from-workflow" in v_map:
-                # Tuple is "from" -> "to"
                 variable_mapping.append(
-                    (v_map["from-workflow"]["variable"], v_map["variable"])
+                    Translation(
+                        in_=v_map["from-workflow"]["variable"], out=v_map["variable"]
+                    )
                 )
     return variable_mapping
 
 
 def get_step_prior_step_variable_mapping(
     *, step: dict[str, Any]
-) -> dict[str, list[tuple[str, str]]]:
-    """Returns list of tuples, indexed by prior step name, of source step vaiable name
-    to this step's variable name."""
-    variable_mapping: dict[str, list[tuple[str, str]]] = {}
+) -> dict[str, list[Translation]]:
+    """Returns list of translate objects, indexed by prior step name,
+    that identify source step vaiable name to this step's variable name."""
+    variable_mapping: dict[str, list[Translation]] = {}
     if "variable-mapping" in step:
         for v_map in step["variable-mapping"]:
             if "from-step" in v_map:
@@ -137,10 +145,12 @@ def get_step_prior_step_variable_mapping(
                 # Tuple is "from" -> "to"
                 if step_name in variable_mapping:
                     variable_mapping[step_name].append(
-                        (step_variable, v_map["variable"])
+                        Translation(in_=step_variable, out=v_map["variable"])
                     )
                 else:
-                    variable_mapping[step_name] = [(step_variable, v_map["variable"])]
+                    variable_mapping[step_name] = [
+                        Translation(in_=step_variable, out=v_map["variable"])
+                    ]
     return variable_mapping
 
 
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index dcb2047..fbf200f 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -39,6 +39,7 @@
 )
 
 from .decoder import (
+    Translation,
     get_step_input_variable_names,
     get_step_prior_step_variable_mapping,
     get_step_replicator,
@@ -364,14 +365,14 @@ def _validate_step_command(
         # "in" variables are worklfow variables, and "out" variables
         # are expected Job variables. We use this to add variables
         # to the "all variables" map.
-        for from_to in get_step_workflow_variable_mapping(step=step):
-            all_variables[from_to[1]] = running_workflow_variables[from_to[0]]
+        for tr in get_step_workflow_variable_mapping(step=step):
+            all_variables[tr.out] = running_workflow_variables[tr.in_]
 
         # Now we apply variables from the "variable mapping" block
         # related to values used in prior steps. The decoder gives
         # us a map indexed by prior step name that's a list of "in" "out"
         # tuples as above.
-        step_prior_v_map: dict[str, list[tuple[str, str]]] = (
+        step_prior_v_map: dict[str, list[Translation]] = (
             get_step_prior_step_variable_mapping(step=step)
         )
         for prior_step_name, v_map in step_prior_v_map.items():
@@ -381,8 +382,8 @@ def _validate_step_command(
                 name=prior_step_name, running_workflow_id=running_workflow_id
             )
             # Copy "in" value to "out"...
-            for from_to in v_map:
-                all_variables[from_to[1]] = prior_step["variables"][from_to[0]]
+            for tr in v_map:
+                all_variables[tr.out] = prior_step["variables"][tr.in_]
 
         _LOGGER.debug(
             "Index %s (%s) workflow_variables=%s",
diff --git a/workflow/workflow_validator.py b/workflow/workflow_validator.py
index 10a2482..1d94973 100644
--- a/workflow/workflow_validator.py
+++ b/workflow/workflow_validator.py
@@ -123,17 +123,17 @@ def _validate_tag_level(
                 .get("variable")
             ):
                 found: bool = False
-                for variable_map in get_step_workflow_variable_mapping(step=step):
-                    if replicate_using_input == variable_map[0]:
+                for translation in get_step_workflow_variable_mapping(step=step):
+                    if replicate_using_input == translation.out:
                         found = True
                         break
                 if not found:
                     for (
                         step_name,
-                        variable_map_list,
+                        translations,
                     ) in get_step_prior_step_variable_mapping(step=step).items():
-                        for variable_map in variable_map_list:
-                            if replicate_using_input == variable_map[0]:
+                        for translation in translations:
+                            if replicate_using_input == translation.out:
                                 found = True
                                 break
                         if found:

From 8f1c09865d15d6bf74cb6f43742fa5b094223ea7 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Thu, 28 Aug 2025 13:04:31 +0000
Subject: [PATCH 37/57] fix: Major refactoring of logic (for new
 launch/workflow API)

---
 tests/instance_launcher.py                    |  27 +-
 tests/test_test_instance_launcher.py          |  28 +-
 tests/test_workflow_engine_examples.py        |   1 +
 .../simple-python-fanout.yaml                 |   6 +-
 workflow/workflow_abc.py                      |  65 +----
 workflow/workflow_engine.py                   | 241 ++++++------------
 6 files changed, 121 insertions(+), 247 deletions(-)

diff --git a/tests/instance_launcher.py b/tests/instance_launcher.py
index 3256c0b..c938e9e 100644
--- a/tests/instance_launcher.py
+++ b/tests/instance_launcher.py
@@ -68,7 +68,7 @@ def __init__(
                 elif os.path.isdir(file_path):
                     shutil.rmtree(file_path)
 
-    def launch(self, launch_parameters: LaunchParameters) -> LaunchResult:
+    def launch(self, *, launch_parameters: LaunchParameters) -> LaunchResult:
         assert launch_parameters
         assert launch_parameters.project_id == TEST_PROJECT_ID
         assert launch_parameters.specification
@@ -76,10 +76,24 @@ def launch(self, launch_parameters: LaunchParameters) -> LaunchResult:
 
         os.makedirs(EXECUTION_DIRECTORY, exist_ok=True)
 
-        # Create an Instance record (and dummy Task ID)
-        response = self._api_adapter.create_instance(
-            running_workflow_step_id=launch_parameters.running_workflow_step_id
+        # Create a running workflow step
+        assert launch_parameters.running_workflow_id
+        assert launch_parameters.step_name
+        response, _ = self._api_adapter.create_running_workflow_step(
+            running_workflow_id=launch_parameters.running_workflow_id,
+            step=launch_parameters.step_name,
+            replica=launch_parameters.step_replication_number,
         )
+        assert "id" in response
+        rwfs_id: str = response["id"]
+        # And add the variables we've been provided with
+        if launch_parameters.variables:
+            _ = self._api_adapter.set_running_workflow_step_variables(
+                running_workflow_step_id=rwfs_id, variables=launch_parameters.variables
+            )
+
+        # Create an Instance record (and dummy Task ID)
+        response = self._api_adapter.create_instance(running_workflow_step_id=rwfs_id)
         instance_id = response["id"]
         task_id = "task-00000000-0000-0000-0000-000000000001"
 
@@ -96,8 +110,8 @@ def launch(self, launch_parameters: LaunchParameters) -> LaunchResult:
         # The command may not need any, but we do the decoding anyway.
         decoded_command, status = job_decoder.decode(
             job["command"],
-            launch_parameters.specification_variables,
-            launch_parameters.running_workflow_step_id,
+            launch_parameters.variables,
+            rwfs_id,
             TextEncoding.JINJA2_3_0,
         )
         print(f"Decoded command: {decoded_command}")
@@ -129,6 +143,7 @@ def launch(self, launch_parameters: LaunchParameters) -> LaunchResult:
         self._msg_dispatcher.send(pod_message)
 
         return LaunchResult(
+            running_workflow_step_id=rwfs_id,
             instance_id=instance_id,
             task_id=task_id,
             command=" ".join(subprocess_cmd),
diff --git a/tests/test_test_instance_launcher.py b/tests/test_test_instance_launcher.py
index 38b8e06..33a34b1 100644
--- a/tests/test_test_instance_launcher.py
+++ b/tests/test_test_instance_launcher.py
@@ -33,23 +33,18 @@ def test_launch_nop(basic_launcher):
         project_id=TEST_PROJECT_ID,
         variables={},
     )
-    response, _ = utaa.create_running_workflow_step(
-        running_workflow_id=response["id"], step="step-1"
-    )
-    rwfsid = response["id"]
     lp: LaunchParameters = LaunchParameters(
         project_id=TEST_PROJECT_ID,
         name="Test Instance",
         launching_user_name="dlister",
         launching_user_api_token="1234567890",
         running_workflow_id=rwfid,
-        running_workflow_step_id=rwfsid,
+        step_name="step-1",
         specification={"collection": "workflow-engine-unit-test-jobs", "job": "nop"},
-        specification_variables={},
     )
 
     # Act
-    result = launcher.launch(lp)
+    result = launcher.launch(launch_parameters=lp)
 
     # Assert
     assert result.error_num == 0
@@ -69,26 +64,21 @@ def test_launch_nop_fail(basic_launcher):
         variables={},
     )
     rwfid = response["id"]
-    response, _ = utaa.create_running_workflow_step(
-        running_workflow_id=response["id"], step="step-1"
-    )
-    rwfsid = response["id"]
     lp: LaunchParameters = LaunchParameters(
         project_id=TEST_PROJECT_ID,
         name="Test Instance",
         launching_user_name="dlister",
         launching_user_api_token="1234567890",
         running_workflow_id=rwfid,
-        running_workflow_step_id=rwfsid,
+        step_name="step-1",
         specification={
             "collection": "workflow-engine-unit-test-jobs",
             "job": "nop-fail",
         },
-        specification_variables={},
     )
 
     # Act
-    result = launcher.launch(lp)
+    result = launcher.launch(launch_parameters=lp)
 
     # Assert
     assert result.error_num == 0
@@ -108,26 +98,22 @@ def test_launch_smiles_to_file(basic_launcher):
         variables={},
     )
     rwfid = response["id"]
-    response, _ = utaa.create_running_workflow_step(
-        running_workflow_id=response["id"], step="step-1"
-    )
-    rwfsid = response["id"]
     lp: LaunchParameters = LaunchParameters(
         project_id=TEST_PROJECT_ID,
         name="Test Instance",
         launching_user_name="dlister",
         launching_user_api_token="1234567890",
         running_workflow_id=rwfid,
-        running_workflow_step_id=rwfsid,
+        step_name="step-1",
         specification={
             "collection": "workflow-engine-unit-test-jobs",
             "job": "smiles-to-file",
         },
-        specification_variables={"smiles": "C1=CC=CC=C1", "outputFile": "output.smi"},
+        variables={"smiles": "C1=CC=CC=C1", "outputFile": "output.smi"},
     )
 
     # Act
-    result = launcher.launch(lp)
+    result = launcher.launch(launch_parameters=lp)
 
     # Assert
     assert result.error_num == 0
diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
index bcd2498..20985bf 100644
--- a/tests/test_workflow_engine_examples.py
+++ b/tests/test_workflow_engine_examples.py
@@ -398,6 +398,7 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
     assert project_file_exists(output_file_2)
 
 
+@pytest.mark.skip(reason="WIP")
 def test_workflow_engine_simple_python_fanout(basic_engine):
     # Arrange
     md, da = basic_engine
diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml
index 1ad06bb..1adb7a6 100644
--- a/tests/workflow-definitions/simple-python-fanout.yaml
+++ b/tests/workflow-definitions/simple-python-fanout.yaml
@@ -13,8 +13,9 @@ steps:
     job: splitsmiles
     version: "1.0.0"
     variables:
-      name: "count"
+      name: count
       value: "1"
+      outputFile: results.smi
   variable-mapping:
   - variable: inputFile
     from-workflow:
@@ -27,8 +28,9 @@ steps:
     job: append-col
     version: "1.0.0"
     variables:
-      name: "desc1"
+      name: desc1
       value: "777"
+      outputFile: results.smi
   replicate:
     using:
       variable: inputFile
diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py
index 4c4f6ff..0a0acc0 100644
--- a/workflow/workflow_abc.py
+++ b/workflow/workflow_abc.py
@@ -27,7 +27,7 @@ class LaunchParameters:
     specification: dict[str, Any]
     # An alternative way to pass variables to the specification.
     # If used it will replace any 'variables' already present in the specification.
-    specification_variables: dict[str, Any] | None = None
+    variables: dict[str, Any] | None = None
     # A string. In DM v4 converted to a boolean and set in the
     # instance Pod as a label. Setting this means the Instances
     # that are created will not be automatically removed by the Job operator.
@@ -35,28 +35,15 @@ class LaunchParameters:
     # The RunningWorkflow UUID.
     # Required if the Instance is part of a Workflow step.
     running_workflow_id: str | None = None
-    # The RunningWorkflowStep UUID.
+    # The RunningWorkflow's step name.
     # Required if the Instance is part of a Workflow step.
-    running_workflow_step_id: str | None = None
-    # A list of prior workflow steps that this step depends upon.
-    #
-    # This list gives the InstanceLauncher an opportunity to take the outputs
-    # of a prior instance and link them to the instance directory for the
-    # instance to be launched. We need to do this for Workflows because Instances
-    # run as apart of a Workflow do not automatically have their outputs copied (linked)
-    # to the Project directory when they complete. As an example, a step that relies
-    # on the output files from two prior steps will provide the following list: -
-    #
-    #   ["r-workflow-step-a04d", "r-workflow-step-d904"]
-    running_workflow_step_prior_steps: list[str] | None = None
-    # Workflow step Job inputs (for this step Instance). These Workflow Inputs (files)
-    # are a list of Job input variable names for file variables where the
-    # file is expected to be present in the Project directory. It is simply a list of
-    # Job variable names. The launcher is expected to find the 'value' of these
-    # variables and then move the file to the instance directory.
-    #
-    #   ["inputFile"]
-    running_workflow_step_inputs: list[str] | None = None
+    step_name: str | None = None
+    # The step replication number.
+    # If only one instance of the step is expected to run
+    # this value can be left at 0 (zero). If this step's launch
+    # is expected to be executed more than once the value should be
+    # non-zero (and unique for this workflow run).
+    step_replication_number: int = 0
     # The application ID (a custom resource name)
     # used to identify the 'type' of Instance to create.
     # For DM Jobs this will be 'datamanagerjobs.squonk.it'
@@ -75,6 +62,9 @@ class LaunchResult:
     # The following optional properties
     # may not be present if there's a launch error.
     #
+    # A running workflow step UUID
+    # (if the step is part of a running workflow)
+    running_workflow_step_id: str | None = None
     # The Instance UUID that was created for you.
     instance_id: str | None = None
     # The Task UUID that is handling the Instance launch
@@ -94,7 +84,6 @@ def launch(
         self,
         *,
         launch_parameters: LaunchParameters,
-        **kwargs: str,
     ) -> LaunchResult:
         """Launch a (Job) Instance"""
 
@@ -199,25 +188,6 @@ def set_running_workflow_done(
         """Set the success value for a RunningWorkflow Record.
         If not successful an error code and message should be provided."""
 
-    @abstractmethod
-    def create_running_workflow_step(
-        self,
-        *,
-        running_workflow_id: str,
-        step: str,
-        replica: int = 0,
-        prior_running_workflow_step_id: str | None = None,
-    ) -> tuple[dict[str, Any], int]:
-        """Create a RunningWorkflowStep Record (from a RunningWorkflow).
-        If this is a replica (concurrent execution) of a step the replica
-        value must be set to a value greater than 0. The replica is unique
-        for a given step and is used to distinguish between running workflow steps
-        generated from the same step name."""
-        # Should return:
-        # {
-        #    "id": "r-workflow-step-00000000-0000-0000-0000-000000000001",
-        # }
-
     @abstractmethod
     def get_running_workflow_step(
         self, *, running_workflow_step_id: str
@@ -292,17 +262,6 @@ def get_running_workflow_step_by_name(
         #          "id": "r-workflow-step-00000000-0000-0000-0000-000000000001",
         #       },
 
-    @abstractmethod
-    def set_running_workflow_step_variables(
-        self,
-        *,
-        running_workflow_step_id: str,
-        variables: dict[str, Any],
-    ) -> None:
-        """Set the variables used prior to decoding the step command for each step.
-        This can be used to understand step failures but will also be vital
-        when adding variables values to subsequent steps from prior step values."""
-
     @abstractmethod
     def set_running_workflow_step_done(
         self,
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index fbf200f..d4e8cca 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -24,7 +24,7 @@
 
 import logging
 import sys
-from typing import Any, Dict, Optional
+from typing import Any, Optional
 
 from decoder.decoder import TextEncoding, decode
 from google.protobuf.message import Message
@@ -40,9 +40,7 @@
 
 from .decoder import (
     Translation,
-    get_step_input_variable_names,
     get_step_prior_step_variable_mapping,
-    get_step_replicator,
     get_step_workflow_variable_mapping,
 )
 
@@ -123,28 +121,13 @@ def _handle_workflow_start_message(self, r_wfid: str) -> None:
         wf_response, _ = self._wapi_adapter.get_workflow(workflow_id=wfid)
         _LOGGER.debug("API.get_workflow(%s) returned: -\n%s", wfid, str(wf_response))
 
-        # Now find the first step,
-        # and create a corresponding RunningWorkflowStep record...
-        first_step: Dict[str, Any] = wf_response["steps"][0]
-        first_step_name: str = first_step["name"]
-        # We need this even if the following goes wrong.
-        response, _ = self._wapi_adapter.create_running_workflow_step(
-            running_workflow_id=r_wfid,
-            step=first_step_name,
-        )
-        _LOGGER.debug(
-            "API.create_running_workflow_step(%s, %s) returned: -\n%s",
-            r_wfid,
-            first_step_name,
-            str(response),
-        )
-        assert "id" in response
-        r_wfsid: str = response["id"]
+        # Now find the first step (index 0)...
+        first_step: dict[str, Any] = wf_response["steps"][0]
 
-        # Launch the first step.
+        # Launch it.
         # If there's a launch problem the step (and running workflow) will have
         # and error, stopping it. There will be no Pod event as the launch has failed.
-        self._launch(wf=wf_response, rwf=rwf_response, rwfs_id=r_wfsid, step=first_step)
+        self._launch(rwf=rwf_response, step=first_step)
 
     def _handle_workflow_stop_message(self, r_wfid: str) -> None:
         """Logic to handle a STOP message."""
@@ -273,33 +256,14 @@ def _handle_pod_message(self, msg: PodMessage) -> None:
         launch_attempted: bool = False
         for step in wf_response["steps"]:
             if step["name"] == step_name:
+
                 step_index = wf_response["steps"].index(step)
                 if step_index + 1 < len(wf_response["steps"]):
 
-                    # There's another step - for this simple logic it is the next step.
-
+                    # There's another step!
+                    # For this simple logic it is the next step.
                     next_step = wf_response["steps"][step_index + 1]
-                    next_step_name = next_step["name"]
-                    rwfs_response, _ = self._wapi_adapter.create_running_workflow_step(
-                        running_workflow_id=r_wfid,
-                        step=next_step_name,
-                    )
-                    assert "id" in rwfs_response
-                    r_wfsid = rwfs_response["id"]
-                    assert r_wfsid
-                    _LOGGER.debug(
-                        "API.create_running_workflow_step(%s, %s) returned: -\n%s",
-                        r_wfid,
-                        next_step_name,
-                        str(response),
-                    )
-
-                    self._launch(
-                        wf=wf_response,
-                        rwf=rwf_response,
-                        rwfs_id=r_wfsid,
-                        step=next_step,
-                    )
+                    self._launch(rwf=rwf_response, step=next_step)
 
                     # Something was started (or there was a launch error and the step
                     # and running workflow error will have been set).
@@ -318,18 +282,12 @@ def _validate_step_command(
         self,
         *,
         running_workflow_id: str,
-        running_workflow_step_id: str,
         step: dict[str, Any],
-        our_step_index: int,
         running_workflow_variables: dict[str, Any],
     ) -> str | dict[str, Any]:
         """Returns an error message if the command isn't valid.
         Without a message we return all the variables that were (successfully)
-        applied to the command.
-
-        We are also given a list of steps in workflow_steps and out position in
-        the list with our_step_index."""
-        assert our_step_index >= 0
+        applied to the command."""
 
         # We get the Job from the step specification, which must contain
         # the keys "collection", "job", and "version". Here we assume that
@@ -343,11 +301,10 @@ def _validate_step_command(
             collection=job_collection, job=job_job, version=job_version
         )
         _LOGGER.debug(
-            "API.get_job(%s, %s, %s) for %s returned: -\n%s",
+            "API.get_job(%s, %s, %s) returned: -\n%s",
             job_collection,
             job_job,
             job_version,
-            running_workflow_step_id,
             str(job),
         )
 
@@ -361,11 +318,12 @@ def _validate_step_command(
         # 'workflow' or 'a prior step'.
 
         # Start with any workflow variables in the step.
-        # This will be a list of tuples of "in" and "out" variable names.
+        # This will be a list of Translations of "in" and "out" variable names.
         # "in" variables are worklfow variables, and "out" variables
         # are expected Job variables. We use this to add variables
         # to the "all variables" map.
         for tr in get_step_workflow_variable_mapping(step=step):
+            assert tr.in_ in running_workflow_variables
             all_variables[tr.out] = running_workflow_variables[tr.in_]
 
         # Now we apply variables from the "variable mapping" block
@@ -382,134 +340,74 @@ def _validate_step_command(
                 name=prior_step_name, running_workflow_id=running_workflow_id
             )
             # Copy "in" value to "out"...
+            print(v_map)
+            print(prior_step["variables"])
             for tr in v_map:
+                assert tr.in_ in prior_step["variables"]
                 all_variables[tr.out] = prior_step["variables"][tr.in_]
 
-        _LOGGER.debug(
-            "Index %s (%s) workflow_variables=%s",
-            our_step_index,
-            running_workflow_step_id,
-            all_variables,
-        )
-
-        # Set the variables for this step (so they can be inspected on error)
-        self._wapi_adapter.set_running_workflow_step_variables(
-            running_workflow_step_id=running_workflow_step_id,
-            variables=all_variables,
-        )
-
         # Now ... can the command be compiled!?
         message, success = decode(
             job["command"], all_variables, "command", TextEncoding.JINJA2_3_0
         )
         return all_variables if success else message
 
-    def _launch(
-        self,
-        *,
-        wf: dict[str, Any],
-        rwf: dict[str, Any],
-        rwfs_id: str,
-        step: dict[str, Any],
-    ) -> None:
+    def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None:
         step_name: str = step["name"]
         rwf_id: str = rwf["id"]
+        project_id = rwf["project"]["id"]
 
-        _LOGGER.info("Validating step command: %s (step=%s)...", rwf_id, step_name)
-
-        # Get step data - importantly, giving us the sequence of steps in the response.
-        # Steps will be in wf_step_data["steps"] and our position in the list
-        # is wf_step_data["caller_step_index"]
-        wf_step_data, _ = self._wapi_adapter.get_workflow_steps_driving_this_step(
-            running_workflow_step_id=rwfs_id,
-        )
-        assert wf_step_data["caller_step_index"] >= 0
-        our_step_index: int = wf_step_data["caller_step_index"]
-
-        # A mojor pievce of work is to get ourselves into a position
+        # A mojor piece of work to accomplish is to get ourselves into a position
         # that allows us to check the step command can be executed.
-        # We do this by compiling a map of varibales we belive the step needs.
+        # We do this by compiling a map of variables we belive the step needs.
 
-        # Get all the workflow variables that were provided
-        # by the user when they "ran" the workflow...
+        # We start with all the workflow variables that were provided
+        # by the user when they "ran" the workflow. We're given a full set of
+        # variables in response (on success) or an error string (on failure)
         rwf_variables: dict[str, Any] = rwf.get("variables", {})
         error_or_variables: str | dict[str, Any] = self._validate_step_command(
             running_workflow_id=rwf_id,
-            running_workflow_step_id=rwfs_id,
             step=step,
-            our_step_index=our_step_index,
             running_workflow_variables=rwf_variables,
         )
         if isinstance(error_or_variables, str):
             error_msg = error_or_variables
             msg = f"Failed command validation error_msg={error_msg}"
             _LOGGER.warning(msg)
-            self._set_step_error(step_name, rwf_id, rwfs_id, 1, msg)
+            self._set_step_error(step_name, rwf_id, None, 1, msg)
             return
 
-        project_id = rwf["project"]["id"]
         variables: dict[str, Any] = error_or_variables
-
-        _LOGGER.info(
-            "Launching step: RunningWorkflow=%s RunningWorkflowStep=%s step=%s"
-            " variables=%s name=%s project=%s, (variables=%s)",
-            rwf_id,
-            rwfs_id,
-            step_name,
-            variables,
-            rwf["name"],
-            project_id,
-            variables,
-        )
-
+        num_replicas: int = 0
         # Is this a replicating step?
-        replicator = get_step_replicator(step=step)
-
-        # When we launch a step we need to identify all the prior steps in the workflow,
-        # those we depend on. The DataManager will then link their outputs to
-        # out instance directory. For simple workflows there is only one prior step,
-        # and it's the one immediately prior to this one.
-        #
-        # We put all the prior step IDs in: -
-        #   'running_workflow_step_prior_steps'
-        #       A list of step UUID strings.
-        #
-        # In this 'simple' linear implementation that is simply the immediately
-        # preceding step.
-        prior_steps: list[str] = []
-        if our_step_index > 0:
-            # We need the step ID of the prior step.
-            prior_step_name = wf_step_data["steps"][our_step_index - 1]["name"]
-            step_response, _ = self._wapi_adapter.get_running_workflow_step_by_name(
-                name=prior_step_name,
-                running_workflow_id=rwf_id,
+        # The number of 'replicas' is zero if the step is only launched once
+        # (i.e. there are no replicas).
+
+        #        replicator = get_step_replicator(step=step)
+        #        if replicator:
+        #            single_step_variables = []
+        #            for replicating_param in variables[replicator]:
+        #                ssv = {**variables}
+        #                ssv[replicator] = replicating_param
+        #                single_step_variables.append(ssv)
+        #        else:
+        #            single_step_variables = [variables]
+
+        assert num_replicas >= 0
+        step_replication_number: int = 1 if num_replicas else 0
+        for _ in range(1 + num_replicas):
+
+            _LOGGER.info(
+                "Launching step: %s RunningWorkflow=%s (name=%s)"
+                " variables=%s project=%s (step_replication_number=%s)",
+                step_name,
+                rwf_id,
+                rwf["name"],
+                variables,
+                project_id,
+                step_replication_number,
             )
-            assert "id" in step_response
-            prior_steps.append(step_response["id"])
-
-        # We must also identify workflow inputs that are required by the step we are
-        # about to launch and pass those using a launch parameter. The launcher
-        # will ensure these are copied into out instance directory before we are run.
-        # We cannot provide the variable values (even though we have them) because
-        # the DM passes input through 'InputHandlers', which may translate the value.
-        # So we have to pass the name and let the DM move the files after
-        # the InputHandler has run.
-        #
-        #   'running_workflow_step_inputs'
-        #       A list of Job input variable names
-
-        inputs: list[str] = []
-        inputs.extend(iter(get_step_input_variable_names(wf, step_name)))
-        if replicator:
-            single_step_variables = []
-            for replicating_param in variables[replicator]:
-                ssv = {**variables}
-                ssv[replicator] = replicating_param
-                single_step_variables.append(ssv)
-        else:
-            single_step_variables = [variables]
 
-        for params in single_step_variables:
             lp: LaunchParameters = LaunchParameters(
                 project_id=project_id,
                 name=step_name,
@@ -517,25 +415,36 @@ def _launch(
                 launching_user_name=rwf["running_user"],
                 launching_user_api_token=rwf["running_user_api_token"],
                 specification=step["specification"],
-                specification_variables=params,
+                variables=variables,
                 running_workflow_id=rwf_id,
-                running_workflow_step_id=rwfs_id,
-                running_workflow_step_prior_steps=prior_steps,
-                running_workflow_step_inputs=inputs,
+                step_name=step_name,
+                step_replication_number=step_replication_number,
             )
             lr: LaunchResult = self._instance_launcher.launch(launch_parameters=lp)
+            rwfs_id: str | None = lr.running_workflow_step_id
+            assert rwfs_id
+
             if lr.error_num:
                 self._set_step_error(
                     step_name, rwf_id, rwfs_id, lr.error_num, lr.error_msg
                 )
             else:
-                _LOGGER.info("Launched step '%s' (command=%s)", step_name, lr.command)
+                _LOGGER.info(
+                    "Launched step '%s' step_id=%s (command=%s)",
+                    step_name,
+                    rwfs_id,
+                    lr.command,
+                )
+
+            # Do we need to increment the replication number?
+            if num_replicas:
+                step_replication_number += 1
 
     def _set_step_error(
         self,
         step_name: str,
         r_wfid: str,
-        r_wfsid: str,
+        r_wfsid: str | None,
         error_num: Optional[int],
         error_msg: Optional[str],
     ) -> None:
@@ -548,12 +457,14 @@ def _set_step_error(
             error_msg,
         )
         r_wf_error: str = f"Step '{step_name}' ERROR({error_num}): {error_msg}"
-        self._wapi_adapter.set_running_workflow_step_done(
-            running_workflow_step_id=r_wfsid,
-            success=False,
-            error_num=error_num,
-            error_msg=r_wf_error,
-        )
+        # There may be a pre-step error (so assume the ID can also be None)
+        if r_wfsid:
+            self._wapi_adapter.set_running_workflow_step_done(
+                running_workflow_step_id=r_wfsid,
+                success=False,
+                error_num=error_num,
+                error_msg=r_wf_error,
+            )
         # We must also set the running workflow as done (failed)
         self._wapi_adapter.set_running_workflow_done(
             running_workflow_id=r_wfid,

From 7770d7f448742ecbaea87c2109ce203849aaa1fc Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Thu, 28 Aug 2025 15:34:22 +0000
Subject: [PATCH 38/57] feat: First successful replicating workflow test

---
 tests/test_decoder.py                         | 11 ---
 tests/test_workflow_engine_examples.py        | 32 +-------
 .../test_workflow_validator_for_run_level.py  | 22 ------
 .../test_workflow_validator_for_tag_level.py  | 22 ------
 .../simple-python-fanout.yaml                 |  6 +-
 workflow/decoder.py                           | 58 ++++++++++++--
 workflow/workflow_engine.py                   | 78 ++++++++++++-------
 7 files changed, 111 insertions(+), 118 deletions(-)

diff --git a/tests/test_decoder.py b/tests/test_decoder.py
index 46c05d9..4958731 100644
--- a/tests/test_decoder.py
+++ b/tests/test_decoder.py
@@ -142,17 +142,6 @@ def test_validate_schema_for_step_specification_variable_names():
     assert error is None
 
 
-@pytest.mark.skip(reason="We do not support combination atm")
-def test_validate_schema_for_simple_python_parallel():
-    # Arrange
-
-    # Act
-    error = decoder.validate_schema(_SIMPLE_PYTHON_PARALLEL_WORKFLOW)
-
-    # Assert
-    assert error is None
-
-
 def test_get_workflow_variables_for_smiple_python_molprops():
     # Arrange
 
diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
index 20985bf..9d07f13 100644
--- a/tests/test_workflow_engine_examples.py
+++ b/tests/test_workflow_engine_examples.py
@@ -398,41 +398,22 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
     assert project_file_exists(output_file_2)
 
 
-@pytest.mark.skip(reason="WIP")
 def test_workflow_engine_simple_python_fanout(basic_engine):
     # Arrange
     md, da = basic_engine
 
     da.mock_get_running_workflow_step_output_values_for_output(
         step_name="first-step",
-        output_variable="outputFile",
+        output_variable="outputBase",
         output=["chunk_1.smi", "chunk_2.smi"],
     )
 
-    # da.mock_get_running_workflow_step_output_values_for_output(
-    #     step_name="parallel-step",
-    #     output_variable="outputFile",
-    #     output=["chunk_1_proc.smi", "chunk_2_proc.smi"]
-    # )
-
-    # da.mock_get_running_workflow_step_output_values_for_output(
-    #     step_name="final-step",
-    #     output_variable="outputFile",
-    #     output=["final-step.out.smi"],
-    # )
-
     # Make sure files that should be generated by the test
     # do not exist before we run the test.
     output_file_first = "chunk_1.smi"
     output_file_second = "chunk_2.smi"
     assert not project_file_exists(output_file_first)
     assert not project_file_exists(output_file_second)
-    output_file_p_first = "chunk_1_proc.smi"
-    output_file_p_second = "chunk_2_proc.smi"
-    assert not project_file_exists(output_file_p_first)
-    assert not project_file_exists(output_file_p_second)
-    # output_file_final = "final-step.out.smi"
-    # assert not project_file_exists(output_file_final)
     # And create the test's input file.
     input_file_1 = "input1.smi"
     input_file_1_content = """O=C(CSCc1ccc(Cl)s1)N1CCC(O)CC1
@@ -458,15 +439,10 @@ def test_workflow_engine_simple_python_fanout(basic_engine):
     print("response")
     pprint(response)
 
-    assert response["count"] == 2
+    assert response["count"] == 3
     assert response["running_workflow_steps"][0]["done"]
     assert response["running_workflow_steps"][0]["success"]
     assert response["running_workflow_steps"][1]["done"]
     assert response["running_workflow_steps"][1]["success"]
-    # assert response["running_workflow_steps"][2]["done"]
-    # assert response["running_workflow_steps"][2]["success"]
-    # assert response["running_workflow_steps"][3]["done"]
-    # assert response["running_workflow_steps"][3]["success"]
-    # This test should generate a file in the simulated project directory
-    # assert project_file_exists(output_file_first)
-    # assert project_file_exists(output_file_final)
+    assert response["running_workflow_steps"][2]["done"]
+    assert response["running_workflow_steps"][2]["success"]
diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py
index 175d828..1be6694 100644
--- a/tests/test_workflow_validator_for_run_level.py
+++ b/tests/test_workflow_validator_for_run_level.py
@@ -217,28 +217,6 @@ def test_validate_simple_python_molprops_with_missing_input():
     ]
 
 
-@pytest.mark.skip("Unsupported workflow")
-def test_validate_simple_python_parallel():
-    # Arrange
-    workflow_filename: str = os.path.join(
-        os.path.dirname(__file__),
-        "workflow-definitions",
-        "simple-python-parallel.yaml",
-    )
-    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
-        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
-    assert workflow
-
-    # Act
-    error = WorkflowValidator.validate(
-        level=ValidationLevel.TAG,
-        workflow_definition=workflow,
-    )
-
-    # Assert
-    assert error.error_num == 0
-
-
 def test_validate_replicate_using_undeclared_input():
     # Arrange
     workflow_filename: str = os.path.join(
diff --git a/tests/test_workflow_validator_for_tag_level.py b/tests/test_workflow_validator_for_tag_level.py
index 8a5d264..96e8e74 100644
--- a/tests/test_workflow_validator_for_tag_level.py
+++ b/tests/test_workflow_validator_for_tag_level.py
@@ -109,28 +109,6 @@ def test_validate_shortcut_example_1():
     assert error.error_msg is None
 
 
-@pytest.mark.skip("Unsupported workflow")
-def test_validate_simple_python_parallel():
-    # Arrange
-    workflow_filename: str = os.path.join(
-        os.path.dirname(__file__),
-        "workflow-definitions",
-        "simple-python-parallel.yaml",
-    )
-    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
-        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
-    assert workflow
-
-    # Act
-    error = WorkflowValidator.validate(
-        level=ValidationLevel.TAG,
-        workflow_definition=workflow,
-    )
-
-    # Assert
-    assert error.error_num == 0
-
-
 def test_validate_simple_python_molprops():
     # Arrange
     workflow_filename: str = os.path.join(
diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml
index 1adb7a6..97ab108 100644
--- a/tests/workflow-definitions/simple-python-fanout.yaml
+++ b/tests/workflow-definitions/simple-python-fanout.yaml
@@ -7,7 +7,7 @@ description: >-
 
 steps:
 - name: first-step
-  description: Create inputs
+  description: Split an input file
   specification:
     collection: workflow-engine-unit-test-jobs
     job: splitsmiles
@@ -15,7 +15,7 @@ steps:
     variables:
       name: count
       value: "1"
-      outputFile: results.smi
+      outputBase: chunk
   variable-mapping:
   - variable: inputFile
     from-workflow:
@@ -38,6 +38,6 @@ steps:
   - variable: inputFile
     from-step:
       name: first-step
-      variable: outputFile
+      variable: outputBase
   out:
   - outputFile
diff --git a/workflow/decoder.py b/workflow/decoder.py
index 78fb211..91c9464 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -5,6 +5,7 @@
 
 import os
 from dataclasses import dataclass
+from enum import Enum
 from typing import Any
 
 import jsonschema
@@ -32,6 +33,26 @@ class Translation:
     out: str
 
 
+class ReplicationOrigin(Enum):
+    """Oirgin of a replication variable."""
+
+    STEP_VARIABLE = 1
+    WORKFLOW_VARIABLE = 2
+
+
+@dataclass
+class ReplicationDriver:
+    """A step's replication driver.
+    The 'variable' is the variable for the step-to-be-executed
+    whose value is 'driven' by the values of the 'source_variable'.
+    The source variable is either from a step (or a workflow)."""
+
+    origin: ReplicationOrigin
+    variable: str
+    source_variable: str
+    source_step_name: str | None = None
+
+
 def validate_schema(workflow: dict[str, Any]) -> str | None:
     """Checks the Workflow Definition against the built-in schema.
     If there's an error the error text is returned, otherwise None.
@@ -154,11 +175,36 @@ def get_step_prior_step_variable_mapping(
     return variable_mapping
 
 
-def get_step_replicator(*, step: dict[str, Any]) -> str | Any:
-    """Return step's replication info"""
-    replicator = step.get("replicate")
-    if replicator:
+def get_step_replication_driver(*, step: dict[str, Any]) -> ReplicationDriver | None:
+    """If the step is expected to replicate we return its replication driver,
+    which consists of a (prior) step name and an (output) variable name.
+    Otherwise it returns nothing."""
+    if replicator := step.get("replicate"):
+        # We need the variable we replicate against,
+        # and the step that owns the variable.
+        #
         # 'using' is a dict but there can be only single value for now
-        replicator = list(replicator["using"].values())[0]
+        variable: str = replicator["using"]["variable"]
+        source_variable: str | None = None
+        # Is the variable from a prior step?
+        step_name: str | None = None
+        step_v_map = get_step_prior_step_variable_mapping(step=step)
+        for step_name_candidate, mappings in step_v_map.items():
+            for mapping in mappings:
+                if mapping.out == variable:
+                    step_name = step_name_candidate
+                    source_variable = mapping.in_
+                    break
+            if step_name:
+                break
+        assert step_name
+        assert source_variable
+
+        return ReplicationDriver(
+            origin=ReplicationOrigin.STEP_VARIABLE,
+            variable=variable,
+            source_step_name=step_name,
+            source_variable=source_variable,
+        )
 
-    return replicator
+    return None
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index d4e8cca..988829a 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -39,8 +39,11 @@
 )
 
 from .decoder import (
+    ReplicationDriver,
+    ReplicationOrigin,
     Translation,
     get_step_prior_step_variable_mapping,
+    get_step_replication_driver,
     get_step_workflow_variable_mapping,
 )
 
@@ -340,8 +343,6 @@ def _validate_step_command(
                 name=prior_step_name, running_workflow_id=running_workflow_id
             )
             # Copy "in" value to "out"...
-            print(v_map)
-            print(prior_step["variables"])
             for tr in v_map:
                 assert tr.in_ in prior_step["variables"]
                 all_variables[tr.out] = prior_step["variables"][tr.in_]
@@ -378,34 +379,63 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None:
             return
 
         variables: dict[str, Any] = error_or_variables
-        num_replicas: int = 0
-        # Is this a replicating step?
-        # The number of 'replicas' is zero if the step is only launched once
-        # (i.e. there are no replicas).
-
-        #        replicator = get_step_replicator(step=step)
-        #        if replicator:
-        #            single_step_variables = []
-        #            for replicating_param in variables[replicator]:
-        #                ssv = {**variables}
-        #                ssv[replicator] = replicating_param
-        #                single_step_variables.append(ssv)
-        #        else:
-        #            single_step_variables = [variables]
-
-        assert num_replicas >= 0
-        step_replication_number: int = 1 if num_replicas else 0
-        for _ in range(1 + num_replicas):
+
+        # A replication number,
+        # use only for steps expected to replicate (even if just once)
+        step_replication_number: int = 0
+        # Does this step have a replicating driver?
+        r_driver: ReplicationDriver | None = get_step_replication_driver(step=step)
+        replication_values: list[str] = []
+        if r_driver:
+            if r_driver.origin == ReplicationOrigin.STEP_VARIABLE:
+                # We need to get the variable values from a prior step
+                # We need the prior steps running-workflow-step-id
+                assert r_driver.source_step_name
+                response, _ = self._wapi_adapter.get_running_workflow_step_by_name(
+                    name=r_driver.source_step_name,
+                    running_workflow_id=rwf_id,
+                )
+                assert "id" in response
+                o_rwfs_id: str = response["id"]
+                response, _ = (
+                    self._wapi_adapter.get_running_workflow_step_output_values_for_output(
+                        running_workflow_step_id=o_rwfs_id,
+                        output_variable=r_driver.source_variable,
+                    )
+                )
+                assert "output" in response
+                replication_values = response["output"]
+            else:
+                assert False, "Unsupported origin"
+
+        num_step_instances: int = max(1, len(replication_values))
+        for iteration in range(num_step_instances):
+
+            # If we are replicating this step then we must replace the step's variable
+            # with a value expected for this iteration.
+            if r_driver:
+                iter_variable: str = r_driver.variable
+                iter_value: str = replication_values[iteration]
+                _LOGGER.info(
+                    "Replicating step: %s iteration=%s variable=%s value=%s",
+                    step_name,
+                    iteration,
+                    iter_variable,
+                    iter_value,
+                )
+                # Over-write the replicating variable
+                # and set the replication numebr to a unique +ve non-zero value...
+                variables[iter_variable] = iter_value
+                step_replication_number = iteration + 1
 
             _LOGGER.info(
                 "Launching step: %s RunningWorkflow=%s (name=%s)"
-                " variables=%s project=%s (step_replication_number=%s)",
+                " variables=%s project=%s",
                 step_name,
                 rwf_id,
                 rwf["name"],
                 variables,
                 project_id,
-                step_replication_number,
             )
 
             lp: LaunchParameters = LaunchParameters(
@@ -436,10 +466,6 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None:
                     lr.command,
                 )
 
-            # Do we need to increment the replication number?
-            if num_replicas:
-                step_replication_number += 1
-
     def _set_step_error(
         self,
         step_name: str,

From c53b2459ab6edd2dfcef567e33d77028cbca6a7c Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Fri, 29 Aug 2025 16:20:26 +0000
Subject: [PATCH 39/57] feat: Use of decoder 2.4.0 (traits)

---
 poetry.lock    | 543 ++++++++++++++++++++++++++++---------------------
 pyproject.toml |   2 +-
 2 files changed, 307 insertions(+), 238 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 7ca8cda..35b18b5 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -43,78 +43,99 @@ files = [
 
 [[package]]
 name = "coverage"
-version = "7.8.2"
+version = "7.10.6"
 description = "Code coverage measurement for Python"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "coverage-7.8.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bd8ec21e1443fd7a447881332f7ce9d35b8fbd2849e761bb290b584535636b0a"},
-    {file = "coverage-7.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4c26c2396674816deaeae7ded0e2b42c26537280f8fe313335858ffff35019be"},
-    {file = "coverage-7.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1aec326ed237e5880bfe69ad41616d333712c7937bcefc1343145e972938f9b3"},
-    {file = "coverage-7.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5e818796f71702d7a13e50c70de2a1924f729228580bcba1607cccf32eea46e6"},
-    {file = "coverage-7.8.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:546e537d9e24efc765c9c891328f30f826e3e4808e31f5d0f87c4ba12bbd1622"},
-    {file = "coverage-7.8.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ab9b09a2349f58e73f8ebc06fac546dd623e23b063e5398343c5270072e3201c"},
-    {file = "coverage-7.8.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fd51355ab8a372d89fb0e6a31719e825cf8df8b6724bee942fb5b92c3f016ba3"},
-    {file = "coverage-7.8.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0774df1e093acb6c9e4d58bce7f86656aeed6c132a16e2337692c12786b32404"},
-    {file = "coverage-7.8.2-cp310-cp310-win32.whl", hash = "sha256:00f2e2f2e37f47e5f54423aeefd6c32a7dbcedc033fcd3928a4f4948e8b96af7"},
-    {file = "coverage-7.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:145b07bea229821d51811bf15eeab346c236d523838eda395ea969d120d13347"},
-    {file = "coverage-7.8.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b99058eef42e6a8dcd135afb068b3d53aff3921ce699e127602efff9956457a9"},
-    {file = "coverage-7.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5feb7f2c3e6ea94d3b877def0270dff0947b8d8c04cfa34a17be0a4dc1836879"},
-    {file = "coverage-7.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:670a13249b957bb9050fab12d86acef7bf8f6a879b9d1a883799276e0d4c674a"},
-    {file = "coverage-7.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0bdc8bf760459a4a4187b452213e04d039990211f98644c7292adf1e471162b5"},
-    {file = "coverage-7.8.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07a989c867986c2a75f158f03fdb413128aad29aca9d4dbce5fc755672d96f11"},
-    {file = "coverage-7.8.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2db10dedeb619a771ef0e2949ccba7b75e33905de959c2643a4607bef2f3fb3a"},
-    {file = "coverage-7.8.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e6ea7dba4e92926b7b5f0990634b78ea02f208d04af520c73a7c876d5a8d36cb"},
-    {file = "coverage-7.8.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ef2f22795a7aca99fc3c84393a55a53dd18ab8c93fb431004e4d8f0774150f54"},
-    {file = "coverage-7.8.2-cp311-cp311-win32.whl", hash = "sha256:641988828bc18a6368fe72355df5f1703e44411adbe49bba5644b941ce6f2e3a"},
-    {file = "coverage-7.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:8ab4a51cb39dc1933ba627e0875046d150e88478dbe22ce145a68393e9652975"},
-    {file = "coverage-7.8.2-cp311-cp311-win_arm64.whl", hash = "sha256:8966a821e2083c74d88cca5b7dcccc0a3a888a596a04c0b9668a891de3a0cc53"},
-    {file = "coverage-7.8.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e2f6fe3654468d061942591aef56686131335b7a8325684eda85dacdf311356c"},
-    {file = "coverage-7.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76090fab50610798cc05241bf83b603477c40ee87acd358b66196ab0ca44ffa1"},
-    {file = "coverage-7.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2bd0a0a5054be160777a7920b731a0570284db5142abaaf81bcbb282b8d99279"},
-    {file = "coverage-7.8.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da23ce9a3d356d0affe9c7036030b5c8f14556bd970c9b224f9c8205505e3b99"},
-    {file = "coverage-7.8.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9392773cffeb8d7e042a7b15b82a414011e9d2b5fdbbd3f7e6a6b17d5e21b20"},
-    {file = "coverage-7.8.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:876cbfd0b09ce09d81585d266c07a32657beb3eaec896f39484b631555be0fe2"},
-    {file = "coverage-7.8.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3da9b771c98977a13fbc3830f6caa85cae6c9c83911d24cb2d218e9394259c57"},
-    {file = "coverage-7.8.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a990f6510b3292686713bfef26d0049cd63b9c7bb17e0864f133cbfd2e6167f"},
-    {file = "coverage-7.8.2-cp312-cp312-win32.whl", hash = "sha256:bf8111cddd0f2b54d34e96613e7fbdd59a673f0cf5574b61134ae75b6f5a33b8"},
-    {file = "coverage-7.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:86a323a275e9e44cdf228af9b71c5030861d4d2610886ab920d9945672a81223"},
-    {file = "coverage-7.8.2-cp312-cp312-win_arm64.whl", hash = "sha256:820157de3a589e992689ffcda8639fbabb313b323d26388d02e154164c57b07f"},
-    {file = "coverage-7.8.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ea561010914ec1c26ab4188aef8b1567272ef6de096312716f90e5baa79ef8ca"},
-    {file = "coverage-7.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cb86337a4fcdd0e598ff2caeb513ac604d2f3da6d53df2c8e368e07ee38e277d"},
-    {file = "coverage-7.8.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26a4636ddb666971345541b59899e969f3b301143dd86b0ddbb570bd591f1e85"},
-    {file = "coverage-7.8.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5040536cf9b13fb033f76bcb5e1e5cb3b57c4807fef37db9e0ed129c6a094257"},
-    {file = "coverage-7.8.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc67994df9bcd7e0150a47ef41278b9e0a0ea187caba72414b71dc590b99a108"},
-    {file = "coverage-7.8.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e6c86888fd076d9e0fe848af0a2142bf606044dc5ceee0aa9eddb56e26895a0"},
-    {file = "coverage-7.8.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:684ca9f58119b8e26bef860db33524ae0365601492e86ba0b71d513f525e7050"},
-    {file = "coverage-7.8.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8165584ddedb49204c4e18da083913bdf6a982bfb558632a79bdaadcdafd0d48"},
-    {file = "coverage-7.8.2-cp313-cp313-win32.whl", hash = "sha256:34759ee2c65362163699cc917bdb2a54114dd06d19bab860725f94ef45a3d9b7"},
-    {file = "coverage-7.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:2f9bc608fbafaee40eb60a9a53dbfb90f53cc66d3d32c2849dc27cf5638a21e3"},
-    {file = "coverage-7.8.2-cp313-cp313-win_arm64.whl", hash = "sha256:9fe449ee461a3b0c7105690419d0b0aba1232f4ff6d120a9e241e58a556733f7"},
-    {file = "coverage-7.8.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8369a7c8ef66bded2b6484053749ff220dbf83cba84f3398c84c51a6f748a008"},
-    {file = "coverage-7.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:159b81df53a5fcbc7d45dae3adad554fdbde9829a994e15227b3f9d816d00b36"},
-    {file = "coverage-7.8.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6fcbbd35a96192d042c691c9e0c49ef54bd7ed865846a3c9d624c30bb67ce46"},
-    {file = "coverage-7.8.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05364b9cc82f138cc86128dc4e2e1251c2981a2218bfcd556fe6b0fbaa3501be"},
-    {file = "coverage-7.8.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46d532db4e5ff3979ce47d18e2fe8ecad283eeb7367726da0e5ef88e4fe64740"},
-    {file = "coverage-7.8.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4000a31c34932e7e4fa0381a3d6deb43dc0c8f458e3e7ea6502e6238e10be625"},
-    {file = "coverage-7.8.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:43ff5033d657cd51f83015c3b7a443287250dc14e69910577c3e03bd2e06f27b"},
-    {file = "coverage-7.8.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94316e13f0981cbbba132c1f9f365cac1d26716aaac130866ca812006f662199"},
-    {file = "coverage-7.8.2-cp313-cp313t-win32.whl", hash = "sha256:3f5673888d3676d0a745c3d0e16da338c5eea300cb1f4ada9c872981265e76d8"},
-    {file = "coverage-7.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:2c08b05ee8d7861e45dc5a2cc4195c8c66dca5ac613144eb6ebeaff2d502e73d"},
-    {file = "coverage-7.8.2-cp313-cp313t-win_arm64.whl", hash = "sha256:1e1448bb72b387755e1ff3ef1268a06617afd94188164960dba8d0245a46004b"},
-    {file = "coverage-7.8.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:496948261eaac5ac9cf43f5d0a9f6eb7a6d4cb3bedb2c5d294138142f5c18f2a"},
-    {file = "coverage-7.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eacd2de0d30871eff893bab0b67840a96445edcb3c8fd915e6b11ac4b2f3fa6d"},
-    {file = "coverage-7.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b039ffddc99ad65d5078ef300e0c7eed08c270dc26570440e3ef18beb816c1ca"},
-    {file = "coverage-7.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e49824808d4375ede9dd84e9961a59c47f9113039f1a525e6be170aa4f5c34d"},
-    {file = "coverage-7.8.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b069938961dfad881dc2f8d02b47645cd2f455d3809ba92a8a687bf513839787"},
-    {file = "coverage-7.8.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:de77c3ba8bb686d1c411e78ee1b97e6e0b963fb98b1637658dd9ad2c875cf9d7"},
-    {file = "coverage-7.8.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1676628065a498943bd3f64f099bb573e08cf1bc6088bbe33cf4424e0876f4b3"},
-    {file = "coverage-7.8.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8e1a26e7e50076e35f7afafde570ca2b4d7900a491174ca357d29dece5aacee7"},
-    {file = "coverage-7.8.2-cp39-cp39-win32.whl", hash = "sha256:6782a12bf76fa61ad9350d5a6ef5f3f020b57f5e6305cbc663803f2ebd0f270a"},
-    {file = "coverage-7.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:1efa4166ba75ccefd647f2d78b64f53f14fb82622bc94c5a5cb0a622f50f1c9e"},
-    {file = "coverage-7.8.2-pp39.pp310.pp311-none-any.whl", hash = "sha256:ec455eedf3ba0bbdf8f5a570012617eb305c63cb9f03428d39bf544cb2b94837"},
-    {file = "coverage-7.8.2-py3-none-any.whl", hash = "sha256:726f32ee3713f7359696331a18daf0c3b3a70bb0ae71141b9d3c52be7c595e32"},
-    {file = "coverage-7.8.2.tar.gz", hash = "sha256:a886d531373a1f6ff9fad2a2ba4a045b68467b779ae729ee0b3b10ac20033b27"},
+    {file = "coverage-7.10.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:70e7bfbd57126b5554aa482691145f798d7df77489a177a6bef80de78860a356"},
+    {file = "coverage-7.10.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e41be6f0f19da64af13403e52f2dec38bbc2937af54df8ecef10850ff8d35301"},
+    {file = "coverage-7.10.6-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c61fc91ab80b23f5fddbee342d19662f3d3328173229caded831aa0bd7595460"},
+    {file = "coverage-7.10.6-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10356fdd33a7cc06e8051413140bbdc6f972137508a3572e3f59f805cd2832fd"},
+    {file = "coverage-7.10.6-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:80b1695cf7c5ebe7b44bf2521221b9bb8cdf69b1f24231149a7e3eb1ae5fa2fb"},
+    {file = "coverage-7.10.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:2e4c33e6378b9d52d3454bd08847a8651f4ed23ddbb4a0520227bd346382bbc6"},
+    {file = "coverage-7.10.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c8a3ec16e34ef980a46f60dc6ad86ec60f763c3f2fa0db6d261e6e754f72e945"},
+    {file = "coverage-7.10.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7d79dabc0a56f5af990cc6da9ad1e40766e82773c075f09cc571e2076fef882e"},
+    {file = "coverage-7.10.6-cp310-cp310-win32.whl", hash = "sha256:86b9b59f2b16e981906e9d6383eb6446d5b46c278460ae2c36487667717eccf1"},
+    {file = "coverage-7.10.6-cp310-cp310-win_amd64.whl", hash = "sha256:e132b9152749bd33534e5bd8565c7576f135f157b4029b975e15ee184325f528"},
+    {file = "coverage-7.10.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c706db3cabb7ceef779de68270150665e710b46d56372455cd741184f3868d8f"},
+    {file = "coverage-7.10.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e0c38dc289e0508ef68ec95834cb5d2e96fdbe792eaccaa1bccac3966bbadcc"},
+    {file = "coverage-7.10.6-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:752a3005a1ded28f2f3a6e8787e24f28d6abe176ca64677bcd8d53d6fe2ec08a"},
+    {file = "coverage-7.10.6-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:689920ecfd60f992cafca4f5477d55720466ad2c7fa29bb56ac8d44a1ac2b47a"},
+    {file = "coverage-7.10.6-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec98435796d2624d6905820a42f82149ee9fc4f2d45c2c5bc5a44481cc50db62"},
+    {file = "coverage-7.10.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b37201ce4a458c7a758ecc4efa92fa8ed783c66e0fa3c42ae19fc454a0792153"},
+    {file = "coverage-7.10.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:2904271c80898663c810a6b067920a61dd8d38341244a3605bd31ab55250dad5"},
+    {file = "coverage-7.10.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5aea98383463d6e1fa4e95416d8de66f2d0cb588774ee20ae1b28df826bcb619"},
+    {file = "coverage-7.10.6-cp311-cp311-win32.whl", hash = "sha256:e3fb1fa01d3598002777dd259c0c2e6d9d5e10e7222976fc8e03992f972a2cba"},
+    {file = "coverage-7.10.6-cp311-cp311-win_amd64.whl", hash = "sha256:f35ed9d945bece26553d5b4c8630453169672bea0050a564456eb88bdffd927e"},
+    {file = "coverage-7.10.6-cp311-cp311-win_arm64.whl", hash = "sha256:99e1a305c7765631d74b98bf7dbf54eeea931f975e80f115437d23848ee8c27c"},
+    {file = "coverage-7.10.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5b2dd6059938063a2c9fee1af729d4f2af28fd1a545e9b7652861f0d752ebcea"},
+    {file = "coverage-7.10.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:388d80e56191bf846c485c14ae2bc8898aa3124d9d35903fef7d907780477634"},
+    {file = "coverage-7.10.6-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:90cb5b1a4670662719591aa92d0095bb41714970c0b065b02a2610172dbf0af6"},
+    {file = "coverage-7.10.6-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:961834e2f2b863a0e14260a9a273aff07ff7818ab6e66d2addf5628590c628f9"},
+    {file = "coverage-7.10.6-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bf9a19f5012dab774628491659646335b1928cfc931bf8d97b0d5918dd58033c"},
+    {file = "coverage-7.10.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:99c4283e2a0e147b9c9cc6bc9c96124de9419d6044837e9799763a0e29a7321a"},
+    {file = "coverage-7.10.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:282b1b20f45df57cc508c1e033403f02283adfb67d4c9c35a90281d81e5c52c5"},
+    {file = "coverage-7.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8cdbe264f11afd69841bd8c0d83ca10b5b32853263ee62e6ac6a0ab63895f972"},
+    {file = "coverage-7.10.6-cp312-cp312-win32.whl", hash = "sha256:a517feaf3a0a3eca1ee985d8373135cfdedfbba3882a5eab4362bda7c7cf518d"},
+    {file = "coverage-7.10.6-cp312-cp312-win_amd64.whl", hash = "sha256:856986eadf41f52b214176d894a7de05331117f6035a28ac0016c0f63d887629"},
+    {file = "coverage-7.10.6-cp312-cp312-win_arm64.whl", hash = "sha256:acf36b8268785aad739443fa2780c16260ee3fa09d12b3a70f772ef100939d80"},
+    {file = "coverage-7.10.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ffea0575345e9ee0144dfe5701aa17f3ba546f8c3bb48db62ae101afb740e7d6"},
+    {file = "coverage-7.10.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:95d91d7317cde40a1c249d6b7382750b7e6d86fad9d8eaf4fa3f8f44cf171e80"},
+    {file = "coverage-7.10.6-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3e23dd5408fe71a356b41baa82892772a4cefcf758f2ca3383d2aa39e1b7a003"},
+    {file = "coverage-7.10.6-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0f3f56e4cb573755e96a16501a98bf211f100463d70275759e73f3cbc00d4f27"},
+    {file = "coverage-7.10.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:db4a1d897bbbe7339946ffa2fe60c10cc81c43fab8b062d3fcb84188688174a4"},
+    {file = "coverage-7.10.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d8fd7879082953c156d5b13c74aa6cca37f6a6f4747b39538504c3f9c63d043d"},
+    {file = "coverage-7.10.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:28395ca3f71cd103b8c116333fa9db867f3a3e1ad6a084aa3725ae002b6583bc"},
+    {file = "coverage-7.10.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:61c950fc33d29c91b9e18540e1aed7d9f6787cc870a3e4032493bbbe641d12fc"},
+    {file = "coverage-7.10.6-cp313-cp313-win32.whl", hash = "sha256:160c00a5e6b6bdf4e5984b0ef21fc860bc94416c41b7df4d63f536d17c38902e"},
+    {file = "coverage-7.10.6-cp313-cp313-win_amd64.whl", hash = "sha256:628055297f3e2aa181464c3808402887643405573eb3d9de060d81531fa79d32"},
+    {file = "coverage-7.10.6-cp313-cp313-win_arm64.whl", hash = "sha256:df4ec1f8540b0bcbe26ca7dd0f541847cc8a108b35596f9f91f59f0c060bfdd2"},
+    {file = "coverage-7.10.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:c9a8b7a34a4de3ed987f636f71881cd3b8339f61118b1aa311fbda12741bff0b"},
+    {file = "coverage-7.10.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8dd5af36092430c2b075cee966719898f2ae87b636cefb85a653f1d0ba5d5393"},
+    {file = "coverage-7.10.6-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b0353b0f0850d49ada66fdd7d0c7cdb0f86b900bb9e367024fd14a60cecc1e27"},
+    {file = "coverage-7.10.6-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d6b9ae13d5d3e8aeca9ca94198aa7b3ebbc5acfada557d724f2a1f03d2c0b0df"},
+    {file = "coverage-7.10.6-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:675824a363cc05781b1527b39dc2587b8984965834a748177ee3c37b64ffeafb"},
+    {file = "coverage-7.10.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:692d70ea725f471a547c305f0d0fc6a73480c62fb0da726370c088ab21aed282"},
+    {file = "coverage-7.10.6-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:851430a9a361c7a8484a36126d1d0ff8d529d97385eacc8dfdc9bfc8c2d2cbe4"},
+    {file = "coverage-7.10.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d9369a23186d189b2fc95cc08b8160ba242057e887d766864f7adf3c46b2df21"},
+    {file = "coverage-7.10.6-cp313-cp313t-win32.whl", hash = "sha256:92be86fcb125e9bda0da7806afd29a3fd33fdf58fba5d60318399adf40bf37d0"},
+    {file = "coverage-7.10.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6b3039e2ca459a70c79523d39347d83b73f2f06af5624905eba7ec34d64d80b5"},
+    {file = "coverage-7.10.6-cp313-cp313t-win_arm64.whl", hash = "sha256:3fb99d0786fe17b228eab663d16bee2288e8724d26a199c29325aac4b0319b9b"},
+    {file = "coverage-7.10.6-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:6008a021907be8c4c02f37cdc3ffb258493bdebfeaf9a839f9e71dfdc47b018e"},
+    {file = "coverage-7.10.6-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:5e75e37f23eb144e78940b40395b42f2321951206a4f50e23cfd6e8a198d3ceb"},
+    {file = "coverage-7.10.6-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0f7cb359a448e043c576f0da00aa8bfd796a01b06aa610ca453d4dde09cc1034"},
+    {file = "coverage-7.10.6-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c68018e4fc4e14b5668f1353b41ccf4bc83ba355f0e1b3836861c6f042d89ac1"},
+    {file = "coverage-7.10.6-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cd4b2b0707fc55afa160cd5fc33b27ccbf75ca11d81f4ec9863d5793fc6df56a"},
+    {file = "coverage-7.10.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4cec13817a651f8804a86e4f79d815b3b28472c910e099e4d5a0e8a3b6a1d4cb"},
+    {file = "coverage-7.10.6-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:f2a6a8e06bbda06f78739f40bfb56c45d14eb8249d0f0ea6d4b3d48e1f7c695d"},
+    {file = "coverage-7.10.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:081b98395ced0d9bcf60ada7661a0b75f36b78b9d7e39ea0790bb4ed8da14747"},
+    {file = "coverage-7.10.6-cp314-cp314-win32.whl", hash = "sha256:6937347c5d7d069ee776b2bf4e1212f912a9f1f141a429c475e6089462fcecc5"},
+    {file = "coverage-7.10.6-cp314-cp314-win_amd64.whl", hash = "sha256:adec1d980fa07e60b6ef865f9e5410ba760e4e1d26f60f7e5772c73b9a5b0713"},
+    {file = "coverage-7.10.6-cp314-cp314-win_arm64.whl", hash = "sha256:a80f7aef9535442bdcf562e5a0d5a5538ce8abe6bb209cfbf170c462ac2c2a32"},
+    {file = "coverage-7.10.6-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:0de434f4fbbe5af4fa7989521c655c8c779afb61c53ab561b64dcee6149e4c65"},
+    {file = "coverage-7.10.6-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6e31b8155150c57e5ac43ccd289d079eb3f825187d7c66e755a055d2c85794c6"},
+    {file = "coverage-7.10.6-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:98cede73eb83c31e2118ae8d379c12e3e42736903a8afcca92a7218e1f2903b0"},
+    {file = "coverage-7.10.6-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f863c08f4ff6b64fa8045b1e3da480f5374779ef187f07b82e0538c68cb4ff8e"},
+    {file = "coverage-7.10.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b38261034fda87be356f2c3f42221fdb4171c3ce7658066ae449241485390d5"},
+    {file = "coverage-7.10.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0e93b1476b79eae849dc3872faeb0bf7948fd9ea34869590bc16a2a00b9c82a7"},
+    {file = "coverage-7.10.6-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:ff8a991f70f4c0cf53088abf1e3886edcc87d53004c7bb94e78650b4d3dac3b5"},
+    {file = "coverage-7.10.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ac765b026c9f33044419cbba1da913cfb82cca1b60598ac1c7a5ed6aac4621a0"},
+    {file = "coverage-7.10.6-cp314-cp314t-win32.whl", hash = "sha256:441c357d55f4936875636ef2cfb3bee36e466dcf50df9afbd398ce79dba1ebb7"},
+    {file = "coverage-7.10.6-cp314-cp314t-win_amd64.whl", hash = "sha256:073711de3181b2e204e4870ac83a7c4853115b42e9cd4d145f2231e12d670930"},
+    {file = "coverage-7.10.6-cp314-cp314t-win_arm64.whl", hash = "sha256:137921f2bac5559334ba66122b753db6dc5d1cf01eb7b64eb412bb0d064ef35b"},
+    {file = "coverage-7.10.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:90558c35af64971d65fbd935c32010f9a2f52776103a259f1dee865fe8259352"},
+    {file = "coverage-7.10.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8953746d371e5695405806c46d705a3cd170b9cc2b9f93953ad838f6c1e58612"},
+    {file = "coverage-7.10.6-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c83f6afb480eae0313114297d29d7c295670a41c11b274e6bca0c64540c1ce7b"},
+    {file = "coverage-7.10.6-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7eb68d356ba0cc158ca535ce1381dbf2037fa8cb5b1ae5ddfc302e7317d04144"},
+    {file = "coverage-7.10.6-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5b15a87265e96307482746d86995f4bff282f14b027db75469c446da6127433b"},
+    {file = "coverage-7.10.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fc53ba868875bfbb66ee447d64d6413c2db91fddcfca57025a0e7ab5b07d5862"},
+    {file = "coverage-7.10.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:efeda443000aa23f276f4df973cb82beca682fd800bb119d19e80504ffe53ec2"},
+    {file = "coverage-7.10.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:9702b59d582ff1e184945d8b501ffdd08d2cee38d93a2206aa5f1365ce0b8d78"},
+    {file = "coverage-7.10.6-cp39-cp39-win32.whl", hash = "sha256:2195f8e16ba1a44651ca684db2ea2b2d4b5345da12f07d9c22a395202a05b23c"},
+    {file = "coverage-7.10.6-cp39-cp39-win_amd64.whl", hash = "sha256:f32ff80e7ef6a5b5b606ea69a36e97b219cd9dc799bcf2963018a4d8f788cfbf"},
+    {file = "coverage-7.10.6-py3-none-any.whl", hash = "sha256:92c4ecf6bf11b2e85fd4d8204814dc26e6a19f0c9d938c207c5cb0eadfcabbe3"},
+    {file = "coverage-7.10.6.tar.gz", hash = "sha256:f644a3ae5933a552a29dbb9aa2f90c677a875f80ebea028e5a52a4f429044b90"},
 ]
 
 [package.extras]
@@ -122,40 +143,35 @@ toml = ["tomli"]
 
 [[package]]
 name = "distlib"
-version = "0.3.9"
+version = "0.4.0"
 description = "Distribution utilities"
 optional = false
 python-versions = "*"
 files = [
-    {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"},
-    {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"},
+    {file = "distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16"},
+    {file = "distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d"},
 ]
 
 [[package]]
 name = "filelock"
-version = "3.18.0"
+version = "3.19.1"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de"},
-    {file = "filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2"},
+    {file = "filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d"},
+    {file = "filelock-3.19.1.tar.gz", hash = "sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58"},
 ]
 
-[package.extras]
-docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"]
-testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"]
-typing = ["typing-extensions (>=4.12.2)"]
-
 [[package]]
 name = "identify"
-version = "2.6.12"
+version = "2.6.13"
 description = "File identification library for Python"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2"},
-    {file = "identify-2.6.12.tar.gz", hash = "sha256:d8de45749f1efb108badef65ee8386f0f7bb19a7f26185f74de6367bffbaf0e6"},
+    {file = "identify-2.6.13-py2.py3-none-any.whl", hash = "sha256:60381139b3ae39447482ecc406944190f690d4a2997f2584062089848361b33b"},
+    {file = "identify-2.6.13.tar.gz", hash = "sha256:da8d6c828e773620e13bfa86ea601c5a5310ba4bcd65edf378198b56a1f9fb32"},
 ]
 
 [package.extras]
@@ -163,13 +179,13 @@ license = ["ukkonen"]
 
 [[package]]
 name = "im-data-manager-job-decoder"
-version = "2.1.0"
+version = "2.4.0"
 description = "Job decoding logic"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "im_data_manager_job_decoder-2.1.0-py3-none-any.whl", hash = "sha256:b4eefdbdf3d7f5ccb9e154f1d737ca4d25f31e74a94d3a620c71a3752c49d4f8"},
-    {file = "im_data_manager_job_decoder-2.1.0.tar.gz", hash = "sha256:11ce891837c7e152be241caac137df192764c06cf2ab6ce84890825bb8c12d25"},
+    {file = "im_data_manager_job_decoder-2.4.0-py3-none-any.whl", hash = "sha256:4a911e2a8760dd381247f2f740b2e280a817dbdad65c65164dad97dfcf9058bf"},
+    {file = "im_data_manager_job_decoder-2.4.0.tar.gz", hash = "sha256:34e7a8ac0421edc26760491ffd8b9183f0757ebc9e25dabf865235c936fad458"},
 ]
 
 [package.dependencies]
@@ -221,13 +237,13 @@ i18n = ["Babel (>=2.7)"]
 
 [[package]]
 name = "jsonschema"
-version = "4.24.0"
+version = "4.25.1"
 description = "An implementation of JSON Schema validation for Python"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "jsonschema-4.24.0-py3-none-any.whl", hash = "sha256:a462455f19f5faf404a7902952b6f0e3ce868f3ee09a359b05eca6673bd8412d"},
-    {file = "jsonschema-4.24.0.tar.gz", hash = "sha256:0b4e8069eb12aedfa881333004bccaec24ecef5a8a6a4b6df142b2cc9599d196"},
+    {file = "jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63"},
+    {file = "jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85"},
 ]
 
 [package.dependencies]
@@ -238,7 +254,7 @@ rpds-py = ">=0.7.1"
 
 [package.extras]
 format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
-format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=24.6.0)"]
+format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "rfc3987-syntax (>=1.1.0)", "uri-template", "webcolors (>=24.6.0)"]
 
 [[package]]
 name = "jsonschema-specifications"
@@ -348,13 +364,13 @@ files = [
 
 [[package]]
 name = "platformdirs"
-version = "4.3.8"
+version = "4.4.0"
 description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4"},
-    {file = "platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc"},
+    {file = "platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85"},
+    {file = "platformdirs-4.4.0.tar.gz", hash = "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf"},
 ]
 
 [package.extras]
@@ -397,41 +413,56 @@ virtualenv = ">=20.10.0"
 
 [[package]]
 name = "protobuf"
-version = "6.31.1"
+version = "6.32.0"
 description = ""
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "protobuf-6.31.1-cp310-abi3-win32.whl", hash = "sha256:7fa17d5a29c2e04b7d90e5e32388b8bfd0e7107cd8e616feef7ed3fa6bdab5c9"},
-    {file = "protobuf-6.31.1-cp310-abi3-win_amd64.whl", hash = "sha256:426f59d2964864a1a366254fa703b8632dcec0790d8862d30034d8245e1cd447"},
-    {file = "protobuf-6.31.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:6f1227473dc43d44ed644425268eb7c2e488ae245d51c6866d19fe158e207402"},
-    {file = "protobuf-6.31.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:a40fc12b84c154884d7d4c4ebd675d5b3b5283e155f324049ae396b95ddebc39"},
-    {file = "protobuf-6.31.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:4ee898bf66f7a8b0bd21bce523814e6fbd8c6add948045ce958b73af7e8878c6"},
-    {file = "protobuf-6.31.1-cp39-cp39-win32.whl", hash = "sha256:0414e3aa5a5f3ff423828e1e6a6e907d6c65c1d5b7e6e975793d5590bdeecc16"},
-    {file = "protobuf-6.31.1-cp39-cp39-win_amd64.whl", hash = "sha256:8764cf4587791e7564051b35524b72844f845ad0bb011704c3736cce762d8fe9"},
-    {file = "protobuf-6.31.1-py3-none-any.whl", hash = "sha256:720a6c7e6b77288b85063569baae8536671b39f15cc22037ec7045658d80489e"},
-    {file = "protobuf-6.31.1.tar.gz", hash = "sha256:d8cac4c982f0b957a4dc73a80e2ea24fab08e679c0de9deb835f4a12d69aca9a"},
+    {file = "protobuf-6.32.0-cp310-abi3-win32.whl", hash = "sha256:84f9e3c1ff6fb0308dbacb0950d8aa90694b0d0ee68e75719cb044b7078fe741"},
+    {file = "protobuf-6.32.0-cp310-abi3-win_amd64.whl", hash = "sha256:a8bdbb2f009cfc22a36d031f22a625a38b615b5e19e558a7b756b3279723e68e"},
+    {file = "protobuf-6.32.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d52691e5bee6c860fff9a1c86ad26a13afbeb4b168cd4445c922b7e2cf85aaf0"},
+    {file = "protobuf-6.32.0-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:501fe6372fd1c8ea2a30b4d9be8f87955a64d6be9c88a973996cef5ef6f0abf1"},
+    {file = "protobuf-6.32.0-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:75a2aab2bd1aeb1f5dc7c5f33bcb11d82ea8c055c9becbb41c26a8c43fd7092c"},
+    {file = "protobuf-6.32.0-cp39-cp39-win32.whl", hash = "sha256:7db8ed09024f115ac877a1427557b838705359f047b2ff2f2b2364892d19dacb"},
+    {file = "protobuf-6.32.0-cp39-cp39-win_amd64.whl", hash = "sha256:15eba1b86f193a407607112ceb9ea0ba9569aed24f93333fe9a497cf2fda37d3"},
+    {file = "protobuf-6.32.0-py3-none-any.whl", hash = "sha256:ba377e5b67b908c8f3072a57b63e2c6a4cbd18aea4ed98d2584350dbf46f2783"},
+    {file = "protobuf-6.32.0.tar.gz", hash = "sha256:a81439049127067fc49ec1d36e25c6ee1d1a2b7be930675f919258d03c04e7d2"},
 ]
 
+[[package]]
+name = "pygments"
+version = "2.19.2"
+description = "Pygments is a syntax highlighting package written in Python."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"},
+    {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"},
+]
+
+[package.extras]
+windows-terminal = ["colorama (>=0.4.6)"]
+
 [[package]]
 name = "pytest"
-version = "8.3.5"
+version = "8.4.1"
 description = "pytest: simple powerful testing with Python"
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 files = [
-    {file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"},
-    {file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"},
+    {file = "pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7"},
+    {file = "pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c"},
 ]
 
 [package.dependencies]
-colorama = {version = "*", markers = "sys_platform == \"win32\""}
-iniconfig = "*"
-packaging = "*"
+colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""}
+iniconfig = ">=1"
+packaging = ">=20"
 pluggy = ">=1.5,<2"
+pygments = ">=2.7.2"
 
 [package.extras]
-dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
+dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"]
 
 [[package]]
 name = "pyyaml"
@@ -513,150 +544,188 @@ typing-extensions = {version = ">=4.4.0", markers = "python_version < \"3.13\""}
 
 [[package]]
 name = "rpds-py"
-version = "0.25.1"
+version = "0.27.1"
 description = "Python bindings to Rust's persistent data structures (rpds)"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "rpds_py-0.25.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:f4ad628b5174d5315761b67f212774a32f5bad5e61396d38108bd801c0a8f5d9"},
-    {file = "rpds_py-0.25.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c742af695f7525e559c16f1562cf2323db0e3f0fbdcabdf6865b095256b2d40"},
-    {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:605ffe7769e24b1800b4d024d24034405d9404f0bc2f55b6db3362cd34145a6f"},
-    {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ccc6f3ddef93243538be76f8e47045b4aad7a66a212cd3a0f23e34469473d36b"},
-    {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f70316f760174ca04492b5ab01be631a8ae30cadab1d1081035136ba12738cfa"},
-    {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1dafef8df605fdb46edcc0bf1573dea0d6d7b01ba87f85cd04dc855b2b4479e"},
-    {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0701942049095741a8aeb298a31b203e735d1c61f4423511d2b1a41dcd8a16da"},
-    {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e87798852ae0b37c88babb7f7bbbb3e3fecc562a1c340195b44c7e24d403e380"},
-    {file = "rpds_py-0.25.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3bcce0edc1488906c2d4c75c94c70a0417e83920dd4c88fec1078c94843a6ce9"},
-    {file = "rpds_py-0.25.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e2f6a2347d3440ae789505693a02836383426249d5293541cd712e07e7aecf54"},
-    {file = "rpds_py-0.25.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4fd52d3455a0aa997734f3835cbc4c9f32571345143960e7d7ebfe7b5fbfa3b2"},
-    {file = "rpds_py-0.25.1-cp310-cp310-win32.whl", hash = "sha256:3f0b1798cae2bbbc9b9db44ee068c556d4737911ad53a4e5093d09d04b3bbc24"},
-    {file = "rpds_py-0.25.1-cp310-cp310-win_amd64.whl", hash = "sha256:3ebd879ab996537fc510a2be58c59915b5dd63bccb06d1ef514fee787e05984a"},
-    {file = "rpds_py-0.25.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5f048bbf18b1f9120685c6d6bb70cc1a52c8cc11bdd04e643d28d3be0baf666d"},
-    {file = "rpds_py-0.25.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4fbb0dbba559959fcb5d0735a0f87cdbca9e95dac87982e9b95c0f8f7ad10255"},
-    {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4ca54b9cf9d80b4016a67a0193ebe0bcf29f6b0a96f09db942087e294d3d4c2"},
-    {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1ee3e26eb83d39b886d2cb6e06ea701bba82ef30a0de044d34626ede51ec98b0"},
-    {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89706d0683c73a26f76a5315d893c051324d771196ae8b13e6ffa1ffaf5e574f"},
-    {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2013ee878c76269c7b557a9a9c042335d732e89d482606990b70a839635feb7"},
-    {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45e484db65e5380804afbec784522de84fa95e6bb92ef1bd3325d33d13efaebd"},
-    {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:48d64155d02127c249695abb87d39f0faf410733428d499867606be138161d65"},
-    {file = "rpds_py-0.25.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:048893e902132fd6548a2e661fb38bf4896a89eea95ac5816cf443524a85556f"},
-    {file = "rpds_py-0.25.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0317177b1e8691ab5879f4f33f4b6dc55ad3b344399e23df2e499de7b10a548d"},
-    {file = "rpds_py-0.25.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bffcf57826d77a4151962bf1701374e0fc87f536e56ec46f1abdd6a903354042"},
-    {file = "rpds_py-0.25.1-cp311-cp311-win32.whl", hash = "sha256:cda776f1967cb304816173b30994faaf2fd5bcb37e73118a47964a02c348e1bc"},
-    {file = "rpds_py-0.25.1-cp311-cp311-win_amd64.whl", hash = "sha256:dc3c1ff0abc91444cd20ec643d0f805df9a3661fcacf9c95000329f3ddf268a4"},
-    {file = "rpds_py-0.25.1-cp311-cp311-win_arm64.whl", hash = "sha256:5a3ddb74b0985c4387719fc536faced33cadf2172769540c62e2a94b7b9be1c4"},
-    {file = "rpds_py-0.25.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b5ffe453cde61f73fea9430223c81d29e2fbf412a6073951102146c84e19e34c"},
-    {file = "rpds_py-0.25.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:115874ae5e2fdcfc16b2aedc95b5eef4aebe91b28e7e21951eda8a5dc0d3461b"},
-    {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a714bf6e5e81b0e570d01f56e0c89c6375101b8463999ead3a93a5d2a4af91fa"},
-    {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:35634369325906bcd01577da4c19e3b9541a15e99f31e91a02d010816b49bfda"},
-    {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4cb2b3ddc16710548801c6fcc0cfcdeeff9dafbc983f77265877793f2660309"},
-    {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9ceca1cf097ed77e1a51f1dbc8d174d10cb5931c188a4505ff9f3e119dfe519b"},
-    {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c2cd1a4b0c2b8c5e31ffff50d09f39906fe351389ba143c195566056c13a7ea"},
-    {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1de336a4b164c9188cb23f3703adb74a7623ab32d20090d0e9bf499a2203ad65"},
-    {file = "rpds_py-0.25.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9fca84a15333e925dd59ce01da0ffe2ffe0d6e5d29a9eeba2148916d1824948c"},
-    {file = "rpds_py-0.25.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:88ec04afe0c59fa64e2f6ea0dd9657e04fc83e38de90f6de201954b4d4eb59bd"},
-    {file = "rpds_py-0.25.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a8bd2f19e312ce3e1d2c635618e8a8d8132892bb746a7cf74780a489f0f6cdcb"},
-    {file = "rpds_py-0.25.1-cp312-cp312-win32.whl", hash = "sha256:e5e2f7280d8d0d3ef06f3ec1b4fd598d386cc6f0721e54f09109a8132182fbfe"},
-    {file = "rpds_py-0.25.1-cp312-cp312-win_amd64.whl", hash = "sha256:db58483f71c5db67d643857404da360dce3573031586034b7d59f245144cc192"},
-    {file = "rpds_py-0.25.1-cp312-cp312-win_arm64.whl", hash = "sha256:6d50841c425d16faf3206ddbba44c21aa3310a0cebc3c1cdfc3e3f4f9f6f5728"},
-    {file = "rpds_py-0.25.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:659d87430a8c8c704d52d094f5ba6fa72ef13b4d385b7e542a08fc240cb4a559"},
-    {file = "rpds_py-0.25.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:68f6f060f0bbdfb0245267da014d3a6da9be127fe3e8cc4a68c6f833f8a23bb1"},
-    {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:083a9513a33e0b92cf6e7a6366036c6bb43ea595332c1ab5c8ae329e4bcc0a9c"},
-    {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:816568614ecb22b18a010c7a12559c19f6fe993526af88e95a76d5a60b8b75fb"},
-    {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c6564c0947a7f52e4792983f8e6cf9bac140438ebf81f527a21d944f2fd0a40"},
-    {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c4a128527fe415d73cf1f70a9a688d06130d5810be69f3b553bf7b45e8acf79"},
-    {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a49e1d7a4978ed554f095430b89ecc23f42014a50ac385eb0c4d163ce213c325"},
-    {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d74ec9bc0e2feb81d3f16946b005748119c0f52a153f6db6a29e8cd68636f295"},
-    {file = "rpds_py-0.25.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3af5b4cc10fa41e5bc64e5c198a1b2d2864337f8fcbb9a67e747e34002ce812b"},
-    {file = "rpds_py-0.25.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:79dc317a5f1c51fd9c6a0c4f48209c6b8526d0524a6904fc1076476e79b00f98"},
-    {file = "rpds_py-0.25.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1521031351865e0181bc585147624d66b3b00a84109b57fcb7a779c3ec3772cd"},
-    {file = "rpds_py-0.25.1-cp313-cp313-win32.whl", hash = "sha256:5d473be2b13600b93a5675d78f59e63b51b1ba2d0476893415dfbb5477e65b31"},
-    {file = "rpds_py-0.25.1-cp313-cp313-win_amd64.whl", hash = "sha256:a7b74e92a3b212390bdce1d93da9f6488c3878c1d434c5e751cbc202c5e09500"},
-    {file = "rpds_py-0.25.1-cp313-cp313-win_arm64.whl", hash = "sha256:dd326a81afe332ede08eb39ab75b301d5676802cdffd3a8f287a5f0b694dc3f5"},
-    {file = "rpds_py-0.25.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:a58d1ed49a94d4183483a3ce0af22f20318d4a1434acee255d683ad90bf78129"},
-    {file = "rpds_py-0.25.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f251bf23deb8332823aef1da169d5d89fa84c89f67bdfb566c49dea1fccfd50d"},
-    {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dbd586bfa270c1103ece2109314dd423df1fa3d9719928b5d09e4840cec0d72"},
-    {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6d273f136e912aa101a9274c3145dcbddbe4bac560e77e6d5b3c9f6e0ed06d34"},
-    {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:666fa7b1bd0a3810a7f18f6d3a25ccd8866291fbbc3c9b912b917a6715874bb9"},
-    {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:921954d7fbf3fccc7de8f717799304b14b6d9a45bbeec5a8d7408ccbf531faf5"},
-    {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3d86373ff19ca0441ebeb696ef64cb58b8b5cbacffcda5a0ec2f3911732a194"},
-    {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c8980cde3bb8575e7c956a530f2c217c1d6aac453474bf3ea0f9c89868b531b6"},
-    {file = "rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8eb8c84ecea987a2523e057c0d950bcb3f789696c0499290b8d7b3107a719d78"},
-    {file = "rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:e43a005671a9ed5a650f3bc39e4dbccd6d4326b24fb5ea8be5f3a43a6f576c72"},
-    {file = "rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:58f77c60956501a4a627749a6dcb78dac522f249dd96b5c9f1c6af29bfacfb66"},
-    {file = "rpds_py-0.25.1-cp313-cp313t-win32.whl", hash = "sha256:2cb9e5b5e26fc02c8a4345048cd9998c2aca7c2712bd1b36da0c72ee969a3523"},
-    {file = "rpds_py-0.25.1-cp313-cp313t-win_amd64.whl", hash = "sha256:401ca1c4a20cc0510d3435d89c069fe0a9ae2ee6495135ac46bdd49ec0495763"},
-    {file = "rpds_py-0.25.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ce4c8e485a3c59593f1a6f683cf0ea5ab1c1dc94d11eea5619e4fb5228b40fbd"},
-    {file = "rpds_py-0.25.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d8222acdb51a22929c3b2ddb236b69c59c72af4019d2cba961e2f9add9b6e634"},
-    {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4593c4eae9b27d22df41cde518b4b9e4464d139e4322e2127daa9b5b981b76be"},
-    {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd035756830c712b64725a76327ce80e82ed12ebab361d3a1cdc0f51ea21acb0"},
-    {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:114a07e85f32b125404f28f2ed0ba431685151c037a26032b213c882f26eb908"},
-    {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dec21e02e6cc932538b5203d3a8bd6aa1480c98c4914cb88eea064ecdbc6396a"},
-    {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:09eab132f41bf792c7a0ea1578e55df3f3e7f61888e340779b06050a9a3f16e9"},
-    {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c98f126c4fc697b84c423e387337d5b07e4a61e9feac494362a59fd7a2d9ed80"},
-    {file = "rpds_py-0.25.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0e6a327af8ebf6baba1c10fadd04964c1965d375d318f4435d5f3f9651550f4a"},
-    {file = "rpds_py-0.25.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:bc120d1132cff853ff617754196d0ac0ae63befe7c8498bd67731ba368abe451"},
-    {file = "rpds_py-0.25.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:140f61d9bed7839446bdd44852e30195c8e520f81329b4201ceead4d64eb3a9f"},
-    {file = "rpds_py-0.25.1-cp39-cp39-win32.whl", hash = "sha256:9c006f3aadeda131b438c3092124bd196b66312f0caa5823ef09585a669cf449"},
-    {file = "rpds_py-0.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:a61d0b2c7c9a0ae45732a77844917b427ff16ad5464b4d4f5e4adb955f582890"},
-    {file = "rpds_py-0.25.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b24bf3cd93d5b6ecfbedec73b15f143596c88ee249fa98cefa9a9dc9d92c6f28"},
-    {file = "rpds_py-0.25.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:0eb90e94f43e5085623932b68840b6f379f26db7b5c2e6bcef3179bd83c9330f"},
-    {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d50e4864498a9ab639d6d8854b25e80642bd362ff104312d9770b05d66e5fb13"},
-    {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7c9409b47ba0650544b0bb3c188243b83654dfe55dcc173a86832314e1a6a35d"},
-    {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:796ad874c89127c91970652a4ee8b00d56368b7e00d3477f4415fe78164c8000"},
-    {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:85608eb70a659bf4c1142b2781083d4b7c0c4e2c90eff11856a9754e965b2540"},
-    {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4feb9211d15d9160bc85fa72fed46432cdc143eb9cf6d5ca377335a921ac37b"},
-    {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ccfa689b9246c48947d31dd9d8b16d89a0ecc8e0e26ea5253068efb6c542b76e"},
-    {file = "rpds_py-0.25.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:3c5b317ecbd8226887994852e85de562f7177add602514d4ac40f87de3ae45a8"},
-    {file = "rpds_py-0.25.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:454601988aab2c6e8fd49e7634c65476b2b919647626208e376afcd22019eeb8"},
-    {file = "rpds_py-0.25.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:1c0c434a53714358532d13539272db75a5ed9df75a4a090a753ac7173ec14e11"},
-    {file = "rpds_py-0.25.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f73ce1512e04fbe2bc97836e89830d6b4314c171587a99688082d090f934d20a"},
-    {file = "rpds_py-0.25.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ee86d81551ec68a5c25373c5643d343150cc54672b5e9a0cafc93c1870a53954"},
-    {file = "rpds_py-0.25.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:89c24300cd4a8e4a51e55c31a8ff3918e6651b241ee8876a42cc2b2a078533ba"},
-    {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:771c16060ff4e79584dc48902a91ba79fd93eade3aa3a12d6d2a4aadaf7d542b"},
-    {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:785ffacd0ee61c3e60bdfde93baa6d7c10d86f15655bd706c89da08068dc5038"},
-    {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a40046a529cc15cef88ac5ab589f83f739e2d332cb4d7399072242400ed68c9"},
-    {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:85fc223d9c76cabe5d0bff82214459189720dc135db45f9f66aa7cffbf9ff6c1"},
-    {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0be9965f93c222fb9b4cc254235b3b2b215796c03ef5ee64f995b1b69af0762"},
-    {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8378fa4a940f3fb509c081e06cb7f7f2adae8cf46ef258b0e0ed7519facd573e"},
-    {file = "rpds_py-0.25.1-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:33358883a4490287e67a2c391dfaea4d9359860281db3292b6886bf0be3d8692"},
-    {file = "rpds_py-0.25.1-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:1d1fadd539298e70cac2f2cb36f5b8a65f742b9b9f1014dd4ea1f7785e2470bf"},
-    {file = "rpds_py-0.25.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:9a46c2fb2545e21181445515960006e85d22025bd2fe6db23e76daec6eb689fe"},
-    {file = "rpds_py-0.25.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:50f2c501a89c9a5f4e454b126193c5495b9fb441a75b298c60591d8a2eb92e1b"},
-    {file = "rpds_py-0.25.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d779b325cc8238227c47fbc53964c8cc9a941d5dbae87aa007a1f08f2f77b23"},
-    {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:036ded36bedb727beeabc16dc1dad7cb154b3fa444e936a03b67a86dc6a5066e"},
-    {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:245550f5a1ac98504147cba96ffec8fabc22b610742e9150138e5d60774686d7"},
-    {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ff7c23ba0a88cb7b104281a99476cccadf29de2a0ef5ce864959a52675b1ca83"},
-    {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e37caa8cdb3b7cf24786451a0bdb853f6347b8b92005eeb64225ae1db54d1c2b"},
-    {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f2f48ab00181600ee266a095fe815134eb456163f7d6699f525dee471f312cf"},
-    {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9e5fc7484fa7dce57e25063b0ec9638ff02a908304f861d81ea49273e43838c1"},
-    {file = "rpds_py-0.25.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:d3c10228d6cf6fe2b63d2e7985e94f6916fa46940df46b70449e9ff9297bd3d1"},
-    {file = "rpds_py-0.25.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:5d9e40f32745db28c1ef7aad23f6fc458dc1e29945bd6781060f0d15628b8ddf"},
-    {file = "rpds_py-0.25.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:35a8d1a24b5936b35c5003313bc177403d8bdef0f8b24f28b1c4a255f94ea992"},
-    {file = "rpds_py-0.25.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:6099263f526efff9cf3883dfef505518730f7a7a93049b1d90d42e50a22b4793"},
-    {file = "rpds_py-0.25.1.tar.gz", hash = "sha256:8960b6dac09b62dac26e75d7e2c4a22efb835d827a7278c34f72b2b84fa160e3"},
+    {file = "rpds_py-0.27.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:68afeec26d42ab3b47e541b272166a0b4400313946871cba3ed3a4fc0cab1cef"},
+    {file = "rpds_py-0.27.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:74e5b2f7bb6fa38b1b10546d27acbacf2a022a8b5543efb06cfebc72a59c85be"},
+    {file = "rpds_py-0.27.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9024de74731df54546fab0bfbcdb49fae19159ecaecfc8f37c18d2c7e2c0bd61"},
+    {file = "rpds_py-0.27.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:31d3ebadefcd73b73928ed0b2fd696f7fefda8629229f81929ac9c1854d0cffb"},
+    {file = "rpds_py-0.27.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2e7f8f169d775dd9092a1743768d771f1d1300453ddfe6325ae3ab5332b4657"},
+    {file = "rpds_py-0.27.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d905d16f77eb6ab2e324e09bfa277b4c8e5e6b8a78a3e7ff8f3cdf773b4c013"},
+    {file = "rpds_py-0.27.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50c946f048209e6362e22576baea09193809f87687a95a8db24e5fbdb307b93a"},
+    {file = "rpds_py-0.27.1-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:3deab27804d65cd8289eb814c2c0e807c4b9d9916c9225e363cb0cf875eb67c1"},
+    {file = "rpds_py-0.27.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8b61097f7488de4be8244c89915da8ed212832ccf1e7c7753a25a394bf9b1f10"},
+    {file = "rpds_py-0.27.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8a3f29aba6e2d7d90528d3c792555a93497fe6538aa65eb675b44505be747808"},
+    {file = "rpds_py-0.27.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:dd6cd0485b7d347304067153a6dc1d73f7d4fd995a396ef32a24d24b8ac63ac8"},
+    {file = "rpds_py-0.27.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6f4461bf931108c9fa226ffb0e257c1b18dc2d44cd72b125bec50ee0ab1248a9"},
+    {file = "rpds_py-0.27.1-cp310-cp310-win32.whl", hash = "sha256:ee5422d7fb21f6a00c1901bf6559c49fee13a5159d0288320737bbf6585bd3e4"},
+    {file = "rpds_py-0.27.1-cp310-cp310-win_amd64.whl", hash = "sha256:3e039aabf6d5f83c745d5f9a0a381d031e9ed871967c0a5c38d201aca41f3ba1"},
+    {file = "rpds_py-0.27.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:be898f271f851f68b318872ce6ebebbc62f303b654e43bf72683dbdc25b7c881"},
+    {file = "rpds_py-0.27.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:62ac3d4e3e07b58ee0ddecd71d6ce3b1637de2d373501412df395a0ec5f9beb5"},
+    {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4708c5c0ceb2d034f9991623631d3d23cb16e65c83736ea020cdbe28d57c0a0e"},
+    {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:abfa1171a9952d2e0002aba2ad3780820b00cc3d9c98c6630f2e93271501f66c"},
+    {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b507d19f817ebaca79574b16eb2ae412e5c0835542c93fe9983f1e432aca195"},
+    {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:168b025f8fd8d8d10957405f3fdcef3dc20f5982d398f90851f4abc58c566c52"},
+    {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb56c6210ef77caa58e16e8c17d35c63fe3f5b60fd9ba9d424470c3400bcf9ed"},
+    {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:d252f2d8ca0195faa707f8eb9368955760880b2b42a8ee16d382bf5dd807f89a"},
+    {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6e5e54da1e74b91dbc7996b56640f79b195d5925c2b78efaa8c5d53e1d88edde"},
+    {file = "rpds_py-0.27.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ffce0481cc6e95e5b3f0a47ee17ffbd234399e6d532f394c8dce320c3b089c21"},
+    {file = "rpds_py-0.27.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a205fdfe55c90c2cd8e540ca9ceba65cbe6629b443bc05db1f590a3db8189ff9"},
+    {file = "rpds_py-0.27.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:689fb5200a749db0415b092972e8eba85847c23885c8543a8b0f5c009b1a5948"},
+    {file = "rpds_py-0.27.1-cp311-cp311-win32.whl", hash = "sha256:3182af66048c00a075010bc7f4860f33913528a4b6fc09094a6e7598e462fe39"},
+    {file = "rpds_py-0.27.1-cp311-cp311-win_amd64.whl", hash = "sha256:b4938466c6b257b2f5c4ff98acd8128ec36b5059e5c8f8372d79316b1c36bb15"},
+    {file = "rpds_py-0.27.1-cp311-cp311-win_arm64.whl", hash = "sha256:2f57af9b4d0793e53266ee4325535a31ba48e2f875da81a9177c9926dfa60746"},
+    {file = "rpds_py-0.27.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ae2775c1973e3c30316892737b91f9283f9908e3cc7625b9331271eaaed7dc90"},
+    {file = "rpds_py-0.27.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2643400120f55c8a96f7c9d858f7be0c88d383cd4653ae2cf0d0c88f668073e5"},
+    {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16323f674c089b0360674a4abd28d5042947d54ba620f72514d69be4ff64845e"},
+    {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a1f4814b65eacac94a00fc9a526e3fdafd78e439469644032032d0d63de4881"},
+    {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ba32c16b064267b22f1850a34051121d423b6f7338a12b9459550eb2096e7ec"},
+    {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5c20f33fd10485b80f65e800bbe5f6785af510b9f4056c5a3c612ebc83ba6cb"},
+    {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:466bfe65bd932da36ff279ddd92de56b042f2266d752719beb97b08526268ec5"},
+    {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:41e532bbdcb57c92ba3be62c42e9f096431b4cf478da9bc3bc6ce5c38ab7ba7a"},
+    {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f149826d742b406579466283769a8ea448eed82a789af0ed17b0cd5770433444"},
+    {file = "rpds_py-0.27.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:80c60cfb5310677bd67cb1e85a1e8eb52e12529545441b43e6f14d90b878775a"},
+    {file = "rpds_py-0.27.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7ee6521b9baf06085f62ba9c7a3e5becffbc32480d2f1b351559c001c38ce4c1"},
+    {file = "rpds_py-0.27.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a512c8263249a9d68cac08b05dd59d2b3f2061d99b322813cbcc14c3c7421998"},
+    {file = "rpds_py-0.27.1-cp312-cp312-win32.whl", hash = "sha256:819064fa048ba01b6dadc5116f3ac48610435ac9a0058bbde98e569f9e785c39"},
+    {file = "rpds_py-0.27.1-cp312-cp312-win_amd64.whl", hash = "sha256:d9199717881f13c32c4046a15f024971a3b78ad4ea029e8da6b86e5aa9cf4594"},
+    {file = "rpds_py-0.27.1-cp312-cp312-win_arm64.whl", hash = "sha256:33aa65b97826a0e885ef6e278fbd934e98cdcfed80b63946025f01e2f5b29502"},
+    {file = "rpds_py-0.27.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e4b9fcfbc021633863a37e92571d6f91851fa656f0180246e84cbd8b3f6b329b"},
+    {file = "rpds_py-0.27.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1441811a96eadca93c517d08df75de45e5ffe68aa3089924f963c782c4b898cf"},
+    {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55266dafa22e672f5a4f65019015f90336ed31c6383bd53f5e7826d21a0e0b83"},
+    {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d78827d7ac08627ea2c8e02c9e5b41180ea5ea1f747e9db0915e3adf36b62dcf"},
+    {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae92443798a40a92dc5f0b01d8a7c93adde0c4dc965310a29ae7c64d72b9fad2"},
+    {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c46c9dd2403b66a2a3b9720ec4b74d4ab49d4fabf9f03dfdce2d42af913fe8d0"},
+    {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2efe4eb1d01b7f5f1939f4ef30ecea6c6b3521eec451fb93191bf84b2a522418"},
+    {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:15d3b4d83582d10c601f481eca29c3f138d44c92187d197aff663a269197c02d"},
+    {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4ed2e16abbc982a169d30d1a420274a709949e2cbdef119fe2ec9d870b42f274"},
+    {file = "rpds_py-0.27.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a75f305c9b013289121ec0f1181931975df78738cdf650093e6b86d74aa7d8dd"},
+    {file = "rpds_py-0.27.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:67ce7620704745881a3d4b0ada80ab4d99df390838839921f99e63c474f82cf2"},
+    {file = "rpds_py-0.27.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9d992ac10eb86d9b6f369647b6a3f412fc0075cfd5d799530e84d335e440a002"},
+    {file = "rpds_py-0.27.1-cp313-cp313-win32.whl", hash = "sha256:4f75e4bd8ab8db624e02c8e2fc4063021b58becdbe6df793a8111d9343aec1e3"},
+    {file = "rpds_py-0.27.1-cp313-cp313-win_amd64.whl", hash = "sha256:f9025faafc62ed0b75a53e541895ca272815bec18abe2249ff6501c8f2e12b83"},
+    {file = "rpds_py-0.27.1-cp313-cp313-win_arm64.whl", hash = "sha256:ed10dc32829e7d222b7d3b93136d25a406ba9788f6a7ebf6809092da1f4d279d"},
+    {file = "rpds_py-0.27.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:92022bbbad0d4426e616815b16bc4127f83c9a74940e1ccf3cfe0b387aba0228"},
+    {file = "rpds_py-0.27.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:47162fdab9407ec3f160805ac3e154df042e577dd53341745fc7fb3f625e6d92"},
+    {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb89bec23fddc489e5d78b550a7b773557c9ab58b7946154a10a6f7a214a48b2"},
+    {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e48af21883ded2b3e9eb48cb7880ad8598b31ab752ff3be6457001d78f416723"},
+    {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6f5b7bd8e219ed50299e58551a410b64daafb5017d54bbe822e003856f06a802"},
+    {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08f1e20bccf73b08d12d804d6e1c22ca5530e71659e6673bce31a6bb71c1e73f"},
+    {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dc5dceeaefcc96dc192e3a80bbe1d6c410c469e97bdd47494a7d930987f18b2"},
+    {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:d76f9cc8665acdc0c9177043746775aa7babbf479b5520b78ae4002d889f5c21"},
+    {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:134fae0e36022edad8290a6661edf40c023562964efea0cc0ec7f5d392d2aaef"},
+    {file = "rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb11a4f1b2b63337cfd3b4d110af778a59aae51c81d195768e353d8b52f88081"},
+    {file = "rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:13e608ac9f50a0ed4faec0e90ece76ae33b34c0e8656e3dceb9a7db994c692cd"},
+    {file = "rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dd2135527aa40f061350c3f8f89da2644de26cd73e4de458e79606384f4f68e7"},
+    {file = "rpds_py-0.27.1-cp313-cp313t-win32.whl", hash = "sha256:3020724ade63fe320a972e2ffd93b5623227e684315adce194941167fee02688"},
+    {file = "rpds_py-0.27.1-cp313-cp313t-win_amd64.whl", hash = "sha256:8ee50c3e41739886606388ba3ab3ee2aae9f35fb23f833091833255a31740797"},
+    {file = "rpds_py-0.27.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:acb9aafccaae278f449d9c713b64a9e68662e7799dbd5859e2c6b3c67b56d334"},
+    {file = "rpds_py-0.27.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b7fb801aa7f845ddf601c49630deeeccde7ce10065561d92729bfe81bd21fb33"},
+    {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0dd05afb46597b9a2e11c351e5e4283c741237e7f617ffb3252780cca9336a"},
+    {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b6dfb0e058adb12d8b1d1b25f686e94ffa65d9995a5157afe99743bf7369d62b"},
+    {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ed090ccd235f6fa8bb5861684567f0a83e04f52dfc2e5c05f2e4b1309fcf85e7"},
+    {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bf876e79763eecf3e7356f157540d6a093cef395b65514f17a356f62af6cc136"},
+    {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:12ed005216a51b1d6e2b02a7bd31885fe317e45897de81d86dcce7d74618ffff"},
+    {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:ee4308f409a40e50593c7e3bb8cbe0b4d4c66d1674a316324f0c2f5383b486f9"},
+    {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0b08d152555acf1f455154d498ca855618c1378ec810646fcd7c76416ac6dc60"},
+    {file = "rpds_py-0.27.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:dce51c828941973a5684d458214d3a36fcd28da3e1875d659388f4f9f12cc33e"},
+    {file = "rpds_py-0.27.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:c1476d6f29eb81aa4151c9a31219b03f1f798dc43d8af1250a870735516a1212"},
+    {file = "rpds_py-0.27.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:3ce0cac322b0d69b63c9cdb895ee1b65805ec9ffad37639f291dd79467bee675"},
+    {file = "rpds_py-0.27.1-cp314-cp314-win32.whl", hash = "sha256:dfbfac137d2a3d0725758cd141f878bf4329ba25e34979797c89474a89a8a3a3"},
+    {file = "rpds_py-0.27.1-cp314-cp314-win_amd64.whl", hash = "sha256:a6e57b0abfe7cc513450fcf529eb486b6e4d3f8aee83e92eb5f1ef848218d456"},
+    {file = "rpds_py-0.27.1-cp314-cp314-win_arm64.whl", hash = "sha256:faf8d146f3d476abfee026c4ae3bdd9ca14236ae4e4c310cbd1cf75ba33d24a3"},
+    {file = "rpds_py-0.27.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:ba81d2b56b6d4911ce735aad0a1d4495e808b8ee4dc58715998741a26874e7c2"},
+    {file = "rpds_py-0.27.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:84f7d509870098de0e864cad0102711c1e24e9b1a50ee713b65928adb22269e4"},
+    {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9e960fc78fecd1100539f14132425e1d5fe44ecb9239f8f27f079962021523e"},
+    {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:62f85b665cedab1a503747617393573995dac4600ff51869d69ad2f39eb5e817"},
+    {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fed467af29776f6556250c9ed85ea5a4dd121ab56a5f8b206e3e7a4c551e48ec"},
+    {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2729615f9d430af0ae6b36cf042cb55c0936408d543fb691e1a9e36648fd35a"},
+    {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b207d881a9aef7ba753d69c123a35d96ca7cb808056998f6b9e8747321f03b8"},
+    {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:639fd5efec029f99b79ae47e5d7e00ad8a773da899b6309f6786ecaf22948c48"},
+    {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fecc80cb2a90e28af8a9b366edacf33d7a91cbfe4c2c4544ea1246e949cfebeb"},
+    {file = "rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42a89282d711711d0a62d6f57d81aa43a1368686c45bc1c46b7f079d55692734"},
+    {file = "rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:cf9931f14223de59551ab9d38ed18d92f14f055a5f78c1d8ad6493f735021bbb"},
+    {file = "rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f39f58a27cc6e59f432b568ed8429c7e1641324fbe38131de852cd77b2d534b0"},
+    {file = "rpds_py-0.27.1-cp314-cp314t-win32.whl", hash = "sha256:d5fa0ee122dc09e23607a28e6d7b150da16c662e66409bbe85230e4c85bb528a"},
+    {file = "rpds_py-0.27.1-cp314-cp314t-win_amd64.whl", hash = "sha256:6567d2bb951e21232c2f660c24cf3470bb96de56cdcb3f071a83feeaff8a2772"},
+    {file = "rpds_py-0.27.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c918c65ec2e42c2a78d19f18c553d77319119bf43aa9e2edf7fb78d624355527"},
+    {file = "rpds_py-0.27.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1fea2b1a922c47c51fd07d656324531adc787e415c8b116530a1d29c0516c62d"},
+    {file = "rpds_py-0.27.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbf94c58e8e0cd6b6f38d8de67acae41b3a515c26169366ab58bdca4a6883bb8"},
+    {file = "rpds_py-0.27.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c2a8fed130ce946d5c585eddc7c8eeef0051f58ac80a8ee43bd17835c144c2cc"},
+    {file = "rpds_py-0.27.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:037a2361db72ee98d829bc2c5b7cc55598ae0a5e0ec1823a56ea99374cfd73c1"},
+    {file = "rpds_py-0.27.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5281ed1cc1d49882f9997981c88df1a22e140ab41df19071222f7e5fc4e72125"},
+    {file = "rpds_py-0.27.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fd50659a069c15eef8aa3d64bbef0d69fd27bb4a50c9ab4f17f83a16cbf8905"},
+    {file = "rpds_py-0.27.1-cp39-cp39-manylinux_2_31_riscv64.whl", hash = "sha256:c4b676c4ae3921649a15d28ed10025548e9b561ded473aa413af749503c6737e"},
+    {file = "rpds_py-0.27.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:079bc583a26db831a985c5257797b2b5d3affb0386e7ff886256762f82113b5e"},
+    {file = "rpds_py-0.27.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:4e44099bd522cba71a2c6b97f68e19f40e7d85399de899d66cdb67b32d7cb786"},
+    {file = "rpds_py-0.27.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e202e6d4188e53c6661af813b46c37ca2c45e497fc558bacc1a7630ec2695aec"},
+    {file = "rpds_py-0.27.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f41f814b8eaa48768d1bb551591f6ba45f87ac76899453e8ccd41dba1289b04b"},
+    {file = "rpds_py-0.27.1-cp39-cp39-win32.whl", hash = "sha256:9e71f5a087ead99563c11fdaceee83ee982fd39cf67601f4fd66cb386336ee52"},
+    {file = "rpds_py-0.27.1-cp39-cp39-win_amd64.whl", hash = "sha256:71108900c9c3c8590697244b9519017a400d9ba26a36c48381b3f64743a44aab"},
+    {file = "rpds_py-0.27.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7ba22cb9693df986033b91ae1d7a979bc399237d45fccf875b76f62bb9e52ddf"},
+    {file = "rpds_py-0.27.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5b640501be9288c77738b5492b3fd3abc4ba95c50c2e41273c8a1459f08298d3"},
+    {file = "rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb08b65b93e0c6dd70aac7f7890a9c0938d5ec71d5cb32d45cf844fb8ae47636"},
+    {file = "rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d7ff07d696a7a38152ebdb8212ca9e5baab56656749f3d6004b34ab726b550b8"},
+    {file = "rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fb7c72262deae25366e3b6c0c0ba46007967aea15d1eea746e44ddba8ec58dcc"},
+    {file = "rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7b002cab05d6339716b03a4a3a2ce26737f6231d7b523f339fa061d53368c9d8"},
+    {file = "rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23f6b69d1c26c4704fec01311963a41d7de3ee0570a84ebde4d544e5a1859ffc"},
+    {file = "rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:530064db9146b247351f2a0250b8f00b289accea4596a033e94be2389977de71"},
+    {file = "rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7b90b0496570bd6b0321724a330d8b545827c4df2034b6ddfc5f5275f55da2ad"},
+    {file = "rpds_py-0.27.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:879b0e14a2da6a1102a3fc8af580fc1ead37e6d6692a781bd8c83da37429b5ab"},
+    {file = "rpds_py-0.27.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:0d807710df3b5faa66c731afa162ea29717ab3be17bdc15f90f2d9f183da4059"},
+    {file = "rpds_py-0.27.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:3adc388fc3afb6540aec081fa59e6e0d3908722771aa1e37ffe22b220a436f0b"},
+    {file = "rpds_py-0.27.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c796c0c1cc68cb08b0284db4229f5af76168172670c74908fdbd4b7d7f515819"},
+    {file = "rpds_py-0.27.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cdfe4bb2f9fe7458b7453ad3c33e726d6d1c7c0a72960bcc23800d77384e42df"},
+    {file = "rpds_py-0.27.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:8fabb8fd848a5f75a2324e4a84501ee3a5e3c78d8603f83475441866e60b94a3"},
+    {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eda8719d598f2f7f3e0f885cba8646644b55a187762bec091fa14a2b819746a9"},
+    {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c64d07e95606ec402a0a1c511fe003873fa6af630bda59bac77fac8b4318ebc"},
+    {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93a2ed40de81bcff59aabebb626562d48332f3d028ca2036f1d23cbb52750be4"},
+    {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:387ce8c44ae94e0ec50532d9cb0edce17311024c9794eb196b90e1058aadeb66"},
+    {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaf94f812c95b5e60ebaf8bfb1898a7d7cb9c1af5744d4a67fa47796e0465d4e"},
+    {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:4848ca84d6ded9b58e474dfdbad4b8bfb450344c0551ddc8d958bf4b36aa837c"},
+    {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2bde09cbcf2248b73c7c323be49b280180ff39fadcfe04e7b6f54a678d02a7cf"},
+    {file = "rpds_py-0.27.1-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:94c44ee01fd21c9058f124d2d4f0c9dc7634bec93cd4b38eefc385dabe71acbf"},
+    {file = "rpds_py-0.27.1-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:df8b74962e35c9249425d90144e721eed198e6555a0e22a563d29fe4486b51f6"},
+    {file = "rpds_py-0.27.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:dc23e6820e3b40847e2f4a7726462ba0cf53089512abe9ee16318c366494c17a"},
+    {file = "rpds_py-0.27.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:aa8933159edc50be265ed22b401125c9eebff3171f570258854dbce3ecd55475"},
+    {file = "rpds_py-0.27.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a50431bf02583e21bf273c71b89d710e7a710ad5e39c725b14e685610555926f"},
+    {file = "rpds_py-0.27.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78af06ddc7fe5cc0e967085a9115accee665fb912c22a3f54bad70cc65b05fe6"},
+    {file = "rpds_py-0.27.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:70d0738ef8fee13c003b100c2fbd667ec4f133468109b3472d249231108283a3"},
+    {file = "rpds_py-0.27.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e2f6fd8a1cea5bbe599b6e78a6e5ee08db434fc8ffea51ff201c8765679698b3"},
+    {file = "rpds_py-0.27.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8177002868d1426305bb5de1e138161c2ec9eb2d939be38291d7c431c4712df8"},
+    {file = "rpds_py-0.27.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:008b839781d6c9bf3b6a8984d1d8e56f0ec46dc56df61fd669c49b58ae800400"},
+    {file = "rpds_py-0.27.1-pp39-pypy39_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:a55b9132bb1ade6c734ddd2759c8dc132aa63687d259e725221f106b83a0e485"},
+    {file = "rpds_py-0.27.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a46fdec0083a26415f11d5f236b79fa1291c32aaa4a17684d82f7017a1f818b1"},
+    {file = "rpds_py-0.27.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:8a63b640a7845f2bdd232eb0d0a4a2dd939bcdd6c57e6bb134526487f3160ec5"},
+    {file = "rpds_py-0.27.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:7e32721e5d4922deaaf963469d795d5bde6093207c52fec719bd22e5d1bedbc4"},
+    {file = "rpds_py-0.27.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:2c426b99a068601b5f4623573df7a7c3d72e87533a2dd2253353a03e7502566c"},
+    {file = "rpds_py-0.27.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:4fc9b7fe29478824361ead6e14e4f5aed570d477e06088826537e202d25fe859"},
+    {file = "rpds_py-0.27.1.tar.gz", hash = "sha256:26a1c73171d10b7acccbded82bf6a586ab8203601e565badc74bbbf8bc5a10f8"},
 ]
 
 [[package]]
 name = "typing-extensions"
-version = "4.13.2"
-description = "Backported and Experimental Type Hints for Python 3.8+"
+version = "4.15.0"
+description = "Backported and Experimental Type Hints for Python 3.9+"
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 files = [
-    {file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"},
-    {file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"},
+    {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"},
+    {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"},
 ]
 
 [[package]]
 name = "virtualenv"
-version = "20.31.2"
+version = "20.34.0"
 description = "Virtual Python Environment builder"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11"},
-    {file = "virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af"},
+    {file = "virtualenv-20.34.0-py3-none-any.whl", hash = "sha256:341f5afa7eee943e4984a9207c025feedd768baff6753cd660c857ceb3e36026"},
+    {file = "virtualenv-20.34.0.tar.gz", hash = "sha256:44815b2c9dee7ed86e387b842a84f20b93f7f417f95886ca1996a72a4138eb1a"},
 ]
 
 [package.dependencies]
@@ -671,4 +740,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.12"
-content-hash = "53c45992ce1109262a0db6e79aced43423e6fd83798b0b2bf45acca1bfc6d056"
+content-hash = "fd2a28449c2fa3c9e20e3589fc27e0e773815be05e9e43a871d583a56a02dbb6"
diff --git a/pyproject.toml b/pyproject.toml
index f2ea162..c1deb22 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,7 @@ packages = [
 [tool.poetry.dependencies]
 python = "^3.12"
 im-protobuf = "^8.2.0"
-im-data-manager-job-decoder = "^2.1.0"
+im-data-manager-job-decoder = "^2.4.0"
 jsonschema = "^4.21.1"
 pyyaml = ">= 5.3.1, < 7.0"
 

From 3883412969c3657632aaf6294d664590f60ea596 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Mon, 1 Sep 2025 16:11:29 +0000
Subject: [PATCH 40/57] refactor: Switch away from workflow replicate property

---
 poetry.lock                                   |  8 +--
 pyproject.toml                                |  2 +-
 tests/test_workflow_engine_examples.py        |  1 +
 .../test_workflow_validator_for_run_level.py  | 24 --------
 .../test_workflow_validator_for_tag_level.py  | 24 --------
 .../simple-python-fanout.yaml                 |  3 -
 workflow/decoder.py                           | 56 -------------------
 workflow/workflow-schema.yaml                 | 19 -------
 workflow/workflow_engine.py                   | 38 +++----------
 workflow/workflow_validator.py                | 35 ------------
 10 files changed, 13 insertions(+), 197 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 35b18b5..716a33c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -179,13 +179,13 @@ license = ["ukkonen"]
 
 [[package]]
 name = "im-data-manager-job-decoder"
-version = "2.4.0"
+version = "2.5.0"
 description = "Job decoding logic"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "im_data_manager_job_decoder-2.4.0-py3-none-any.whl", hash = "sha256:4a911e2a8760dd381247f2f740b2e280a817dbdad65c65164dad97dfcf9058bf"},
-    {file = "im_data_manager_job_decoder-2.4.0.tar.gz", hash = "sha256:34e7a8ac0421edc26760491ffd8b9183f0757ebc9e25dabf865235c936fad458"},
+    {file = "im_data_manager_job_decoder-2.5.0-py3-none-any.whl", hash = "sha256:d177a37083b73c82d71c137cd36ab3bf54de0a4ab5ab55e5aec49acb238b86f6"},
+    {file = "im_data_manager_job_decoder-2.5.0.tar.gz", hash = "sha256:1a0523ccead3ad851dcf6a450ec1792be1830d20a938d3ddfdf04ffcdf915a47"},
 ]
 
 [package.dependencies]
@@ -740,4 +740,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.12"
-content-hash = "fd2a28449c2fa3c9e20e3589fc27e0e773815be05e9e43a871d583a56a02dbb6"
+content-hash = "341541770454fac78492e8f33f1aca9418582886dfe2d24af99cc06dbd1b7137"
diff --git a/pyproject.toml b/pyproject.toml
index c1deb22..cde7f6b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,7 @@ packages = [
 [tool.poetry.dependencies]
 python = "^3.12"
 im-protobuf = "^8.2.0"
-im-data-manager-job-decoder = "^2.4.0"
+im-data-manager-job-decoder = "^2.5.0"
 jsonschema = "^4.21.1"
 pyyaml = ">= 5.3.1, < 7.0"
 
diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
index 9d07f13..40e0573 100644
--- a/tests/test_workflow_engine_examples.py
+++ b/tests/test_workflow_engine_examples.py
@@ -398,6 +398,7 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
     assert project_file_exists(output_file_2)
 
 
+@pytest.mark.skip(reason="WIP")
 def test_workflow_engine_simple_python_fanout(basic_engine):
     # Arrange
     md, da = basic_engine
diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py
index 1be6694..e76239d 100644
--- a/tests/test_workflow_validator_for_run_level.py
+++ b/tests/test_workflow_validator_for_run_level.py
@@ -217,30 +217,6 @@ def test_validate_simple_python_molprops_with_missing_input():
     ]
 
 
-def test_validate_replicate_using_undeclared_input():
-    # Arrange
-    workflow_filename: str = os.path.join(
-        os.path.dirname(__file__),
-        "workflow-definitions",
-        "replicate-using-undeclared-input.yaml",
-    )
-    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
-        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
-    assert workflow
-
-    # Act
-    error = WorkflowValidator.validate(
-        level=ValidationLevel.TAG,
-        workflow_definition=workflow,
-    )
-
-    # Assert
-    assert error.error_num == 7
-    assert error.error_msg == [
-        "Replicate input variable is not declared: y (step=step-2)"
-    ]
-
-
 def test_validate_duplicate_step_output_variable_names():
     # Arrange
     workflow_filename: str = os.path.join(
diff --git a/tests/test_workflow_validator_for_tag_level.py b/tests/test_workflow_validator_for_tag_level.py
index 96e8e74..4c1719d 100644
--- a/tests/test_workflow_validator_for_tag_level.py
+++ b/tests/test_workflow_validator_for_tag_level.py
@@ -151,30 +151,6 @@ def test_validate_simple_python_molprops_with_options():
     assert error.error_msg is None
 
 
-def test_validate_replicate_using_undeclared_input():
-    # Arrange
-    workflow_filename: str = os.path.join(
-        os.path.dirname(__file__),
-        "workflow-definitions",
-        "replicate-using-undeclared-input.yaml",
-    )
-    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
-        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
-    assert workflow
-
-    # Act
-    error = WorkflowValidator.validate(
-        level=ValidationLevel.TAG,
-        workflow_definition=workflow,
-    )
-
-    # Assert
-    assert error.error_num == 7
-    assert error.error_msg == [
-        "Replicate input variable is not declared: y (step=step-2)"
-    ]
-
-
 def test_validate_duplicate_step_output_variable_names():
     # Arrange
     workflow_filename: str = os.path.join(
diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml
index 97ab108..0a50216 100644
--- a/tests/workflow-definitions/simple-python-fanout.yaml
+++ b/tests/workflow-definitions/simple-python-fanout.yaml
@@ -31,9 +31,6 @@ steps:
       name: desc1
       value: "777"
       outputFile: results.smi
-  replicate:
-    using:
-      variable: inputFile
   variable-mapping:
   - variable: inputFile
     from-step:
diff --git a/workflow/decoder.py b/workflow/decoder.py
index 91c9464..674d1ad 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -5,7 +5,6 @@
 
 import os
 from dataclasses import dataclass
-from enum import Enum
 from typing import Any
 
 import jsonschema
@@ -33,26 +32,6 @@ class Translation:
     out: str
 
 
-class ReplicationOrigin(Enum):
-    """Oirgin of a replication variable."""
-
-    STEP_VARIABLE = 1
-    WORKFLOW_VARIABLE = 2
-
-
-@dataclass
-class ReplicationDriver:
-    """A step's replication driver.
-    The 'variable' is the variable for the step-to-be-executed
-    whose value is 'driven' by the values of the 'source_variable'.
-    The source variable is either from a step (or a workflow)."""
-
-    origin: ReplicationOrigin
-    variable: str
-    source_variable: str
-    source_step_name: str | None = None
-
-
 def validate_schema(workflow: dict[str, Any]) -> str | None:
     """Checks the Workflow Definition against the built-in schema.
     If there's an error the error text is returned, otherwise None.
@@ -173,38 +152,3 @@ def get_step_prior_step_variable_mapping(
                         Translation(in_=step_variable, out=v_map["variable"])
                     ]
     return variable_mapping
-
-
-def get_step_replication_driver(*, step: dict[str, Any]) -> ReplicationDriver | None:
-    """If the step is expected to replicate we return its replication driver,
-    which consists of a (prior) step name and an (output) variable name.
-    Otherwise it returns nothing."""
-    if replicator := step.get("replicate"):
-        # We need the variable we replicate against,
-        # and the step that owns the variable.
-        #
-        # 'using' is a dict but there can be only single value for now
-        variable: str = replicator["using"]["variable"]
-        source_variable: str | None = None
-        # Is the variable from a prior step?
-        step_name: str | None = None
-        step_v_map = get_step_prior_step_variable_mapping(step=step)
-        for step_name_candidate, mappings in step_v_map.items():
-            for mapping in mappings:
-                if mapping.out == variable:
-                    step_name = step_name_candidate
-                    source_variable = mapping.in_
-                    break
-            if step_name:
-                break
-        assert step_name
-        assert source_variable
-
-        return ReplicationDriver(
-            origin=ReplicationOrigin.STEP_VARIABLE,
-            variable=variable,
-            source_step_name=step_name,
-            source_variable=source_variable,
-        )
-
-    return None
diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml
index 27c726c..90acb29 100644
--- a/workflow/workflow-schema.yaml
+++ b/workflow/workflow-schema.yaml
@@ -59,17 +59,6 @@ definitions:
     type: string
     pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
 
-  # A step replication control variable
-  # that is based on a step variable
-  replicate-using-variable:
-    type: object
-    additionalProperties: false
-    properties:
-      variable:
-        $ref: '#/definitions/variable-name'
-    required:
-    - variable
-
   # A Step variable
   # (whose value is derived from a variable used in a prior step)
   step-variable-from-step:
@@ -167,14 +156,6 @@ definitions:
         # The format of this is essentially idenical to the specification
         # used when a Job is launched via the DM API.
         $ref: '#/definitions/step-specification'
-      replicate:
-        # Used to indicate one input variable that is used to replicate/spawn
-        # step instances based on the number of values generated for the variable.
-        type: object
-        additionalProperties: false
-        properties:
-          using:
-            $ref: '#/definitions/replicate-using-variable'
       variable-mapping:
         # The map of the source of the step's variables.
         # all variables the step needs (that aren;t already in the specification)
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index 988829a..d33751f 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -39,11 +39,8 @@
 )
 
 from .decoder import (
-    ReplicationDriver,
-    ReplicationOrigin,
     Translation,
     get_step_prior_step_variable_mapping,
-    get_step_replication_driver,
     get_step_workflow_variable_mapping,
 )
 
@@ -380,41 +377,20 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None:
 
         variables: dict[str, Any] = error_or_variables
 
-        # A replication number,
-        # use only for steps expected to replicate (even if just once)
+        # A step replication number,
+        # used only for steps expected to run in parallel (even if just once)
         step_replication_number: int = 0
-        # Does this step have a replicating driver?
-        r_driver: ReplicationDriver | None = get_step_replication_driver(step=step)
         replication_values: list[str] = []
-        if r_driver:
-            if r_driver.origin == ReplicationOrigin.STEP_VARIABLE:
-                # We need to get the variable values from a prior step
-                # We need the prior steps running-workflow-step-id
-                assert r_driver.source_step_name
-                response, _ = self._wapi_adapter.get_running_workflow_step_by_name(
-                    name=r_driver.source_step_name,
-                    running_workflow_id=rwf_id,
-                )
-                assert "id" in response
-                o_rwfs_id: str = response["id"]
-                response, _ = (
-                    self._wapi_adapter.get_running_workflow_step_output_values_for_output(
-                        running_workflow_step_id=o_rwfs_id,
-                        output_variable=r_driver.source_variable,
-                    )
-                )
-                assert "output" in response
-                replication_values = response["output"]
-            else:
-                assert False, "Unsupported origin"
+        source_is_splitter: bool = False
+        iter_variable: str | None = None
 
         num_step_instances: int = max(1, len(replication_values))
         for iteration in range(num_step_instances):
 
             # If we are replicating this step then we must replace the step's variable
             # with a value expected for this iteration.
-            if r_driver:
-                iter_variable: str = r_driver.variable
+            if source_is_splitter:
+                assert iter_variable
                 iter_value: str = replication_values[iteration]
                 _LOGGER.info(
                     "Replicating step: %s iteration=%s variable=%s value=%s",
@@ -424,7 +400,7 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None:
                     iter_value,
                 )
                 # Over-write the replicating variable
-                # and set the replication numebr to a unique +ve non-zero value...
+                # and set the replication number to a unique +ve non-zero value...
                 variables[iter_variable] = iter_value
                 step_replication_number = iteration + 1
 
diff --git a/workflow/workflow_validator.py b/workflow/workflow_validator.py
index 1d94973..4a646d3 100644
--- a/workflow/workflow_validator.py
+++ b/workflow/workflow_validator.py
@@ -6,8 +6,6 @@
 
 from .decoder import (
     get_step_output_variable_names,
-    get_step_prior_step_variable_mapping,
-    get_step_workflow_variable_mapping,
     get_steps,
     get_workflow_variable_names,
     validate_schema,
@@ -113,39 +111,6 @@ def _validate_tag_level(
                 error_num=2,
                 error_msg=[f"Duplicate step names found: {', '.join(duplicate_names)}"],
             )
-        # For each 'replicating' step the replicating variable
-        # must be declared in the step - which is either a workflow variable
-        # or a prior step variable.
-        for step in get_steps(workflow_definition):
-            if (
-                replicate_using_input := step.get("replicate", {})
-                .get("using", {})
-                .get("variable")
-            ):
-                found: bool = False
-                for translation in get_step_workflow_variable_mapping(step=step):
-                    if replicate_using_input == translation.out:
-                        found = True
-                        break
-                if not found:
-                    for (
-                        step_name,
-                        translations,
-                    ) in get_step_prior_step_variable_mapping(step=step).items():
-                        for translation in translations:
-                            if replicate_using_input == translation.out:
-                                found = True
-                                break
-                        if found:
-                            break
-                if not found:
-                    return ValidationResult(
-                        error_num=7,
-                        error_msg=[
-                            "Replicate input variable is not declared:"
-                            f" {replicate_using_input} (step={step["name"]})"
-                        ],
-                    )
 
         return _VALIDATION_SUCCESS
 

From 6397955b44e564bcf327d7b3c54d9199d2b47f69 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Tue, 2 Sep 2025 09:45:09 +0000
Subject: [PATCH 41/57] refactor: Refactored using decoder 2.5.0

---
 tests/job-definitions/job-definitions.yaml |  7 ++
 tests/test_workflow_engine_examples.py     |  1 -
 workflow/decoder.py                        | 19 ++++-
 workflow/workflow_engine.py                | 87 +++++++++++++++++-----
 4 files changed, 89 insertions(+), 25 deletions(-)

diff --git a/tests/job-definitions/job-definitions.yaml b/tests/job-definitions/job-definitions.yaml
index 7e3e1b7..0c48e84 100644
--- a/tests/job-definitions/job-definitions.yaml
+++ b/tests/job-definitions/job-definitions.yaml
@@ -136,3 +136,10 @@ jobs:
   splitsmiles:
     command: >-
       copyf.py {{ inputFile }}
+    # Simulate multiple output files...
+    variables:
+      outputs:
+        properties:
+          outputBase:
+            creates: '{{ outputBase }}_*.smi'
+            type: files
diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
index 40e0573..9d07f13 100644
--- a/tests/test_workflow_engine_examples.py
+++ b/tests/test_workflow_engine_examples.py
@@ -398,7 +398,6 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
     assert project_file_exists(output_file_2)
 
 
-@pytest.mark.skip(reason="WIP")
 def test_workflow_engine_simple_python_fanout(basic_engine):
     # Arrange
     md, da = basic_engine
diff --git a/workflow/decoder.py b/workflow/decoder.py
index 674d1ad..daf6a3d 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -61,6 +61,16 @@ def get_steps(definition: dict[str, Any]) -> list[dict[str, Any]]:
     return response
 
 
+def get_step(definition: dict[str, Any], name: str) -> dict[str, Any]:
+    """Given a Workflow definition this function returns a named step
+    (if it exists)."""
+    steps: list[dict[str, Any]] = get_steps(definition)
+    for step in steps:
+        if step["name"] == name:
+            return step
+    return {}
+
+
 def get_name(definition: dict[str, Any]) -> str:
     """Given a Workflow definition this function returns its name."""
     return str(definition.get("name", ""))
@@ -117,8 +127,8 @@ def get_step_input_variable_names(
 
 
 def get_step_workflow_variable_mapping(*, step: dict[str, Any]) -> list[Translation]:
-    """Returns a list of workflow vaiable name to step variable name tuples
-    for the given step."""
+    """Returns a list of workflow vaiable name to step variable name
+    Translation objects for the given step."""
     variable_mapping: list[Translation] = []
     if "variable-mapping" in step:
         for v_map in step["variable-mapping"]:
@@ -134,8 +144,9 @@ def get_step_workflow_variable_mapping(*, step: dict[str, Any]) -> list[Translat
 def get_step_prior_step_variable_mapping(
     *, step: dict[str, Any]
 ) -> dict[str, list[Translation]]:
-    """Returns list of translate objects, indexed by prior step name,
-    that identify source step vaiable name to this step's variable name."""
+    """Returns list of Translation objects, indexed by prior step name,
+    that identify source step (output) variable name to this step's (input)
+    variable name."""
     variable_mapping: dict[str, list[Translation]] = {}
     if "variable-mapping" in step:
         for v_map in step["variable-mapping"]:
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index d33751f..01d49a2 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -26,7 +26,8 @@
 import sys
 from typing import Any, Optional
 
-from decoder.decoder import TextEncoding, decode
+import decoder.decoder as job_defintion_decoder
+from decoder.decoder import TextEncoding
 from google.protobuf.message import Message
 from informaticsmatters.protobuf.datamanager.pod_message_pb2 import PodMessage
 from informaticsmatters.protobuf.datamanager.workflow_message_pb2 import WorkflowMessage
@@ -40,6 +41,7 @@
 
 from .decoder import (
     Translation,
+    get_step,
     get_step_prior_step_variable_mapping,
     get_step_workflow_variable_mapping,
 )
@@ -127,7 +129,7 @@ def _handle_workflow_start_message(self, r_wfid: str) -> None:
         # Launch it.
         # If there's a launch problem the step (and running workflow) will have
         # and error, stopping it. There will be no Pod event as the launch has failed.
-        self._launch(rwf=rwf_response, step=first_step)
+        self._launch(wf=wf_response, rwf=rwf_response, step=first_step)
 
     def _handle_workflow_stop_message(self, r_wfid: str) -> None:
         """Logic to handle a STOP message."""
@@ -263,7 +265,7 @@ def _handle_pod_message(self, msg: PodMessage) -> None:
                     # There's another step!
                     # For this simple logic it is the next step.
                     next_step = wf_response["steps"][step_index + 1]
-                    self._launch(rwf=rwf_response, step=next_step)
+                    self._launch(wf=wf_response, rwf=rwf_response, step=next_step)
 
                     # Something was started (or there was a launch error and the step
                     # and running workflow error will have been set).
@@ -278,21 +280,13 @@ def _handle_pod_message(self, msg: PodMessage) -> None:
                 success=True,
             )
 
-    def _validate_step_command(
-        self,
-        *,
-        running_workflow_id: str,
-        step: dict[str, Any],
-        running_workflow_variables: dict[str, Any],
-    ) -> str | dict[str, Any]:
-        """Returns an error message if the command isn't valid.
-        Without a message we return all the variables that were (successfully)
-        applied to the command."""
-
+    def _get_step_job(self, *, step: dict[str, Any]) -> dict[str, Any]:
+        """Gets the Job definition for a given Step."""
         # We get the Job from the step specification, which must contain
         # the keys "collection", "job", and "version". Here we assume that
         # the workflow definition has passed the RUN-level validation
         # which means we can get these values.
+        assert "specification" in step
         step_spec: dict[str, Any] = step["specification"]
         job_collection: str = step_spec["collection"]
         job_job: str = step_spec["job"]
@@ -300,6 +294,7 @@ def _validate_step_command(
         job, _ = self._wapi_adapter.get_job(
             collection=job_collection, job=job_job, version=job_version
         )
+
         _LOGGER.debug(
             "API.get_job(%s, %s, %s) returned: -\n%s",
             job_collection,
@@ -308,6 +303,19 @@ def _validate_step_command(
             str(job),
         )
 
+        return job
+
+    def _validate_step_command(
+        self,
+        *,
+        running_workflow_id: str,
+        step: dict[str, Any],
+        running_workflow_variables: dict[str, Any],
+    ) -> str | dict[str, Any]:
+        """Returns an error message if the command isn't valid.
+        Without a message we return all the variables that were (successfully)
+        applied to the command."""
+
         # Start with any variables provided in the step's specification.
         # This will be ou t"all variables" map for this step,
         # whcih we will add to (and maybe even over-write)...
@@ -345,12 +353,15 @@ def _validate_step_command(
                 all_variables[tr.out] = prior_step["variables"][tr.in_]
 
         # Now ... can the command be compiled!?
-        message, success = decode(
+        job: dict[str, Any] = self._get_step_job(step=step)
+        message, success = job_defintion_decoder.decode(
             job["command"], all_variables, "command", TextEncoding.JINJA2_3_0
         )
         return all_variables if success else message
 
-    def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None:
+    def _launch(
+        self, *, wf: dict[str, Any], rwf: dict[str, Any], step: dict[str, Any]
+    ) -> None:
         step_name: str = step["name"]
         rwf_id: str = rwf["id"]
         project_id = rwf["project"]["id"]
@@ -380,17 +391,53 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None:
         # A step replication number,
         # used only for steps expected to run in parallel (even if just once)
         step_replication_number: int = 0
+        # Do we replicate this step (run it more than once)?
+        # We do if a variable in this step's mapping block
+        # refers to an output of a prior step whose type is 'files'.
+        # If the prior step is a 'splitter' we populate the 'replication_values' array
+        # with the list of files the prior step genrated for its output.
         replication_values: list[str] = []
-        source_is_splitter: bool = False
         iter_variable: str | None = None
+        tr_map: dict[str, list[Translation]] = get_step_prior_step_variable_mapping(
+            step=step
+        )
+        for p_step_name, tr_list in tr_map.items():
+            # We need to get the Job definition for each step
+            # and then check whether the (ouptu) variable is of type 'files'...
+            wf_step: dict[str, Any] = get_step(wf, p_step_name)
+            assert wf_step
+            job_definition: dict[str, Any] = self._get_step_job(step=wf_step)
+            jd_outputs: dict[str, Any] = job_defintion_decoder.get_outputs(
+                job_definition
+            )
+            for tr in tr_list:
+                if jd_outputs.get(tr.in_, {}).get("type") == "files":
+                    iter_variable = tr.out
+                    # Get the prior running step's output values
+                    response, _ = self._wapi_adapter.get_running_workflow_step_by_name(
+                        name=p_step_name,
+                        running_workflow_id=rwf_id,
+                    )
+                    rwfs_id = response["id"]
+                    assert rwfs_id
+                    result, _ = (
+                        self._wapi_adapter.get_running_workflow_step_output_values_for_output(
+                            running_workflow_step_id=rwfs_id,
+                            output_variable=tr.in_,
+                        )
+                    )
+                    replication_values = result["output"].copy()
+                    break
+            # Stop if we've got an iteration variable
+            if iter_variable:
+                break
 
         num_step_instances: int = max(1, len(replication_values))
         for iteration in range(num_step_instances):
 
             # If we are replicating this step then we must replace the step's variable
             # with a value expected for this iteration.
-            if source_is_splitter:
-                assert iter_variable
+            if iter_variable:
                 iter_value: str = replication_values[iteration]
                 _LOGGER.info(
                     "Replicating step: %s iteration=%s variable=%s value=%s",
@@ -427,7 +474,7 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None:
                 step_replication_number=step_replication_number,
             )
             lr: LaunchResult = self._instance_launcher.launch(launch_parameters=lp)
-            rwfs_id: str | None = lr.running_workflow_step_id
+            rwfs_id = lr.running_workflow_step_id
             assert rwfs_id
 
             if lr.error_num:

From 7f4b0c6e892308c6517e26c6cbdc597b172d971d Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Tue, 2 Sep 2025 09:51:30 +0000
Subject: [PATCH 42/57] docs: Doc tweak

---
 workflow/workflow_engine.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index 01d49a2..606b45d 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -396,6 +396,9 @@ def _launch(
         # refers to an output of a prior step whose type is 'files'.
         # If the prior step is a 'splitter' we populate the 'replication_values' array
         # with the list of files the prior step genrated for its output.
+        #
+        # In this engine we onlhy act on the _first_ match, i.e. there CANNOT
+        # be more than one prior step variable that is 'files'!
         replication_values: list[str] = []
         iter_variable: str | None = None
         tr_map: dict[str, list[Translation]] = get_step_prior_step_variable_mapping(

From 5e2c8bc7e973acc3bfa7575361cb3db5c9414d20 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Tue, 2 Sep 2025 10:47:26 +0000
Subject: [PATCH 43/57] refactor: variable-map is now  'plumbing' and
 Translation is a 'Connector'

---
 tests/test_workflow_engine_examples.py        |  6 +--
 ...cate-step-input-output-variable-names.yaml |  4 +-
 .../example-smiles-to-file.yaml               |  2 +-
 .../replicate-using-undeclared-input.yaml     |  4 +-
 .../shortcut-example-1.yaml                   |  2 +-
 .../simple-python-molprops-with-options.yaml  |  4 +-
 .../simple-python-molprops.yaml               |  4 +-
 .../simple-python-parallel.yaml               |  6 +--
 ....yaml => simple-python-split-combine.yaml} | 12 +++---
 workflow/decoder.py                           | 35 +++++++++--------
 workflow/workflow-schema.yaml                 |  6 +--
 workflow/workflow_engine.py                   | 38 +++++++++----------
 12 files changed, 61 insertions(+), 62 deletions(-)
 rename tests/workflow-definitions/{simple-python-fanout.yaml => simple-python-split-combine.yaml} (75%)

diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
index 9d07f13..dbfd779 100644
--- a/tests/test_workflow_engine_examples.py
+++ b/tests/test_workflow_engine_examples.py
@@ -398,12 +398,12 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine):
     assert project_file_exists(output_file_2)
 
 
-def test_workflow_engine_simple_python_fanout(basic_engine):
+def test_workflow_engine_simple_python_split_combine(basic_engine):
     # Arrange
     md, da = basic_engine
 
     da.mock_get_running_workflow_step_output_values_for_output(
-        step_name="first-step",
+        step_name="split",
         output_variable="outputBase",
         output=["chunk_1.smi", "chunk_2.smi"],
     )
@@ -427,7 +427,7 @@ def test_workflow_engine_simple_python_fanout(basic_engine):
     r_wfid = start_workflow(
         md,
         da,
-        "simple-python-fanout",
+        "simple-python-split-combine",
         {"candidateMolecules": input_file_1},
     )
 
diff --git a/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml b/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml
index 3ba3926..deaae85 100644
--- a/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml
+++ b/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml
@@ -14,7 +14,7 @@ steps:
     variables:
       name: "col1"
       value: 123
-  variable-mapping:
+  plumbing:
   - variable: inputFile
     from-workflow:
       variable: candidateMolecules
@@ -34,7 +34,7 @@ steps:
     variables:
       name: "col2"
       value: "999"
-  variable-mapping:
+  plumbing:
   - variable: inputFile
     from-step:
       name: step1
diff --git a/tests/workflow-definitions/example-smiles-to-file.yaml b/tests/workflow-definitions/example-smiles-to-file.yaml
index 018d90c..29c3e98 100644
--- a/tests/workflow-definitions/example-smiles-to-file.yaml
+++ b/tests/workflow-definitions/example-smiles-to-file.yaml
@@ -13,7 +13,7 @@ steps:
     collection: workflow-engine-unit-test-jobs
     job: smiles-to-file
     version: "1.0.0"
-  variable-mapping:
+  plumbing:
   - variable: outputFile
     from-workflow:
       variable: outputFile
diff --git a/tests/workflow-definitions/replicate-using-undeclared-input.yaml b/tests/workflow-definitions/replicate-using-undeclared-input.yaml
index 447521b..0828b48 100644
--- a/tests/workflow-definitions/replicate-using-undeclared-input.yaml
+++ b/tests/workflow-definitions/replicate-using-undeclared-input.yaml
@@ -14,7 +14,7 @@ steps:
     variables:
       name: "col1"
       value: 123
-  variable-mapping:
+  plumbing:
   - variable: inputFile
     from-workflow:
       variable: candidateMolecules
@@ -33,7 +33,7 @@ steps:
   replicate:
     using:
       variable: y
-  variable-mapping:
+  plumbing:
   - variable: inputFile
     from-step:
       name: step-1
diff --git a/tests/workflow-definitions/shortcut-example-1.yaml b/tests/workflow-definitions/shortcut-example-1.yaml
index 0b6c2c3..b9c4a87 100644
--- a/tests/workflow-definitions/shortcut-example-1.yaml
+++ b/tests/workflow-definitions/shortcut-example-1.yaml
@@ -19,7 +19,7 @@ steps:
     collection: workflow-engine-unit-test-jobs
     job: shortcut-example-1-process-b
     version: "1.0.0"
-  variable-mapping:
+  plumbing:
   - variable: inputFile
     from-step:
       name: example-1-step-1
diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
index 9ef80e5..de1ad86 100644
--- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml
+++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
@@ -13,7 +13,7 @@ steps:
     version: "1.0.0"
     variables:
       outputFile: step1.out.smi
-  variable-mapping:
+  plumbing:
   - variable: inputFile
     from-workflow:
       variable: candidateMolecules
@@ -33,7 +33,7 @@ steps:
     variables:
       name: "col2"
       value: "999"
-  variable-mapping:
+  plumbing:
   - variable: inputFile
     from-step:
       name: step1
diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml
index ba0d1d0..5639da3 100644
--- a/tests/workflow-definitions/simple-python-molprops.yaml
+++ b/tests/workflow-definitions/simple-python-molprops.yaml
@@ -15,7 +15,7 @@ steps:
       name: "col1"
       value: 123
       outputFile: "results.smi"
-  variable-mapping:
+  plumbing:
   - variable: inputFile
     from-workflow:
       variable: candidateMolecules
@@ -29,7 +29,7 @@ steps:
     variables:
       name: "col2"
       value: "999"
-  variable-mapping:
+  plumbing:
   - variable: inputFile
     from-step:
       name: step1
diff --git a/tests/workflow-definitions/simple-python-parallel.yaml b/tests/workflow-definitions/simple-python-parallel.yaml
index c1f5c8f..dc8e3f3 100644
--- a/tests/workflow-definitions/simple-python-parallel.yaml
+++ b/tests/workflow-definitions/simple-python-parallel.yaml
@@ -14,7 +14,7 @@ steps:
     variables:
       name: "unnecessary"
       value: "0"
-  variable-mapping:
+  plumbing:
   - variable: inputFile
     from-workflow:
       variable: candidateMolecules
@@ -28,7 +28,7 @@ steps:
     variables:
       name: "desc1"
       value: "777"
-  variable-mapping:
+  plumbing:
   - variable: inputFile
     from-step:
       name: first-step
@@ -43,7 +43,7 @@ steps:
     variables:
       name: "desc2"
       value: "999"
-  variable-mapping:
+  plumbing:
   - variable: inputFile
     from-step:
       name: first-step
diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-split-combine.yaml
similarity index 75%
rename from tests/workflow-definitions/simple-python-fanout.yaml
rename to tests/workflow-definitions/simple-python-split-combine.yaml
index 0a50216..f39a100 100644
--- a/tests/workflow-definitions/simple-python-fanout.yaml
+++ b/tests/workflow-definitions/simple-python-split-combine.yaml
@@ -3,10 +3,12 @@ kind: DataManagerWorkflow
 kind-version: "2025.2"
 name: python-workflow
 description: >-
-  A simple parallel workflow. Input is split into N chunks and N processes of the same job is started
+  A simple parallel workflow. The input is split into chunks and a number outputFile
+  parallel steps processes these outputs. Finally a combine step concatenates the
+  files.
 
 steps:
-- name: first-step
+- name: split
   description: Split an input file
   specification:
     collection: workflow-engine-unit-test-jobs
@@ -16,7 +18,7 @@ steps:
       name: count
       value: "1"
       outputBase: chunk
-  variable-mapping:
+  plumbing:
   - variable: inputFile
     from-workflow:
       variable: candidateMolecules
@@ -31,10 +33,10 @@ steps:
       name: desc1
       value: "777"
       outputFile: results.smi
-  variable-mapping:
+  plumbing:
   - variable: inputFile
     from-step:
-      name: first-step
+      name: split
       variable: outputBase
   out:
   - outputFile
diff --git a/workflow/decoder.py b/workflow/decoder.py
index daf6a3d..1c50e96 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -25,8 +25,9 @@
 
 
 @dataclass
-class Translation:
-    """A source ("in_") to destination ("out") variable map."""
+class Connector:
+    """A connection - connexts a plumbing source variable ("in_")
+    to destination variable ("out")."""
 
     in_: str
     out: str
@@ -83,13 +84,13 @@ def get_description(definition: dict[str, Any]) -> str | None:
 
 def get_workflow_variable_names(definition: dict[str, Any]) -> set[str]:
     """Given a Workflow definition this function returns all the names of the
-    variables that need to be defined at the workflow level. These are the 'variables'
-    used in every steps' variabale-mapping block.
+    variables defined in steps that need to be defined at the workflow level.
+    These are the 'variables' used in every step's 'plumbing' block.
     """
     wf_variable_names: set[str] = set()
     steps: list[dict[str, Any]] = get_steps(definition)
     for step in steps:
-        if v_map := step.get("variable-mapping"):
+        if v_map := step.get("plumbing"):
             for v in v_map:
                 if "from-workflow" in v:
                     wf_variable_names.add(v["from-workflow"]["variable"])
@@ -126,40 +127,38 @@ def get_step_input_variable_names(
     return variable_names
 
 
-def get_step_workflow_variable_mapping(*, step: dict[str, Any]) -> list[Translation]:
+def get_step_workflow_plumbing(*, step: dict[str, Any]) -> list[Connector]:
     """Returns a list of workflow vaiable name to step variable name
     Translation objects for the given step."""
-    variable_mapping: list[Translation] = []
-    if "variable-mapping" in step:
-        for v_map in step["variable-mapping"]:
+    variable_mapping: list[Connector] = []
+    if "plumbing" in step:
+        for v_map in step["plumbing"]:
             if "from-workflow" in v_map:
                 variable_mapping.append(
-                    Translation(
+                    Connector(
                         in_=v_map["from-workflow"]["variable"], out=v_map["variable"]
                     )
                 )
     return variable_mapping
 
 
-def get_step_prior_step_variable_mapping(
-    *, step: dict[str, Any]
-) -> dict[str, list[Translation]]:
+def get_step_prior_step_plumbing(*, step: dict[str, Any]) -> dict[str, list[Connector]]:
     """Returns list of Translation objects, indexed by prior step name,
     that identify source step (output) variable name to this step's (input)
     variable name."""
-    variable_mapping: dict[str, list[Translation]] = {}
-    if "variable-mapping" in step:
-        for v_map in step["variable-mapping"]:
+    variable_mapping: dict[str, list[Connector]] = {}
+    if "plumbing" in step:
+        for v_map in step["plumbing"]:
             if "from-step" in v_map:
                 step_name = v_map["from-step"]["name"]
                 step_variable = v_map["from-step"]["variable"]
                 # Tuple is "from" -> "to"
                 if step_name in variable_mapping:
                     variable_mapping[step_name].append(
-                        Translation(in_=step_variable, out=v_map["variable"])
+                        Connector(in_=step_variable, out=v_map["variable"])
                     )
                 else:
                     variable_mapping[step_name] = [
-                        Translation(in_=step_variable, out=v_map["variable"])
+                        Connector(in_=step_variable, out=v_map["variable"])
                     ]
     return variable_mapping
diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml
index 90acb29..97a4610 100644
--- a/workflow/workflow-schema.yaml
+++ b/workflow/workflow-schema.yaml
@@ -156,11 +156,11 @@ definitions:
         # The format of this is essentially idenical to the specification
         # used when a Job is launched via the DM API.
         $ref: '#/definitions/step-specification'
-      variable-mapping:
+      plumbing:
         # The map of the source of the step's variables.
-        # all variables the step needs (that aren;t already in the specification)
+        # All variables the step needs (that aren't already in the specification)
         # need to be declared here. They either come "from" a prior step
-        # or are expected in th erunning workflow variables. Here we simply
+        # or are expected to be a workflow variable. Here we simply
         # associate every required variable to a source.
         type: array
         items:
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index 606b45d..75f3bd3 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -40,10 +40,10 @@
 )
 
 from .decoder import (
-    Translation,
+    Connector,
     get_step,
-    get_step_prior_step_variable_mapping,
-    get_step_workflow_variable_mapping,
+    get_step_prior_step_plumbing,
+    get_step_workflow_plumbing,
 )
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -330,27 +330,27 @@ def _validate_step_command(
         # "in" variables are worklfow variables, and "out" variables
         # are expected Job variables. We use this to add variables
         # to the "all variables" map.
-        for tr in get_step_workflow_variable_mapping(step=step):
-            assert tr.in_ in running_workflow_variables
-            all_variables[tr.out] = running_workflow_variables[tr.in_]
+        for connector in get_step_workflow_plumbing(step=step):
+            assert connector.in_ in running_workflow_variables
+            all_variables[connector.out] = running_workflow_variables[connector.in_]
 
         # Now we apply variables from the "variable mapping" block
         # related to values used in prior steps. The decoder gives
         # us a map indexed by prior step name that's a list of "in" "out"
         # tuples as above.
-        step_prior_v_map: dict[str, list[Translation]] = (
-            get_step_prior_step_variable_mapping(step=step)
+        prior_step_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(
+            step=step
         )
-        for prior_step_name, v_map in step_prior_v_map.items():
+        for prior_step_name, connections in prior_step_plumbing.items():
             # Retrieve the prior "running" step
             # in order to get the variables that were set there...
             prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name(
                 name=prior_step_name, running_workflow_id=running_workflow_id
             )
             # Copy "in" value to "out"...
-            for tr in v_map:
-                assert tr.in_ in prior_step["variables"]
-                all_variables[tr.out] = prior_step["variables"][tr.in_]
+            for connector in connections:
+                assert connector.in_ in prior_step["variables"]
+                all_variables[connector.out] = prior_step["variables"][connector.in_]
 
         # Now ... can the command be compiled!?
         job: dict[str, Any] = self._get_step_job(step=step)
@@ -401,10 +401,8 @@ def _launch(
         # be more than one prior step variable that is 'files'!
         replication_values: list[str] = []
         iter_variable: str | None = None
-        tr_map: dict[str, list[Translation]] = get_step_prior_step_variable_mapping(
-            step=step
-        )
-        for p_step_name, tr_list in tr_map.items():
+        plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(step=step)
+        for p_step_name, connections in plumbing.items():
             # We need to get the Job definition for each step
             # and then check whether the (ouptu) variable is of type 'files'...
             wf_step: dict[str, Any] = get_step(wf, p_step_name)
@@ -413,9 +411,9 @@ def _launch(
             jd_outputs: dict[str, Any] = job_defintion_decoder.get_outputs(
                 job_definition
             )
-            for tr in tr_list:
-                if jd_outputs.get(tr.in_, {}).get("type") == "files":
-                    iter_variable = tr.out
+            for connector in connections:
+                if jd_outputs.get(connector.in_, {}).get("type") == "files":
+                    iter_variable = connector.out
                     # Get the prior running step's output values
                     response, _ = self._wapi_adapter.get_running_workflow_step_by_name(
                         name=p_step_name,
@@ -426,7 +424,7 @@ def _launch(
                     result, _ = (
                         self._wapi_adapter.get_running_workflow_step_output_values_for_output(
                             running_workflow_step_id=rwfs_id,
-                            output_variable=tr.in_,
+                            output_variable=connector.in_,
                         )
                     )
                     replication_values = result["output"].copy()

From c39ddb7aa0968de2573ba798d6fb1ed4342922ad Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Tue, 2 Sep 2025 10:54:00 +0000
Subject: [PATCH 44/57] refactor: Better function and variable naming
 (plumbing)

---
 workflow/decoder.py         | 46 ++++++++++++++++++++-----------------
 workflow/workflow_engine.py | 28 ++++++++++++++--------
 2 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/workflow/decoder.py b/workflow/decoder.py
index 1c50e96..b41552e 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -127,38 +127,42 @@ def get_step_input_variable_names(
     return variable_names
 
 
-def get_step_workflow_plumbing(*, step: dict[str, Any]) -> list[Connector]:
-    """Returns a list of workflow vaiable name to step variable name
-    Translation objects for the given step."""
-    variable_mapping: list[Connector] = []
-    if "plumbing" in step:
-        for v_map in step["plumbing"]:
+def get_step_workflow_variable_connections(
+    *, step_definition: dict[str, Any]
+) -> list[Connector]:
+    """Returns a list of connectors that connect a workflow variable name
+    to a step variable name for the given step definition."""
+    connections: list[Connector] = []
+    if "plumbing" in step_definition:
+        for v_map in step_definition["plumbing"]:
             if "from-workflow" in v_map:
-                variable_mapping.append(
+                connections.append(
                     Connector(
                         in_=v_map["from-workflow"]["variable"], out=v_map["variable"]
                     )
                 )
-    return variable_mapping
-
-
-def get_step_prior_step_plumbing(*, step: dict[str, Any]) -> dict[str, list[Connector]]:
-    """Returns list of Translation objects, indexed by prior step name,
-    that identify source step (output) variable name to this step's (input)
-    variable name."""
-    variable_mapping: dict[str, list[Connector]] = {}
-    if "plumbing" in step:
-        for v_map in step["plumbing"]:
+    return connections
+
+
+def get_step_prior_step_plumbing(
+    *, step_definition: dict[str, Any]
+) -> dict[str, list[Connector]]:
+    """Returns list of variable Connections, indexed by prior step name,
+    that identify a source step variable name (an output) to an input variable in this
+    step (an input)."""
+    plumbing: dict[str, list[Connector]] = {}
+    if "plumbing" in step_definition:
+        for v_map in step_definition["plumbing"]:
             if "from-step" in v_map:
                 step_name = v_map["from-step"]["name"]
                 step_variable = v_map["from-step"]["variable"]
                 # Tuple is "from" -> "to"
-                if step_name in variable_mapping:
-                    variable_mapping[step_name].append(
+                if step_name in plumbing:
+                    plumbing[step_name].append(
                         Connector(in_=step_variable, out=v_map["variable"])
                     )
                 else:
-                    variable_mapping[step_name] = [
+                    plumbing[step_name] = [
                         Connector(in_=step_variable, out=v_map["variable"])
                     ]
-    return variable_mapping
+    return plumbing
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index 75f3bd3..d295682 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -43,7 +43,7 @@
     Connector,
     get_step,
     get_step_prior_step_plumbing,
-    get_step_workflow_plumbing,
+    get_step_workflow_variable_connections,
 )
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -129,7 +129,7 @@ def _handle_workflow_start_message(self, r_wfid: str) -> None:
         # Launch it.
         # If there's a launch problem the step (and running workflow) will have
         # and error, stopping it. There will be no Pod event as the launch has failed.
-        self._launch(wf=wf_response, rwf=rwf_response, step=first_step)
+        self._launch(wf=wf_response, rwf=rwf_response, step_definition=first_step)
 
     def _handle_workflow_stop_message(self, r_wfid: str) -> None:
         """Logic to handle a STOP message."""
@@ -265,7 +265,9 @@ def _handle_pod_message(self, msg: PodMessage) -> None:
                     # There's another step!
                     # For this simple logic it is the next step.
                     next_step = wf_response["steps"][step_index + 1]
-                    self._launch(wf=wf_response, rwf=rwf_response, step=next_step)
+                    self._launch(
+                        wf=wf_response, rwf=rwf_response, step_definition=next_step
+                    )
 
                     # Something was started (or there was a launch error and the step
                     # and running workflow error will have been set).
@@ -330,7 +332,7 @@ def _validate_step_command(
         # "in" variables are worklfow variables, and "out" variables
         # are expected Job variables. We use this to add variables
         # to the "all variables" map.
-        for connector in get_step_workflow_plumbing(step=step):
+        for connector in get_step_workflow_variable_connections(step_definition=step):
             assert connector.in_ in running_workflow_variables
             all_variables[connector.out] = running_workflow_variables[connector.in_]
 
@@ -339,7 +341,7 @@ def _validate_step_command(
         # us a map indexed by prior step name that's a list of "in" "out"
         # tuples as above.
         prior_step_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(
-            step=step
+            step_definition=step
         )
         for prior_step_name, connections in prior_step_plumbing.items():
             # Retrieve the prior "running" step
@@ -360,9 +362,13 @@ def _validate_step_command(
         return all_variables if success else message
 
     def _launch(
-        self, *, wf: dict[str, Any], rwf: dict[str, Any], step: dict[str, Any]
+        self,
+        *,
+        wf: dict[str, Any],
+        rwf: dict[str, Any],
+        step_definition: dict[str, Any],
     ) -> None:
-        step_name: str = step["name"]
+        step_name: str = step_definition["name"]
         rwf_id: str = rwf["id"]
         project_id = rwf["project"]["id"]
 
@@ -376,7 +382,7 @@ def _launch(
         rwf_variables: dict[str, Any] = rwf.get("variables", {})
         error_or_variables: str | dict[str, Any] = self._validate_step_command(
             running_workflow_id=rwf_id,
-            step=step,
+            step=step_definition,
             running_workflow_variables=rwf_variables,
         )
         if isinstance(error_or_variables, str):
@@ -401,7 +407,9 @@ def _launch(
         # be more than one prior step variable that is 'files'!
         replication_values: list[str] = []
         iter_variable: str | None = None
-        plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(step=step)
+        plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(
+            step_definition=step_definition
+        )
         for p_step_name, connections in plumbing.items():
             # We need to get the Job definition for each step
             # and then check whether the (ouptu) variable is of type 'files'...
@@ -468,7 +476,7 @@ def _launch(
                 debug=rwf.get("debug"),
                 launching_user_name=rwf["running_user"],
                 launching_user_api_token=rwf["running_user_api_token"],
-                specification=step["specification"],
+                specification=step_definition["specification"],
                 variables=variables,
                 running_workflow_id=rwf_id,
                 step_name=step_name,

From cdd936e6d727134f37c6b94261b79e1c6d7d4e58 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Tue, 2 Sep 2025 13:46:59 +0000
Subject: [PATCH 45/57] feat: new _prepare_step_variables function

---
 tests/job-definitions/job-definitions.yaml    |  17 ++-
 .../simple-python-split-combine.yaml          |   4 +-
 workflow/workflow_engine.py                   | 112 ++++++++++++++----
 3 files changed, 106 insertions(+), 27 deletions(-)

diff --git a/tests/job-definitions/job-definitions.yaml b/tests/job-definitions/job-definitions.yaml
index 0c48e84..03a3b69 100644
--- a/tests/job-definitions/job-definitions.yaml
+++ b/tests/job-definitions/job-definitions.yaml
@@ -132,12 +132,27 @@ jobs:
   concatenate:
     command: >-
       concatenate.py {% for ifile in  inputFile %}{{ ifile }} {% endfor %} --outputFile {{ outputFile }}
+    # Simulate a multiple input files Job (combiner)...
+    variables:
+      inputs:
+        properties:
+          inputFile:
+            type: files
+      outputs:
+        properties:
+          outputBase:
+            creates: '{{ outputFile }}'
+            type: file
 
   splitsmiles:
     command: >-
       copyf.py {{ inputFile }}
-    # Simulate multiple output files...
+    # Simulate a multiple output files Job (splitetr)...
     variables:
+      inputs:
+        properties:
+          inputFile:
+            type: file
       outputs:
         properties:
           outputBase:
diff --git a/tests/workflow-definitions/simple-python-split-combine.yaml b/tests/workflow-definitions/simple-python-split-combine.yaml
index f39a100..26fb3d7 100644
--- a/tests/workflow-definitions/simple-python-split-combine.yaml
+++ b/tests/workflow-definitions/simple-python-split-combine.yaml
@@ -15,15 +15,13 @@ steps:
     job: splitsmiles
     version: "1.0.0"
     variables:
-      name: count
-      value: "1"
       outputBase: chunk
   plumbing:
   - variable: inputFile
     from-workflow:
       variable: candidateMolecules
 
-- name: parallel-step
+- name: parallel
   description: Add some params
   specification:
     collection: workflow-engine-unit-test-jobs
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index d295682..3273350 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -24,6 +24,7 @@
 
 import logging
 import sys
+from dataclasses import dataclass
 from typing import Any, Optional
 
 import decoder.decoder as job_defintion_decoder
@@ -51,6 +52,20 @@
 _LOGGER.addHandler(logging.StreamHandler(sys.stdout))
 
 
+@dataclass
+class StepPreparationResponse:
+    """Step preparation response object. Iterations is +ve (non-zero) if a step
+    can be launched - it's value indicates how many times. If a step can be launched
+    'variables' will not be None. If a parallel set of steps can take place
+    (even just one) 'iteration_variable' will be set and 'iteration_values'
+    will be a list containing a value for eacdh step."""
+
+    iterations: int
+    variables: dict[str, Any] | None = None
+    iteration_variable: str | None = None
+    iteration_values: list[str] | None = None
+
+
 class WorkflowEngine:
     """The workflow engine."""
 
@@ -126,10 +141,18 @@ def _handle_workflow_start_message(self, r_wfid: str) -> None:
         # Now find the first step (index 0)...
         first_step: dict[str, Any] = wf_response["steps"][0]
 
+        sp_resp = self._prepare_step_variables(
+            wf=wf_response, step_definition=first_step, rwf=rwf_response
+        )
+        assert sp_resp.variables is not None
         # Launch it.
         # If there's a launch problem the step (and running workflow) will have
         # and error, stopping it. There will be no Pod event as the launch has failed.
-        self._launch(wf=wf_response, rwf=rwf_response, step_definition=first_step)
+        self._launch(
+            rwf=rwf_response,
+            step_definition=first_step,
+            step_preparation_response=sp_resp,
+        )
 
     def _handle_workflow_stop_message(self, r_wfid: str) -> None:
         """Logic to handle a STOP message."""
@@ -265,8 +288,31 @@ def _handle_pod_message(self, msg: PodMessage) -> None:
                     # There's another step!
                     # For this simple logic it is the next step.
                     next_step = wf_response["steps"][step_index + 1]
+
+                    # A mojor piece of work to accomplish is to get ourselves into a position
+                    # that allows us to check the step command can be executed.
+                    # We do this by compiling a map of variables we belive the step needs.
+
+                    # If the step about to be launched is based on a prior step
+                    # that generates multiple outputs (files) then we have to
+                    # exit unless all of the step instances have completed.
+                    #
+                    # Do we need a 'prepare variables' function?
+                    # One that returns a map of variables or nothing
+                    # (e.g. 'nothing' when a step launch cannot be attempted)
+                    sp_resp = self._prepare_step_variables(
+                        wf=wf_response, step_definition=next_step, rwf=rwf_response
+                    )
+                    if sp_resp.iterations == 0:
+                        # Cannot prepare variables for this step,
+                        # we have to leave.
+                        return
+                    assert sp_resp.variables is not None
+
                     self._launch(
-                        wf=wf_response, rwf=rwf_response, step_definition=next_step
+                        rwf=rwf_response,
+                        step_definition=next_step,
+                        step_preparation_response=sp_resp,
                     )
 
                     # Something was started (or there was a launch error and the step
@@ -361,20 +407,18 @@ def _validate_step_command(
         )
         return all_variables if success else message
 
-    def _launch(
+    def _prepare_step_variables(
         self,
         *,
         wf: dict[str, Any],
-        rwf: dict[str, Any],
         step_definition: dict[str, Any],
-    ) -> None:
+        rwf: dict[str, Any],
+    ) -> StepPreparationResponse:
+        """Attempts to prepare a map of step variables. If variables cannot be
+        presented to the step we return an object with 'iterations' set to zero."""
+
         step_name: str = step_definition["name"]
         rwf_id: str = rwf["id"]
-        project_id = rwf["project"]["id"]
-
-        # A mojor piece of work to accomplish is to get ourselves into a position
-        # that allows us to check the step command can be executed.
-        # We do this by compiling a map of variables we belive the step needs.
 
         # We start with all the workflow variables that were provided
         # by the user when they "ran" the workflow. We're given a full set of
@@ -390,13 +434,10 @@ def _launch(
             msg = f"Failed command validation error_msg={error_msg}"
             _LOGGER.warning(msg)
             self._set_step_error(step_name, rwf_id, None, 1, msg)
-            return
+            return StepPreparationResponse(iterations=0)
 
         variables: dict[str, Any] = error_or_variables
 
-        # A step replication number,
-        # used only for steps expected to run in parallel (even if just once)
-        step_replication_number: int = 0
         # Do we replicate this step (run it more than once)?
         # We do if a variable in this step's mapping block
         # refers to an output of a prior step whose type is 'files'.
@@ -405,7 +446,7 @@ def _launch(
         #
         # In this engine we onlhy act on the _first_ match, i.e. there CANNOT
         # be more than one prior step variable that is 'files'!
-        replication_values: list[str] = []
+        iter_values: list[str] = []
         iter_variable: str | None = None
         plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(
             step_definition=step_definition
@@ -435,34 +476,59 @@ def _launch(
                             output_variable=connector.in_,
                         )
                     )
-                    replication_values = result["output"].copy()
+                    iter_values = result["output"].copy()
                     break
             # Stop if we've got an iteration variable
             if iter_variable:
                 break
 
-        num_step_instances: int = max(1, len(replication_values))
-        for iteration in range(num_step_instances):
+        num_step_instances: int = max(1, len(iter_values))
+        return StepPreparationResponse(
+            variables=variables,
+            iterations=num_step_instances,
+            iteration_variable=iter_variable,
+            iteration_values=iter_values,
+        )
+
+    def _launch(
+        self,
+        *,
+        rwf: dict[str, Any],
+        step_definition: dict[str, Any],
+        step_preparation_response: StepPreparationResponse,
+    ) -> None:
+        step_name: str = step_definition["name"]
+        rwf_id: str = rwf["id"]
+        project_id = rwf["project"]["id"]
+
+        # A step replication number,
+        # used only for steps expected to run in parallel (even if just once)
+        step_replication_number: int = 0
+
+        variables = step_preparation_response.variables
+        assert variables is not None
+        for iteration in range(step_preparation_response.iterations):
 
             # If we are replicating this step then we must replace the step's variable
             # with a value expected for this iteration.
-            if iter_variable:
-                iter_value: str = replication_values[iteration]
+            if step_preparation_response.iteration_variable:
+                assert step_preparation_response.iteration_values
+                iter_value: str = step_preparation_response.iteration_values[iteration]
                 _LOGGER.info(
                     "Replicating step: %s iteration=%s variable=%s value=%s",
                     step_name,
                     iteration,
-                    iter_variable,
+                    step_preparation_response.iteration_variable,
                     iter_value,
                 )
                 # Over-write the replicating variable
                 # and set the replication number to a unique +ve non-zero value...
-                variables[iter_variable] = iter_value
+                variables[step_preparation_response.iteration_variable] = iter_value
                 step_replication_number = iteration + 1
 
             _LOGGER.info(
                 "Launching step: %s RunningWorkflow=%s (name=%s)"
-                " variables=%s project=%s",
+                " step_variables=%s project=%s",
                 step_name,
                 rwf_id,
                 rwf["name"],

From c08ed5c1adf2720a21f2e95d86ac6c4e9645c430 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Tue, 2 Sep 2025 14:57:36 +0000
Subject: [PATCH 46/57] feat: Refactoring

---
 workflow/workflow_engine.py | 127 ++++++++++++++++++------------------
 1 file changed, 65 insertions(+), 62 deletions(-)

diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index 3273350..edf5715 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -141,7 +141,7 @@ def _handle_workflow_start_message(self, r_wfid: str) -> None:
         # Now find the first step (index 0)...
         first_step: dict[str, Any] = wf_response["steps"][0]
 
-        sp_resp = self._prepare_step_variables(
+        sp_resp = self._prepare_step(
             wf=wf_response, step_definition=first_step, rwf=rwf_response
         )
         assert sp_resp.variables is not None
@@ -300,7 +300,7 @@ def _handle_pod_message(self, msg: PodMessage) -> None:
                     # Do we need a 'prepare variables' function?
                     # One that returns a map of variables or nothing
                     # (e.g. 'nothing' when a step launch cannot be attempted)
-                    sp_resp = self._prepare_step_variables(
+                    sp_resp = self._prepare_step(
                         wf=wf_response, step_definition=next_step, rwf=rwf_response
                     )
                     if sp_resp.iterations == 0:
@@ -353,93 +353,96 @@ def _get_step_job(self, *, step: dict[str, Any]) -> dict[str, Any]:
 
         return job
 
-    def _validate_step_command(
+    def _prepare_step(
         self,
         *,
-        running_workflow_id: str,
-        step: dict[str, Any],
-        running_workflow_variables: dict[str, Any],
-    ) -> str | dict[str, Any]:
-        """Returns an error message if the command isn't valid.
-        Without a message we return all the variables that were (successfully)
-        applied to the command."""
+        wf: dict[str, Any],
+        step_definition: dict[str, Any],
+        rwf: dict[str, Any],
+    ) -> StepPreparationResponse:
+        """Attempts to prepare a map of step variables. If variables cannot be
+        presented to the step we return an object with 'iterations' set to zero."""
+
+        step_name: str = step_definition["name"]
+        rwf_id: str = rwf["id"]
+
+        # Compile a set of variables for this step.
 
         # Start with any variables provided in the step's specification.
-        # This will be ou t"all variables" map for this step,
-        # whcih we will add to (and maybe even over-write)...
-        all_variables: dict[str, Any] = step["specification"].get("variables", {})
+        # A map that we will add to (and maybe even over-write)...
+        variables: dict[str, Any] = step_definition["specification"].get(
+            "variables", {}
+        )
 
-        # Next, we iterate through the step's "variable mapping" block.
-        # This tells us all the variables that are set from either the
-        # 'workflow' or 'a prior step'.
+        # All the running workflow variables
+        rwf_variables: dict[str, Any] = rwf.get("variables", {})
 
-        # Start with any workflow variables in the step.
-        # This will be a list of Translations of "in" and "out" variable names.
+        # Process the step's plumbing realting to workflow variables.
+        # This will be a list of Connectors of "in" and "out" variable names.
         # "in" variables are worklfow variables, and "out" variables
         # are expected Job variables. We use this to add variables
-        # to the "all variables" map.
-        for connector in get_step_workflow_variable_connections(step_definition=step):
-            assert connector.in_ in running_workflow_variables
-            all_variables[connector.out] = running_workflow_variables[connector.in_]
+        # to the variables map.
+        for connector in get_step_workflow_variable_connections(
+            step_definition=step_definition
+        ):
+            assert connector.in_ in rwf_variables
+            variables[connector.out] = rwf_variables[connector.in_]
 
-        # Now we apply variables from the "variable mapping" block
+        # Now we apply variables from the "plumbing" block
         # related to values used in prior steps. The decoder gives
         # us a map indexed by prior step name that's a list of "in" "out"
         # tuples as above.
         prior_step_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(
-            step_definition=step
+            step_definition=step_definition
         )
         for prior_step_name, connections in prior_step_plumbing.items():
             # Retrieve the prior "running" step
             # in order to get the variables that were set there...
             prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name(
-                name=prior_step_name, running_workflow_id=running_workflow_id
+                name=prior_step_name, running_workflow_id=rwf_id
             )
             # Copy "in" value to "out"...
             for connector in connections:
                 assert connector.in_ in prior_step["variables"]
-                all_variables[connector.out] = prior_step["variables"][connector.in_]
+                variables[connector.out] = prior_step["variables"][connector.in_]
 
         # Now ... can the command be compiled!?
-        job: dict[str, Any] = self._get_step_job(step=step)
+        job: dict[str, Any] = self._get_step_job(step=step_definition)
         message, success = job_defintion_decoder.decode(
-            job["command"], all_variables, "command", TextEncoding.JINJA2_3_0
+            job["command"], variables, "command", TextEncoding.JINJA2_3_0
         )
-        return all_variables if success else message
-
-    def _prepare_step_variables(
-        self,
-        *,
-        wf: dict[str, Any],
-        step_definition: dict[str, Any],
-        rwf: dict[str, Any],
-    ) -> StepPreparationResponse:
-        """Attempts to prepare a map of step variables. If variables cannot be
-        presented to the step we return an object with 'iterations' set to zero."""
-
-        step_name: str = step_definition["name"]
-        rwf_id: str = rwf["id"]
-
-        # We start with all the workflow variables that were provided
-        # by the user when they "ran" the workflow. We're given a full set of
-        # variables in response (on success) or an error string (on failure)
-        rwf_variables: dict[str, Any] = rwf.get("variables", {})
-        error_or_variables: str | dict[str, Any] = self._validate_step_command(
-            running_workflow_id=rwf_id,
-            step=step_definition,
-            running_workflow_variables=rwf_variables,
-        )
-        if isinstance(error_or_variables, str):
-            error_msg = error_or_variables
-            msg = f"Failed command validation error_msg={error_msg}"
+        if not success:
+            msg = f"Failed command validation error_msg={message}"
             _LOGGER.warning(msg)
             self._set_step_error(step_name, rwf_id, None, 1, msg)
             return StepPreparationResponse(iterations=0)
 
-        variables: dict[str, Any] = error_or_variables
+        # Our inputs
+        our_job_definition: dict[str, Any] = self._get_step_job(step=step_definition)
+        our_inputs: dict[str, Any] = job_defintion_decoder.get_inputs(
+            our_job_definition
+        )
+
+        # Are we a combiner step?
+        #
+        # We are if a variable in our step's plumbing refers to an input that is
+        # of type 'files'. A combiner's input is required to accept a space-separated
+        # set of files.
+        we_are_a_combiner: bool = False
+        our_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(
+            step_definition=step_definition
+        )
+        for p_step_name, connections in our_plumbing.items():
+            for connector in connections:
+                if our_inputs.get(connector.out, {}).get("type") == "files":
+                    we_are_a_combiner = True
+
+        assert not we_are_a_combiner
+
+        # We're not a combiner...
 
         # Do we replicate this step (run it more than once)?
-        # We do if a variable in this step's mapping block
+        # We do if a variable in this step's plumbing
         # refers to an output of a prior step whose type is 'files'.
         # If the prior step is a 'splitter' we populate the 'replication_values' array
         # with the list of files the prior step genrated for its output.
@@ -448,12 +451,9 @@ def _prepare_step_variables(
         # be more than one prior step variable that is 'files'!
         iter_values: list[str] = []
         iter_variable: str | None = None
-        plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(
-            step_definition=step_definition
-        )
-        for p_step_name, connections in plumbing.items():
+        for p_step_name, connections in our_plumbing.items():
             # We need to get the Job definition for each step
-            # and then check whether the (ouptu) variable is of type 'files'...
+            # and then check whether the (output) variable is of type 'files'...
             wf_step: dict[str, Any] = get_step(wf, p_step_name)
             assert wf_step
             job_definition: dict[str, Any] = self._get_step_job(step=wf_step)
@@ -497,6 +497,9 @@ def _launch(
         step_definition: dict[str, Any],
         step_preparation_response: StepPreparationResponse,
     ) -> None:
+        """Given a runningWorkflow record, a step defitnion (from the Workflow),
+        and the step's variables (in a preparation object) this method launches
+        one or more instances of the given step."""
         step_name: str = step_definition["name"]
         rwf_id: str = rwf["id"]
         project_id = rwf["project"]["id"]

From 19e58ae8249d600d903de82044a2898f658f8c9d Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Tue, 2 Sep 2025 16:00:36 +0000
Subject: [PATCH 47/57] refactor: More combiner logic

---
 tests/instance_launcher.py                    |  6 ++
 tests/test_test_wapi_adapter.py               | 66 --------------
 tests/wapi_adapter.py                         | 44 ++--------
 .../simple-python-split-combine.yaml          | 14 +++
 workflow/workflow_abc.py                      | 60 +++++++------
 workflow/workflow_engine.py                   | 88 +++++++++++++------
 6 files changed, 119 insertions(+), 159 deletions(-)

diff --git a/tests/instance_launcher.py b/tests/instance_launcher.py
index c938e9e..c59d138 100644
--- a/tests/instance_launcher.py
+++ b/tests/instance_launcher.py
@@ -76,6 +76,12 @@ def launch(self, *, launch_parameters: LaunchParameters) -> LaunchResult:
 
         os.makedirs(EXECUTION_DIRECTORY, exist_ok=True)
 
+        if launch_parameters.step_replication_number:
+            assert (
+                launch_parameters.step_replication_number
+                <= launch_parameters.total_number_of_replicas
+            )
+
         # Create a running workflow step
         assert launch_parameters.running_workflow_id
         assert launch_parameters.step_name
diff --git a/tests/test_test_wapi_adapter.py b/tests/test_test_wapi_adapter.py
index a8ebc33..f116d69 100644
--- a/tests/test_test_wapi_adapter.py
+++ b/tests/test_test_wapi_adapter.py
@@ -421,72 +421,6 @@ def test_create_instance_and_get_step_instance_directory_by_name():
     assert response["instance_directory"] == f".{i_id}"
 
 
-def test_get_workflow_steps_driving_this_step_when_1st_step():
-    # Arrange
-    utaa = UnitTestWorkflowAPIAdapter()
-    response = utaa.create_workflow(
-        workflow_definition={
-            "name": "blah",
-            "steps": [{"name": "step-1"}, {"name": "step-2"}, {"name": "step-3"}],
-        }
-    )
-    response = utaa.create_running_workflow(
-        user_id="dlister",
-        workflow_id=response["id"],
-        project_id=TEST_PROJECT_ID,
-        variables={},
-    )
-    response, _ = utaa.create_running_workflow_step(
-        running_workflow_id=response["id"], step="step-1"
-    )
-    rwfs_id = response["id"]
-
-    # Act
-    response, _ = utaa.get_workflow_steps_driving_this_step(
-        running_workflow_step_id=rwfs_id
-    )
-
-    # Assert
-    assert response["caller_step_index"] == 0
-    assert len(response["steps"]) == 3
-    assert response["steps"][0]["name"] == "step-1"
-    assert response["steps"][1]["name"] == "step-2"
-    assert response["steps"][2]["name"] == "step-3"
-
-
-def test_get_workflow_steps_driving_this_step_when_2nd_step():
-    # Arrange
-    utaa = UnitTestWorkflowAPIAdapter()
-    response = utaa.create_workflow(
-        workflow_definition={
-            "name": "blah",
-            "steps": [{"name": "step-1"}, {"name": "step-2"}, {"name": "step-3"}],
-        }
-    )
-    response = utaa.create_running_workflow(
-        user_id="dlister",
-        workflow_id=response["id"],
-        project_id=TEST_PROJECT_ID,
-        variables={},
-    )
-    response, _ = utaa.create_running_workflow_step(
-        running_workflow_id=response["id"], step="step-2"
-    )
-    rwfs_id = response["id"]
-
-    # Act
-    response, _ = utaa.get_workflow_steps_driving_this_step(
-        running_workflow_step_id=rwfs_id
-    )
-
-    # Assert
-    assert response["caller_step_index"] == 1
-    assert len(response["steps"]) == 3
-    assert response["steps"][0]["name"] == "step-1"
-    assert response["steps"][1]["name"] == "step-2"
-    assert response["steps"][2]["name"] == "step-3"
-
-
 def test_get_running_workflow_step_by_name():
     # Arrange
     utaa = UnitTestWorkflowAPIAdapter()
diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py
index c283ee1..75bd0e9 100644
--- a/tests/wapi_adapter.py
+++ b/tests/wapi_adapter.py
@@ -112,6 +112,12 @@ def get_running_steps(
         # Does nothing at the moment - this is used for the STOP logic.
         return {"count": 0, "steps": []}, 0
 
+    def get_status_of_all_step_instances_by_name(
+        self, *, running_workflow_id: str, step_name: str
+    ) -> tuple[dict[str, Any], int]:
+        # Need to implement!
+        return {"count": 0, "status": []}, 0
+
     def set_running_workflow_done(
         self,
         *,
@@ -249,44 +255,6 @@ def set_running_workflow_step_done(
             Pickler(pickle_file).dump(running_workflow_step)
         UnitTestWorkflowAPIAdapter.lock.release()
 
-    def get_workflow_steps_driving_this_step(
-        self,
-        *,
-        running_workflow_step_id: str,
-    ) -> tuple[dict[str, Any], int]:
-        # To accomplish this we get the running workflow for the step,
-        # then the workflow, then the steps from that workflow.
-        # We return a dictionary and an HTTP response code.
-        UnitTestWorkflowAPIAdapter.lock.acquire()
-        with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "rb") as pickle_file:
-            running_workflow_step = Unpickler(pickle_file).load()
-        UnitTestWorkflowAPIAdapter.lock.release()
-
-        assert running_workflow_step_id in running_workflow_step
-
-        running_workflow_id: str = running_workflow_step[running_workflow_step_id][
-            "running_workflow"
-        ]["id"]
-        rwf_response, _ = self.get_running_workflow(
-            running_workflow_id=running_workflow_id
-        )
-        assert rwf_response
-        workflow_id: str = rwf_response["workflow"]["id"]
-        wf_response, _ = self.get_workflow(workflow_id=workflow_id)
-        assert wf_response
-        # Find the caller's python in the step sequence (-1 if not found)
-        caller_step_index: int = -1
-        index: int = 0
-        for step in wf_response["steps"]:
-            if step["name"] == running_workflow_step[running_workflow_step_id]["name"]:
-                caller_step_index = index
-                break
-            index += 1
-        return {
-            "caller_step_index": caller_step_index,
-            "steps": wf_response["steps"].copy(),
-        }, 0
-
     def get_instance(self, *, instance_id: str) -> tuple[dict[str, Any], int]:
         UnitTestWorkflowAPIAdapter.lock.acquire()
         with open(_INSTANCE_PICKLE_FILE, "rb") as pickle_file:
diff --git a/tests/workflow-definitions/simple-python-split-combine.yaml b/tests/workflow-definitions/simple-python-split-combine.yaml
index 26fb3d7..739ec15 100644
--- a/tests/workflow-definitions/simple-python-split-combine.yaml
+++ b/tests/workflow-definitions/simple-python-split-combine.yaml
@@ -36,5 +36,19 @@ steps:
     from-step:
       name: split
       variable: outputBase
+
+#- name: combine
+#  description: Add some params
+#  specification:
+#    collection: workflow-engine-unit-test-jobs
+#    job: concatenate
+#    version: "1.0.0"
+#    variables:
+#      outputFile: results.smi
+#  plumbing:
+#  - variable: inputFile
+#    from-step:
+#      variable: outputFile
+#      name: parallel
   out:
   - outputFile
diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py
index 0a0acc0..ee9c22a 100644
--- a/workflow/workflow_abc.py
+++ b/workflow/workflow_abc.py
@@ -42,8 +42,12 @@ class LaunchParameters:
     # If only one instance of the step is expected to run
     # this value can be left at 0 (zero). If this step's launch
     # is expected to be executed more than once the value should be
-    # non-zero (and unique for this workflow run).
+    # 1..'N'.
     step_replication_number: int = 0
+    # The total number of replicas of this instance that are expected to be laucnhed.
+    # if step_replication_number is set, this has to be set. It is 'N'.
+    # If step_replication_number is zero this value is ignored.
+    total_number_of_replicas: int = 0
     # The application ID (a custom resource name)
     # used to identify the 'type' of Instance to create.
     # For DM Jobs this will be 'datamanagerjobs.squonk.it'
@@ -144,7 +148,7 @@ def get_running_workflow(
         #       "running_user": "alan",
         #       "running_user_api_token": "123456789",
         #       "done": False,
-        #       "success": false,
+        #       "success": False,
         #       "error_num": 0,
         #       "error_msg": "",
         #       "workflow": {
@@ -171,7 +175,29 @@ def get_running_steps(
         #    "count": 1,
         #    "steps": [
         #       {
-        #           "name:": "step-1234"
+        #           "name": "step-1234"
+        #       }
+        #    ]
+        # }
+
+    @abstractmethod
+    def get_status_of_all_step_instances_by_name(
+        self, *, running_workflow_id: str, step_name: str
+    ) -> tuple[dict[str, Any], int]:
+        """Get a list of step execution statuses for the named step."""
+        # Should return:
+        # {
+        #    "count": 2,
+        #    "status": [
+        #       {
+        #           "done": True,
+        #           "success": True,
+        #           "running_workflow_step_id": "step-0001"
+        #       },
+        #       {
+        #           "done": False,
+        #           "success": False,
+        #           "running_workflow_step_id": "step-0002"
         #       }
         #    ]
         # }
@@ -195,9 +221,9 @@ def get_running_workflow_step(
         """Get a RunningWorkflowStep Record"""
         # Should return:
         # {
-        #       "name:": "step-1234",
+        #       "name": "step-1234",
         #       "done": False,
-        #       "success": false,
+        #       "success": False,
         #       "error_num": 0,
         #       "error_msg": "",
         #       "variables": {
@@ -234,9 +260,9 @@ def get_running_workflow_step_by_name(
         # Should return:
         # {
         #       "id": "r-workflow-step-00000000-0000-0000-0000-000000000001",
-        #       "name:": "step-1234",
+        #       "name": "step-1234",
         #       "done": False,
-        #       "success": false,
+        #       "success": False,
         #       "error_num": 0,
         #       "error_msg": "",
         #       "variables": {
@@ -274,26 +300,6 @@ def set_running_workflow_step_done(
         """Set the success value for a RunningWorkflowStep Record,
         If not successful an error code and message should be provided."""
 
-    @abstractmethod
-    def get_workflow_steps_driving_this_step(
-        self,
-        *,
-        running_workflow_step_id: str,
-    ) -> tuple[dict[str, Any], int]:
-        """Get all the step records that belong to the Workflow for the given
-        RunningWorkflowStep record ID. You are also given the caller's position
-        in the list, which will be -1 if the caller is not present."""
-        # It should return:
-        # {
-        #    "caller_step_index": 0,
-        #    "steps": [
-        #      {
-        #        "name": "step-name"
-        #        "specification": "{}",
-        #       }
-        #     ]
-        # }
-
     @abstractmethod
     def get_instance(self, *, instance_id: str) -> tuple[dict[str, Any], int]:
         """Get an Instance Record"""
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index edf5715..34d51b7 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -58,12 +58,14 @@ class StepPreparationResponse:
     can be launched - it's value indicates how many times. If a step can be launched
     'variables' will not be None. If a parallel set of steps can take place
     (even just one) 'iteration_variable' will be set and 'iteration_values'
-    will be a list containing a value for eacdh step."""
+    will be a list containing a value for each step. If prparation failed
+    'error_msg' chould contain something useful."""
 
     iterations: int
     variables: dict[str, Any] | None = None
     iteration_variable: str | None = None
     iteration_values: list[str] | None = None
+    error_msg: str | None = None
 
 
 class WorkflowEngine:
@@ -145,6 +147,7 @@ def _handle_workflow_start_message(self, r_wfid: str) -> None:
             wf=wf_response, step_definition=first_step, rwf=rwf_response
         )
         assert sp_resp.variables is not None
+        assert sp_resp.error_msg is None
         # Launch it.
         # If there's a launch problem the step (and running workflow) will have
         # and error, stopping it. There will be no Pod event as the launch has failed.
@@ -303,7 +306,7 @@ def _handle_pod_message(self, msg: PodMessage) -> None:
                     sp_resp = self._prepare_step(
                         wf=wf_response, step_definition=next_step, rwf=rwf_response
                     )
-                    if sp_resp.iterations == 0:
+                    if sp_resp.iterations == 0 or sp_resp.error_msg:
                         # Cannot prepare variables for this step,
                         # we have to leave.
                         return
@@ -366,7 +369,59 @@ def _prepare_step(
         step_name: str = step_definition["name"]
         rwf_id: str = rwf["id"]
 
-        # Compile a set of variables for this step.
+        # Before we move on, are we combiner?
+        #
+        # We are if a variable in our step's plumbing refers to an input of ours
+        # that is of type 'files'. If we are a combiner then we use the name of the
+        # step we are combining (there can only be one) so that we can ensure
+        # all its step instances have finished (successfully). We cannot
+        # move on until all the files we depend on are ready.
+
+        our_job_definition: dict[str, Any] = self._get_step_job(step=step_definition)
+        our_inputs: dict[str, Any] = job_defintion_decoder.get_inputs(
+            our_job_definition
+        )
+        our_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(
+            step_definition=step_definition
+        )
+        step_name_being_combined: str | None = None
+        for p_step_name, connections in our_plumbing.items():
+            for connector in connections:
+                if our_inputs.get(connector.out, {}).get("type") == "files":
+                    step_name_being_combined = p_step_name
+                    break
+            if step_name_being_combined:
+                break
+        if step_name_being_combined:
+            response, _ = self._wapi_adapter.get_status_of_all_step_instances_by_name(
+                running_workflow_id=rwf_id,
+                step_name=step_name_being_combined,
+            )
+            # Assume succes...
+            all_step_instances_done: bool = True
+            all_step_instances_successful: bool = True
+            assert "count" in response
+            assert response["count"] > 0
+            assert "status" in response
+            for status in response["status"]:
+                if not status["done"]:
+                    all_step_instances_done = False
+                    break
+                if not status["success"]:
+                    all_step_instances_successful = False
+                    break
+            if not all_step_instances_done:
+                # Can't move on - but other steps need to finish.
+                return StepPreparationResponse(iterations=0)
+            elif not all_step_instances_successful:
+                # Can't move on - all prior steps are done,
+                # but at least one was in error.
+                return StepPreparationResponse(
+                    iterations=0,
+                    error_msg="A prior step 'step_name_being_combined' iteration has failed",
+                )
+
+        # Now compile a set of variables for this step.
 
         # Start with any variables provided in the step's specification.
         # A map that we will add to (and maybe even over-write)...
@@ -417,30 +472,6 @@ def _prepare_step(
             self._set_step_error(step_name, rwf_id, None, 1, msg)
             return StepPreparationResponse(iterations=0)
 
-        # Our inputs
-        our_job_definition: dict[str, Any] = self._get_step_job(step=step_definition)
-        our_inputs: dict[str, Any] = job_defintion_decoder.get_inputs(
-            our_job_definition
-        )
-
-        # Are we a combiner step?
-        #
-        # We are if a variable in our step's plumbing refers to an input that is
-        # of type 'files'. A combiner's input is required to accept a space-separated
-        # set of files.
-        we_are_a_combiner: bool = False
-        our_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(
-            step_definition=step_definition
-        )
-        for p_step_name, connections in our_plumbing.items():
-            for connector in connections:
-                if our_inputs.get(connector.out, {}).get("type") == "files":
-                    we_are_a_combiner = True
-
-        assert not we_are_a_combiner
-
-        # We're not a combiner...
-
         # Do we replicate this step (run it more than once)?
         # We do if a variable in this step's plumbing
         # refers to an output of a prior step whose type is 'files'.
@@ -507,7 +538,7 @@ def _launch(
         # A step replication number,
         # used only for steps expected to run in parallel (even if just once)
         step_replication_number: int = 0
-
+        total_replicas: int = step_preparation_response.iterations
         variables = step_preparation_response.variables
         assert variables is not None
         for iteration in range(step_preparation_response.iterations):
@@ -550,6 +581,7 @@ def _launch(
                 running_workflow_id=rwf_id,
                 step_name=step_name,
                 step_replication_number=step_replication_number,
+                total_number_of_replicas=total_replicas,
             )
             lr: LaunchResult = self._instance_launcher.launch(launch_parameters=lp)
             rwfs_id = lr.running_workflow_step_id

From f6707c8b41852ce7e141ad455c0e8dd39bfbe6c0 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Tue, 2 Sep 2025 16:36:46 +0000
Subject: [PATCH 48/57] feat: refactor definition of an output

---
 tests/test_decoder.py                         | 38 ----------------
 tests/test_workflow_engine_examples.py        |  2 +-
 .../test_workflow_validator_for_run_level.py  | 24 ----------
 .../test_workflow_validator_for_tag_level.py  | 24 ----------
 ...cate-step-input-output-variable-names.yaml | 44 -------------------
 .../example-smiles-to-file.yaml               |  4 +-
 .../replicate-using-undeclared-input.yaml     |  4 +-
 .../shortcut-example-1.yaml                   |  4 +-
 .../simple-python-molprops-with-options.yaml  |  4 +-
 .../simple-python-molprops.yaml               |  4 +-
 .../simple-python-parallel.yaml               | 14 +++---
 .../simple-python-split-combine.yaml          | 11 +++--
 workflow/workflow-schema.yaml                 | 32 +++++++-------
 13 files changed, 40 insertions(+), 169 deletions(-)
 delete mode 100644 tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml

diff --git a/tests/test_decoder.py b/tests/test_decoder.py
index 4958731..28136b3 100644
--- a/tests/test_decoder.py
+++ b/tests/test_decoder.py
@@ -184,41 +184,3 @@ def test_get_workflow_steps():
     assert len(steps) == 2
     assert steps[0]["name"] == "step1"
     assert steps[1]["name"] == "step2"
-
-
-def test_get_step_input_variable_names_when_duplicates():
-    # Arrange
-    workflow_filename: str = os.path.join(
-        os.path.dirname(__file__),
-        "workflow-definitions",
-        "duplicate-step-input-output-variable-names.yaml",
-    )
-    with open(workflow_filename, "r", encoding="utf8") as wf_file:
-        definition: Dict[str, Any] = yaml.safe_load(wf_file)
-
-    # Act
-    inputs = decoder.get_step_input_variable_names(definition, "step-1")
-
-    # Assert
-    assert len(inputs) == 2
-    assert inputs[0] == "inputFile"
-    assert inputs[1] == "inputFile"
-
-
-def test_get_step_output_variable_names_when_duplicates():
-    # Arrange
-    workflow_filename: str = os.path.join(
-        os.path.dirname(__file__),
-        "workflow-definitions",
-        "duplicate-step-input-output-variable-names.yaml",
-    )
-    with open(workflow_filename, "r", encoding="utf8") as wf_file:
-        definition: Dict[str, Any] = yaml.safe_load(wf_file)
-
-    # Act
-    outputs = decoder.get_step_output_variable_names(definition, "step-2")
-
-    # Assert
-    assert len(outputs) == 2
-    assert outputs[0] == "outputFile"
-    assert outputs[1] == "outputFile"
diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
index dbfd779..720fd4e 100644
--- a/tests/test_workflow_engine_examples.py
+++ b/tests/test_workflow_engine_examples.py
@@ -428,7 +428,7 @@ def test_workflow_engine_simple_python_split_combine(basic_engine):
         md,
         da,
         "simple-python-split-combine",
-        {"candidateMolecules": input_file_1},
+        {"candidateMolecules": input_file_1, "combination": "combination.smi"},
     )
 
     # Assert
diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py
index e76239d..6b575a3 100644
--- a/tests/test_workflow_validator_for_run_level.py
+++ b/tests/test_workflow_validator_for_run_level.py
@@ -215,27 +215,3 @@ def test_validate_simple_python_molprops_with_missing_input():
     assert error.error_msg == [
         "Missing workflow variable values for: candidateMolecules"
     ]
-
-
-def test_validate_duplicate_step_output_variable_names():
-    # Arrange
-    workflow_filename: str = os.path.join(
-        os.path.dirname(__file__),
-        "workflow-definitions",
-        "duplicate-step-input-output-variable-names.yaml",
-    )
-    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
-        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
-    assert workflow
-
-    # Act
-    error = WorkflowValidator.validate(
-        level=ValidationLevel.TAG,
-        workflow_definition=workflow,
-    )
-
-    # Assert
-    assert error.error_num == 3
-    assert error.error_msg == [
-        "Duplicate step output variable: outputFile (step=step-2)"
-    ]
diff --git a/tests/test_workflow_validator_for_tag_level.py b/tests/test_workflow_validator_for_tag_level.py
index 4c1719d..ddf403f 100644
--- a/tests/test_workflow_validator_for_tag_level.py
+++ b/tests/test_workflow_validator_for_tag_level.py
@@ -149,27 +149,3 @@ def test_validate_simple_python_molprops_with_options():
     # Assert
     assert error.error_num == 0
     assert error.error_msg is None
-
-
-def test_validate_duplicate_step_output_variable_names():
-    # Arrange
-    workflow_filename: str = os.path.join(
-        os.path.dirname(__file__),
-        "workflow-definitions",
-        "duplicate-step-input-output-variable-names.yaml",
-    )
-    with open(workflow_filename, "r", encoding="utf8") as workflow_file:
-        workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
-    assert workflow
-
-    # Act
-    error = WorkflowValidator.validate(
-        level=ValidationLevel.TAG,
-        workflow_definition=workflow,
-    )
-
-    # Assert
-    assert error.error_num == 3
-    assert error.error_msg == [
-        "Duplicate step output variable: outputFile (step=step-2)"
-    ]
diff --git a/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml b/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml
deleted file mode 100644
index deaae85..0000000
--- a/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
----
-kind: DataManagerWorkflow
-kind-version: "2025.2"
-name: duplicate-step-output-variable-names
-description: A workflow where step-1 has duplicate output variable names
-
-steps:
-- name: step-1
-  description: Add column 1
-  specification:
-    collection: workflow-engine-unit-test-jobs
-    job: rdkit-molprops
-    version: "1.0.0"
-    variables:
-      name: "col1"
-      value: 123
-  plumbing:
-  - variable: inputFile
-    from-workflow:
-      variable: candidateMolecules
-  - variable: inputFile
-    from-workflow:
-      variable: candidateMolecules
-  in:
-  - inputFile
-  - inputFile
-
-- name: step-2
-  description: Add column 2
-  specification:
-    collection: workflow-engine-unit-test-jobs
-    job: cluster-butina
-    version: "1.0.0"
-    variables:
-      name: "col2"
-      value: "999"
-  plumbing:
-  - variable: inputFile
-    from-step:
-      name: step1
-      variable: outputFile
-  out:
-  - outputFile
-  - outputFile
diff --git a/tests/workflow-definitions/example-smiles-to-file.yaml b/tests/workflow-definitions/example-smiles-to-file.yaml
index 29c3e98..0f0844a 100644
--- a/tests/workflow-definitions/example-smiles-to-file.yaml
+++ b/tests/workflow-definitions/example-smiles-to-file.yaml
@@ -20,5 +20,5 @@ steps:
   - variable: smiles
     from-workflow:
       variable: smiles
-  out:
-  - outputFile
+  - variable: outputFile
+    to-project:
diff --git a/tests/workflow-definitions/replicate-using-undeclared-input.yaml b/tests/workflow-definitions/replicate-using-undeclared-input.yaml
index 0828b48..5b81d9e 100644
--- a/tests/workflow-definitions/replicate-using-undeclared-input.yaml
+++ b/tests/workflow-definitions/replicate-using-undeclared-input.yaml
@@ -38,5 +38,5 @@ steps:
     from-step:
       name: step-1
       variable: outputFile
-  out:
-  - outputFile
+  - variable: outputFile
+    to-project:
diff --git a/tests/workflow-definitions/shortcut-example-1.yaml b/tests/workflow-definitions/shortcut-example-1.yaml
index b9c4a87..5bb94c3 100644
--- a/tests/workflow-definitions/shortcut-example-1.yaml
+++ b/tests/workflow-definitions/shortcut-example-1.yaml
@@ -24,5 +24,5 @@ steps:
     from-step:
       name: example-1-step-1
       variable: outputFile
-  out:
-  - outputFile
+  - variable: outputFile
+    to-project:
diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
index de1ad86..30cf64b 100644
--- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml
+++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml
@@ -41,5 +41,5 @@ steps:
   - variable: outputFile
     from-workflow:
       variable: clusteredMolecules
-  out:
-  - outputFile
+  - variable: outputFile
+    to-project:
diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml
index 5639da3..054bb2b 100644
--- a/tests/workflow-definitions/simple-python-molprops.yaml
+++ b/tests/workflow-definitions/simple-python-molprops.yaml
@@ -37,5 +37,5 @@ steps:
   - variable: outputFile
     from-workflow:
       variable: clusteredMolecules
-  out:
-  - outputFile
+  - variable: outputFile
+    to-project:
diff --git a/tests/workflow-definitions/simple-python-parallel.yaml b/tests/workflow-definitions/simple-python-parallel.yaml
index dc8e3f3..2a0fcb7 100644
--- a/tests/workflow-definitions/simple-python-parallel.yaml
+++ b/tests/workflow-definitions/simple-python-parallel.yaml
@@ -55,14 +55,14 @@ steps:
     collection: workflow-engine-unit-test-jobs
     job: concatenate
     version: "1.0.0"
-  inputs:
-  - input: inputFile
+  plumbing:
+  - variable: inputFile
     from:
       step: parallel-step-a
-      output: outputFile
-  - input: inputFile
+      variable: outputFile
+  - variable: inputFile
     from:
       step: parallel-step-b
-      output: outputFile
-  out:
-  - outputFile
+      variable: outputFile
+  - variable: outputFile
+    to-project:
diff --git a/tests/workflow-definitions/simple-python-split-combine.yaml b/tests/workflow-definitions/simple-python-split-combine.yaml
index 739ec15..476d27d 100644
--- a/tests/workflow-definitions/simple-python-split-combine.yaml
+++ b/tests/workflow-definitions/simple-python-split-combine.yaml
@@ -38,7 +38,7 @@ steps:
       variable: outputBase
 
 #- name: combine
-#  description: Add some params
+#  description: Combine the parallel files
 #  specification:
 #    collection: workflow-engine-unit-test-jobs
 #    job: concatenate
@@ -47,8 +47,11 @@ steps:
 #      outputFile: results.smi
 #  plumbing:
 #  - variable: inputFile
-#    from-step:
 #      variable: outputFile
+#    from-step:
 #      name: parallel
-  out:
-  - outputFile
+#  - variable: outputFile
+#    from-workflow:
+#      variable: combination
+#  - variable: outputFile
+#    to-project:
diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml
index 97a4610..bd55c27 100644
--- a/workflow/workflow-schema.yaml
+++ b/workflow/workflow-schema.yaml
@@ -102,6 +102,20 @@ definitions:
     - variable
     - from-workflow
 
+  # A Step variable
+  # (whose value is to be copied to the project directory)
+  step-variable-to-project:
+    type: object
+    additionalProperties: false
+    properties:
+      variable:
+        $ref: '#/definitions/variable-name'
+      to-project:
+        type: 'null'
+    required:
+    - variable
+    - to-project
+
   # A step specification variable
   # (there must be at least one if a variables block is defined).
   # Typical variable syntax based on Python's definition of a variable
@@ -167,23 +181,7 @@ definitions:
           anyOf:
           - $ref: "#/definitions/step-variable-from-step"
           - $ref: "#/definitions/step-variable-from-workflow"
-        minItems: 1
-      in:
-        # An optional list of the step variables that are inputs.
-        # These are typically files, expected to be present in the Project directory,
-        # that need to be copied (by the DM) into the step's instance directory.
-        type: array
-        items:
-          $ref: '#/definitions/variable-name'
-        minItems: 1
-      out:
-        # An optional list of the step variables that are outputs.
-        # These are typically files, expected to be present in the Step Instance directory,
-        # when it finished (successfully), that need to be copied (by the DM)
-        # into the Project directory via "realise_outputs()"
-        type: array
-        items:
-          $ref: '#/definitions/variable-name'
+          - $ref: "#/definitions/step-variable-to-project"
         minItems: 1
     required:
     - name

From 5834c8c998d004d32839468e1ea1ded25c618626 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Wed, 3 Sep 2025 11:00:21 +0000
Subject: [PATCH 49/57] fix: Add get_status_of_all_step_instances_by_name
 implementation (and fix step replicas)

---
 tests/instance_launcher.py |  1 +
 tests/wapi_adapter.py      | 22 +++++++++++++++++++---
 workflow/workflow_abc.py   |  4 +++-
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/tests/instance_launcher.py b/tests/instance_launcher.py
index c59d138..137a77b 100644
--- a/tests/instance_launcher.py
+++ b/tests/instance_launcher.py
@@ -89,6 +89,7 @@ def launch(self, *, launch_parameters: LaunchParameters) -> LaunchResult:
             running_workflow_id=launch_parameters.running_workflow_id,
             step=launch_parameters.step_name,
             replica=launch_parameters.step_replication_number,
+            replicas=launch_parameters.total_number_of_replicas,
         )
         assert "id" in response
         rwfs_id: str = response["id"]
diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py
index 75bd0e9..71e326b 100644
--- a/tests/wapi_adapter.py
+++ b/tests/wapi_adapter.py
@@ -113,10 +113,24 @@ def get_running_steps(
         return {"count": 0, "steps": []}, 0
 
     def get_status_of_all_step_instances_by_name(
-        self, *, running_workflow_id: str, step_name: str
+        self, *, running_workflow_id: str, name: str
     ) -> tuple[dict[str, Any], int]:
-        # Need to implement!
-        return {"count": 0, "status": []}, 0
+        UnitTestWorkflowAPIAdapter.lock.acquire()
+        with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "rb") as pickle_file:
+            running_workflow_step = Unpickler(pickle_file).load()
+        UnitTestWorkflowAPIAdapter.lock.release()
+
+        steps: list[dict[str, Any]] = []
+        for rwfs_id, record in running_workflow_step.items():
+            if record["running_workflow"]["id"] != running_workflow_id:
+                continue
+            if record["name"] == name:
+                response = record
+                response["id"] = rwfs_id
+                if record["replica"] == 0:
+                    _ = response.pop("replica")
+                steps.append(response)
+        return {"count": len(steps), "status": steps}, 0
 
     def set_running_workflow_done(
         self,
@@ -146,6 +160,7 @@ def create_running_workflow_step(
         running_workflow_id: str,
         step: str,
         replica: int = 0,
+        replicas: int = 0,
         prior_running_workflow_step_id: str | None = None,
     ) -> tuple[dict[str, Any], int]:
         if replica:
@@ -164,6 +179,7 @@ def create_running_workflow_step(
             "done": False,
             "success": False,
             "replica": replica,
+            "replicas": replicas,
             "variables": {},
             "running_workflow": {"id": running_workflow_id},
         }
diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py
index ee9c22a..2648772 100644
--- a/workflow/workflow_abc.py
+++ b/workflow/workflow_abc.py
@@ -182,7 +182,7 @@ def get_running_steps(
 
     @abstractmethod
     def get_status_of_all_step_instances_by_name(
-        self, *, running_workflow_id: str, step_name: str
+        self, *, name: str, running_workflow_id: str
     ) -> tuple[dict[str, Any], int]:
         """Get a list of step execution statuses for the named step."""
         # Should return:
@@ -226,6 +226,8 @@ def get_running_workflow_step(
         #       "success": False,
         #       "error_num": 0,
         #       "error_msg": "",
+        #       "replica": 0,
+        #       "replicas": 0,
         #       "variables": {
         #          "x": 1,
         #          "y": 2,

From cfaeaec5c47b781d1cc400380fc5b83e904fed76 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Wed, 3 Sep 2025 11:00:54 +0000
Subject: [PATCH 50/57] docs: Doc tweak

---
 workflow/workflow-schema.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml
index bd55c27..2266fca 100644
--- a/workflow/workflow-schema.yaml
+++ b/workflow/workflow-schema.yaml
@@ -103,7 +103,7 @@ definitions:
     - from-workflow
 
   # A Step variable
-  # (whose value is to be copied to the project directory)
+  # (whose value (a file) is to be copied to the project directory)
   step-variable-to-project:
     type: object
     additionalProperties: false

From 7d0363efef546b6e49daf75fdf8ebf5a6decb8b6 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Wed, 3 Sep 2025 11:01:39 +0000
Subject: [PATCH 51/57] fix: Typo in YAML

---
 tests/workflow-definitions/simple-python-split-combine.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/workflow-definitions/simple-python-split-combine.yaml b/tests/workflow-definitions/simple-python-split-combine.yaml
index 476d27d..da306ae 100644
--- a/tests/workflow-definitions/simple-python-split-combine.yaml
+++ b/tests/workflow-definitions/simple-python-split-combine.yaml
@@ -47,9 +47,9 @@ steps:
 #      outputFile: results.smi
 #  plumbing:
 #  - variable: inputFile
-#      variable: outputFile
 #    from-step:
 #      name: parallel
+#      variable: outputFile
 #  - variable: outputFile
 #    from-workflow:
 #      variable: combination

From f3361199abbafd73ab59ae880b7dd09c6e9c6860 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Wed, 3 Sep 2025 11:04:03 +0000
Subject: [PATCH 52/57] feat: Minor work on combiner logic

---
 workflow/workflow_engine.py | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index 34d51b7..ce0f1fe 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -393,16 +393,19 @@ def _prepare_step(
             if step_name_being_combined:
                 break
         if step_name_being_combined:
+            print("*** COMBINER")
             response, _ = self._wapi_adapter.get_status_of_all_step_instances_by_name(
+                name=step_name_being_combined,
                 running_workflow_id=rwf_id,
-                step_name=step_name_being_combined,
             )
             # Assume succes...
-            all_step_instances_done: bool = True
-            all_step_instances_successful: bool = True
             assert "count" in response
-            assert response["count"] > 0
+            num_being_combined: int = response["count"]
+            assert num_being_combined > 0
             assert "status" in response
+
+            all_step_instances_done: bool = True
+            all_step_instances_successful: bool = True
             for status in response["status"]:
                 if not status["done"]:
                     all_step_instances_done = False
@@ -412,15 +415,30 @@ def _prepare_step(
                     break
             if not all_step_instances_done:
                 # Can't move on - but other steps need to finish.
+                _LOGGER.debug(
+                    "Assessing start of combiner step (%s)"
+                    " but not all steps (%s) to be combined are done",
+                    step_name,
+                    step_name_being_combined,
+                )
                 return StepPreparationResponse(iterations=0)
             elif not all_step_instances_successful:
                 # Can't move on - all prior steps are done,
                 # but at least one was in error.
+                _LOGGER.debug(
+                    "Assessing start of combiner step (%s)"
+                    " but at least one step (%s) to be combined failed",
+                    step_name,
+                    step_name_being_combined,
+                )
                 return StepPreparationResponse(
                     iterations=0,
-                    error_msg="A prior step 'step_name_being_combined' iteration has failed",
+                    error_msg=f"Prior instance of step '{step_name_being_combined}' has failed",
                 )
 
+        if step_name_being_combined:
+            print("*** COMBINER : Able to start")
+
         # Now compile a set of variables for this step.
 
         # Start with any variables provided in the step's specification.
@@ -447,6 +465,9 @@ def _prepare_step(
         # related to values used in prior steps. The decoder gives
         # us a map indexed by prior step name that's a list of "in" "out"
         # tuples as above.
+        #
+        # If this is a combiner step remember that we need to inspect
+        # variables from all the prior steps.
         prior_step_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(
             step_definition=step_definition
         )

From bea2cdb963dd7878e05e9ac23bf6f274790cfd6d Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Wed, 3 Sep 2025 12:51:49 +0000
Subject: [PATCH 53/57] fix: First very basic combiner run

---
 tests/test_workflow_engine_examples.py        |  12 +-
 .../simple-python-split-combine.yaml          |  36 +++---
 workflow/workflow_engine.py                   | 120 +++++++++++-------
 3 files changed, 98 insertions(+), 70 deletions(-)

diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py
index 720fd4e..056c787 100644
--- a/tests/test_workflow_engine_examples.py
+++ b/tests/test_workflow_engine_examples.py
@@ -439,10 +439,8 @@ def test_workflow_engine_simple_python_split_combine(basic_engine):
     print("response")
     pprint(response)
 
-    assert response["count"] == 3
-    assert response["running_workflow_steps"][0]["done"]
-    assert response["running_workflow_steps"][0]["success"]
-    assert response["running_workflow_steps"][1]["done"]
-    assert response["running_workflow_steps"][1]["success"]
-    assert response["running_workflow_steps"][2]["done"]
-    assert response["running_workflow_steps"][2]["success"]
+    assert response["count"] == 4
+    rwf_steps = response["running_workflow_steps"]
+    for rwf_step in rwf_steps:
+        assert rwf_step["done"]
+        assert rwf_step["success"]
diff --git a/tests/workflow-definitions/simple-python-split-combine.yaml b/tests/workflow-definitions/simple-python-split-combine.yaml
index da306ae..2dcc68e 100644
--- a/tests/workflow-definitions/simple-python-split-combine.yaml
+++ b/tests/workflow-definitions/simple-python-split-combine.yaml
@@ -37,21 +37,21 @@ steps:
       name: split
       variable: outputBase
 
-#- name: combine
-#  description: Combine the parallel files
-#  specification:
-#    collection: workflow-engine-unit-test-jobs
-#    job: concatenate
-#    version: "1.0.0"
-#    variables:
-#      outputFile: results.smi
-#  plumbing:
-#  - variable: inputFile
-#    from-step:
-#      name: parallel
-#      variable: outputFile
-#  - variable: outputFile
-#    from-workflow:
-#      variable: combination
-#  - variable: outputFile
-#    to-project:
+- name: combine
+  description: Combine the parallel files
+  specification:
+    collection: workflow-engine-unit-test-jobs
+    job: concatenate
+    version: "1.0.0"
+    variables:
+      outputFile: results.smi
+  plumbing:
+  - variable: inputFile
+    from-step:
+      name: parallel
+      variable: outputFile
+  - variable: outputFile
+    from-workflow:
+      variable: combination
+  - variable: outputFile
+    to-project:
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index ce0f1fe..37aecfe 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -384,24 +384,28 @@ def _prepare_step(
         our_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(
             step_definition=step_definition
         )
+        step_is_combiner: bool = False
         step_name_being_combined: str | None = None
+        combiner_input_variable: str | None = None
+        num_step_recplicas_being_combined: int = 0
         for p_step_name, connections in our_plumbing.items():
             for connector in connections:
                 if our_inputs.get(connector.out, {}).get("type") == "files":
                     step_name_being_combined = p_step_name
+                    combiner_input_variable = connector.out
+                    step_is_combiner = True
                     break
             if step_name_being_combined:
                 break
         if step_name_being_combined:
-            print("*** COMBINER")
             response, _ = self._wapi_adapter.get_status_of_all_step_instances_by_name(
                 name=step_name_being_combined,
                 running_workflow_id=rwf_id,
             )
             # Assume succes...
             assert "count" in response
-            num_being_combined: int = response["count"]
-            assert num_being_combined > 0
+            num_step_recplicas_being_combined = response["count"]
+            assert num_step_recplicas_being_combined > 0
             assert "status" in response
 
             all_step_instances_done: bool = True
@@ -436,10 +440,8 @@ def _prepare_step(
                     error_msg=f"Prior instance of step '{step_name_being_combined}' has failed",
                 )
 
-        if step_name_being_combined:
-            print("*** COMBINER : Able to start")
-
-        # Now compile a set of variables for this step.
+        # I think we can start this step,
+        # so compile a set of variables for it.
 
         # Start with any variables provided in the step's specification.
         # A map that we will add to (and maybe even over-write)...
@@ -472,15 +474,39 @@ def _prepare_step(
             step_definition=step_definition
         )
         for prior_step_name, connections in prior_step_plumbing.items():
-            # Retrieve the prior "running" step
-            # in order to get the variables that were set there...
-            prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name(
-                name=prior_step_name, running_workflow_id=rwf_id
-            )
-            # Copy "in" value to "out"...
-            for connector in connections:
-                assert connector.in_ in prior_step["variables"]
-                variables[connector.out] = prior_step["variables"][connector.in_]
+            if step_is_combiner and prior_step_name == step_name_being_combined:
+                assert combiner_input_variable
+                input_source_list: list[str] = []
+                for replica in range(1, num_step_recplicas_being_combined + 1):
+                    prior_step, _ = (
+                        self._wapi_adapter.get_running_workflow_step_by_name(
+                            name=prior_step_name,
+                            replica=replica,
+                            running_workflow_id=rwf_id,
+                        )
+                    )
+                    # Copy "in" value to "out"...
+                    for connector in connections:
+                        assert connector.in_ in prior_step["variables"]
+                        if connector.out == combiner_input_variable:
+                            input_source_list.append(
+                                prior_step["variables"][connector.in_]
+                            )
+                        else:
+                            variables[connector.out] = prior_step["variables"][
+                                connector.in_
+                            ]
+                variables[combiner_input_variable] = input_source_list
+            else:
+                # Retrieve the prior "running" step
+                # in order to get the variables that were set there...
+                prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name(
+                    name=prior_step_name, running_workflow_id=rwf_id
+                )
+                # Copy "in" value to "out"...
+                for connector in connections:
+                    assert connector.in_ in prior_step["variables"]
+                    variables[connector.out] = prior_step["variables"][connector.in_]
 
         # Now ... can the command be compiled!?
         job: dict[str, Any] = self._get_step_job(step=step_definition)
@@ -494,7 +520,8 @@ def _prepare_step(
             return StepPreparationResponse(iterations=0)
 
         # Do we replicate this step (run it more than once)?
-        # We do if a variable in this step's plumbing
+        #
+        # We do if this is not a combiner step and a variable in this step's plumbing
         # refers to an output of a prior step whose type is 'files'.
         # If the prior step is a 'splitter' we populate the 'replication_values' array
         # with the list of files the prior step genrated for its output.
@@ -503,36 +530,39 @@ def _prepare_step(
         # be more than one prior step variable that is 'files'!
         iter_values: list[str] = []
         iter_variable: str | None = None
-        for p_step_name, connections in our_plumbing.items():
-            # We need to get the Job definition for each step
-            # and then check whether the (output) variable is of type 'files'...
-            wf_step: dict[str, Any] = get_step(wf, p_step_name)
-            assert wf_step
-            job_definition: dict[str, Any] = self._get_step_job(step=wf_step)
-            jd_outputs: dict[str, Any] = job_defintion_decoder.get_outputs(
-                job_definition
-            )
-            for connector in connections:
-                if jd_outputs.get(connector.in_, {}).get("type") == "files":
-                    iter_variable = connector.out
-                    # Get the prior running step's output values
-                    response, _ = self._wapi_adapter.get_running_workflow_step_by_name(
-                        name=p_step_name,
-                        running_workflow_id=rwf_id,
-                    )
-                    rwfs_id = response["id"]
-                    assert rwfs_id
-                    result, _ = (
-                        self._wapi_adapter.get_running_workflow_step_output_values_for_output(
-                            running_workflow_step_id=rwfs_id,
-                            output_variable=connector.in_,
+        if not step_is_combiner:
+            for p_step_name, connections in our_plumbing.items():
+                # We need to get the Job definition for each step
+                # and then check whether the (output) variable is of type 'files'...
+                wf_step: dict[str, Any] = get_step(wf, p_step_name)
+                assert wf_step
+                job_definition: dict[str, Any] = self._get_step_job(step=wf_step)
+                jd_outputs: dict[str, Any] = job_defintion_decoder.get_outputs(
+                    job_definition
+                )
+                for connector in connections:
+                    if jd_outputs.get(connector.in_, {}).get("type") == "files":
+                        iter_variable = connector.out
+                        # Get the prior running step's output values
+                        response, _ = (
+                            self._wapi_adapter.get_running_workflow_step_by_name(
+                                name=p_step_name,
+                                running_workflow_id=rwf_id,
+                            )
                         )
-                    )
-                    iter_values = result["output"].copy()
+                        rwfs_id = response["id"]
+                        assert rwfs_id
+                        result, _ = (
+                            self._wapi_adapter.get_running_workflow_step_output_values_for_output(
+                                running_workflow_step_id=rwfs_id,
+                                output_variable=connector.in_,
+                            )
+                        )
+                        iter_values = result["output"].copy()
+                        break
+                # Stop if we've got an iteration variable
+                if iter_variable:
                     break
-            # Stop if we've got an iteration variable
-            if iter_variable:
-                break
 
         num_step_instances: int = max(1, len(iter_values))
         return StepPreparationResponse(

From 94fd202c2e2a9850b37b25d59b3fe8cee7167d62 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Wed, 3 Sep 2025 14:04:13 +0000
Subject: [PATCH 54/57] docs: Doc tweak

---
 workflow/workflow_engine.py | 69 ++++++++++++++++++++++++-------------
 1 file changed, 45 insertions(+), 24 deletions(-)

diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index 37aecfe..f24a16f 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -65,6 +65,7 @@ class StepPreparationResponse:
     variables: dict[str, Any] | None = None
     iteration_variable: str | None = None
     iteration_values: list[str] | None = None
+    error_num: int = 0
     error_msg: str | None = None
 
 
@@ -306,12 +307,20 @@ def _handle_pod_message(self, msg: PodMessage) -> None:
                     sp_resp = self._prepare_step(
                         wf=wf_response, step_definition=next_step, rwf=rwf_response
                     )
-                    if sp_resp.iterations == 0 or sp_resp.error_msg:
+                    if sp_resp.iterations == 0:
                         # Cannot prepare variables for this step,
-                        # we have to leave.
+                        # it might be a combiner step and some prior steps may still
+                        # be running ... or something's gone wrong.
+                        if sp_resp.error_num:
+                            self._wapi_adapter.set_running_workflow_done(
+                                running_workflow_id=r_wfid,
+                                success=False,
+                                error_num=sp_resp.error_num,
+                                error_msg=sp_resp.error_msg,
+                            )
                         return
-                    assert sp_resp.variables is not None
 
+                    assert sp_resp.variables is not None
                     self._launch(
                         rwf=rwf_response,
                         step_definition=next_step,
@@ -359,17 +368,19 @@ def _get_step_job(self, *, step: dict[str, Any]) -> dict[str, Any]:
     def _prepare_step(
         self,
         *,
-        wf: dict[str, Any],
         step_definition: dict[str, Any],
+        wf: dict[str, Any],
         rwf: dict[str, Any],
     ) -> StepPreparationResponse:
         """Attempts to prepare a map of step variables. If variables cannot be
-        presented to the step we return an object with 'iterations' set to zero."""
+        presented to the step we return an object with 'iterations' set to zero.
+        If there's a problem that means we should be able to proceed but cannot,
+        we set 'error_num' and 'error_msg'."""
 
         step_name: str = step_definition["name"]
         rwf_id: str = rwf["id"]
 
-        # Before we move on, are we combiner?
+        # Before we move on, are we a combiner?
         #
         # We are if a variable in our step's plumbing refers to an input of ours
         # that is of type 'files'. If we are a combiner then we use the name of the
@@ -397,17 +408,24 @@ def _prepare_step(
                     break
             if step_name_being_combined:
                 break
-        if step_name_being_combined:
+
+        if step_is_combiner:
+            assert step_name_being_combined
+            assert combiner_input_variable
+
+            # Are all the step instances we're combining done?
+
             response, _ = self._wapi_adapter.get_status_of_all_step_instances_by_name(
                 name=step_name_being_combined,
                 running_workflow_id=rwf_id,
             )
-            # Assume succes...
             assert "count" in response
             num_step_recplicas_being_combined = response["count"]
             assert num_step_recplicas_being_combined > 0
             assert "status" in response
 
+            # Assume they're all done
+            # and undo our assumption if not...
             all_step_instances_done: bool = True
             all_step_instances_successful: bool = True
             for status in response["status"]:
@@ -418,7 +436,7 @@ def _prepare_step(
                     all_step_instances_successful = False
                     break
             if not all_step_instances_done:
-                # Can't move on - but other steps need to finish.
+                # Can't move on - other steps need to finish.
                 _LOGGER.debug(
                     "Assessing start of combiner step (%s)"
                     " but not all steps (%s) to be combined are done",
@@ -428,8 +446,8 @@ def _prepare_step(
                 return StepPreparationResponse(iterations=0)
             elif not all_step_instances_successful:
                 # Can't move on - all prior steps are done,
-                # but at least one was in error.
-                _LOGGER.debug(
+                # but at least one was not successful.
+                _LOGGER.warning(
                     "Assessing start of combiner step (%s)"
                     " but at least one step (%s) to be combined failed",
                     step_name,
@@ -437,6 +455,7 @@ def _prepare_step(
                 )
                 return StepPreparationResponse(
                     iterations=0,
+                    error_num=1,
                     error_msg=f"Prior instance of step '{step_name_being_combined}' has failed",
                 )
 
@@ -448,11 +467,11 @@ def _prepare_step(
         variables: dict[str, Any] = step_definition["specification"].get(
             "variables", {}
         )
-
-        # All the running workflow variables
+        # ...and the running workflow variables
         rwf_variables: dict[str, Any] = rwf.get("variables", {})
 
-        # Process the step's plumbing realting to workflow variables.
+        # Process the step's "plumbing" relating to workflow variables.
+        #
         # This will be a list of Connectors of "in" and "out" variable names.
         # "in" variables are worklfow variables, and "out" variables
         # are expected Job variables. We use this to add variables
@@ -463,13 +482,12 @@ def _prepare_step(
             assert connector.in_ in rwf_variables
             variables[connector.out] = rwf_variables[connector.in_]
 
-        # Now we apply variables from the "plumbing" block
-        # related to values used in prior steps. The decoder gives
-        # us a map indexed by prior step name that's a list of "in" "out"
-        # tuples as above.
+        # Now process variables (from the "plumbing" block)
+        # that relate to values used in prior steps.
         #
-        # If this is a combiner step remember that we need to inspect
-        # variables from all the prior steps.
+        # The decoder gives us a map indexed by prior step name that's a list of
+        # "in" "out" connectors as above. If this is a combiner step remember
+        # that we need to inspect variables from all the prior steps.
         prior_step_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(
             step_definition=step_definition
         )
@@ -486,6 +504,8 @@ def _prepare_step(
                         )
                     )
                     # Copy "in" value to "out"...
+                    # accumulating thiose for the 'combining' variable,
+                    # which will be set as a list when we're done.
                     for connector in connections:
                         assert connector.in_ in prior_step["variables"]
                         if connector.out == combiner_input_variable:
@@ -508,16 +528,17 @@ def _prepare_step(
                     assert connector.in_ in prior_step["variables"]
                     variables[connector.out] = prior_step["variables"][connector.in_]
 
-        # Now ... can the command be compiled!?
+        # All variables are set ...
+        # is this enough to satisfy the setp's Job command?
+
         job: dict[str, Any] = self._get_step_job(step=step_definition)
         message, success = job_defintion_decoder.decode(
             job["command"], variables, "command", TextEncoding.JINJA2_3_0
         )
         if not success:
-            msg = f"Failed command validation error_msg={message}"
+            msg = f"Failed command validation for step {step_name} error_msg={message}"
             _LOGGER.warning(msg)
-            self._set_step_error(step_name, rwf_id, None, 1, msg)
-            return StepPreparationResponse(iterations=0)
+            return StepPreparationResponse(iterations=0, error_num=2, error_msg=msg)
 
         # Do we replicate this step (run it more than once)?
         #

From 8dd930852939bdc7686a3931d2f543fa1d33ec8b Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Wed, 3 Sep 2025 14:26:45 +0000
Subject: [PATCH 55/57] fix: replica always starts at 0

---
 tests/wapi_adapter.py       |  6 ++--
 workflow/workflow_abc.py    |  6 ++--
 workflow/workflow_engine.py | 68 +++++++++++++++++++------------------
 3 files changed, 41 insertions(+), 39 deletions(-)

diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py
index 71e326b..4c42850 100644
--- a/tests/wapi_adapter.py
+++ b/tests/wapi_adapter.py
@@ -160,11 +160,11 @@ def create_running_workflow_step(
         running_workflow_id: str,
         step: str,
         replica: int = 0,
-        replicas: int = 0,
+        replicas: int = 1,
         prior_running_workflow_step_id: str | None = None,
     ) -> tuple[dict[str, Any], int]:
-        if replica:
-            assert replica > 0
+        assert replica >= 0
+        assert replicas > replica
 
         UnitTestWorkflowAPIAdapter.lock.acquire()
         with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "rb") as pickle_file:
diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py
index 2648772..ae88898 100644
--- a/workflow/workflow_abc.py
+++ b/workflow/workflow_abc.py
@@ -45,9 +45,9 @@ class LaunchParameters:
     # 1..'N'.
     step_replication_number: int = 0
     # The total number of replicas of this instance that are expected to be laucnhed.
-    # if step_replication_number is set, this has to be set. It is 'N'.
-    # If step_replication_number is zero this value is ignored.
-    total_number_of_replicas: int = 0
+    # This cannot be less than 1 and must be grater than any value of
+    # 'step_replication_number' that will be used fo rthe same step.
+    total_number_of_replicas: int = 1
     # The application ID (a custom resource name)
     # used to identify the 'type' of Instance to create.
     # For DM Jobs this will be 'datamanagerjobs.squonk.it'
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index f24a16f..b173bf4 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -54,17 +54,17 @@
 
 @dataclass
 class StepPreparationResponse:
-    """Step preparation response object. Iterations is +ve (non-zero) if a step
+    """Step preparation response object. 'replicas' is +ve (non-zero) if a step
     can be launched - it's value indicates how many times. If a step can be launched
     'variables' will not be None. If a parallel set of steps can take place
-    (even just one) 'iteration_variable' will be set and 'iteration_values'
-    will be a list containing a value for each step. If prparation failed
-    'error_msg' chould contain something useful."""
+    (even just one) 'replica_variable' will be set and 'replica_values'
+    will be a list containing a value for each step instance. If preparation fails
+    'erro_num' wil be set, and 'error_msg' should contain something useful."""
 
-    iterations: int
+    replicas: int
     variables: dict[str, Any] | None = None
-    iteration_variable: str | None = None
-    iteration_values: list[str] | None = None
+    replica_variable: str | None = None
+    replica_values: list[str] | None = None
     error_num: int = 0
     error_msg: str | None = None
 
@@ -307,9 +307,10 @@ def _handle_pod_message(self, msg: PodMessage) -> None:
                     sp_resp = self._prepare_step(
                         wf=wf_response, step_definition=next_step, rwf=rwf_response
                     )
-                    if sp_resp.iterations == 0:
+                    if sp_resp.replicas == 0:
                         # Cannot prepare variables for this step,
-                        # it might be a combiner step and some prior steps may still
+                        # it might be a step dependent on more than one prior step
+                        # (like a 'combiner') and some prior steps may still
                         # be running ... or something's gone wrong.
                         if sp_resp.error_num:
                             self._wapi_adapter.set_running_workflow_done(
@@ -443,7 +444,7 @@ def _prepare_step(
                     step_name,
                     step_name_being_combined,
                 )
-                return StepPreparationResponse(iterations=0)
+                return StepPreparationResponse(replicas=0)
             elif not all_step_instances_successful:
                 # Can't move on - all prior steps are done,
                 # but at least one was not successful.
@@ -454,7 +455,7 @@ def _prepare_step(
                     step_name_being_combined,
                 )
                 return StepPreparationResponse(
-                    iterations=0,
+                    replicas=0,
                     error_num=1,
                     error_msg=f"Prior instance of step '{step_name_being_combined}' has failed",
                 )
@@ -482,12 +483,12 @@ def _prepare_step(
             assert connector.in_ in rwf_variables
             variables[connector.out] = rwf_variables[connector.in_]
 
-        # Now process variables (from the "plumbing" block)
+        # Now process variables (in the "plumbing" block)
         # that relate to values used in prior steps.
         #
         # The decoder gives us a map indexed by prior step name that's a list of
         # "in" "out" connectors as above. If this is a combiner step remember
-        # that we need to inspect variables from all the prior steps.
+        # that the combiner_input_variable is a used as a list.
         prior_step_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(
             step_definition=step_definition
         )
@@ -495,7 +496,7 @@ def _prepare_step(
             if step_is_combiner and prior_step_name == step_name_being_combined:
                 assert combiner_input_variable
                 input_source_list: list[str] = []
-                for replica in range(1, num_step_recplicas_being_combined + 1):
+                for replica in range(num_step_recplicas_being_combined):
                     prior_step, _ = (
                         self._wapi_adapter.get_running_workflow_step_by_name(
                             name=prior_step_name,
@@ -538,7 +539,7 @@ def _prepare_step(
         if not success:
             msg = f"Failed command validation for step {step_name} error_msg={message}"
             _LOGGER.warning(msg)
-            return StepPreparationResponse(iterations=0, error_num=2, error_msg=msg)
+            return StepPreparationResponse(replicas=0, error_num=2, error_msg=msg)
 
         # Do we replicate this step (run it more than once)?
         #
@@ -588,9 +589,9 @@ def _prepare_step(
         num_step_instances: int = max(1, len(iter_values))
         return StepPreparationResponse(
             variables=variables,
-            iterations=num_step_instances,
-            iteration_variable=iter_variable,
-            iteration_values=iter_values,
+            replicas=num_step_instances,
+            replica_variable=iter_variable,
+            replica_values=iter_values,
         )
 
     def _launch(
@@ -607,30 +608,31 @@ def _launch(
         rwf_id: str = rwf["id"]
         project_id = rwf["project"]["id"]
 
-        # A step replication number,
-        # used only for steps expected to run in parallel (even if just once)
-        step_replication_number: int = 0
-        total_replicas: int = step_preparation_response.iterations
+        # Total replicas must be 1 or more
+        total_replicas: int = step_preparation_response.replicas
+        assert total_replicas >= 1
+
         variables = step_preparation_response.variables
         assert variables is not None
-        for iteration in range(step_preparation_response.iterations):
+        for replica in range(step_preparation_response.replicas):
 
-            # If we are replicating this step then we must replace the step's variable
+            # If we are replicating this step more than once
+            # the 'replica_variable' will be set.
+            # We must replace the step's variable
             # with a value expected for this iteration.
-            if step_preparation_response.iteration_variable:
-                assert step_preparation_response.iteration_values
-                iter_value: str = step_preparation_response.iteration_values[iteration]
+            if step_preparation_response.replica_variable:
+                assert step_preparation_response.replica_values
+                iter_value: str = step_preparation_response.replica_values[replica]
                 _LOGGER.info(
-                    "Replicating step: %s iteration=%s variable=%s value=%s",
+                    "Replicating step: %s replica=%s variable=%s value=%s",
                     step_name,
-                    iteration,
-                    step_preparation_response.iteration_variable,
+                    replica,
+                    step_preparation_response.replica_variable,
                     iter_value,
                 )
                 # Over-write the replicating variable
                 # and set the replication number to a unique +ve non-zero value...
-                variables[step_preparation_response.iteration_variable] = iter_value
-                step_replication_number = iteration + 1
+                variables[step_preparation_response.replica_variable] = iter_value
 
             _LOGGER.info(
                 "Launching step: %s RunningWorkflow=%s (name=%s)"
@@ -652,7 +654,7 @@ def _launch(
                 variables=variables,
                 running_workflow_id=rwf_id,
                 step_name=step_name,
-                step_replication_number=step_replication_number,
+                step_replication_number=replica,
                 total_number_of_replicas=total_replicas,
             )
             lr: LaunchResult = self._instance_launcher.launch(launch_parameters=lp)

From 179135aa4d66820ba2c80aa9c528086adf5c7cae Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Wed, 3 Sep 2025 14:32:34 +0000
Subject: [PATCH 56/57] docs: Doc tweak

---
 workflow/workflow_engine.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index b173bf4..6897c19 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -510,15 +510,24 @@ def _prepare_step(
                     for connector in connections:
                         assert connector.in_ in prior_step["variables"]
                         if connector.out == combiner_input_variable:
+                            # Each instance may have a different value
                             input_source_list.append(
                                 prior_step["variables"][connector.in_]
                             )
-                        else:
+                        elif replica == 0:
+                            # Only the first instance value are of interest,
+                            # the rest wil be the same - only one variable
+                            # is a list of different values.
                             variables[connector.out] = prior_step["variables"][
                                 connector.in_
                             ]
+                # Now we have accumulated the prior steps values (files)
+                # set the combiner's corresponding input variable...
                 variables[combiner_input_variable] = input_source_list
             else:
+                # Not a preior step for a combiner,
+                # or not a step being combined in a combiner.
+                #
                 # Retrieve the prior "running" step
                 # in order to get the variables that were set there...
                 prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name(

From cdddc35d39d517ebf520d2f43a93ccbde6f772d7 Mon Sep 17 00:00:00 2001
From: Alan Christie <alan.christie@matildapeak.com>
Date: Thu, 4 Sep 2025 11:56:44 +0000
Subject: [PATCH 57/57] feat: Add from-link-prefix variables

---
 tests/job-definitions/job-definitions.yaml    |  8 +-
 tests/jobs/concatenate.py                     | 11 ++-
 .../simple-python-split-combine.yaml          |  8 +-
 workflow/decoder.py                           | 13 ++-
 workflow/workflow-schema.yaml                 | 24 ++++++
 workflow/workflow_engine.py                   | 84 ++++++++-----------
 6 files changed, 90 insertions(+), 58 deletions(-)

diff --git a/tests/job-definitions/job-definitions.yaml b/tests/job-definitions/job-definitions.yaml
index 03a3b69..749c68e 100644
--- a/tests/job-definitions/job-definitions.yaml
+++ b/tests/job-definitions/job-definitions.yaml
@@ -131,13 +131,19 @@ jobs:
 
   concatenate:
     command: >-
-      concatenate.py {% for ifile in  inputFile %}{{ ifile }} {% endfor %} --outputFile {{ outputFile }}
+      concatenate.py --inputFile {{ inputFile }} --outputFile {{ outputFile }}
     # Simulate a multiple input files Job (combiner)...
     variables:
       inputs:
         properties:
           inputFile:
             type: files
+      options:
+        type: object
+        properties:
+          inputDirPrefix:
+            title: Optional inoput directory prefix
+            type: string
       outputs:
         properties:
           outputBase:
diff --git a/tests/jobs/concatenate.py b/tests/jobs/concatenate.py
index 2f6b22b..3fb6834 100644
--- a/tests/jobs/concatenate.py
+++ b/tests/jobs/concatenate.py
@@ -2,13 +2,16 @@
 
 parser = argparse.ArgumentParser(
     prog="addcol",
-    description="Takes a list of files and writes them into single outputfile",
+    description="Takes an optional directory prefix and a file,"
+    " and combines all the input files that are found"
+    " into single outputfile",
 )
-parser.add_argument("inputFile", nargs="+", type=argparse.FileType("r"))
+parser.add_argument("--inputDirPrefix")
+parser.add_argument("--inputFile", required=True)
 parser.add_argument("-o", "--outputFile", required=True)
 args = parser.parse_args()
 
 
 with open(args.outputFile, "wt", encoding="utf8") as ofile:
-    for f in args.inputFile:
-        ofile.write(f.read())
+    with open(args.inputFile, "rt", encoding="utf8") as ifile:
+        ofile.write(ifile.read())
diff --git a/tests/workflow-definitions/simple-python-split-combine.yaml b/tests/workflow-definitions/simple-python-split-combine.yaml
index 2dcc68e..bcbea06 100644
--- a/tests/workflow-definitions/simple-python-split-combine.yaml
+++ b/tests/workflow-definitions/simple-python-split-combine.yaml
@@ -46,12 +46,14 @@ steps:
     variables:
       outputFile: results.smi
   plumbing:
+  - variable: outputFile
+    from-workflow:
+      variable: combination
   - variable: inputFile
     from-step:
       name: parallel
       variable: outputFile
-  - variable: outputFile
-    from-workflow:
-      variable: combination
+  - variable: inputDirPrefix
+    from-link-prefix:
   - variable: outputFile
     to-project:
diff --git a/workflow/decoder.py b/workflow/decoder.py
index b41552e..aac874b 100644
--- a/workflow/decoder.py
+++ b/workflow/decoder.py
@@ -144,7 +144,7 @@ def get_step_workflow_variable_connections(
     return connections
 
 
-def get_step_prior_step_plumbing(
+def get_step_prior_step_connections(
     *, step_definition: dict[str, Any]
 ) -> dict[str, list[Connector]]:
     """Returns list of variable Connections, indexed by prior step name,
@@ -166,3 +166,14 @@ def get_step_prior_step_plumbing(
                         Connector(in_=step_variable, out=v_map["variable"])
                     ]
     return plumbing
+
+
+def get_step_link_prefix_variables(*, step_definition: dict[str, Any]) -> set[str]:
+    """Returns the set of variables expected to be set to the value
+    of the instance directory prefix."""
+    variables: set[str] = set()
+    if "plumbing" in step_definition:
+        for v_map in step_definition["plumbing"]:
+            if "from-link-prefix" in v_map:
+                variables.add(v_map["variable"])
+    return variables
diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml
index 2266fca..ba5343c 100644
--- a/workflow/workflow-schema.yaml
+++ b/workflow/workflow-schema.yaml
@@ -102,6 +102,29 @@ definitions:
     - variable
     - from-workflow
 
+  # A Step variable
+  # (whose value is set to the value of a directory prefix used when the DM
+  # links the instance directories of prior step instances into this
+  # step's instance directory)
+  #
+  # This _must_ be treated by the step's job as a directory prefix,
+  # typiclaly '.instance-', that can be used to identify directories in this step's
+  # execution directory where the execution directory of prior steps
+  # are hard-linked by the DM. A job can find all the prior step directory names
+  # using the selected variable (e.g. inspect any directory name
+  # that starts with "{variable}").
+  step-variable-from-link-prefix:
+    type: object
+    additionalProperties: false
+    properties:
+      variable:
+        $ref: '#/definitions/variable-name'
+      from-link-prefix:
+        type: 'null'
+    required:
+    - variable
+    - from-link-prefix
+
   # A Step variable
   # (whose value (a file) is to be copied to the project directory)
   step-variable-to-project:
@@ -181,6 +204,7 @@ definitions:
           anyOf:
           - $ref: "#/definitions/step-variable-from-step"
           - $ref: "#/definitions/step-variable-from-workflow"
+          - $ref: "#/definitions/step-variable-from-link-prefix"
           - $ref: "#/definitions/step-variable-to-project"
         minItems: 1
     required:
diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py
index 6897c19..997abe0 100644
--- a/workflow/workflow_engine.py
+++ b/workflow/workflow_engine.py
@@ -43,7 +43,8 @@
 from .decoder import (
     Connector,
     get_step,
-    get_step_prior_step_plumbing,
+    get_step_link_prefix_variables,
+    get_step_prior_step_connections,
     get_step_workflow_variable_connections,
 )
 
@@ -77,10 +78,15 @@ def __init__(
         *,
         wapi_adapter: WorkflowAPIAdapter,
         instance_launcher: InstanceLauncher,
+        step_link_prefix: str = ".instance-",
     ):
+        """Initialiser, given a Workflow API adapter, Instance laucnher,
+        and a step (directory) link prefix (the directory prefix the DM uses to hard-link
+        prior step instanes into the next step, typically '.instance-')"""
         # Keep the dependent objects
         self._wapi_adapter = wapi_adapter
         self._instance_launcher = instance_launcher
+        self._step_link_prefix = step_link_prefix
 
     def handle_message(self, msg: Message) -> None:
         """Expect Workflow and Pod messages.
@@ -393,7 +399,7 @@ def _prepare_step(
         our_inputs: dict[str, Any] = job_defintion_decoder.get_inputs(
             our_job_definition
         )
-        our_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(
+        our_plumbing: dict[str, list[Connector]] = get_step_prior_step_connections(
             step_definition=step_definition
         )
         step_is_combiner: bool = False
@@ -483,60 +489,40 @@ def _prepare_step(
             assert connector.in_ in rwf_variables
             variables[connector.out] = rwf_variables[connector.in_]
 
+        # Process the step's "plumbing" relating to link-prefix variables.
+        #
+        # This will be a set of variable names. We just set each one
+        # to the built-in step link prefix.
+        for link_variable in get_step_link_prefix_variables(
+            step_definition=step_definition
+        ):
+            variables[link_variable] = self._step_link_prefix
+
         # Now process variables (in the "plumbing" block)
         # that relate to values used in prior steps.
         #
         # The decoder gives us a map indexed by prior step name that's a list of
-        # "in" "out" connectors as above. If this is a combiner step remember
+        # "in"/"out" connectors as above. If this is a combiner step remember
         # that the combiner_input_variable is a used as a list.
-        prior_step_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(
-            step_definition=step_definition
+        prior_step_plumbing: dict[str, list[Connector]] = (
+            get_step_prior_step_connections(step_definition=step_definition)
         )
         for prior_step_name, connections in prior_step_plumbing.items():
-            if step_is_combiner and prior_step_name == step_name_being_combined:
-                assert combiner_input_variable
-                input_source_list: list[str] = []
-                for replica in range(num_step_recplicas_being_combined):
-                    prior_step, _ = (
-                        self._wapi_adapter.get_running_workflow_step_by_name(
-                            name=prior_step_name,
-                            replica=replica,
-                            running_workflow_id=rwf_id,
-                        )
-                    )
-                    # Copy "in" value to "out"...
-                    # accumulating thiose for the 'combining' variable,
-                    # which will be set as a list when we're done.
-                    for connector in connections:
-                        assert connector.in_ in prior_step["variables"]
-                        if connector.out == combiner_input_variable:
-                            # Each instance may have a different value
-                            input_source_list.append(
-                                prior_step["variables"][connector.in_]
-                            )
-                        elif replica == 0:
-                            # Only the first instance value are of interest,
-                            # the rest wil be the same - only one variable
-                            # is a list of different values.
-                            variables[connector.out] = prior_step["variables"][
-                                connector.in_
-                            ]
-                # Now we have accumulated the prior steps values (files)
-                # set the combiner's corresponding input variable...
-                variables[combiner_input_variable] = input_source_list
-            else:
-                # Not a preior step for a combiner,
-                # or not a step being combined in a combiner.
-                #
-                # Retrieve the prior "running" step
-                # in order to get the variables that were set there...
-                prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name(
-                    name=prior_step_name, running_workflow_id=rwf_id
-                )
-                # Copy "in" value to "out"...
-                for connector in connections:
-                    assert connector.in_ in prior_step["variables"]
-                    variables[connector.out] = prior_step["variables"][connector.in_]
+            # Retrieve the first prior "running" step in order to get the variables
+            # that were used for it.
+            #
+            # For a combiner step we only need to inspect the first instance of
+            # the prior step (the default replica value is '0').
+            # We assume all the combiner's prior (parallel) instances
+            # have the same variables and values.
+            prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name(
+                name=prior_step_name,
+                running_workflow_id=rwf_id,
+            )
+            # Copy "in" value to "out"...
+            for connector in connections:
+                assert connector.in_ in prior_step["variables"]
+                variables[connector.out] = prior_step["variables"][connector.in_]
 
         # All variables are set ...
         # is this enough to satisfy the setp's Job command?