From a74ff9133689cc600e48f778d8262428dad6a558 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Mon, 23 Jun 2025 09:40:57 +0100 Subject: [PATCH 01/57] feat: kind-version now 2025.2 BREAKING CHANGE --- tests/workflow-definitions/duplicate-step-names.yaml | 2 +- .../workflow-definitions/duplicate-workflow-variable-names.yaml | 2 +- tests/workflow-definitions/example-nop-fail.yaml | 2 +- tests/workflow-definitions/example-smiles-to-file.yaml | 2 +- tests/workflow-definitions/example-two-step-nop.yaml | 2 +- tests/workflow-definitions/minimal.yaml | 2 +- tests/workflow-definitions/shortcut-example-1.yaml | 2 +- .../simple-python-molprops-with-options.yaml | 2 +- tests/workflow-definitions/simple-python-molprops.yaml | 2 +- tests/workflow-definitions/simple-python-parallel.yaml | 2 +- .../workflow-definitions/step-specification-variable-names.yaml | 2 +- tests/workflow-definitions/workflow-options.yaml | 2 +- workflow/workflow-schema.yaml | 2 +- 13 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/workflow-definitions/duplicate-step-names.yaml b/tests/workflow-definitions/duplicate-step-names.yaml index cd9920f..c9a6028 100644 --- a/tests/workflow-definitions/duplicate-step-names.yaml +++ b/tests/workflow-definitions/duplicate-step-names.yaml @@ -1,6 +1,6 @@ --- kind: DataManagerWorkflow -kind-version: "2025.1" +kind-version: "2025.2" name: duplicate-step-names steps: diff --git a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml index dbaeafa..8179bd4 100644 --- a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml +++ b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml @@ -1,6 +1,6 @@ --- kind: DataManagerWorkflow -kind-version: "2025.1" +kind-version: "2025.2" name: duplicate-workflow-variable-names description: A workflow with a duplicate variable name in the input and output variable-mapping: diff --git a/tests/workflow-definitions/example-nop-fail.yaml b/tests/workflow-definitions/example-nop-fail.yaml index a0a7194..6e639c2 100644 --- a/tests/workflow-definitions/example-nop-fail.yaml +++ b/tests/workflow-definitions/example-nop-fail.yaml @@ -1,6 +1,6 @@ --- kind: DataManagerWorkflow -kind-version: "2025.1" +kind-version: "2025.2" name: nop-fail description: >- A workflow with one step that fails diff --git a/tests/workflow-definitions/example-smiles-to-file.yaml b/tests/workflow-definitions/example-smiles-to-file.yaml index 54b7ea8..b7dc70c 100644 --- a/tests/workflow-definitions/example-smiles-to-file.yaml +++ b/tests/workflow-definitions/example-smiles-to-file.yaml @@ -1,6 +1,6 @@ --- kind: DataManagerWorkflow -kind-version: "2025.1" +kind-version: "2025.2" name: smiles-to-file description: >- A workflow with one step that uses variables. diff --git a/tests/workflow-definitions/example-two-step-nop.yaml b/tests/workflow-definitions/example-two-step-nop.yaml index 7030e84..b52e83a 100644 --- a/tests/workflow-definitions/example-two-step-nop.yaml +++ b/tests/workflow-definitions/example-two-step-nop.yaml @@ -1,6 +1,6 @@ --- kind: DataManagerWorkflow -kind-version: "2025.1" +kind-version: "2025.2" name: two-step-nop description: >- A workflow with two steps. diff --git a/tests/workflow-definitions/minimal.yaml b/tests/workflow-definitions/minimal.yaml index 4c3c90a..f744057 100644 --- a/tests/workflow-definitions/minimal.yaml +++ b/tests/workflow-definitions/minimal.yaml @@ -1,6 +1,6 @@ --- kind: DataManagerWorkflow -kind-version: "2025.1" +kind-version: "2025.2" name: workflow-minimal steps: diff --git a/tests/workflow-definitions/shortcut-example-1.yaml b/tests/workflow-definitions/shortcut-example-1.yaml index 214fb24..e5b719d 100644 --- a/tests/workflow-definitions/shortcut-example-1.yaml +++ b/tests/workflow-definitions/shortcut-example-1.yaml @@ -1,6 +1,6 @@ --- kind: DataManagerWorkflow -kind-version: "2025.1" +kind-version: "2025.2" name: shortcut-example-1 description: The shortcut example 1 workflow diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml index 69dfc4f..eeb6111 100644 --- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml +++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml @@ -1,6 +1,6 @@ --- kind: DataManagerWorkflow -kind-version: "2025.1" +kind-version: "2025.2" name: python-workflow description: A simple python experimental workflow diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml index 6290731..4d9451c 100644 --- a/tests/workflow-definitions/simple-python-molprops.yaml +++ b/tests/workflow-definitions/simple-python-molprops.yaml @@ -1,6 +1,6 @@ --- kind: DataManagerWorkflow -kind-version: "2025.1" +kind-version: "2025.2" name: python-workflow description: A simple python experimental workflow variable-mapping: diff --git a/tests/workflow-definitions/simple-python-parallel.yaml b/tests/workflow-definitions/simple-python-parallel.yaml index ece197a..9fea80a 100644 --- a/tests/workflow-definitions/simple-python-parallel.yaml +++ b/tests/workflow-definitions/simple-python-parallel.yaml @@ -1,6 +1,6 @@ --- kind: DataManagerWorkflow -kind-version: "2025.1" +kind-version: "2025.2" name: python-workflow description: A simple branching workflow variable-mapping: diff --git a/tests/workflow-definitions/step-specification-variable-names.yaml b/tests/workflow-definitions/step-specification-variable-names.yaml index 2064844..e899b7f 100644 --- a/tests/workflow-definitions/step-specification-variable-names.yaml +++ b/tests/workflow-definitions/step-specification-variable-names.yaml @@ -1,6 +1,6 @@ --- kind: DataManagerWorkflow -kind-version: "2025.1" +kind-version: "2025.2" name: step-variables description: Test a lot of variables whose format is supported diff --git a/tests/workflow-definitions/workflow-options.yaml b/tests/workflow-definitions/workflow-options.yaml index 4be5083..9e742fe 100644 --- a/tests/workflow-definitions/workflow-options.yaml +++ b/tests/workflow-definitions/workflow-options.yaml @@ -1,6 +1,6 @@ --- kind: DataManagerWorkflow -kind-version: "2025.1" +kind-version: "2025.2" name: workflow-options description: Illustrate the use of workflow options variable-mapping: diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index 6f2c468..792f866 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -17,7 +17,7 @@ properties: const: DataManagerWorkflow kind-version: enum: - - '2025.1' + - '2025.2' name: $ref: '#/definitions/rfc1035-label-name' description: From d955a19379ba6f2743b3e576caab5d2fddabcc3e Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Mon, 23 Jun 2025 14:18:33 +0100 Subject: [PATCH 02/57] feat: Initial schema for replicate step declaration --- tests/test_decoder.py | 52 ++++++++++++++ .../test_workflow_validator_for_run_level.py | 69 +++++++++++++++++++ .../test_workflow_validator_for_tag_level.py | 69 +++++++++++++++++++ .../duplicate-step-output-variable-names.yaml | 56 +++++++++++++++ .../duplicate-workflow-variable-names.yaml | 1 - .../replicate-using-undeclared-input.yaml | 54 +++++++++++++++ .../simple-python-molprops-with-options.yaml | 1 - .../simple-python-molprops.yaml | 1 - .../simple-python-parallel.yaml | 1 - workflow/decoder.py | 30 ++++++++ workflow/workflow-schema.yaml | 23 +++++-- workflow/workflow_validator.py | 47 +++++++++++-- 12 files changed, 391 insertions(+), 13 deletions(-) create mode 100644 tests/workflow-definitions/duplicate-step-output-variable-names.yaml create mode 100644 tests/workflow-definitions/replicate-using-undeclared-input.yaml diff --git a/tests/test_decoder.py b/tests/test_decoder.py index f870f8a..8a25774 100644 --- a/tests/test_decoder.py +++ b/tests/test_decoder.py @@ -56,6 +56,15 @@ ) assert _DUPLICATE_WORKFLOW_VARIABLE_NAMES_WORKFLOW +_SIMPLE_PYTHON_PARALLEL_FILE: str = os.path.join( + os.path.dirname(__file__), + "workflow-definitions", + "simple-python-parallel.yaml", +) +with open(_SIMPLE_PYTHON_PARALLEL_FILE, "r", encoding="utf8") as workflow_file: + _SIMPLE_PYTHON_PARALLEL_WORKFLOW: Dict[str, Any] = yaml.safe_load(workflow_file) +assert _SIMPLE_PYTHON_PARALLEL_WORKFLOW + _STEP_SPECIFICATION_VARIABLE_NAMES_WORKFLOW_FILE: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", @@ -165,6 +174,16 @@ def test_validate_schema_for_workflow_options(): assert error is None +def test_validate_schema_for_simple_python_parallel(): + # Arrange + + # Act + error = decoder.validate_schema(_SIMPLE_PYTHON_PARALLEL_WORKFLOW) + + # Assert + assert error is None + + def test_get_workflow_variables_for_smiple_python_molprops(): # Arrange @@ -329,3 +348,36 @@ def test_get_workflow_outputs_for_step_with_unkown_step_name(): # Assert assert not outputs + + +def test_get_step_input_variable_names_when_duplicates(): + # Arrange + + # Act + inputs = decoder.get_step_input_variable_names( + _SIMPLE_PYTHON_PARALLEL_WORKFLOW, "final-step" + ) + + # Assert + assert len(inputs) == 2 + assert inputs[0] == "inputFile" + assert inputs[1] == "inputFile" + + +def test_get_step_output_variable_names_when_duplicates(): + # Arrange + workflow_filename: str = os.path.join( + os.path.dirname(__file__), + "workflow-definitions", + "duplicate-step-output-variable-names.yaml", + ) + with open(workflow_filename, "r", encoding="utf8") as wf_file: + definition: Dict[str, Any] = yaml.safe_load(wf_file) + + # Act + outputs = decoder.get_step_output_variable_names(definition, "step-1") + + # Assert + assert len(outputs) == 2 + assert outputs[0] == "outputFile" + assert outputs[1] == "outputFile" diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py index e5fdfb6..2df1630 100644 --- a/tests/test_workflow_validator_for_run_level.py +++ b/tests/test_workflow_validator_for_run_level.py @@ -233,3 +233,72 @@ def test_validate_duplicate_workflow_variable_names(): # Assert assert error.error_num == 6 assert error.error_msg == ["Duplicate workflow variable names found: x"] + + +def test_validate_simple_python_parallel(): + # Arrange + workflow_file: str = os.path.join( + os.path.dirname(__file__), + "workflow-definitions", + "simple-python-parallel.yaml", + ) + with open(workflow_file, "r", encoding="utf8") as workflow_file: + workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) + assert workflow + + # Act + error = WorkflowValidator.validate( + level=ValidationLevel.TAG, + workflow_definition=workflow, + ) + + # Assert + assert error.error_num == 0 + + +def test_validate_replicate_using_undeclared_input(): + # Arrange + workflow_file: str = os.path.join( + os.path.dirname(__file__), + "workflow-definitions", + "replicate-using-undeclared-input.yaml", + ) + with open(workflow_file, "r", encoding="utf8") as workflow_file: + workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) + assert workflow + + # Act + error = WorkflowValidator.validate( + level=ValidationLevel.TAG, + workflow_definition=workflow, + ) + + # Assert + assert error.error_num == 7 + assert error.error_msg == [ + "Replicate input variable is not declared: y (step=step-2)" + ] + + +def test_validate_duplicate_step_output_variable_names(): + # Arrange + workflow_file: str = os.path.join( + os.path.dirname(__file__), + "workflow-definitions", + "duplicate-step-output-variable-names.yaml", + ) + with open(workflow_file, "r", encoding="utf8") as workflow_file: + workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) + assert workflow + + # Act + error = WorkflowValidator.validate( + level=ValidationLevel.TAG, + workflow_definition=workflow, + ) + + # Assert + assert error.error_num == 3 + assert error.error_msg == [ + "Duplicate step output variable: outputFile (step=step-1)" + ] diff --git a/tests/test_workflow_validator_for_tag_level.py b/tests/test_workflow_validator_for_tag_level.py index 8c68400..4445502 100644 --- a/tests/test_workflow_validator_for_tag_level.py +++ b/tests/test_workflow_validator_for_tag_level.py @@ -109,6 +109,27 @@ def test_validate_shortcut_example_1(): assert error.error_msg is None +def test_validate_simple_python_parallel(): + # Arrange + workflow_file: str = os.path.join( + os.path.dirname(__file__), + "workflow-definitions", + "simple-python-parallel.yaml", + ) + with open(workflow_file, "r", encoding="utf8") as workflow_file: + workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) + assert workflow + + # Act + error = WorkflowValidator.validate( + level=ValidationLevel.TAG, + workflow_definition=workflow, + ) + + # Assert + assert error.error_num == 0 + + def test_validate_simple_python_molprops(): # Arrange workflow_file: str = os.path.join( @@ -171,3 +192,51 @@ def test_validate_duplicate_workflow_variable_names(): # Assert assert error.error_num == 6 assert error.error_msg == ["Duplicate workflow variable names found: x"] + + +def test_validate_replicate_using_undeclared_input(): + # Arrange + workflow_file: str = os.path.join( + os.path.dirname(__file__), + "workflow-definitions", + "replicate-using-undeclared-input.yaml", + ) + with open(workflow_file, "r", encoding="utf8") as workflow_file: + workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) + assert workflow + + # Act + error = WorkflowValidator.validate( + level=ValidationLevel.TAG, + workflow_definition=workflow, + ) + + # Assert + assert error.error_num == 7 + assert error.error_msg == [ + "Replicate input variable is not declared: y (step=step-2)" + ] + + +def test_validate_duplicate_step_output_variable_names(): + # Arrange + workflow_file: str = os.path.join( + os.path.dirname(__file__), + "workflow-definitions", + "duplicate-step-output-variable-names.yaml", + ) + with open(workflow_file, "r", encoding="utf8") as workflow_file: + workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) + assert workflow + + # Act + error = WorkflowValidator.validate( + level=ValidationLevel.TAG, + workflow_definition=workflow, + ) + + # Assert + assert error.error_num == 3 + assert error.error_msg == [ + "Duplicate step output variable: outputFile (step=step-1)" + ] diff --git a/tests/workflow-definitions/duplicate-step-output-variable-names.yaml b/tests/workflow-definitions/duplicate-step-output-variable-names.yaml new file mode 100644 index 0000000..d37c544 --- /dev/null +++ b/tests/workflow-definitions/duplicate-step-output-variable-names.yaml @@ -0,0 +1,56 @@ +--- +kind: DataManagerWorkflow +kind-version: "2025.2" +name: duplicate-step-output-variable-names +description: A workflow where step-1 has duplicate output variable names +variable-mapping: + inputs: + - name: x + outputs: + - name: y + from: + step: step-2 + output: outputFile + as: clustered-molecules.smi + +steps: + +- name: step-1 + description: Add column 1 + specification: + collection: workflow-engine-unit-test-jobs + job: rdkit-molprops + version: "1.0.0" + variables: + name: "col1" + value: 123 + inputs: + - input: inputFile + from: + workflow-input: candidateMolecules + - input: inputFile + from: + workflow-input: candidateMolecules + outputs: + - output: outputFile + as: __step1__out.smi + - output: outputFile + as: __step1__out.smi + +- name: step-2 + description: Add column 2 + specification: + collection: workflow-engine-unit-test-jobs + job: cluster-butina + version: "1.0.0" + variables: + name: "col2" + value: "999" + inputs: + - input: inputFile + from: + step: step1 + output: outputFile + outputs: + - output: outputFile + as: __step2__out.smi diff --git a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml index 8179bd4..6c1206f 100644 --- a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml +++ b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml @@ -6,7 +6,6 @@ description: A workflow with a duplicate variable name in the input and output variable-mapping: inputs: - name: x - type: squonk/x-smiles outputs: - name: x from: diff --git a/tests/workflow-definitions/replicate-using-undeclared-input.yaml b/tests/workflow-definitions/replicate-using-undeclared-input.yaml new file mode 100644 index 0000000..cc454e5 --- /dev/null +++ b/tests/workflow-definitions/replicate-using-undeclared-input.yaml @@ -0,0 +1,54 @@ +--- +kind: DataManagerWorkflow +kind-version: "2025.2" +name: replicate-using-undeclared-input +description: A workflow that replicates from a variable that's not declared +variable-mapping: + inputs: + - name: x + outputs: + - name: y + from: + step: step2 + output: outputFile + as: clustered-molecules.smi + +steps: + +- name: step-1 + description: Add column 1 + specification: + collection: workflow-engine-unit-test-jobs + job: rdkit-molprops + version: "1.0.0" + variables: + name: "col1" + value: 123 + inputs: + - input: inputFile + from: + workflow-input: candidateMolecules + outputs: + - output: outputFile + as: __step-1__out.smi + +- name: step-2 + description: Add column 2 + specification: + collection: workflow-engine-unit-test-jobs + job: cluster-butina + version: "1.0.0" + variables: + name: "col2" + value: "999" + replicate: + using: + input: y + inputs: + - input: inputFile + from: + step: step-1 + output: outputFile + outputs: + - output: outputFile + as: __step-2__out.smi diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml index eeb6111..afc1ae8 100644 --- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml +++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml @@ -56,7 +56,6 @@ variables: variable-mapping: inputs: - name: candidateMolecules - type: squonk/x-smiles outputs: - name: clusteredMolecules from: diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml index 4d9451c..cf28dd2 100644 --- a/tests/workflow-definitions/simple-python-molprops.yaml +++ b/tests/workflow-definitions/simple-python-molprops.yaml @@ -6,7 +6,6 @@ description: A simple python experimental workflow variable-mapping: inputs: - name: candidateMolecules - type: squonk/x-smiles outputs: - name: clusteredMolecules from: diff --git a/tests/workflow-definitions/simple-python-parallel.yaml b/tests/workflow-definitions/simple-python-parallel.yaml index 9fea80a..bfd3935 100644 --- a/tests/workflow-definitions/simple-python-parallel.yaml +++ b/tests/workflow-definitions/simple-python-parallel.yaml @@ -6,7 +6,6 @@ description: A simple branching workflow variable-mapping: inputs: - name: candidateMolecules - type: squonk/x-smiles outputs: - name: clusteredMolecules from: diff --git a/workflow/decoder.py b/workflow/decoder.py index 9e75133..19abc94 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -82,6 +82,36 @@ def get_variable_names(definition: dict[str, Any]) -> list[str]: return wf_variable_names +def get_step_output_variable_names( + definition: dict[str, Any], step_name: str +) -> list[str]: + """Given a Workflow definition and a Step name this function returns all the names + of the output variables defined at the Step level. This function DOES NOT + de-duplicate names, that is the role of the validator.""" + variable_names: list[str] = [] + steps: list[dict[str, Any]] = get_steps(definition) + for step in steps: + if step["name"] == step_name: + variable_names.extend( + output["output"] for output in step.get("outputs", []) + ) + return variable_names + + +def get_step_input_variable_names( + definition: dict[str, Any], step_name: str +) -> list[str]: + """Given a Workflow definition and a Step name (expected to exist) + this function returns all the names of the input + variables defined at the step level.""" + variable_names: list[str] = [] + steps: list[dict[str, Any]] = get_steps(definition) + for step in steps: + if step["name"] == step_name: + variable_names.extend(input["input"] for input in step.get("inputs", [])) + return variable_names + + def get_workflow_job_input_names_for_step( definition: dict[str, Any], name: str ) -> list[str]: diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index 792f866..70e7b10 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -90,12 +90,8 @@ definitions: properties: name: $ref: '#/definitions/template-variable-name' - type: - type: string - description: The MIME type of the parameter required: - name - - type # A workflow output parameter is essentially a file # taken from the output of a step with a default (as) value. @@ -176,6 +172,17 @@ definitions: - name - as + # A step replication control variable + # that is based on a step input variable + replicate-using-input: + type: object + additionalProperties: false + properties: + input: + $ref: '#/definitions/template-variable-name' + required: + - input + # A Step input (from an output of a prior step) step-input-from-step: type: object @@ -262,6 +269,14 @@ definitions: description: A description of the step specification: $ref: '#/definitions/step-specification' + replicate: + # Used to indicate one input variable that is used to replicate/spawn + # step instances based on the number of values generated for the variable. + type: object + additionalProperties: false + properties: + using: + $ref: '#/definitions/replicate-using-input' inputs: type: array items: diff --git a/workflow/workflow_validator.py b/workflow/workflow_validator.py index 78198b8..6324bd5 100644 --- a/workflow/workflow_validator.py +++ b/workflow/workflow_validator.py @@ -6,6 +6,8 @@ from .decoder import ( get_required_variable_names, + get_step_input_variable_names, + get_step_output_variable_names, get_steps, get_variable_names, validate_schema, @@ -83,21 +85,37 @@ def _validate_tag_level( assert workflow_definition # TAG level requires that each step name is unique, + # and all the output variable names in the step are unique. duplicate_names: set[str] = set() - step_names: set[str] = set() + all_step_names: set[str] = set() + variable_names: set[str] = set() for step in get_steps(workflow_definition): step_name: str = step["name"] - if step_name not in duplicate_names and step_name in step_names: + if step_name not in duplicate_names and step_name in all_step_names: duplicate_names.add(step_name) - step_names.add(step_name) + all_step_names.add(step_name) + # Are output variable names unique? + variable_names.clear() + step_variables: list[str] = get_step_output_variable_names( + workflow_definition, step_name + ) + for step_variable in step_variables: + if step_variable in variable_names: + return ValidationResult( + error_num=3, + error_msg=[ + f"Duplicate step output variable: {step_variable} (step={step_name})" + ], + ) + variable_names.add(step_variable) if duplicate_names: return ValidationResult( error_num=2, error_msg=[f"Duplicate step names found: {', '.join(duplicate_names)}"], ) # Workflow variables must be unique. - duplicate_names = set() - variable_names: set[str] = set() + duplicate_names.clear() + variable_names.clear() wf_variable_names: list[str] = get_variable_names(workflow_definition) for wf_variable_name in wf_variable_names: if ( @@ -113,6 +131,25 @@ def _validate_tag_level( f"Duplicate workflow variable names found: {', '.join(duplicate_names)}" ], ) + # For each 'replicating' step the replicating variable + # must be declared in the step. + for step in get_steps(workflow_definition): + if ( + replicate_using_input := step.get("replicate", {}) + .get("using", {}) + .get("input") + ): + step_name = step["name"] + if replicate_using_input not in get_step_input_variable_names( + workflow_definition, step_name + ): + return ValidationResult( + error_num=7, + error_msg=[ + "Replicate input variable is not declared:" + f" {replicate_using_input} (step={step_name})" + ], + ) return _VALIDATION_SUCCESS From ab94bd7fecc31af77781cc36a50d9b414eefec91 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Mon, 23 Jun 2025 14:34:41 +0100 Subject: [PATCH 03/57] feat: Add get_generated_outputs_for_step_output() to API adapter --- tests/wapi_adapter.py | 7 +++++++ workflow/workflow_abc.py | 13 +++++++++++++ 2 files changed, 20 insertions(+) diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py index 0f9915f..7b7d8d3 100644 --- a/tests/wapi_adapter.py +++ b/tests/wapi_adapter.py @@ -381,6 +381,13 @@ def get_running_workflow_steps(self, *, running_workflow_id: str) -> dict[str, A steps.append(item) return {"count": len(steps), "running_workflow_steps": steps} + def get_generated_outputs_for_step_output( + self, *, running_workflow_step_id: str, output: str + ) -> tuple[dict[str, Any], int]: + del running_workflow_step_id + del output + return {"outputs": []}, HTTPStatus.OK + def realise_outputs( self, *, running_workflow_step_id: str, outputs: list[str, str] ) -> tuple[dict[str, Any], int]: diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py index 12e3251..65b1b3d 100644 --- a/workflow/workflow_abc.py +++ b/workflow/workflow_abc.py @@ -341,6 +341,19 @@ def get_job( # } # If not present an empty dictionary should be returned. + @abstractmethod + def get_generated_outputs_for_step_output( + self, *, running_workflow_step_id: str, output: str + ) -> tuple[dict[str, Any], int]: + """Gets the set of outputs for the output variable of a given step. + The step must have stopped for this to return any meaningful value. + Returned files might also include paths that are relative to the + Step's instance directory.""" + # Should return a (possibly empty) list of paths and filenames: + # { + # "outputs": ["file1.sdf", "file2.sdf"] + # } + @abstractmethod def realise_outputs( self, *, running_workflow_step_id: str, outputs: list[str] From b4be3117e5d2574884e40003bd4dc6ea4cc43224 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Mon, 23 Jun 2025 14:52:19 +0100 Subject: [PATCH 04/57] feat: Rename new API method --- tests/wapi_adapter.py | 2 +- workflow/workflow_abc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py index 7b7d8d3..11e2798 100644 --- a/tests/wapi_adapter.py +++ b/tests/wapi_adapter.py @@ -381,7 +381,7 @@ def get_running_workflow_steps(self, *, running_workflow_id: str) -> dict[str, A steps.append(item) return {"count": len(steps), "running_workflow_steps": steps} - def get_generated_outputs_for_step_output( + def get_running_workflow_step_outputs( self, *, running_workflow_step_id: str, output: str ) -> tuple[dict[str, Any], int]: del running_workflow_step_id diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py index 65b1b3d..251f6c5 100644 --- a/workflow/workflow_abc.py +++ b/workflow/workflow_abc.py @@ -342,7 +342,7 @@ def get_job( # If not present an empty dictionary should be returned. @abstractmethod - def get_generated_outputs_for_step_output( + def get_running_workflow_step_outputs( self, *, running_workflow_step_id: str, output: str ) -> tuple[dict[str, Any], int]: """Gets the set of outputs for the output variable of a given step. From 27c5a83dd1d0561e4e62c976899713f16b1d02c2 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Mon, 23 Jun 2025 15:24:24 +0100 Subject: [PATCH 05/57] feat: Add replica to step creation (and step-by-name query) --- tests/wapi_adapter.py | 17 +++++++++++++---- workflow/workflow_abc.py | 13 ++++++++++--- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py index 11e2798..9150b90 100644 --- a/tests/wapi_adapter.py +++ b/tests/wapi_adapter.py @@ -136,8 +136,12 @@ def create_running_workflow_step( *, running_workflow_id: str, step: str, + replica: int = 0, prior_running_workflow_step_id: str | None = None, ) -> dict[str, Any]: + if replica: + assert replica > 0 + UnitTestWorkflowAPIAdapter.lock.acquire() with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "rb") as pickle_file: running_workflow_step = Unpickler(pickle_file).load() @@ -150,6 +154,7 @@ def create_running_workflow_step( "name": step, "done": False, "success": False, + "replica": replica, "variables": {}, "running_workflow": {"id": running_workflow_id}, } @@ -177,24 +182,28 @@ def get_running_workflow_step( return {}, 0 response = running_workflow_step[running_workflow_step_id] response["id"] = running_workflow_step_id + if response["replica"] == 0: + _ = response.pop("replica") return response, 0 def get_running_workflow_step_by_name( - self, *, name: str, running_workflow_id: str + self, *, name: str, running_workflow_id: str, replica: int = 0 ) -> dict[str, Any]: + if replica: + assert replica > 0 UnitTestWorkflowAPIAdapter.lock.acquire() with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "rb") as pickle_file: running_workflow_step = Unpickler(pickle_file).load() UnitTestWorkflowAPIAdapter.lock.release() - print(f"name={name} running_workflow_id={running_workflow_id}") for rwfs_id, record in running_workflow_step.items(): - print(f"rwfs_id={rwfs_id} record={record}") if record["running_workflow"]["id"] != running_workflow_id: continue - if record["name"] == name: + if record["name"] == name and record["replica"] == replica: response = record response["id"] = rwfs_id + if record["replica"] == 0: + _ = response.pop("replica") return response, 0 return {}, 0 diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py index 251f6c5..f65a5a1 100644 --- a/workflow/workflow_abc.py +++ b/workflow/workflow_abc.py @@ -205,9 +205,14 @@ def create_running_workflow_step( *, running_workflow_id: str, step: str, + replica: int = 0, prior_running_workflow_step_id: str | None = None, ) -> tuple[dict[str, Any], int]: - """Create a RunningWorkflowStep Record (from a RunningWorkflow)""" + """Create a RunningWorkflowStep Record (from a RunningWorkflow). + If this is a replica (concurrent execution) of a step the replica + value must be set to a value greater than 0. The replica is unique + for a given step and is used to distinguish between running workflow steps + generated from the same step name.""" # Should return: # { # "id": "r-workflow-step-00000000-0000-0000-0000-000000000001", @@ -244,10 +249,12 @@ def get_running_workflow_step( @abstractmethod def get_running_workflow_step_by_name( - self, *, name: str, running_workflow_id: str + self, *, name: str, running_workflow_id: str, replica: int = 0 ) -> tuple[dict[str, Any], int]: """Get a RunningWorkflowStep Record given a step name - (and its RUnningWorkflow ID)""" + (and its RunningWorkflow ID). For steps that may be replicated + the replica, a value of 1 or higher, is used to identify the specific replica. + """ # Should return: # { # "id": "r-workflow-step-00000000-0000-0000-0000-000000000001", From 1d22716eea8c5d16460726e0b839b8c5cd8b93c5 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Mon, 23 Jun 2025 16:00:04 +0100 Subject: [PATCH 06/57] feat: Removed 'as' from workflow mapping output declaration --- tests/test_decoder.py | 13 ++++++------- tests/test_test_api_adapter.py | 1 - tests/wapi_adapter.py | 3 +-- .../duplicate-step-output-variable-names.yaml | 1 - .../duplicate-workflow-variable-names.yaml | 1 - .../replicate-using-undeclared-input.yaml | 1 - .../simple-python-molprops-with-options.yaml | 1 - .../simple-python-molprops.yaml | 1 - .../simple-python-parallel.yaml | 1 - workflow/decoder.py | 18 +++++------------- workflow/workflow-schema.yaml | 3 --- workflow/workflow_abc.py | 6 ++---- workflow/workflow_engine.py | 7 +++---- 13 files changed, 17 insertions(+), 40 deletions(-) diff --git a/tests/test_decoder.py b/tests/test_decoder.py index 8a25774..f9ead02 100644 --- a/tests/test_decoder.py +++ b/tests/test_decoder.py @@ -317,37 +317,36 @@ def test_get_workflow_outputs_for_step_with_name_step1(): # Arrange # Act - outputs = decoder.get_workflow_output_values_for_step( + has_outputs = decoder.workflow_step_has_outputs( _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "step1" ) # Assert - assert not outputs + assert not has_outputs def test_get_workflow_outputs_for_step_with_name_step2(): # Arrange # Act - outputs = decoder.get_workflow_output_values_for_step( + has_outputs = decoder.workflow_step_has_outputs( _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "step2" ) # Assert - assert len(outputs) == 1 - assert "clustered-molecules.smi" in outputs + assert has_outputs def test_get_workflow_outputs_for_step_with_unkown_step_name(): # Arrange # Act - outputs = decoder.get_workflow_output_values_for_step( + has_outputs = decoder.workflow_step_has_outputs( _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "unknown" ) # Assert - assert not outputs + assert not has_outputs def test_get_step_input_variable_names_when_duplicates(): diff --git a/tests/test_test_api_adapter.py b/tests/test_test_api_adapter.py index 673e725..f54d4a4 100644 --- a/tests/test_test_api_adapter.py +++ b/tests/test_test_api_adapter.py @@ -450,7 +450,6 @@ def test_basic_realise(): # Act response, _ = utaa.realise_outputs( running_workflow_step_id="r-workflow-step-00000000-0000-0000-0000-000000000001", - outputs=["a.txt"], ) # Assert diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py index 9150b90..13e0e97 100644 --- a/tests/wapi_adapter.py +++ b/tests/wapi_adapter.py @@ -398,8 +398,7 @@ def get_running_workflow_step_outputs( return {"outputs": []}, HTTPStatus.OK def realise_outputs( - self, *, running_workflow_step_id: str, outputs: list[str, str] + self, *, running_workflow_step_id: str ) -> tuple[dict[str, Any], int]: del running_workflow_step_id - del outputs return {}, HTTPStatus.OK diff --git a/tests/workflow-definitions/duplicate-step-output-variable-names.yaml b/tests/workflow-definitions/duplicate-step-output-variable-names.yaml index d37c544..5a371a2 100644 --- a/tests/workflow-definitions/duplicate-step-output-variable-names.yaml +++ b/tests/workflow-definitions/duplicate-step-output-variable-names.yaml @@ -11,7 +11,6 @@ variable-mapping: from: step: step-2 output: outputFile - as: clustered-molecules.smi steps: diff --git a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml index 6c1206f..f524c44 100644 --- a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml +++ b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml @@ -11,7 +11,6 @@ variable-mapping: from: step: step2 output: outputFile - as: clustered-molecules.smi steps: diff --git a/tests/workflow-definitions/replicate-using-undeclared-input.yaml b/tests/workflow-definitions/replicate-using-undeclared-input.yaml index cc454e5..883ec62 100644 --- a/tests/workflow-definitions/replicate-using-undeclared-input.yaml +++ b/tests/workflow-definitions/replicate-using-undeclared-input.yaml @@ -11,7 +11,6 @@ variable-mapping: from: step: step2 output: outputFile - as: clustered-molecules.smi steps: diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml index afc1ae8..2fc1155 100644 --- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml +++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml @@ -61,7 +61,6 @@ variable-mapping: from: step: step2 output: outputFile - as: clustered-molecules.smi options: - name: rdkitPropertyName default: name diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml index cf28dd2..dddb080 100644 --- a/tests/workflow-definitions/simple-python-molprops.yaml +++ b/tests/workflow-definitions/simple-python-molprops.yaml @@ -11,7 +11,6 @@ variable-mapping: from: step: step2 output: outputFile - as: clustered-molecules.smi steps: diff --git a/tests/workflow-definitions/simple-python-parallel.yaml b/tests/workflow-definitions/simple-python-parallel.yaml index bfd3935..e620cda 100644 --- a/tests/workflow-definitions/simple-python-parallel.yaml +++ b/tests/workflow-definitions/simple-python-parallel.yaml @@ -11,7 +11,6 @@ variable-mapping: from: step: final-step output: outputFile - as: clustered-molecules.smi steps: diff --git a/workflow/decoder.py b/workflow/decoder.py index 19abc94..6970caf 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -132,22 +132,14 @@ def get_workflow_job_input_names_for_step( return inputs -def get_workflow_output_values_for_step( - definition: dict[str, Any], name: str -) -> list[str]: - """Given a Workflow definition and a step name we return a list of workflow - out variable names the step creates. To do this we iterate through the workflows's - outputs to find those that are declared 'from' our step.""" +def workflow_step_has_outputs(definition: dict[str, Any], name: str) -> bool: + """Given a Workflow definition and a step name we return a boolean + that is true if the step produces outputs.""" wf_outputs = definition.get("variable-mapping", {}).get("outputs", {}) - outputs: list[str] = [] - outputs.extend( - output["as"] + return any( + "from" in output and "step" in output["from"] and output["from"]["step"] == name for output in wf_outputs - if "from" in output - and "step" in output["from"] - and output["from"]["step"] == name ) - return outputs def set_variables_from_options_for_step( diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index 70e7b10..70a6ad3 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -103,11 +103,8 @@ definitions: $ref: '#/definitions/template-variable-name' from: $ref: '#/definitions/from-step-output' - as: - $ref: '#/definitions/file-name' required: - name - - as # Declaration of a step option value from a workflow option (variable) as-step-option: diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py index f65a5a1..680cb3b 100644 --- a/workflow/workflow_abc.py +++ b/workflow/workflow_abc.py @@ -363,11 +363,9 @@ def get_running_workflow_step_outputs( @abstractmethod def realise_outputs( - self, *, running_workflow_step_id: str, outputs: list[str] + self, *, running_workflow_step_id: str ) -> tuple[dict[str, Any], int]: - """Copy (link) the step's files as outputs into the Project directory. - A step ID is provided, along with a list of outputs - (files that will be in the step's instance directory).""" + """Copy (link) the step's files as outputs into the Project directory.""" # Should return an empty map or: # { # "error": "", diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index 7d6bc8f..e11d17a 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -41,8 +41,8 @@ from .decoder import ( get_workflow_job_input_names_for_step, - get_workflow_output_values_for_step, set_step_variables, + workflow_step_has_outputs, ) _LOGGER: logging.Logger = logging.getLogger(__name__) @@ -260,12 +260,11 @@ def _handle_pod_message(self, msg: PodMessage) -> None: error_num: int | None = None error_msg: str | None = None - if output_values := get_workflow_output_values_for_step(wf_response, step_name): - # Got some output values + if workflow_step_has_outputs(wf_response, step_name): + # The step produces at least one output. # Inform the DM so it can link them to the Project directory response, status_code = self._wapi_adapter.realise_outputs( running_workflow_step_id=r_wfsid, - outputs=output_values, ) if status_code != HTTPStatus.OK: error_num = status_code From 0c60799747f290afe827d1a49444debfc5929705 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Mon, 23 Jun 2025 16:38:05 +0100 Subject: [PATCH 07/57] feat: Add get_running_workflow_step_output_values_for_output() to API --- tests/test_test_api_adapter.py | 15 +++++++++++++++ tests/wapi_adapter.py | 2 +- workflow/workflow_abc.py | 6 +++--- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/tests/test_test_api_adapter.py b/tests/test_test_api_adapter.py index f54d4a4..0cf4032 100644 --- a/tests/test_test_api_adapter.py +++ b/tests/test_test_api_adapter.py @@ -443,6 +443,21 @@ def test_get_running_workflow_step_by_name(): assert response["id"] == rwfs_id +def test_basic_get_running_workflow_step_output_values_for_output_when_step_unknown(): + # Arrange + utaa = UnitTestWorkflowAPIAdapter() + + # Act + response, _ = utaa.get_running_workflow_step_output_values_for_output( + running_workflow_step_id="r-workflow-step-00000000-0000-0000-0000-000000000001", + output="outputFile", + ) + + # Assert + assert "outputs" in response + assert len(response["outputs"]) == 0 + + def test_basic_realise(): # Arrange utaa = UnitTestWorkflowAPIAdapter() diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py index 13e0e97..d215301 100644 --- a/tests/wapi_adapter.py +++ b/tests/wapi_adapter.py @@ -390,7 +390,7 @@ def get_running_workflow_steps(self, *, running_workflow_id: str) -> dict[str, A steps.append(item) return {"count": len(steps), "running_workflow_steps": steps} - def get_running_workflow_step_outputs( + def get_running_workflow_step_output_values_for_output( self, *, running_workflow_step_id: str, output: str ) -> tuple[dict[str, Any], int]: del running_workflow_step_id diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py index 680cb3b..e3ae8dd 100644 --- a/workflow/workflow_abc.py +++ b/workflow/workflow_abc.py @@ -349,16 +349,16 @@ def get_job( # If not present an empty dictionary should be returned. @abstractmethod - def get_running_workflow_step_outputs( + def get_running_workflow_step_output_values_for_output( self, *, running_workflow_step_id: str, output: str ) -> tuple[dict[str, Any], int]: - """Gets the set of outputs for the output variable of a given step. + """Gets the set of outputs generated for the output variable of a given step. The step must have stopped for this to return any meaningful value. Returned files might also include paths that are relative to the Step's instance directory.""" # Should return a (possibly empty) list of paths and filenames: # { - # "outputs": ["file1.sdf", "file2.sdf"] + # "outputs": ["dir/file1.sdf", "dir/file2.sdf"] # } @abstractmethod From 12b3602148712a52d5aca457c88ea5965eb8886d Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Mon, 23 Jun 2025 16:55:06 +0100 Subject: [PATCH 08/57] fix: Removed rogue 'print' statement --- workflow/decoder.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/workflow/decoder.py b/workflow/decoder.py index 6970caf..55512e6 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -260,7 +260,4 @@ def set_step_variables( ) result |= options - - print("final step vars", result) - return result From da1286929de3b7ff1b14fc84b5cb252903f0f0bf Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Thu, 26 Jun 2025 10:05:07 +0100 Subject: [PATCH 09/57] test: Add mock of step outputs --- tests/test_test_api_adapter.py | 64 +++++++++++++++++++++-- tests/test_workflow_engine_examples.py | 16 +++--- tests/wapi_adapter.py | 70 ++++++++++++++++++++++---- workflow/workflow_abc.py | 4 +- 4 files changed, 131 insertions(+), 23 deletions(-) diff --git a/tests/test_test_api_adapter.py b/tests/test_test_api_adapter.py index 0cf4032..88a5f61 100644 --- a/tests/test_test_api_adapter.py +++ b/tests/test_test_api_adapter.py @@ -443,19 +443,75 @@ def test_get_running_workflow_step_by_name(): assert response["id"] == rwfs_id -def test_basic_get_running_workflow_step_output_values_for_output_when_step_unknown(): +def test_mock_get_running_workflow_step_output_values_for_output(): # Arrange utaa = UnitTestWorkflowAPIAdapter() + response = utaa.create_workflow(workflow_definition={"name": "blah"}) + response = utaa.create_running_workflow( + user_id="dlister", + workflow_id=response["id"], + project_id=TEST_PROJECT_ID, + variables={}, + ) + response, _ = utaa.create_running_workflow_step( + running_workflow_id=response["id"], step="step-1" + ) # Act + utaa.mock_get_running_workflow_step_output_values_for_output( + step_name="step-1", output_variable="results", output=["a", "b"] + ) + + # Assert response, _ = utaa.get_running_workflow_step_output_values_for_output( running_workflow_step_id="r-workflow-step-00000000-0000-0000-0000-000000000001", - output="outputFile", + output_variable="results", + ) + assert "output" in response + assert len(response["output"]) == 2 + assert "a" in response["output"] + assert "b" in response["output"] + + +def test_basic_get_running_workflow_step_output_values_for_output_when_step_variable_name_unknown(): + # Arrange + utaa = UnitTestWorkflowAPIAdapter() + response = utaa.create_workflow(workflow_definition={"name": "blah"}) + response = utaa.create_running_workflow( + user_id="dlister", + workflow_id=response["id"], + project_id=TEST_PROJECT_ID, + variables={}, + ) + response, _ = utaa.create_running_workflow_step( + running_workflow_id=response["id"], step="step-1" + ) + + # Act + utaa.mock_get_running_workflow_step_output_values_for_output( + step_name="step-1", output_variable="results", output=["a", "b"] ) # Assert - assert "outputs" in response - assert len(response["outputs"]) == 0 + with pytest.raises(AssertionError): + _, _ = utaa.get_running_workflow_step_output_values_for_output( + running_workflow_step_id="r-workflow-step-00000000-0000-0000-0000-000000000001", + output_variable="unknownVariable", + ) + + +def test_basic_get_running_workflow_step_output_values_for_output_when_step_unknown(): + # Arrange + utaa = UnitTestWorkflowAPIAdapter() + + # Act + with pytest.raises(AssertionError): + _, _ = utaa.get_running_workflow_step_output_values_for_output( + running_workflow_step_id="r-workflow-step-00000000-0000-0000-0000-000000000001", + output_variable="outputFile", + ) + + # Assert def test_basic_realise(): diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py index 51b1053..8fa25b9 100644 --- a/tests/test_workflow_engine_examples.py +++ b/tests/test_workflow_engine_examples.py @@ -42,7 +42,7 @@ def basic_engine(): print("Starting message queue...") message_queue.start() - yield [wapi_adapter, message_dispatcher] + yield [message_dispatcher, wapi_adapter] print("Stopping message queue...") message_queue.stop() @@ -145,7 +145,7 @@ def wait_for_workflow( def test_workflow_engine_example_two_step_nop(basic_engine): # Arrange - da, md = basic_engine + md, da = basic_engine # Act r_wfid = start_workflow(md, da, "example-two-step-nop", {}) @@ -164,7 +164,7 @@ def test_workflow_engine_example_two_step_nop(basic_engine): def test_workflow_engine_example_nop_fail(basic_engine): # Arrange - da, md = basic_engine + md, da = basic_engine # Act r_wfid = start_workflow(md, da, "example-nop-fail", {}) @@ -181,7 +181,7 @@ def test_workflow_engine_example_nop_fail(basic_engine): def test_workflow_engine_example_smiles_to_file(basic_engine): # Arrange - da, md = basic_engine + md, da = basic_engine # Make sure a file that should be generated by the test # does not exist before we run the test. output_file = "ethanol.smi" @@ -206,7 +206,7 @@ def test_workflow_engine_example_smiles_to_file(basic_engine): def test_workflow_engine_shortcut_example_1(basic_engine): # Arrange - da, md = basic_engine + md, da = basic_engine # Make sure files that should be generated by the test # do not exist before we run the test. output_file_a = "a.sdf" @@ -234,7 +234,7 @@ def test_workflow_engine_shortcut_example_1(basic_engine): def test_workflow_engine_simple_python_molprops(basic_engine): # Arrange - da, md = basic_engine + md, da = basic_engine # Make sure files that should be generated by the test # do not exist before we run the test. output_file_1 = "step1.out.smi" @@ -315,7 +315,7 @@ def test_workflow_engine_simple_python_molprops(basic_engine): def test_workflow_engine_simple_python_molprops_with_options(basic_engine): # Arrange - da, md = basic_engine + md, da = basic_engine # Make sure files that should be generated by the test # do not exist before we run the test. output_file_1 = "step1.out.smi" @@ -403,7 +403,7 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine): def test_workflow_engine_simple_python_parallel(basic_engine): # Arrange - da, md = basic_engine + md, da = basic_engine # Make sure files that should be generated by the test # do not exist before we run the test. output_file_first = "first-step.out.smi" diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py index d215301..f604689 100644 --- a/tests/wapi_adapter.py +++ b/tests/wapi_adapter.py @@ -15,6 +15,7 @@ method. """ +import copy import os from http import HTTPStatus from multiprocessing import Lock @@ -49,6 +50,7 @@ f"{_PICKLE_DIRECTORY}/running-workflow-step.pickle" ) _INSTANCE_PICKLE_FILE: str = f"{_PICKLE_DIRECTORY}/instance.pickle" +_MOCK_STEP_OUTPUT_FILE: str = f"{_PICKLE_DIRECTORY}/mock-output.pickle" class UnitTestWorkflowAPIAdapter(WorkflowAPIAdapter): @@ -73,12 +75,13 @@ def __init__(self): _RUNNING_WORKFLOW_PICKLE_FILE, _RUNNING_WORKFLOW_STEP_PICKLE_FILE, _INSTANCE_PICKLE_FILE, + _MOCK_STEP_OUTPUT_FILE, ]: with open(file, "wb") as pickle_file: Pickler(pickle_file).dump({}) UnitTestWorkflowAPIAdapter.lock.release() - def get_workflow(self, *, workflow_id: str) -> dict[str, Any]: + def get_workflow(self, *, workflow_id: str) -> tuple[dict[str, Any], int]: UnitTestWorkflowAPIAdapter.lock.acquire() with open(_WORKFLOW_PICKLE_FILE, "rb") as pickle_file: workflow = Unpickler(pickle_file).load() @@ -138,7 +141,7 @@ def create_running_workflow_step( step: str, replica: int = 0, prior_running_workflow_step_id: str | None = None, - ) -> dict[str, Any]: + ) -> tuple[dict[str, Any], int]: if replica: assert replica > 0 @@ -172,7 +175,7 @@ def create_running_workflow_step( def get_running_workflow_step( self, *, running_workflow_step_id: str - ) -> dict[str, Any]: + ) -> tuple[dict[str, Any], int]: UnitTestWorkflowAPIAdapter.lock.acquire() with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "rb") as pickle_file: running_workflow_step = Unpickler(pickle_file).load() @@ -188,7 +191,7 @@ def get_running_workflow_step( def get_running_workflow_step_by_name( self, *, name: str, running_workflow_id: str, replica: int = 0 - ) -> dict[str, Any]: + ) -> tuple[dict[str, Any], int]: if replica: assert replica > 0 UnitTestWorkflowAPIAdapter.lock.acquire() @@ -293,7 +296,9 @@ def get_instance(self, *, instance_id: str) -> dict[str, Any]: response = {} if instance_id not in instances else instances[instance_id] return response, 0 - def get_job(self, *, collection: str, job: str, version: str) -> dict[str, Any]: + def get_job( + self, *, collection: str, job: str, version: str + ) -> tuple[dict[str, Any], int]: assert collection == _JOB_DEFINITIONS["collection"] assert job in _JOB_DEFINITIONS["jobs"] assert version @@ -391,14 +396,61 @@ def get_running_workflow_steps(self, *, running_workflow_id: str) -> dict[str, A return {"count": len(steps), "running_workflow_steps": steps} def get_running_workflow_step_output_values_for_output( - self, *, running_workflow_step_id: str, output: str + self, *, running_workflow_step_id: str, output_variable: str ) -> tuple[dict[str, Any], int]: - del running_workflow_step_id - del output - return {"outputs": []}, HTTPStatus.OK + """We use the 'mock' data to return output values, otherwise + we return an empty list. And we need to get the step in order to get its name. + """ + # The RunningWorkflowStep must exist... + step, _ = self.get_running_workflow_step( + running_workflow_step_id=running_workflow_step_id + ) + assert step + step_name: str = step["name"] + # Now we can inspect the 'mock' data... + UnitTestWorkflowAPIAdapter.lock.acquire() + with open(_MOCK_STEP_OUTPUT_FILE, "rb") as pickle_file: + mock_output = Unpickler(pickle_file).load() + UnitTestWorkflowAPIAdapter.lock.release() + + if step_name not in mock_output: + return {"output": []}, 0 + # The record's output variable must match (there's only one record per step atm) + assert mock_output[step_name]["output_variable"] == output_variable + # Now return what was provided to the mock method... + response = {"output": copy.copy(mock_output[step_name]["output"])} + return response, 0 def realise_outputs( self, *, running_workflow_step_id: str ) -> tuple[dict[str, Any], int]: del running_workflow_step_id return {}, HTTPStatus.OK + + # Custom (test) methods + # Methods not declared in the ABC + + def mock_get_running_workflow_step_output_values_for_output( + self, *, step_name: str, output_variable: str, output: list[str] + ) -> None: + """Sets the output response for a step. + Limitation is that there can only be one record for each step name + so, for now, the output_variable is superfluous and only used + to check the output variable name matches.""" + assert isinstance(step_name, str) + assert isinstance(output_variable, str) + assert isinstance(output, list) + + UnitTestWorkflowAPIAdapter.lock.acquire() + with open(_MOCK_STEP_OUTPUT_FILE, "rb") as pickle_file: + mock_output = Unpickler(pickle_file).load() + + record = { + "output_variable": output_variable, + "output": output, + } + mock_output[step_name] = record + + with open(_MOCK_STEP_OUTPUT_FILE, "wb") as pickle_file: + Pickler(pickle_file).dump(mock_output) + UnitTestWorkflowAPIAdapter.lock.release() diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py index e3ae8dd..aa01199 100644 --- a/workflow/workflow_abc.py +++ b/workflow/workflow_abc.py @@ -350,7 +350,7 @@ def get_job( @abstractmethod def get_running_workflow_step_output_values_for_output( - self, *, running_workflow_step_id: str, output: str + self, *, running_workflow_step_id: str, output_variable: str ) -> tuple[dict[str, Any], int]: """Gets the set of outputs generated for the output variable of a given step. The step must have stopped for this to return any meaningful value. @@ -358,7 +358,7 @@ def get_running_workflow_step_output_values_for_output( Step's instance directory.""" # Should return a (possibly empty) list of paths and filenames: # { - # "outputs": ["dir/file1.sdf", "dir/file2.sdf"] + # "output": ["dir/file1.sdf", "dir/file2.sdf"] # } @abstractmethod From cc5d8cc71dc5fad7f9b025e0e8b24f9bc45b8626 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Fri, 18 Jul 2025 16:56:11 +0100 Subject: [PATCH 10/57] fix: it's a mess --- tests/job-definitions/job-definitions.yaml | 4 + tests/jobs/copyf.py | 30 ++++++ tests/jobs/copyf.sh | 4 + tests/jobs/split-smi.sh | 72 +++++++++++++ tests/test_workflow_engine_examples.py | 58 +++++++--- tests/wapi_adapter.py | 6 ++ .../simple-python-fanout.yaml | 70 ++++++++++++ workflow/decoder.py | 23 +++- workflow/workflow-schema.yaml | 28 ++--- workflow/workflow_engine.py | 101 ++++++++++++++---- 10 files changed, 344 insertions(+), 52 deletions(-) create mode 100644 tests/jobs/copyf.py create mode 100755 tests/jobs/copyf.sh create mode 100755 tests/jobs/split-smi.sh create mode 100644 tests/workflow-definitions/simple-python-fanout.yaml diff --git a/tests/job-definitions/job-definitions.yaml b/tests/job-definitions/job-definitions.yaml index 66afcd3..7e3e1b7 100644 --- a/tests/job-definitions/job-definitions.yaml +++ b/tests/job-definitions/job-definitions.yaml @@ -132,3 +132,7 @@ jobs: concatenate: command: >- concatenate.py {% for ifile in inputFile %}{{ ifile }} {% endfor %} --outputFile {{ outputFile }} + + splitsmiles: + command: >- + copyf.py {{ inputFile }} diff --git a/tests/jobs/copyf.py b/tests/jobs/copyf.py new file mode 100644 index 0000000..23dc38b --- /dev/null +++ b/tests/jobs/copyf.py @@ -0,0 +1,30 @@ +import shutil +import sys +from pathlib import Path + + +def main(): + print("copyf job runnint") + if len(sys.argv) != 2: + print("Usage: python copy_file.py ") + sys.exit(1) + + original_path = Path(sys.argv[1]) + + if not original_path.exists() or not original_path.is_file(): + print(f"Error: '{original_path}' does not exist or is not a file.") + sys.exit(1) + + # Create a new filename like 'example_copy.txt' + new_name = original_path.absolute().parent.joinpath("chunk_1.smi") + new_path = original_path.with_name(new_name.name) + shutil.copyfile(original_path, new_path) + + new_name = original_path.absolute().parent.joinpath("chunk_2.smi") + new_path = original_path.with_name(new_name.name) + + shutil.copyfile(original_path, new_path) + + +if __name__ == "__main__": + main() diff --git a/tests/jobs/copyf.sh b/tests/jobs/copyf.sh new file mode 100755 index 0000000..8994a2b --- /dev/null +++ b/tests/jobs/copyf.sh @@ -0,0 +1,4 @@ +#! /bin/bash + +cp "$1" chunk_1.smi +cp "$1" chunk_2.smi diff --git a/tests/jobs/split-smi.sh b/tests/jobs/split-smi.sh new file mode 100755 index 0000000..48a2fb3 --- /dev/null +++ b/tests/jobs/split-smi.sh @@ -0,0 +1,72 @@ +#!/bin/bash +set -euo pipefail + +if [[ $# -lt 3 || $# -gt 4 ]]; then + echo "Usage: $0 [has_header: yes]" + exit 1 +fi + +input_file="$1" +lines_per_file="$2" +base_name="$3" +has_header="${4:-no}" + +# Determine how to read the file (plain text or gzipped) +if [[ "$input_file" == *.gz ]]; then + reader="zcat" +else + reader="cat" +fi + +if ! [[ -f "$input_file" ]]; then + echo "Error: File '$input_file' not found" + exit 1 +fi + +# Extract header if present +if [[ "$has_header" == "yes" ]]; then + header="$($reader "$input_file" | head -n1)" + data_start=2 +else + header="" + data_start=1 +fi + +# Count number of data lines (excluding header if present) +data_lines="$($reader "$input_file" | tail -n +"$data_start" | wc -l)" +if [[ "$data_lines" -eq 0 ]]; then + echo "No data lines to process." + exit 0 +fi + +# Calculate number of output files and required zero padding +num_files=$(( (data_lines + lines_per_file - 1) / lines_per_file )) +pad_width=0 +if [[ "$num_files" -gt 1 ]]; then + pad_width=${#num_files} +fi + +# Split logic +$reader "$input_file" | tail -n +"$data_start" | awk -v header="$header" -v lines="$lines_per_file" -v base="$base_name" -v pad="$pad_width" ' +function new_file() { + suffix = (pad > 0) ? sprintf("%0*d", pad, file_index) : file_index + file = base "_" suffix ".smi" + if (header != "") { + print header > file + } + file_index++ + line_count = 0 +} +{ + if (line_count == 0) { + new_file() + } + print >> file + line_count++ + if (line_count == lines) { + close(file) + print file " created" + line_count = 0 + } +} +' file_index=1 diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py index 8fa25b9..8e4208a 100644 --- a/tests/test_workflow_engine_examples.py +++ b/tests/test_workflow_engine_examples.py @@ -83,6 +83,7 @@ def start_workflow( variables=variables, level=ValidationLevel.RUN, ) + print("vr_result", vr_result) assert vr_result.error_num == 0 # 3. response = da.create_running_workflow( @@ -401,22 +402,44 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine): assert project_file_exists(output_file_2) -def test_workflow_engine_simple_python_parallel(basic_engine): +def test_workflow_engine_simple_python_fanout(basic_engine): # Arrange md, da = basic_engine + + da.mock_get_running_workflow_step_output_values_for_output( + step_name="first-step", + output_variable="outputFile", + output=["chunk_1.smi", "chunk_2.smi"], + ) + + # da.mock_get_running_workflow_step_output_values_for_output( + # step_name="parallel-step", + # output_variable="outputFile", + # output=["chunk_1_proc.smi", "chunk_2_proc.smi"] + # ) + + # da.mock_get_running_workflow_step_output_values_for_output( + # step_name="final-step", + # output_variable="outputFile", + # output=["final-step.out.smi"], + # ) + # Make sure files that should be generated by the test # do not exist before we run the test. - output_file_first = "first-step.out.smi" + output_file_first = "chunk_1.smi" + output_file_second = "chunk_2.smi" assert not project_file_exists(output_file_first) - output_file_pa = "parallel-step-a.out.smi" - assert not project_file_exists(output_file_pa) - output_file_pb = "parallel-step-b.out.smi" - assert not project_file_exists(output_file_pb) - output_file_final = "final-step.out.smi" - assert not project_file_exists(output_file_final) + assert not project_file_exists(output_file_second) + output_file_p_first = "chunk_1_proc.smi" + output_file_p_second = "chunk_2_proc.smi" + assert not project_file_exists(output_file_p_first) + assert not project_file_exists(output_file_p_second) + # output_file_final = "final-step.out.smi" + # assert not project_file_exists(output_file_final) # And create the test's input file. input_file_1 = "input1.smi" - input_file_1_content = "O=C(CSCc1ccc(Cl)s1)N1CCC(O)CC1" + input_file_1_content = """O=C(CSCc1ccc(Cl)s1)N1CCC(O)CC1 + COCN1C(=O)NC(C)(C)C1=O""" with open( f"{EXECUTION_DIRECTORY}/{input_file_1}", mode="wt", encoding="utf8" ) as input_file: @@ -426,7 +449,7 @@ def test_workflow_engine_simple_python_parallel(basic_engine): r_wfid = start_workflow( md, da, - "simple-python-parallel", + "simple-python-fanout", {"candidateMolecules": input_file_1}, ) @@ -435,16 +458,17 @@ def test_workflow_engine_simple_python_parallel(basic_engine): # Additional, detailed checks... # Check we only have one RunningWorkflowStep, and it succeeded response = da.get_running_workflow_steps(running_workflow_id=r_wfid) + print("response", response) - assert response["count"] == 4 + assert response["count"] == 2 assert response["running_workflow_steps"][0]["done"] assert response["running_workflow_steps"][0]["success"] assert response["running_workflow_steps"][1]["done"] assert response["running_workflow_steps"][1]["success"] - assert response["running_workflow_steps"][2]["done"] - assert response["running_workflow_steps"][2]["success"] - assert response["running_workflow_steps"][3]["done"] - assert response["running_workflow_steps"][3]["success"] + # assert response["running_workflow_steps"][2]["done"] + # assert response["running_workflow_steps"][2]["success"] + # assert response["running_workflow_steps"][3]["done"] + # assert response["running_workflow_steps"][3]["success"] # This test should generate a file in the simulated project directory - assert project_file_exists(output_file_first) - assert project_file_exists(output_file_final) + # assert project_file_exists(output_file_first) + # assert project_file_exists(output_file_final) diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py index f604689..e91db77 100644 --- a/tests/wapi_adapter.py +++ b/tests/wapi_adapter.py @@ -202,6 +202,7 @@ def get_running_workflow_step_by_name( for rwfs_id, record in running_workflow_step.items(): if record["running_workflow"]["id"] != running_workflow_id: continue + print("running wf step by name, record:", record) if record["name"] == name and record["replica"] == replica: response = record response["id"] = rwfs_id @@ -413,6 +414,11 @@ def get_running_workflow_step_output_values_for_output( mock_output = Unpickler(pickle_file).load() UnitTestWorkflowAPIAdapter.lock.release() + print("mock output", mock_output) + print("step", step) + print("step_name", step_name) + # mock output {'first-step': {'output_variable': 'results', 'output': ['chunk_1.smi', 'chunk_2.smi']}} + if step_name not in mock_output: return {"output": []}, 0 # The record's output variable must match (there's only one record per step atm) diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml new file mode 100644 index 0000000..01f576f --- /dev/null +++ b/tests/workflow-definitions/simple-python-fanout.yaml @@ -0,0 +1,70 @@ +--- +kind: DataManagerWorkflow +kind-version: "2025.2" +name: python-workflow +description: >- + A simple parallel workflow. Input is split into N chunks and N processes of the same job is started +variable-mapping: + inputs: + - name: candidateMolecules + outputs: + - name: clusteredMolecules + from: + step: final-step + output: outputFile + + +steps: + +- name: first-step + description: Create inputs + specification: + collection: workflow-engine-unit-test-jobs + job: splitsmiles + version: "1.0.0" + variables: + name: "count" + value: "1" + inputs: + - input: inputFile + from: + workflow-input: candidateMolecules + outputs: + - output: outputFile + # as: chunk_*.smi + +- name: parallel-step + description: Add some params + specification: + collection: workflow-engine-unit-test-jobs + job: append-col + version: "1.0.0" + variables: + name: "desc1" + value: "777" + replicate: + using: + input: inputFile + inputs: + - input: inputFile + from: + step: first-step + output: outputFile + outputs: + - output: outputFile + # as: parallel-step.out.smi + +# - name: final-step +# description: Collate results +# specification: +# collection: workflow-engine-unit-test-jobs +# job: concatenate +# version: "1.0.0" +# inputs: +# - input: inputFile +# from: +# step: parallel-step +# output: outputFile +# outputs: +# - output: outputFile +# # as: final-step.out.smi diff --git a/workflow/decoder.py b/workflow/decoder.py index 55512e6..3e12ecb 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -213,6 +213,7 @@ def set_step_variables( workflow: dict[str, Any], inputs: list[dict[str, Any]], outputs: list[dict[str, Any]], + step_outputs: dict[str, Any], previous_step_outputs: list[dict[str, Any]], workflow_variables: dict[str, Any], step_name: str, @@ -224,6 +225,13 @@ def set_step_variables( """ result = {} + print("ssv: wf vars", workflow_variables) + print("ssv: inputs", inputs) + print("ssv: outputs", outputs) + print("ssv: step_outputs", step_outputs) + print("ssv: prev step outputs", previous_step_outputs) + print("ssv: step_name", step_name) + for item in inputs: p_key = item["input"] p_val = "" @@ -234,7 +242,16 @@ def set_step_variables( elif "step" in val.keys(): for out in previous_step_outputs: if out["output"] == val["output"]: - p_val = out["as"] + # p_val = out["as"] + if step_outputs["output"]: + p_val = step_outputs["output"] + print("\n!!!!!!!!!!!!!if clause!!!!!!!!!!!!!!!!!!!!!\n") + print(p_val) + else: + # what do I need to do here?? + print("\n!!!!!!!!!!!!!else clause!!!!!!!!!!!!!!!!!!!!!\n") + print(out) + print(val) # this bit handles multiple inputs: if a step # requires input from multiple steps, add them to @@ -250,7 +267,9 @@ def set_step_variables( for item in outputs: p_key = item["output"] - p_val = item["as"] + # p_val = item["as"] + # p_val = step_outputs["output"] + p_val = "somefile.smi" result[p_key] = p_val options = set_variables_from_options_for_step( diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index 70a6ad3..d904e45 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -206,17 +206,17 @@ definitions: - from # A Step output (with an 'as' - a declared value) - step-output-as: - type: object - additionalProperties: false - properties: - output: - $ref: '#/definitions/template-variable-name' - as: - $ref: '#/definitions/file-name' - required: - - output - - as + # step-output-as: + # type: object + # additionalProperties: false + # properties: + # output: + # $ref: '#/definitions/template-variable-name' + # as: + # $ref: '#/definitions/file-name' + # required: + # - output + # - as # A step specification variable @@ -282,9 +282,9 @@ definitions: - $ref: "#/definitions/step-input-from-workflow" outputs: type: array - items: - anyOf: - - $ref: "#/definitions/step-output-as" + # items: + # anyOf: + # - $ref: "#/definitions/step-output-as" required: - name - specification diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index e11d17a..672e9bb 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -424,9 +424,10 @@ def _validate_step_command( running_workflow_step_id, ) + # resolve all previous steps + previous_step_names = set() if our_step_index > 0: - # resolve all previous steps - previous_step_names = set() + print("prev step inputs", inputs) for inp in inputs: if step_name := inp["from"].get("step", None): previous_step_names.add(step_name) @@ -465,14 +466,54 @@ def _validate_step_command( running_wf, _ = self._wapi_adapter.get_running_workflow( running_workflow_id=running_wf_id ) + print("running wf", running_wf) workflow_id = running_wf["workflow"]["id"] workflow, _ = self._wapi_adapter.get_workflow(workflow_id=workflow_id) + print("workflow", workflow) + + # for step in workflow["steps"]: + # if step["name"] in previous_step_names: + + previous_step_id = None + for name in previous_step_names: + result, _ = self._wapi_adapter.get_running_workflow_step_by_name( + name=name, running_workflow_id=running_wf_id, replica=0 + ) + print("by name results", result) + print("by name results, vars", result["variables"]) + print("by name results, od", result["id"]) + previous_step_id = result["id"] + # if name == 'first-step': + # previous_step_id = result["id"] + + # don't understand how this is structured + print("prev steps", previous_step_names) + print("outputs", outputs) + print() + step_outputs: dict[str, Any] = {"output": []} + if previous_step_id: + for output in outputs: + for k, v in output.items(): + print("sending params to output mock", k, v) + try: + step_outputs, _ = ( + self._wapi_adapter.get_running_workflow_step_output_values_for_output( + running_workflow_step_id=previous_step_id, + output_variable=v, # foraeach outputs key + ) + ) + + print("mockputs", running_workflow_step_id, step_outputs) + except AssertionError: + print("no output for step", running_workflow_step_id, k, v) + step_vars = set_step_variables( workflow=workflow, workflow_variables=all_variables, inputs=inputs, outputs=outputs, + step_outputs=step_outputs, previous_step_outputs=previous_step_outputs, step_name=running_wf_step["name"], ) @@ -515,6 +556,7 @@ def _launch( wf_step_data, _ = self._wapi_adapter.get_workflow_steps_driving_this_step( running_workflow_step_id=rwfs_id, ) + print("wf_step_data", wf_step_data) assert wf_step_data["caller_step_index"] >= 0 our_step_index: int = wf_step_data["caller_step_index"] @@ -541,6 +583,13 @@ def _launch( project_id = rwf["project"]["id"] variables: dict[str, Any] = error_or_variables + print("variables", variables) + # find out if and by which parameter this step should be replicated + replicator = "" + if replicate := step.get("replicate", {}): + if using := replicate.get("using", {}): + # using is a dict but there can be only single value for now + replicator = list(using.values())[0] _LOGGER.info( "Launching step: RunningWorkflow=%s RunningWorkflowStep=%s step=%s" @@ -587,24 +636,38 @@ def _launch( # A list of Job input variable names inputs: list[str] = [] inputs.extend(iter(get_workflow_job_input_names_for_step(wf, step_name))) - lp: LaunchParameters = LaunchParameters( - project_id=project_id, - name=step_name, - debug=rwf.get("debug"), - launching_user_name=rwf["running_user"], - launching_user_api_token=rwf["running_user_api_token"], - specification=step["specification"], - specification_variables=variables, - running_workflow_id=rwf_id, - running_workflow_step_id=rwfs_id, - running_workflow_step_prior_steps=prior_steps, - running_workflow_step_inputs=inputs, - ) - lr: LaunchResult = self._instance_launcher.launch(launch_parameters=lp) - if lr.error_num: - self._set_step_error(step_name, rwf_id, rwfs_id, lr.error_num, lr.error_msg) + if replicator: + single_step_variables = [] + for replicating_param in variables[replicator]: + ssv = {**variables} + ssv[replicator] = replicating_param + single_step_variables.append(ssv) else: - _LOGGER.info("Launched step '%s' (command=%s)", step_name, lr.command) + single_step_variables = [variables] + + print("single step variables", single_step_variables) + + for params in single_step_variables: + lp: LaunchParameters = LaunchParameters( + project_id=project_id, + name=step_name, + debug=rwf.get("debug"), + launching_user_name=rwf["running_user"], + launching_user_api_token=rwf["running_user_api_token"], + specification=step["specification"], + specification_variables=params, + running_workflow_id=rwf_id, + running_workflow_step_id=rwfs_id, + running_workflow_step_prior_steps=prior_steps, + running_workflow_step_inputs=inputs, + ) + lr: LaunchResult = self._instance_launcher.launch(launch_parameters=lp) + if lr.error_num: + self._set_step_error( + step_name, rwf_id, rwfs_id, lr.error_num, lr.error_msg + ) + else: + _LOGGER.info("Launched step '%s' (command=%s)", step_name, lr.command) def _set_step_error( self, From ad0020579a96daa7f706af5cf020a7e572a58827 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Fri, 8 Aug 2025 10:16:25 +0100 Subject: [PATCH 11/57] docs: Add instance-directory comment --- workflow/workflow_abc.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py index aa01199..0d7b1c8 100644 --- a/workflow/workflow_abc.py +++ b/workflow/workflow_abc.py @@ -240,6 +240,12 @@ def get_running_workflow_step( # } # If not present an empty dictionary should be returned. # + # Additionally, if the step has started (an instance has been created) + # the response will contain a "instance_directory" top-level property + # that is the directory within the Project that's the step's working directory. + # + # "instance_directory": ".instance-00000000-0000-0000-0000-00000000000a", + # # For steps that are not the first in a workflow the following field # can be expected in the response: - # @@ -273,6 +279,12 @@ def get_running_workflow_step_by_name( # } # If not present an empty dictionary should be returned. # + # Additionally, if the step has started (an instance has been created) + # the response will contain a "instance_directory" top-level property + # that is the directory within the Project that's the step's working directory. + # + # "instance_directory": ".instance-00000000-0000-0000-0000-00000000000a", + # # For steps that are not the first in a workflow the following field # can be expected in the response: - # @@ -322,6 +334,12 @@ def get_workflow_steps_driving_this_step( # } # ] # } + # + # Additionally, if the step has started (an instance has been created) + # each entry on the array of steps will contain a "instance_directory" property + # that is the directory within the Project that's the step's working directory. + # + # "instance_directory": ".instance-00000000-0000-0000-0000-00000000000a", @abstractmethod def get_instance(self, *, instance_id: str) -> tuple[dict[str, Any], int]: From 95eafcbcddfa7c2f6a65b0324742233051740fa9 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Fri, 8 Aug 2025 10:48:02 +0100 Subject: [PATCH 12/57] feat: Creating instances now adds instance-directory property to step record --- tests/wapi_adapter.py | 11 +++++++++++ workflow/workflow_abc.py | 6 ------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py index f604689..9d87e38 100644 --- a/tests/wapi_adapter.py +++ b/tests/wapi_adapter.py @@ -377,6 +377,17 @@ def create_instance(self, *, running_workflow_step_id: str) -> dict[str, Any]: with open(_INSTANCE_PICKLE_FILE, "wb") as pickle_file: Pickler(pickle_file).dump(instances) + + # Use the instance ID as the step's instance-directory (prefixing with '.') + with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "rb") as pickle_file: + running_workflow_step = Unpickler(pickle_file).load() + assert running_workflow_step_id in running_workflow_step + running_workflow_step[running_workflow_step_id][ + "instance_directory" + ] = f".{instance_id}" + with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "wb") as pickle_file: + Pickler(pickle_file).dump(running_workflow_step) + UnitTestWorkflowAPIAdapter.lock.release() return {"id": instance_id} diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py index 0d7b1c8..2024fba 100644 --- a/workflow/workflow_abc.py +++ b/workflow/workflow_abc.py @@ -334,12 +334,6 @@ def get_workflow_steps_driving_this_step( # } # ] # } - # - # Additionally, if the step has started (an instance has been created) - # each entry on the array of steps will contain a "instance_directory" property - # that is the directory within the Project that's the step's working directory. - # - # "instance_directory": ".instance-00000000-0000-0000-0000-00000000000a", @abstractmethod def get_instance(self, *, instance_id: str) -> tuple[dict[str, Any], int]: From f37ea7417b4920324bbcffc759a27e1ff753d06a Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Fri, 8 Aug 2025 11:21:30 +0100 Subject: [PATCH 13/57] fix: get_instance() response type --- tests/wapi_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py index 9d87e38..322507c 100644 --- a/tests/wapi_adapter.py +++ b/tests/wapi_adapter.py @@ -287,7 +287,7 @@ def get_workflow_steps_driving_this_step( "steps": wf_response["steps"].copy(), }, 0 - def get_instance(self, *, instance_id: str) -> dict[str, Any]: + def get_instance(self, *, instance_id: str) -> tuple[dict[str, Any], int]: UnitTestWorkflowAPIAdapter.lock.acquire() with open(_INSTANCE_PICKLE_FILE, "rb") as pickle_file: instances = Unpickler(pickle_file).load() From 13e44705e370919f6917cc2e10203c349959f085 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Fri, 8 Aug 2025 11:22:09 +0100 Subject: [PATCH 14/57] test: Add tests for instance_directory --- tests/test_test_api_adapter.py | 82 ++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 3 deletions(-) diff --git a/tests/test_test_api_adapter.py b/tests/test_test_api_adapter.py index 88a5f61..a8ebc33 100644 --- a/tests/test_test_api_adapter.py +++ b/tests/test_test_api_adapter.py @@ -324,9 +324,20 @@ def test_get_running_workflow_step_with_prior_step(): def test_create_instance(): # Arrange utaa = UnitTestWorkflowAPIAdapter() + response = utaa.create_workflow(workflow_definition={"name": "blah"}) + response = utaa.create_running_workflow( + user_id="dlister", + workflow_id=response["id"], + project_id=TEST_PROJECT_ID, + variables={}, + ) + response, _ = utaa.create_running_workflow_step( + running_workflow_id=response["id"], step="step-1" + ) + rwfs_id = response["id"] # Act - response = utaa.create_instance(running_workflow_step_id="r-workflow-step-000") + response = utaa.create_instance(running_workflow_step_id=rwfs_id) # Assert assert "id" in response @@ -335,14 +346,79 @@ def test_create_instance(): def test_create_and_get_instance(): # Arrange utaa = UnitTestWorkflowAPIAdapter() - response = utaa.create_instance(running_workflow_step_id="r-workflow-step-000") + response = utaa.create_workflow(workflow_definition={"name": "blah"}) + response = utaa.create_running_workflow( + user_id="dlister", + workflow_id=response["id"], + project_id=TEST_PROJECT_ID, + variables={}, + ) + response, _ = utaa.create_running_workflow_step( + running_workflow_id=response["id"], step="step-1" + ) + rwfs_id = response["id"] + response = utaa.create_instance(running_workflow_step_id=rwfs_id) instance_id = response["id"] # Act response, _ = utaa.get_instance(instance_id=instance_id) # Assert - assert response["running_workflow_step_id"] == "r-workflow-step-000" + assert response["running_workflow_step_id"] == rwfs_id + + +def test_create_instance_and_get_step_instance_directory(): + # Arrange + utaa = UnitTestWorkflowAPIAdapter() + response = utaa.create_workflow(workflow_definition={"name": "blah"}) + response = utaa.create_running_workflow( + user_id="dlister", + workflow_id=response["id"], + project_id=TEST_PROJECT_ID, + variables={}, + ) + response, _ = utaa.create_running_workflow_step( + running_workflow_id=response["id"], step="step-1" + ) + rwfs_id = response["id"] + response = utaa.create_instance(running_workflow_step_id=rwfs_id) + i_id = response["id"] + + # Act + response, _ = utaa.get_running_workflow_step(running_workflow_step_id=rwfs_id) + + # Assert + assert "instance_directory" in response + assert response["instance_directory"] == f".{i_id}" + + +def test_create_instance_and_get_step_instance_directory_by_name(): + # Arrange + utaa = UnitTestWorkflowAPIAdapter() + response = utaa.create_workflow(workflow_definition={"name": "blah"}) + wf_id = response["id"] + response = utaa.create_running_workflow( + user_id="dlister", + workflow_id=wf_id, + project_id=TEST_PROJECT_ID, + variables={}, + ) + rwf_id = response["id"] + response, _ = utaa.create_running_workflow_step( + running_workflow_id=rwf_id, step="step-1" + ) + rwfs_id = response["id"] + response = utaa.create_instance(running_workflow_step_id=rwfs_id) + i_id = response["id"] + + # Act + response, _ = utaa.get_running_workflow_step_by_name( + running_workflow_id=rwf_id, name="step-1" + ) + + # Assert + assert "instance_directory" in response + assert response["instance_directory"] == f".{i_id}" def test_get_workflow_steps_driving_this_step_when_1st_step(): From 84866d0235b51777ae44fdb997d82e1f820410d1 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Mon, 11 Aug 2025 17:06:24 +0100 Subject: [PATCH 15/57] build: Add experimental json copy of the schema --- workflow/workflow-schema.json | 344 ++++++++++++++++++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100644 workflow/workflow-schema.json diff --git a/workflow/workflow-schema.json b/workflow/workflow-schema.json new file mode 100644 index 0000000..b70a0b4 --- /dev/null +++ b/workflow/workflow-schema.json @@ -0,0 +1,344 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Data Manager Workflow Schema", + "description": "The Schema for Data Manager Workflows", + "type": "object", + "properties": { + "kind": { + "const": "DataManagerWorkflow" + }, + "kind-version": { + "enum": [ + "2025.2" + ] + }, + "name": { + "$ref": "#/definitions/rfc1035-label-name" + }, + "description": { + "type": "string", + "description": "A description of the workflow" + }, + "steps": { + "type": "array", + "items": { + "$ref": "#/definitions/step" + } + }, + "variables": { + "type": "object", + "additionalProperties": true + }, + "variable-mapping": { + "type": "object", + "additionalProperties": false, + "properties": { + "inputs": { + "type": "array", + "items": { + "$ref": "#/definitions/workflow-input-parameter" + } + }, + "outputs": { + "type": "array", + "items": { + "$ref": "#/definitions/workflow-output-parameter" + } + }, + "options": { + "type": "array", + "items": { + "$ref": "#/definitions/workflow-option-parameter" + } + } + } + } + }, + "required": [ + "kind", + "kind-version", + "name", + "steps" + ], + "definitions": { + "rfc1035-label-name": { + "type": "string", + "pattern": "^[a-z][a-z0-9-]{,63}(? Date: Mon, 11 Aug 2025 17:16:18 +0100 Subject: [PATCH 16/57] style: Removed unnecessary json schema file --- workflow/workflow-schema.json | 344 ---------------------------------- 1 file changed, 344 deletions(-) delete mode 100644 workflow/workflow-schema.json diff --git a/workflow/workflow-schema.json b/workflow/workflow-schema.json deleted file mode 100644 index b70a0b4..0000000 --- a/workflow/workflow-schema.json +++ /dev/null @@ -1,344 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Data Manager Workflow Schema", - "description": "The Schema for Data Manager Workflows", - "type": "object", - "properties": { - "kind": { - "const": "DataManagerWorkflow" - }, - "kind-version": { - "enum": [ - "2025.2" - ] - }, - "name": { - "$ref": "#/definitions/rfc1035-label-name" - }, - "description": { - "type": "string", - "description": "A description of the workflow" - }, - "steps": { - "type": "array", - "items": { - "$ref": "#/definitions/step" - } - }, - "variables": { - "type": "object", - "additionalProperties": true - }, - "variable-mapping": { - "type": "object", - "additionalProperties": false, - "properties": { - "inputs": { - "type": "array", - "items": { - "$ref": "#/definitions/workflow-input-parameter" - } - }, - "outputs": { - "type": "array", - "items": { - "$ref": "#/definitions/workflow-output-parameter" - } - }, - "options": { - "type": "array", - "items": { - "$ref": "#/definitions/workflow-option-parameter" - } - } - } - } - }, - "required": [ - "kind", - "kind-version", - "name", - "steps" - ], - "definitions": { - "rfc1035-label-name": { - "type": "string", - "pattern": "^[a-z][a-z0-9-]{,63}(? Date: Mon, 11 Aug 2025 18:58:04 +0100 Subject: [PATCH 17/57] fix: Better rfc1035-label-name pattern --- workflow/workflow-schema.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index 70a6ad3..475d3a1 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -65,7 +65,7 @@ definitions: # See https://kubernetes.io/docs/concepts/overview/working-with-objects/names/ rfc1035-label-name: type: string - pattern: ^[a-z][a-z0-9-]{,63}(?- A value compatible with Kubernetes variables to allow it to be used ins Pod Label From de34339a16e5b7cdc732ab8eb8a2a0d4773bac4b Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Mon, 11 Aug 2025 19:21:14 +0100 Subject: [PATCH 18/57] fix: Better rfc1035-label-name regex --- workflow/workflow-schema.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index 475d3a1..651308e 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -65,7 +65,7 @@ definitions: # See https://kubernetes.io/docs/concepts/overview/working-with-objects/names/ rfc1035-label-name: type: string - pattern: ^[a-z][a-z0-9-]{,63}$(?- A value compatible with Kubernetes variables to allow it to be used ins Pod Label From e7adc1b536982dc5d3b9b0b8526da5ce4bfda090 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Mon, 11 Aug 2025 19:25:38 +0100 Subject: [PATCH 19/57] test: Fix decoder tests --- tests/test_decoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_decoder.py b/tests/test_decoder.py index f9ead02..ddac876 100644 --- a/tests/test_decoder.py +++ b/tests/test_decoder.py @@ -130,7 +130,7 @@ def test_workflow_name_with_spaces(): # Assert assert ( - error == "'workflow with spaces' does not match '^[a-z][a-z0-9-]{,63}(? Date: Fri, 15 Aug 2025 16:06:21 +0100 Subject: [PATCH 20/57] test: Fix test module name (for consistency) --- tests/{test_test_api_adapter.py => test_test_wapi_adapter.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_test_api_adapter.py => test_test_wapi_adapter.py} (100%) diff --git a/tests/test_test_api_adapter.py b/tests/test_test_wapi_adapter.py similarity index 100% rename from tests/test_test_api_adapter.py rename to tests/test_test_wapi_adapter.py From 39bb840888eee2a4305495dda689d72ad5379316 Mon Sep 17 00:00:00 2001 From: "Alan B. Christie" <29806285+alanbchristie@users.noreply.github.com> Date: Fri, 15 Aug 2025 22:25:22 +0100 Subject: [PATCH 21/57] Remove unnecessary logic --- tests/instance_launcher.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/instance_launcher.py b/tests/instance_launcher.py index ab7cab2..3256c0b 100644 --- a/tests/instance_launcher.py +++ b/tests/instance_launcher.py @@ -76,19 +76,15 @@ def launch(self, launch_parameters: LaunchParameters) -> LaunchResult: os.makedirs(EXECUTION_DIRECTORY, exist_ok=True) - # We're passed a RunningWorkflowStep ID but a record is expected to have been - # created bt the caller, we simply create instance records. - response, _ = self._api_adapter.get_running_workflow_step( - running_workflow_step_id=launch_parameters.running_workflow_step_id - ) - # Now simulate the creation of a Task and Instance record + # Create an Instance record (and dummy Task ID) response = self._api_adapter.create_instance( running_workflow_step_id=launch_parameters.running_workflow_step_id ) instance_id = response["id"] task_id = "task-00000000-0000-0000-0000-000000000001" - # Apply variables to the step's Job command. + # Get the job defitnion. + # This is expected to exist in the tests/job-definitions directory. job, _ = self._api_adapter.get_job( collection=launch_parameters.specification["collection"], job=launch_parameters.specification["job"], @@ -96,7 +92,8 @@ def launch(self, launch_parameters: LaunchParameters) -> LaunchResult: ) assert job - # Now apply the variables to the command + # Now apply the provided variables to the command. + # The command may not need any, but we do the decoding anyway. decoded_command, status = job_decoder.decode( job["command"], launch_parameters.specification_variables, From 2a6b708afd98c89b4093988857f6c331f82755aa Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Mon, 18 Aug 2025 12:51:34 +0100 Subject: [PATCH 22/57] fix: stashing --- tests/test_workflow_engine_examples.py | 4 +++- workflow/decoder.py | 10 ++++++++++ workflow/workflow_engine.py | 27 +++++++++++++++++--------- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py index 8e4208a..e2d23ae 100644 --- a/tests/test_workflow_engine_examples.py +++ b/tests/test_workflow_engine_examples.py @@ -1,6 +1,7 @@ import os import time from datetime import datetime, timezone +from pprint import pprint from typing import Any import pytest @@ -458,7 +459,8 @@ def test_workflow_engine_simple_python_fanout(basic_engine): # Additional, detailed checks... # Check we only have one RunningWorkflowStep, and it succeeded response = da.get_running_workflow_steps(running_workflow_id=r_wfid) - print("response", response) + print("response") + pprint(response) assert response["count"] == 2 assert response["running_workflow_steps"][0]["done"] diff --git a/workflow/decoder.py b/workflow/decoder.py index 3e12ecb..6947036 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -280,3 +280,13 @@ def set_step_variables( result |= options return result + + +def get_step_replication_param(*, step: dict[str, Any]) -> str | Any: + """Return step's replication info""" + replicator = step.get("replicate", None) + if replicator: + # 'using' is a dict but there can be only single value for now + replicator = list(replicator["using"].values())[0] + + return replicator diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index 672e9bb..a502893 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -25,6 +25,7 @@ import logging import sys from http import HTTPStatus +from pprint import pprint from typing import Any, Dict, Optional from decoder.decoder import TextEncoding, decode @@ -40,6 +41,7 @@ ) from .decoder import ( + get_step_replication_param, get_workflow_job_input_names_for_step, set_step_variables, workflow_step_has_outputs, @@ -466,11 +468,13 @@ def _validate_step_command( running_wf, _ = self._wapi_adapter.get_running_workflow( running_workflow_id=running_wf_id ) - print("running wf", running_wf) + print("running wf") + pprint(running_wf) workflow_id = running_wf["workflow"]["id"] workflow, _ = self._wapi_adapter.get_workflow(workflow_id=workflow_id) - print("workflow", workflow) + print("workflow") + pprint(workflow) # for step in workflow["steps"]: # if step["name"] in previous_step_names: @@ -556,10 +560,14 @@ def _launch( wf_step_data, _ = self._wapi_adapter.get_workflow_steps_driving_this_step( running_workflow_step_id=rwfs_id, ) - print("wf_step_data", wf_step_data) + print("wf_step_data") + pprint(wf_step_data) assert wf_step_data["caller_step_index"] >= 0 our_step_index: int = wf_step_data["caller_step_index"] + print("step in _launch:", step_name) + pprint(step) + # Now check the step command can be executed # (by trying to decoding the Job command). # @@ -585,11 +593,7 @@ def _launch( variables: dict[str, Any] = error_or_variables print("variables", variables) # find out if and by which parameter this step should be replicated - replicator = "" - if replicate := step.get("replicate", {}): - if using := replicate.get("using", {}): - # using is a dict but there can be only single value for now - replicator = list(using.values())[0] + replicator = get_step_replication_param(step=step) _LOGGER.info( "Launching step: RunningWorkflow=%s RunningWorkflowStep=%s step=%s" @@ -634,6 +638,10 @@ def _launch( # # 'running_workflow_step_inputs' # A list of Job input variable names + + print("variables") + pprint(variables) + inputs: list[str] = [] inputs.extend(iter(get_workflow_job_input_names_for_step(wf, step_name))) if replicator: @@ -645,7 +653,8 @@ def _launch( else: single_step_variables = [variables] - print("single step variables", single_step_variables) + print("single step variables") + pprint(single_step_variables) for params in single_step_variables: lp: LaunchParameters = LaunchParameters( From 340670e795c7e5d6329564930c627845591b814a Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Tue, 19 Aug 2025 12:10:09 +0100 Subject: [PATCH 23/57] fix: stashing --- tests/test_workflow_engine_examples.py | 18 ++++ tests/wapi_adapter.py | 3 +- .../shortcut-example-1.yaml | 4 +- .../simple-python-fanout.yaml | 17 --- .../simple-python-molprops-with-options.yaml | 100 +++++++++++++----- .../simple-python-molprops.yaml | 2 - workflow/decoder.py | 60 ++++++----- workflow/workflow_engine.py | 2 + 8 files changed, 128 insertions(+), 78 deletions(-) diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py index e2d23ae..d4c1a6f 100644 --- a/tests/test_workflow_engine_examples.py +++ b/tests/test_workflow_engine_examples.py @@ -206,9 +206,11 @@ def test_workflow_engine_example_smiles_to_file(basic_engine): assert project_file_exists(output_file) +@pytest.mark.skip("Unrealistic test") def test_workflow_engine_shortcut_example_1(basic_engine): # Arrange md, da = basic_engine + # Make sure files that should be generated by the test # do not exist before we run the test. output_file_a = "a.sdf" @@ -234,9 +236,17 @@ def test_workflow_engine_shortcut_example_1(basic_engine): assert project_file_exists(output_file_b) +@pytest.mark.skip("temporary skip") def test_workflow_engine_simple_python_molprops(basic_engine): # Arrange md, da = basic_engine + + da.mock_get_running_workflow_step_output_values_for_output( + step_name="step2", + output_variable="outputFile", + output="step1.out.smi", + ) + # Make sure files that should be generated by the test # do not exist before we run the test. output_file_1 = "step1.out.smi" @@ -318,6 +328,13 @@ def test_workflow_engine_simple_python_molprops(basic_engine): def test_workflow_engine_simple_python_molprops_with_options(basic_engine): # Arrange md, da = basic_engine + + da.mock_get_running_workflow_step_output_values_for_output( + step_name="step1", + output_variable="outputFile", + output="step1.out.smi", + ) + # Make sure files that should be generated by the test # do not exist before we run the test. output_file_1 = "step1.out.smi" @@ -383,6 +400,7 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine): "simple-python-molprops-with-options", { "candidateMolecules": input_file_1, + "outputFile": output_file_1, "rdkitPropertyName": "prop", "rdkitPropertyValue": 1.2, }, diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py index a770f3b..1f1f6d0 100644 --- a/tests/wapi_adapter.py +++ b/tests/wapi_adapter.py @@ -448,7 +448,7 @@ def realise_outputs( # Methods not declared in the ABC def mock_get_running_workflow_step_output_values_for_output( - self, *, step_name: str, output_variable: str, output: list[str] + self, *, step_name: str, output_variable: str, output: list[str] | str ) -> None: """Sets the output response for a step. Limitation is that there can only be one record for each step name @@ -456,7 +456,6 @@ def mock_get_running_workflow_step_output_values_for_output( to check the output variable name matches.""" assert isinstance(step_name, str) assert isinstance(output_variable, str) - assert isinstance(output, list) UnitTestWorkflowAPIAdapter.lock.acquire() with open(_MOCK_STEP_OUTPUT_FILE, "rb") as pickle_file: diff --git a/tests/workflow-definitions/shortcut-example-1.yaml b/tests/workflow-definitions/shortcut-example-1.yaml index e5b719d..6a0ef31 100644 --- a/tests/workflow-definitions/shortcut-example-1.yaml +++ b/tests/workflow-definitions/shortcut-example-1.yaml @@ -14,7 +14,7 @@ steps: version: "1.0.0" outputs: - output: 'outputFile' - as: 'a.sdf' + # as: 'a.sdf' - name: example-1-step-2 description: The first step @@ -29,4 +29,4 @@ steps: output: 'outputFile' outputs: - output: 'outputFile' - as: 'b.sdf' + # as: 'b.sdf' diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml index 01f576f..c0fd343 100644 --- a/tests/workflow-definitions/simple-python-fanout.yaml +++ b/tests/workflow-definitions/simple-python-fanout.yaml @@ -31,7 +31,6 @@ steps: workflow-input: candidateMolecules outputs: - output: outputFile - # as: chunk_*.smi - name: parallel-step description: Add some params @@ -52,19 +51,3 @@ steps: output: outputFile outputs: - output: outputFile - # as: parallel-step.out.smi - -# - name: final-step -# description: Collate results -# specification: -# collection: workflow-engine-unit-test-jobs -# job: concatenate -# version: "1.0.0" -# inputs: -# - input: inputFile -# from: -# step: parallel-step -# output: outputFile -# outputs: -# - output: outputFile -# # as: final-step.out.smi diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml index 2fc1155..0e8874d 100644 --- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml +++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml @@ -53,40 +53,82 @@ variables: minimum: 0 maximum: 1 -variable-mapping: - inputs: - - name: candidateMolecules - outputs: - - name: clusteredMolecules - from: - step: step2 - output: outputFile - options: - - name: rdkitPropertyName - default: name - as: - - option: name - step: step1 - - name: rdkitPropertyValue - as: - - option: value - step: step1 +# variable-mapping: +# inputs: +# - name: candidateMolecules +# outputs: +# - name: outputFile +# from: +# step: step1 +# output: outputFile +# - name: clusteredMolecules +# from: +# step: step1 +# output: outputFile +# options: +# - name: rdkitPropertyName +# default: name +# as: +# - option: name +# step: step1 +# - name: rdkitPropertyValue +# as: +# - option: value +# step: step1 steps: +# - name: step1 +# description: Add column 1 +# specification: +# collection: workflow-engine-unit-test-jobs +# job: rdkit-molprops +# version: "1.0.0" +# inputs: +# - input: inputFile +# from: +# workflow-input: candidateMolecules +# # outputs: +# # - output: outputFile + +# - name: step2 +# description: Add column 2 +# specification: +# collection: workflow-engine-unit-test-jobs +# job: cluster-butina +# version: "1.0.0" +# variables: +# name: "col2" +# value: "999" +# inputs: +# - input: inputFile +# from: +# step: step1 +# output: outputFile +# outputs: +# - output: outputFile + + - name: step1 description: Add column 1 specification: collection: workflow-engine-unit-test-jobs job: rdkit-molprops version: "1.0.0" - inputs: - - input: inputFile + variable-mapping: + - name: inputFile from: - workflow-input: candidateMolecules - outputs: - - output: outputFile - as: step1.out.smi + workflow-variable: candidateMolecules + - name: name + from: + workflow-variable: rdkitPropertyName + - name: value + from: + workflow-variable: rdkitPropertyValue + - name: outputFile + from: + workflow-variable: clusteredMolecules + - name: step2 description: Add column 2 @@ -97,11 +139,11 @@ steps: variables: name: "col2" value: "999" - inputs: - - input: inputFile + variable-mapping: + - name: inputFile from: step: step1 output: outputFile - outputs: - - output: outputFile - as: step2.out.smi + - name: outputFile + from: + workflow-variable: outputFile diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml index dddb080..8abd637 100644 --- a/tests/workflow-definitions/simple-python-molprops.yaml +++ b/tests/workflow-definitions/simple-python-molprops.yaml @@ -29,7 +29,6 @@ steps: workflow-input: candidateMolecules outputs: - output: outputFile - as: step1.out.smi - name: step2 description: Add column 2 @@ -47,4 +46,3 @@ steps: output: outputFile outputs: - output: outputFile - as: step2.out.smi diff --git a/workflow/decoder.py b/workflow/decoder.py index 6947036..d03b314 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -4,6 +4,7 @@ """ import os +from pprint import pprint from typing import Any import jsonschema @@ -225,8 +226,10 @@ def set_step_variables( """ result = {} - print("ssv: wf vars", workflow_variables) - print("ssv: inputs", inputs) + print("ssv: wf vars:") + pprint(workflow_variables) + print("ssv: inputs:") + pprint(inputs) print("ssv: outputs", outputs) print("ssv: step_outputs", step_outputs) print("ssv: prev step outputs", previous_step_outputs) @@ -240,30 +243,35 @@ def set_step_variables( p_val = workflow_variables[val["workflow-input"]] result[p_key] = p_val elif "step" in val.keys(): - for out in previous_step_outputs: - if out["output"] == val["output"]: - # p_val = out["as"] - if step_outputs["output"]: - p_val = step_outputs["output"] - print("\n!!!!!!!!!!!!!if clause!!!!!!!!!!!!!!!!!!!!!\n") - print(p_val) - else: - # what do I need to do here?? - print("\n!!!!!!!!!!!!!else clause!!!!!!!!!!!!!!!!!!!!!\n") - print(out) - print(val) - - # this bit handles multiple inputs: if a step - # requires input from multiple steps, add them to - # the list in result dict. this is the reason for - # mypy ignore statements, mypy doesn't understand - # redefinition - if p_key in result: - if not isinstance(result[p_key], set): - result[p_key] = {result[p_key]} # type: ignore [assignment] - result[p_key].add(p_val) # type: ignore [attr-defined] - else: - result[p_key] = p_val + # this links the variable to previous step output + if previous_step_outputs: + for out in previous_step_outputs: + if out["output"] == val["output"]: + # p_val = out["as"] + if step_outputs["output"]: + p_val = step_outputs["output"] + print("\n!!!!!!!!!!!!!if clause!!!!!!!!!!!!!!!!!!!!!\n") + print(p_val) + else: + # what do I need to do here?? + print("\n!!!!!!!!!!!!!else clause!!!!!!!!!!!!!!!!!!!!!\n") + print(out) + print(val) + + # this bit handles multiple inputs: if a step + # requires input from multiple steps, add them to + # the list in result dict. this is the reason for + # mypy ignore statements, mypy doesn't understand + # redefinition + if p_key in result: + if not isinstance(result[p_key], set): + result[p_key] = {result[p_key]} # type: ignore [assignment] + result[p_key].add(p_val) # type: ignore [attr-defined] + else: + result[p_key] = p_val + else: + if val["output"] in workflow_variables: + result[p_key] = workflow_variables[val["output"]] for item in outputs: p_key = item["output"] diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index a502893..4736851 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -512,6 +512,8 @@ def _validate_step_command( except AssertionError: print("no output for step", running_workflow_step_id, k, v) + print("final prev step outputs", previous_step_outputs) + step_vars = set_step_variables( workflow=workflow, workflow_variables=all_variables, From e3ece79dedc54a59a6e29d4e33525bee5c6c7a83 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Tue, 19 Aug 2025 15:53:23 +0100 Subject: [PATCH 24/57] refactor: Major refactor (new variable-mapping schema) --- tests/test_decoder.py | 137 ++--------- ...est_workflow_validator_for_create_level.py | 24 +- .../test_workflow_validator_for_run_level.py | 78 +++---- .../test_workflow_validator_for_tag_level.py | 67 ++---- ...ate-step-input-output-variable-names.yaml} | 47 ++-- .../duplicate-workflow-variable-names.yaml | 50 ---- .../replicate-using-undeclared-input.yaml | 37 ++- .../shortcut-example-1.yaml | 18 +- .../simple-python-fanout.yaml | 34 +-- .../simple-python-molprops-with-options.yaml | 96 ++------ .../simple-python-molprops.yaml | 36 ++- .../simple-python-parallel.yaml | 52 ++--- .../step-specification-variable-names.yaml | 1 - .../workflow-options.yaml | 54 ----- workflow/decoder.py | 156 +++---------- workflow/workflow-schema.yaml | 217 ++++++------------ workflow/workflow_engine.py | 4 +- workflow/workflow_validator.py | 23 +- 18 files changed, 283 insertions(+), 848 deletions(-) rename tests/workflow-definitions/{duplicate-step-output-variable-names.yaml => duplicate-step-input-output-variable-names.yaml} (52%) delete mode 100644 tests/workflow-definitions/duplicate-workflow-variable-names.yaml delete mode 100644 tests/workflow-definitions/workflow-options.yaml diff --git a/tests/test_decoder.py b/tests/test_decoder.py index ddac876..5f5da9f 100644 --- a/tests/test_decoder.py +++ b/tests/test_decoder.py @@ -43,19 +43,6 @@ ) assert _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW -_DUPLICATE_WORKFLOW_VARIABLE_NAMES_WORKFLOW_FILE: str = os.path.join( - os.path.dirname(__file__), - "workflow-definitions", - "duplicate-workflow-variable-names.yaml", -) -with open( - _DUPLICATE_WORKFLOW_VARIABLE_NAMES_WORKFLOW_FILE, "r", encoding="utf8" -) as workflow_file: - _DUPLICATE_WORKFLOW_VARIABLE_NAMES_WORKFLOW: Dict[str, Any] = yaml.safe_load( - workflow_file - ) -assert _DUPLICATE_WORKFLOW_VARIABLE_NAMES_WORKFLOW - _SIMPLE_PYTHON_PARALLEL_FILE: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", @@ -78,15 +65,6 @@ ) assert _STEP_SPECIFICATION_VARIABLE_NAMES_WORKFLOW -_WORKFLOW_OPTIONS_WORKFLOW_FILE: str = os.path.join( - os.path.dirname(__file__), - "workflow-definitions", - "workflow-options.yaml", -) -with open(_WORKFLOW_OPTIONS_WORKFLOW_FILE, "r", encoding="utf8") as workflow_file: - _WORKFLOW_OPTIONS: Dict[str, Any] = yaml.safe_load(workflow_file) -assert _WORKFLOW_OPTIONS - def test_validate_schema_for_minimal(): # Arrange @@ -144,7 +122,7 @@ def test_validate_schema_for_shortcut_example_1(): assert error is None -def test_validate_schema_for_python_simple_molprops(): +def test_validate_schema_for_simple_python_molprops(): # Arrange # Act @@ -164,16 +142,7 @@ def test_validate_schema_for_step_specification_variable_names(): assert error is None -def test_validate_schema_for_workflow_options(): - # Arrange - - # Act - error = decoder.validate_schema(_WORKFLOW_OPTIONS) - - # Assert - assert error is None - - +@pytest.mark.skip(reason="DO not support combination atm") def test_validate_schema_for_simple_python_parallel(): # Arrange @@ -188,7 +157,7 @@ def test_get_workflow_variables_for_smiple_python_molprops(): # Arrange # Act - wf_variables = decoder.get_variable_names(_SIMPLE_PYTHON_MOLPROPS_WORKFLOW) + wf_variables = decoder.get_workflow_variable_names(_SIMPLE_PYTHON_MOLPROPS_WORKFLOW) # Assert assert len(wf_variables) == 2 @@ -228,91 +197,6 @@ def test_get_workflow_steps(): assert steps[1]["name"] == "step2" -def test_get_workflow_variables_for_duplicate_variables(): - # Arrange - - # Act - names = decoder.get_variable_names(_DUPLICATE_WORKFLOW_VARIABLE_NAMES_WORKFLOW) - - # Assert - assert len(names) == 2 - assert names[0] == "x" - assert names[1] == "x" - - -def test_get_required_variable_names_for_simnple_python_molprops_with_options(): - # Arrange - - # Act - rqd_variables = decoder.get_required_variable_names( - _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW - ) - - # Assert - assert len(rqd_variables) == 2 - assert "candidateMolecules" in rqd_variables - assert "rdkitPropertyValue" in rqd_variables - - -def test_set_variables_from_options_for_step_for_simnple_python_molprops_with_options(): - # Arrange - variables = { - "rdkitPropertyName": "propertyName", - "rdkitPropertyValue": "propertyValue", - } - - # Act - new_variables = decoder.set_variables_from_options_for_step( - _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, - variables, - "step1", - ) - - # Assert - assert len(new_variables) == 2 - assert "name" in new_variables - assert "value" in new_variables - assert new_variables["name"] == "propertyName" - assert new_variables["value"] == "propertyValue" - - -def test_get_workflow_inputs_for_step_with_name_step1(): - # Arrange - - # Act - inputs = decoder.get_workflow_job_input_names_for_step( - _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "step1" - ) - - # Assert - assert len(inputs) == 1 - assert "inputFile" in inputs - - -def test_get_workflow_inputs_for_step_with_name_step2(): - # Arrange - - # Act - inputs = decoder.get_workflow_job_input_names_for_step( - _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "step2" - ) - - # Assert - assert not inputs - - -def test_get_workflow_inputs_for_step_with_unkown_step_name(): - # Arrange - - # Act - inputs = decoder.get_workflow_job_input_names_for_step( - _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "unknown" - ) - - # Assert - assert not inputs - - def test_get_workflow_outputs_for_step_with_name_step1(): # Arrange @@ -351,11 +235,16 @@ def test_get_workflow_outputs_for_step_with_unkown_step_name(): def test_get_step_input_variable_names_when_duplicates(): # Arrange + workflow_filename: str = os.path.join( + os.path.dirname(__file__), + "workflow-definitions", + "duplicate-step-input-output-variable-names.yaml", + ) + with open(workflow_filename, "r", encoding="utf8") as wf_file: + definition: Dict[str, Any] = yaml.safe_load(wf_file) # Act - inputs = decoder.get_step_input_variable_names( - _SIMPLE_PYTHON_PARALLEL_WORKFLOW, "final-step" - ) + inputs = decoder.get_step_input_variable_names(definition, "step-1") # Assert assert len(inputs) == 2 @@ -368,13 +257,13 @@ def test_get_step_output_variable_names_when_duplicates(): workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", - "duplicate-step-output-variable-names.yaml", + "duplicate-step-input-output-variable-names.yaml", ) with open(workflow_filename, "r", encoding="utf8") as wf_file: definition: Dict[str, Any] = yaml.safe_load(wf_file) # Act - outputs = decoder.get_step_output_variable_names(definition, "step-1") + outputs = decoder.get_step_output_variable_names(definition, "step-2") # Assert assert len(outputs) == 2 diff --git a/tests/test_workflow_validator_for_create_level.py b/tests/test_workflow_validator_for_create_level.py index ad429c9..18f621f 100644 --- a/tests/test_workflow_validator_for_create_level.py +++ b/tests/test_workflow_validator_for_create_level.py @@ -26,10 +26,10 @@ def test_validate_minimal(): def test_validate_example_nop_file(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "example-nop-fail.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -46,10 +46,10 @@ def test_validate_example_nop_file(): def test_validate_example_smiles_to_file(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "example-smiles-to-file.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -66,10 +66,10 @@ def test_validate_example_smiles_to_file(): def test_validate_example_two_step_nop(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "example-two-step-nop.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -86,10 +86,10 @@ def test_validate_example_two_step_nop(): def test_validate_shortcut_example_1(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "shortcut-example-1.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -106,10 +106,10 @@ def test_validate_shortcut_example_1(): def test_validate_simple_python_molprops(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "simple-python-molprops.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -126,12 +126,12 @@ def test_validate_simple_python_molprops(): def test_validate_simple_python_molprops_with_options(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "simple-python-molprops-with-options.yaml", ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py index 2df1630..b1f6118 100644 --- a/tests/test_workflow_validator_for_run_level.py +++ b/tests/test_workflow_validator_for_run_level.py @@ -11,10 +11,10 @@ def test_validate_example_nop_file(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "example-nop-fail.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -31,10 +31,10 @@ def test_validate_example_nop_file(): def test_validate_duplicate_step_names(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "duplicate-step-names.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -51,10 +51,10 @@ def test_validate_duplicate_step_names(): def test_validate_example_smiles_to_file(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "example-smiles-to-file.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -71,10 +71,10 @@ def test_validate_example_smiles_to_file(): def test_validate_example_two_step_nop(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "example-two-step-nop.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -91,10 +91,10 @@ def test_validate_example_two_step_nop(): def test_validate_shortcut_example_1(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "shortcut-example-1.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -111,10 +111,10 @@ def test_validate_shortcut_example_1(): def test_validate_simple_python_molprops(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "simple-python-molprops.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow variables = {"candidateMolecules": "input.sdf", "clusteredMolecules": "output.sdf"} @@ -133,17 +133,19 @@ def test_validate_simple_python_molprops(): def test_validate_simple_python_molprops_with_options_when_missing_required(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "simple-python-molprops-with-options.yaml", ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow variables = { "candidateMolecules": "input.sdf", "clusteredMolecules": "output.sdf", + "outputFile": "results.sdf", + "rdkitPropertyName": "name", } # Act @@ -162,12 +164,12 @@ def test_validate_simple_python_molprops_with_options_when_missing_required(): def test_validate_simple_python_molprops_with_options(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "simple-python-molprops-with-options.yaml", ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow variables = { @@ -175,6 +177,7 @@ def test_validate_simple_python_molprops_with_options(): "clusteredMolecules": "output.sdf", "rdkitPropertyName": "col1", "rdkitPropertyValue": 123, + "outputFile": "results.sdf", } # Act @@ -191,10 +194,10 @@ def test_validate_simple_python_molprops_with_options(): def test_validate_simple_python_molprops_with_missing_input(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "simple-python-molprops.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow variables = {"clusteredMolecules": "output.sdf"} @@ -213,36 +216,15 @@ def test_validate_simple_python_molprops_with_missing_input(): ] -def test_validate_duplicate_workflow_variable_names(): - # Arrange - workflow_file: str = os.path.join( - os.path.dirname(__file__), - "workflow-definitions", - "duplicate-workflow-variable-names.yaml", - ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: - workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) - assert workflow - - # Act - error = WorkflowValidator.validate( - level=ValidationLevel.TAG, - workflow_definition=workflow, - ) - - # Assert - assert error.error_num == 6 - assert error.error_msg == ["Duplicate workflow variable names found: x"] - - +@pytest.mark.skip("Unsupported workflow") def test_validate_simple_python_parallel(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "simple-python-parallel.yaml", ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -258,12 +240,12 @@ def test_validate_simple_python_parallel(): def test_validate_replicate_using_undeclared_input(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "replicate-using-undeclared-input.yaml", ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -282,12 +264,12 @@ def test_validate_replicate_using_undeclared_input(): def test_validate_duplicate_step_output_variable_names(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", - "duplicate-step-output-variable-names.yaml", + "duplicate-step-input-output-variable-names.yaml", ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -300,5 +282,5 @@ def test_validate_duplicate_step_output_variable_names(): # Assert assert error.error_num == 3 assert error.error_msg == [ - "Duplicate step output variable: outputFile (step=step-1)" + "Duplicate step output variable: outputFile (step=step-2)" ] diff --git a/tests/test_workflow_validator_for_tag_level.py b/tests/test_workflow_validator_for_tag_level.py index 4445502..8a5d264 100644 --- a/tests/test_workflow_validator_for_tag_level.py +++ b/tests/test_workflow_validator_for_tag_level.py @@ -11,10 +11,10 @@ def test_validate_example_nop_file(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "example-nop-fail.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -31,10 +31,10 @@ def test_validate_example_nop_file(): def test_validate_duplicate_step_names(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "duplicate-step-names.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -51,10 +51,10 @@ def test_validate_duplicate_step_names(): def test_validate_example_smiles_to_file(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "example-smiles-to-file.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -71,10 +71,10 @@ def test_validate_example_smiles_to_file(): def test_validate_example_two_step_nop(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "example-two-step-nop.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -91,10 +91,10 @@ def test_validate_example_two_step_nop(): def test_validate_shortcut_example_1(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "shortcut-example-1.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -109,14 +109,15 @@ def test_validate_shortcut_example_1(): assert error.error_msg is None +@pytest.mark.skip("Unsupported workflow") def test_validate_simple_python_parallel(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "simple-python-parallel.yaml", ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -132,10 +133,10 @@ def test_validate_simple_python_parallel(): def test_validate_simple_python_molprops(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "simple-python-molprops.yaml" ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -152,12 +153,12 @@ def test_validate_simple_python_molprops(): def test_validate_simple_python_molprops_with_options(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "simple-python-molprops-with-options.yaml", ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -172,36 +173,14 @@ def test_validate_simple_python_molprops_with_options(): assert error.error_msg is None -def test_validate_duplicate_workflow_variable_names(): - # Arrange - workflow_file: str = os.path.join( - os.path.dirname(__file__), - "workflow-definitions", - "duplicate-workflow-variable-names.yaml", - ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: - workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) - assert workflow - - # Act - error = WorkflowValidator.validate( - level=ValidationLevel.TAG, - workflow_definition=workflow, - ) - - # Assert - assert error.error_num == 6 - assert error.error_msg == ["Duplicate workflow variable names found: x"] - - def test_validate_replicate_using_undeclared_input(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", "replicate-using-undeclared-input.yaml", ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -220,12 +199,12 @@ def test_validate_replicate_using_undeclared_input(): def test_validate_duplicate_step_output_variable_names(): # Arrange - workflow_file: str = os.path.join( + workflow_filename: str = os.path.join( os.path.dirname(__file__), "workflow-definitions", - "duplicate-step-output-variable-names.yaml", + "duplicate-step-input-output-variable-names.yaml", ) - with open(workflow_file, "r", encoding="utf8") as workflow_file: + with open(workflow_filename, "r", encoding="utf8") as workflow_file: workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) assert workflow @@ -238,5 +217,5 @@ def test_validate_duplicate_step_output_variable_names(): # Assert assert error.error_num == 3 assert error.error_msg == [ - "Duplicate step output variable: outputFile (step=step-1)" + "Duplicate step output variable: outputFile (step=step-2)" ] diff --git a/tests/workflow-definitions/duplicate-step-output-variable-names.yaml b/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml similarity index 52% rename from tests/workflow-definitions/duplicate-step-output-variable-names.yaml rename to tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml index 5a371a2..bc0b761 100644 --- a/tests/workflow-definitions/duplicate-step-output-variable-names.yaml +++ b/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml @@ -3,17 +3,8 @@ kind: DataManagerWorkflow kind-version: "2025.2" name: duplicate-step-output-variable-names description: A workflow where step-1 has duplicate output variable names -variable-mapping: - inputs: - - name: x - outputs: - - name: y - from: - step: step-2 - output: outputFile steps: - - name: step-1 description: Add column 1 specification: @@ -23,18 +14,16 @@ steps: variables: name: "col1" value: 123 - inputs: - - input: inputFile - from: - workflow-input: candidateMolecules - - input: inputFile - from: - workflow-input: candidateMolecules - outputs: - - output: outputFile - as: __step1__out.smi - - output: outputFile - as: __step1__out.smi + variable-mapping: + - variable: inputFile + from-workflow-variable: + variable: candidateMolecules + - variable: inputFile + from-workflow-variable: + variable: candidateMolecules + in: + - inputFile + - inputFile - name: step-2 description: Add column 2 @@ -45,11 +34,11 @@ steps: variables: name: "col2" value: "999" - inputs: - - input: inputFile - from: - step: step1 - output: outputFile - outputs: - - output: outputFile - as: __step2__out.smi + variable-mapping: + - variable: inputFile + from-step: + name: step1 + variable: outputFile + out: + - outputFile + - outputFile diff --git a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml deleted file mode 100644 index f524c44..0000000 --- a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml +++ /dev/null @@ -1,50 +0,0 @@ ---- -kind: DataManagerWorkflow -kind-version: "2025.2" -name: duplicate-workflow-variable-names -description: A workflow with a duplicate variable name in the input and output -variable-mapping: - inputs: - - name: x - outputs: - - name: x - from: - step: step2 - output: outputFile - -steps: - -- name: step1 - description: Add column 1 - specification: - collection: workflow-engine-unit-test-jobs - job: rdkit-molprops - version: "1.0.0" - variables: - name: "col1" - value: 123 - inputs: - - input: inputFile - from: - workflow-input: candidateMolecules - outputs: - - output: outputFile - as: __step1__out.smi - -- name: step2 - description: Add column 2 - specification: - collection: workflow-engine-unit-test-jobs - job: cluster-butina - version: "1.0.0" - variables: - name: "col2" - value: "999" - inputs: - - input: inputFile - from: - step: step1 - output: outputFile - outputs: - - output: outputFile - as: __step2__out.smi diff --git a/tests/workflow-definitions/replicate-using-undeclared-input.yaml b/tests/workflow-definitions/replicate-using-undeclared-input.yaml index 883ec62..488ddba 100644 --- a/tests/workflow-definitions/replicate-using-undeclared-input.yaml +++ b/tests/workflow-definitions/replicate-using-undeclared-input.yaml @@ -3,17 +3,8 @@ kind: DataManagerWorkflow kind-version: "2025.2" name: replicate-using-undeclared-input description: A workflow that replicates from a variable that's not declared -variable-mapping: - inputs: - - name: x - outputs: - - name: y - from: - step: step2 - output: outputFile steps: - - name: step-1 description: Add column 1 specification: @@ -23,13 +14,12 @@ steps: variables: name: "col1" value: 123 - inputs: - - input: inputFile - from: - workflow-input: candidateMolecules - outputs: - - output: outputFile - as: __step-1__out.smi + variable-mapping: + - variable: inputFile + from-workflow-variable: + variable: candidateMolecules + in: + - outputFile - name: step-2 description: Add column 2 @@ -43,11 +33,10 @@ steps: replicate: using: input: y - inputs: - - input: inputFile - from: - step: step-1 - output: outputFile - outputs: - - output: outputFile - as: __step-2__out.smi + variable-mapping: + - variable: inputFile + from-step: + name: step-1 + variable: outputFile + out: + - outputFile diff --git a/tests/workflow-definitions/shortcut-example-1.yaml b/tests/workflow-definitions/shortcut-example-1.yaml index 6a0ef31..0b6c2c3 100644 --- a/tests/workflow-definitions/shortcut-example-1.yaml +++ b/tests/workflow-definitions/shortcut-example-1.yaml @@ -12,9 +12,6 @@ steps: collection: workflow-engine-unit-test-jobs job: shortcut-example-1-process-a version: "1.0.0" - outputs: - - output: 'outputFile' - # as: 'a.sdf' - name: example-1-step-2 description: The first step @@ -22,11 +19,10 @@ steps: collection: workflow-engine-unit-test-jobs job: shortcut-example-1-process-b version: "1.0.0" - inputs: - - input: 'inputFile' - from: - step: example-1-step-1 - output: 'outputFile' - outputs: - - output: 'outputFile' - # as: 'b.sdf' + variable-mapping: + - variable: inputFile + from-step: + name: example-1-step-1 + variable: outputFile + out: + - outputFile diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml index c0fd343..9e2042f 100644 --- a/tests/workflow-definitions/simple-python-fanout.yaml +++ b/tests/workflow-definitions/simple-python-fanout.yaml @@ -4,18 +4,8 @@ kind-version: "2025.2" name: python-workflow description: >- A simple parallel workflow. Input is split into N chunks and N processes of the same job is started -variable-mapping: - inputs: - - name: candidateMolecules - outputs: - - name: clusteredMolecules - from: - step: final-step - output: outputFile - steps: - - name: first-step description: Create inputs specification: @@ -25,12 +15,10 @@ steps: variables: name: "count" value: "1" - inputs: - - input: inputFile - from: - workflow-input: candidateMolecules - outputs: - - output: outputFile + variable-mapping: + - variable: inputFile + from-workflow-variable: + variable: candidateMolecules - name: parallel-step description: Add some params @@ -44,10 +32,10 @@ steps: replicate: using: input: inputFile - inputs: - - input: inputFile - from: - step: first-step - output: outputFile - outputs: - - output: outputFile + variable-mapping: + - variable: inputFile + from-step: + name: first-step + variable: outputFile + out: + - outputFile diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml index 0e8874d..696855d 100644 --- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml +++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml @@ -53,62 +53,7 @@ variables: minimum: 0 maximum: 1 -# variable-mapping: -# inputs: -# - name: candidateMolecules -# outputs: -# - name: outputFile -# from: -# step: step1 -# output: outputFile -# - name: clusteredMolecules -# from: -# step: step1 -# output: outputFile -# options: -# - name: rdkitPropertyName -# default: name -# as: -# - option: name -# step: step1 -# - name: rdkitPropertyValue -# as: -# - option: value -# step: step1 - steps: - -# - name: step1 -# description: Add column 1 -# specification: -# collection: workflow-engine-unit-test-jobs -# job: rdkit-molprops -# version: "1.0.0" -# inputs: -# - input: inputFile -# from: -# workflow-input: candidateMolecules -# # outputs: -# # - output: outputFile - -# - name: step2 -# description: Add column 2 -# specification: -# collection: workflow-engine-unit-test-jobs -# job: cluster-butina -# version: "1.0.0" -# variables: -# name: "col2" -# value: "999" -# inputs: -# - input: inputFile -# from: -# step: step1 -# output: outputFile -# outputs: -# - output: outputFile - - - name: step1 description: Add column 1 specification: @@ -116,19 +61,18 @@ steps: job: rdkit-molprops version: "1.0.0" variable-mapping: - - name: inputFile - from: - workflow-variable: candidateMolecules - - name: name - from: - workflow-variable: rdkitPropertyName - - name: value - from: - workflow-variable: rdkitPropertyValue - - name: outputFile - from: - workflow-variable: clusteredMolecules - + - variable: inputFile + from-workflow-variable: + variable: candidateMolecules + - variable: name + from-workflow-variable: + variable: rdkitPropertyName + - variable: value + from-workflow-variable: + variable: rdkitPropertyValue + - variable: outputFile + from-workflow-variable: + variable: clusteredMolecules - name: step2 description: Add column 2 @@ -140,10 +84,12 @@ steps: name: "col2" value: "999" variable-mapping: - - name: inputFile - from: - step: step1 - output: outputFile - - name: outputFile - from: - workflow-variable: outputFile + - variable: inputFile + from-step: + name: step1 + variable: outputFile + - variable: outputFile + from-workflow-variable: + variable: outputFile + out: + - outputFile diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml index 8abd637..5a0797e 100644 --- a/tests/workflow-definitions/simple-python-molprops.yaml +++ b/tests/workflow-definitions/simple-python-molprops.yaml @@ -3,17 +3,8 @@ kind: DataManagerWorkflow kind-version: "2025.2" name: python-workflow description: A simple python experimental workflow -variable-mapping: - inputs: - - name: candidateMolecules - outputs: - - name: clusteredMolecules - from: - step: step2 - output: outputFile steps: - - name: step1 description: Add column 1 specification: @@ -23,12 +14,10 @@ steps: variables: name: "col1" value: 123 - inputs: - - input: inputFile - from: - workflow-input: candidateMolecules - outputs: - - output: outputFile + variable-mapping: + - variable: inputFile + from-workflow-variable: + variable: candidateMolecules - name: step2 description: Add column 2 @@ -39,10 +28,13 @@ steps: variables: name: "col2" value: "999" - inputs: - - input: inputFile - from: - step: step1 - output: outputFile - outputs: - - output: outputFile + variable-mapping: + - variable: inputFile + from-step: + name: step1 + variable: outputFile + - variable: outputFile + from-workflow-variable: + variable: clusteredMolecules + out: + - outputFile diff --git a/tests/workflow-definitions/simple-python-parallel.yaml b/tests/workflow-definitions/simple-python-parallel.yaml index e620cda..1137413 100644 --- a/tests/workflow-definitions/simple-python-parallel.yaml +++ b/tests/workflow-definitions/simple-python-parallel.yaml @@ -3,18 +3,8 @@ kind: DataManagerWorkflow kind-version: "2025.2" name: python-workflow description: A simple branching workflow -variable-mapping: - inputs: - - name: candidateMolecules - outputs: - - name: clusteredMolecules - from: - step: final-step - output: outputFile - steps: - - name: first-step description: Create inputs specification: @@ -24,13 +14,10 @@ steps: variables: name: "unnecessary" value: "0" - inputs: - - input: inputFile - from: - workflow-input: candidateMolecules - outputs: - - output: outputFile - as: first-step.out.smi + variable-mapping: + - variable: inputFile + from-workflow-variable: + variable: candidateMolecules - name: parallel-step-a description: Add some params @@ -41,14 +28,11 @@ steps: variables: name: "desc1" value: "777" - inputs: - - input: inputFile - from: - step: first-step - output: outputFile - outputs: - - output: outputFile - as: parallel-step-a.out.smi + variable-mapping: + - variable: inputFile + from-step: + name: first-step + variable: outputFile - name: parallel-step-b description: Add some other params @@ -59,14 +43,11 @@ steps: variables: name: "desc2" value: "999" - inputs: - - input: inputFile - from: - step: first-step - output: outputFile - outputs: - - output: outputFile - as: parallel-step-b.out.smi + variable-mapping: + - variable: inputFile + from-step: + name: first-step + variable: outputFile - name: final-step description: Collate results @@ -83,6 +64,5 @@ steps: from: step: parallel-step-b output: outputFile - outputs: - - output: outputFile - as: final-step.out.smi + out: + - outputFile diff --git a/tests/workflow-definitions/step-specification-variable-names.yaml b/tests/workflow-definitions/step-specification-variable-names.yaml index e899b7f..99ae052 100644 --- a/tests/workflow-definitions/step-specification-variable-names.yaml +++ b/tests/workflow-definitions/step-specification-variable-names.yaml @@ -5,7 +5,6 @@ name: step-variables description: Test a lot of variables whose format is supported steps: - - name: step-1 specification: collection: a diff --git a/tests/workflow-definitions/workflow-options.yaml b/tests/workflow-definitions/workflow-options.yaml deleted file mode 100644 index 9e742fe..0000000 --- a/tests/workflow-definitions/workflow-options.yaml +++ /dev/null @@ -1,54 +0,0 @@ ---- -kind: DataManagerWorkflow -kind-version: "2025.2" -name: workflow-options -description: Illustrate the use of workflow options -variable-mapping: - options: - - name: variableWithoutDefault - as: - - option: variable1 - step: step-1 - - option: variable2 - step: step-2 - - name: variableWithIntegerDefault - default: 7 - as: - - option: variable3 - step: step-1 - - name: variableWithIntegerDefaultAndRange - default: 7 - minimum: 1 - maximum: 8 - as: - - option: variable4 - step: step-1 - - name: variableWithFloatDefault - default: 1.0 - as: - - option: variable5 - step: step-1 - - name: variableWithBooleanDefault - default: true - as: - - option: variable6 - step: step-1 - - name: variableWithStringDefault - default: Hello, World! - as: - - option: variable7 - step: step-1 - -steps: - -- name: step-1 - specification: - collection: a - job: b - version: '1.0.0' - -- name: step-2 - specification: - collection: a - job: b - version: '1.0.0' diff --git a/workflow/decoder.py b/workflow/decoder.py index d03b314..0fa00a5 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -63,23 +63,18 @@ def get_description(definition: dict[str, Any]) -> str | None: return definition.get("description") -def get_variable_names(definition: dict[str, Any]) -> list[str]: +def get_workflow_variable_names(definition: dict[str, Any]) -> set[str]: """Given a Workflow definition this function returns all the names of the - variables defined at the workflow level. These are the 'names' for inputs, - outputs and options. This function DOES NOT de-duplicate names, - that is the role of the validator.""" - wf_variable_names: list[str] = [] - variables: dict[str, Any] | None = definition.get("variable-mapping") - if variables: - wf_variable_names.extend( - input_variable["name"] for input_variable in variables.get("inputs", []) - ) - wf_variable_names.extend( - output_variable["name"] for output_variable in variables.get("outputs", []) - ) - wf_variable_names.extend( - option_variable["name"] for option_variable in variables.get("options", []) - ) + variables that need to be defined at the workflow level. These are the 'variables' + used in every steps' variabale-mapping block. + """ + wf_variable_names: set[str] = set() + steps: list[dict[str, Any]] = get_steps(definition) + for step in steps: + if v_map := step.get("variable-mapping"): + for v in v_map: + if "from-workflow-variable" in v: + wf_variable_names.add(v["from-workflow-variable"]["variable"]) return wf_variable_names @@ -87,128 +82,41 @@ def get_step_output_variable_names( definition: dict[str, Any], step_name: str ) -> list[str]: """Given a Workflow definition and a Step name this function returns all the names - of the output variables defined at the Step level. This function DOES NOT - de-duplicate names, that is the role of the validator.""" + of the output variables defined at the Step level. These are the names + of variables that have files assocaited with them that need copying to + the Project directory (from the Instance).""" variable_names: list[str] = [] steps: list[dict[str, Any]] = get_steps(definition) for step in steps: if step["name"] == step_name: - variable_names.extend( - output["output"] for output in step.get("outputs", []) - ) + variable_names.extend(step.get("out", [])) return variable_names def get_step_input_variable_names( definition: dict[str, Any], step_name: str ) -> list[str]: - """Given a Workflow definition and a Step name (expected to exist) - this function returns all the names of the input - variables defined at the step level.""" + """Given a Workflow definition and a Step name this function returns all the names + of the input variables defined at the Step level. These are the names + of variables that have files assocaited with them that need copying to + the Instance directory (from the Project).""" variable_names: list[str] = [] steps: list[dict[str, Any]] = get_steps(definition) for step in steps: if step["name"] == step_name: - variable_names.extend(input["input"] for input in step.get("inputs", [])) + variable_names.extend(step.get("in", [])) return variable_names -def get_workflow_job_input_names_for_step( - definition: dict[str, Any], name: str -) -> list[str]: - """Given a Workflow definition and a step name we return a list of step Job input - variable names the step expects. To do this we iterate through the step's - inputs to find those that are declared 'from->workflow-input'.""" - inputs: list[str] = [] - for step in definition.get("steps", {}): - if step["name"] == name and "inputs" in step: - # Find all the workflow inputs. - # This gives us the name of the workflow input variable - # and the name of the step input (Job) variable. - inputs.extend( - step_input["input"] - for step_input in step["inputs"] - if "from" in step_input and "workflow-input" in step_input["from"] - ) - return inputs - - def workflow_step_has_outputs(definition: dict[str, Any], name: str) -> bool: """Given a Workflow definition and a step name we return a boolean - that is true if the step produces outputs.""" - wf_outputs = definition.get("variable-mapping", {}).get("outputs", {}) - return any( - "from" in output and "step" in output["from"] and output["from"]["step"] == name - for output in wf_outputs + that is true if the step produces outputs. This requires inspection + of the 'as-yet-unused' variables block.""" + return ( + len(get_step_output_variable_names(definition=definition, step_name=name)) > 0 ) -def set_variables_from_options_for_step( - definition: dict[str, Any], variables: dict[str, Any], step_name: str -) -> dict[str, Any]: - """Given a Workflow definition, an existing map of variables and values, - and a step name this function returns a new set of variables by adding - variables and values that are required for the step that have been defined in the - workflow's variables->options block. - - As an example, the following option, which is used if the step name is 'step1', - expects 'rdkitPropertyName' to exist in the current set of variables, - and should be copied into the new set of variables using the key 'propertyName' - and value that is the same as the one provided in the original 'rdkitPropertyName': - - - name: rdkitPropertyName - default: propertyName - as: - - option: propertyName - step: step1 - - And ... in the above example ... if the input variables map - is {"rdkitPropertyName": "rings"} then the output map would be - {"rdkitPropertyName": "rings", "propertyName": "rings"} - - The function returns a new variable map, with and an optional error string on error. - """ - - assert isinstance(definition, dict) - assert step_name - - result = {} - options = definition.get("variable-mapping", {}).get("options", []) - - for opt in options: - for step_alias in opt["as"]: - if step_alias["step"] == step_name: - result[step_alias["option"]] = variables[opt["name"]] - # can break the loop because a variable can be a step - # variable only once - break - - # Success... - return result - - -def get_required_variable_names(definition: dict[str, Any]) -> list[str]: - """Given a Workflow definition this function returns all the names of the - variables that are required to be defined when it is RUN - i.e. - all those the user needs to provide.""" - required_variables: list[str] = [] - variables: dict[str, Any] | None = definition.get("variable-mapping") - if variables: - # All inputs are required (no defaults atm)... - required_variables.extend( - input_variable["name"] for input_variable in variables.get("inputs", []) - ) - # Options without defaults are required... - # It is the role of the engine to provide the actual default for those - # that have defaults but no user-defined value. - required_variables.extend( - option_variable["name"] - for option_variable in variables.get("options", []) - if "default" not in option_variable - ) - return required_variables - - def set_step_variables( *, workflow: dict[str, Any], @@ -224,6 +132,8 @@ def set_step_variables( Inputs are defined in step definition but their values may come from previous step outputs. """ + assert workflow + result = {} print("ssv: wf vars:") @@ -280,13 +190,13 @@ def set_step_variables( p_val = "somefile.smi" result[p_key] = p_val - options = set_variables_from_options_for_step( - definition=workflow, - variables=workflow_variables, - step_name=step_name, - ) - - result |= options + # options = set_variables_from_options_for_step( + # definition=workflow, + # variables=workflow_variables, + # step_name=step_name, + # ) + # + # result |= options return result diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index c839158..1896a76 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -1,5 +1,5 @@ --- -# The JSONSchema for 'Workflow' YAML files. +# The schema for 'Workflow' YAML files. # # See https://json-schema.org/understanding-json-schema/index.html @@ -33,24 +33,6 @@ properties: # and, like Jobs, has no current schema so we permit anything here. type: object additionalProperties: true - variable-mapping: - # Workflow-specific variable declarations, - # used (at the moment) to map workflow variables to steps. - type: object - additionalProperties: false - properties: - inputs: - type: array - items: - $ref: "#/definitions/workflow-input-parameter" - outputs: - type: array - items: - $ref: "#/definitions/workflow-output-parameter" - options: - type: array - items: - $ref: "#/definitions/workflow-option-parameter" required: - kind - kind-version @@ -73,102 +55,10 @@ definitions: # What does a Job specification template variable look like? # The values found in Jinja variables like '{{ x }}'. # Stuff like 'candidateMolecules' or 'clustered_molecules' - template-variable-name: + variable-name: type: string pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$ - # What does a filename look like? - # We do not (at the moment) permit spaces! - file-name: - type: string - pattern: ^[a-zA-Z0-9._-]+$ - - # An input parameter has a name and a (MIME) type. - workflow-input-parameter: - type: object - additionalProperties: false - properties: - name: - $ref: '#/definitions/template-variable-name' - required: - - name - - # A workflow output parameter is essentially a file - # taken from the output of a step with a default (as) value. - workflow-output-parameter: - type: object - additionalProperties: false - properties: - name: - $ref: '#/definitions/template-variable-name' - from: - $ref: '#/definitions/from-step-output' - required: - - name - - # Declaration of a step option value from a workflow option (variable) - as-step-option: - type: object - additionalProperties: false - properties: - option: - $ref: '#/definitions/template-variable-name' - step: - $ref: '#/definitions/rfc1035-label-name' - required: - - option - - step - - - # Declaration of a value from a workflow input (variable) - from-workflow-input: - type: object - additionalProperties: false - properties: - workflow-input: - $ref: '#/definitions/template-variable-name' - required: - - workflow-input - - # Declaration of a value from another step - from-step-output: - type: object - additionalProperties: false - properties: - step: - $ref: '#/definitions/rfc1035-label-name' - output: - $ref: '#/definitions/template-variable-name' - required: - - step - - output - - # A workflow option used as a step option - workflow-option-parameter: - type: object - additionalProperties: false - properties: - name: - $ref: '#/definitions/template-variable-name' - description: - type: string - default: - oneOf: - - type: string - - type: number - - type: boolean - minimum: - type: number - maximum: - type: number - as: - type: array - items: - $ref: '#/definitions/as-step-option' - required: - - name - - as - # A step replication control variable # that is based on a step input variable replicate-using-input: @@ -176,48 +66,52 @@ definitions: additionalProperties: false properties: input: - $ref: '#/definitions/template-variable-name' + $ref: '#/definitions/variable-name' required: - input - # A Step input (from an output of a prior step) - step-input-from-step: + # A Step variable + # (whose value is derived from a variable used in a prior step) + step-variable-from-step: type: object additionalProperties: false properties: - input: - $ref: '#/definitions/template-variable-name' - from: - $ref: '#/definitions/from-step-output' + variable: + $ref: '#/definitions/variable-name' + from-step: + type: object + additionalProperties: false + properties: + name: + $ref: '#/definitions/rfc1035-label-name' + variable: + $ref: '#/definitions/variable-name' + required: + - name + - variable required: - - input + - variable + - from-step - # A Step input (from a workflow input) - step-input-from-workflow: + # A Step variable + # (whose value is derived from a workflow variable) + step-variable-from-workflow: type: object additionalProperties: false properties: - input: - $ref: '#/definitions/template-variable-name' - from: - $ref: '#/definitions/from-workflow-input' + variable: + $ref: '#/definitions/variable-name' + from-workflow-variable: + type: object + additionalProperties: false + properties: + variable: + $ref: '#/definitions/variable-name' + required: + - variable required: - - input - - from - - # A Step output (with an 'as' - a declared value) - # step-output-as: - # type: object - # additionalProperties: false - # properties: - # output: - # $ref: '#/definitions/template-variable-name' - # as: - # $ref: '#/definitions/file-name' - # required: - # - output - # - as - + - variable + - from-workflow-variable # A step specification variable # (there must be at least one if a variables block is defined). @@ -260,11 +154,18 @@ definitions: additionalProperties: false properties: name: + # A unique name for the step $ref: '#/definitions/rfc1035-label-name' description: + # An optional description type: string description: A description of the step specification: + # The step Job specififcation. + # This MUST define `collection`, a 'job', and a 'version'. + # 'variables' (a map of name and value)can also be provided. + # The format of this is essentially idenical to the specification + # used when a Job is launched via the DM API. $ref: '#/definitions/step-specification' replicate: # Used to indicate one input variable that is used to replicate/spawn @@ -274,17 +175,35 @@ definitions: properties: using: $ref: '#/definitions/replicate-using-input' - inputs: + variable-mapping: + # The map of the source of the step's variables. + # all variables the step needs (that aren;t already in the specification) + # need to be declared here. They either come "from" a prior step + # or are expected in th erunning workflow variables. Here we simply + # associate every required variable to a source. type: array items: anyOf: - - $ref: "#/definitions/step-input-from-step" - - $ref: "#/definitions/step-input-from-workflow" - outputs: + - $ref: "#/definitions/step-variable-from-step" + - $ref: "#/definitions/step-variable-from-workflow" + minItems: 1 + in: + # An optional list of the step variables that are inputs. + # These are typically files, expected to be present in the Project directory, + # that need to be copied (by the DM) into the step's instance directory. + type: array + items: + $ref: '#/definitions/variable-name' + minItems: 1 + out: + # An optional list of the step variables that are outputs. + # These are typically files, expected to be present in the Step Instance directory, + # when it finished (successfully), that need to be copied (by the DM) + # into the Project directory via "realise_outputs()" type: array - # items: - # anyOf: - # - $ref: "#/definitions/step-output-as" + items: + $ref: '#/definitions/variable-name' + minItems: 1 required: - name - specification diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index 4736851..e1f5c1b 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -41,8 +41,8 @@ ) from .decoder import ( + get_step_input_variable_names, get_step_replication_param, - get_workflow_job_input_names_for_step, set_step_variables, workflow_step_has_outputs, ) @@ -645,7 +645,7 @@ def _launch( pprint(variables) inputs: list[str] = [] - inputs.extend(iter(get_workflow_job_input_names_for_step(wf, step_name))) + inputs.extend(iter(get_step_input_variable_names(wf, step_name))) if replicator: single_step_variables = [] for replicating_param in variables[replicator]: diff --git a/workflow/workflow_validator.py b/workflow/workflow_validator.py index 6324bd5..1efd18c 100644 --- a/workflow/workflow_validator.py +++ b/workflow/workflow_validator.py @@ -5,11 +5,10 @@ from typing import Any from .decoder import ( - get_required_variable_names, get_step_input_variable_names, get_step_output_variable_names, get_steps, - get_variable_names, + get_workflow_variable_names, validate_schema, ) @@ -113,24 +112,6 @@ def _validate_tag_level( error_num=2, error_msg=[f"Duplicate step names found: {', '.join(duplicate_names)}"], ) - # Workflow variables must be unique. - duplicate_names.clear() - variable_names.clear() - wf_variable_names: list[str] = get_variable_names(workflow_definition) - for wf_variable_name in wf_variable_names: - if ( - wf_variable_name not in duplicate_names - and wf_variable_name in variable_names - ): - duplicate_names.add(wf_variable_name) - variable_names.add(wf_variable_name) - if duplicate_names: - return ValidationResult( - error_num=6, - error_msg=[ - f"Duplicate workflow variable names found: {', '.join(duplicate_names)}" - ], - ) # For each 'replicating' step the replicating variable # must be declared in the step. for step in get_steps(workflow_definition): @@ -163,7 +144,7 @@ def _validate_run_level( assert workflow_definition # We must have values for all the variables defined in the workflow. - wf_variables: list[str] = get_required_variable_names(workflow_definition) + wf_variables: set[str] = get_workflow_variable_names(workflow_definition) missing_values: list[str] = [] missing_values.extend( wf_variable From f9d4aca320a7cd8b7029010914e1db761f3f4506 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Tue, 19 Aug 2025 16:27:27 +0100 Subject: [PATCH 25/57] refactor: from-workflow-variable becomes from-workflow --- workflow/workflow-schema.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index 1896a76..910dc66 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -101,7 +101,7 @@ definitions: properties: variable: $ref: '#/definitions/variable-name' - from-workflow-variable: + from-workflow: type: object additionalProperties: false properties: @@ -111,7 +111,7 @@ definitions: - variable required: - variable - - from-workflow-variable + - from-workflow # A step specification variable # (there must be at least one if a variables block is defined). From 2c4c867518466b1d3f63bdfaa9850686fbfd14d4 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Tue, 19 Aug 2025 16:28:02 +0100 Subject: [PATCH 26/57] refactor: from-workflow-variable becomes from-workflow --- .../duplicate-step-input-output-variable-names.yaml | 4 ++-- .../replicate-using-undeclared-input.yaml | 2 +- tests/workflow-definitions/simple-python-fanout.yaml | 2 +- .../simple-python-molprops-with-options.yaml | 10 +++++----- tests/workflow-definitions/simple-python-molprops.yaml | 4 ++-- tests/workflow-definitions/simple-python-parallel.yaml | 2 +- workflow/decoder.py | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml b/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml index bc0b761..3ba3926 100644 --- a/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml +++ b/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml @@ -16,10 +16,10 @@ steps: value: 123 variable-mapping: - variable: inputFile - from-workflow-variable: + from-workflow: variable: candidateMolecules - variable: inputFile - from-workflow-variable: + from-workflow: variable: candidateMolecules in: - inputFile diff --git a/tests/workflow-definitions/replicate-using-undeclared-input.yaml b/tests/workflow-definitions/replicate-using-undeclared-input.yaml index 488ddba..1382f0c 100644 --- a/tests/workflow-definitions/replicate-using-undeclared-input.yaml +++ b/tests/workflow-definitions/replicate-using-undeclared-input.yaml @@ -16,7 +16,7 @@ steps: value: 123 variable-mapping: - variable: inputFile - from-workflow-variable: + from-workflow: variable: candidateMolecules in: - outputFile diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml index 9e2042f..598f0e6 100644 --- a/tests/workflow-definitions/simple-python-fanout.yaml +++ b/tests/workflow-definitions/simple-python-fanout.yaml @@ -17,7 +17,7 @@ steps: value: "1" variable-mapping: - variable: inputFile - from-workflow-variable: + from-workflow: variable: candidateMolecules - name: parallel-step diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml index 696855d..3a3904a 100644 --- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml +++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml @@ -62,16 +62,16 @@ steps: version: "1.0.0" variable-mapping: - variable: inputFile - from-workflow-variable: + from-workflow: variable: candidateMolecules - variable: name - from-workflow-variable: + from-workflow: variable: rdkitPropertyName - variable: value - from-workflow-variable: + from-workflow: variable: rdkitPropertyValue - variable: outputFile - from-workflow-variable: + from-workflow: variable: clusteredMolecules - name: step2 @@ -89,7 +89,7 @@ steps: name: step1 variable: outputFile - variable: outputFile - from-workflow-variable: + from-workflow: variable: outputFile out: - outputFile diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml index 5a0797e..39fb6cd 100644 --- a/tests/workflow-definitions/simple-python-molprops.yaml +++ b/tests/workflow-definitions/simple-python-molprops.yaml @@ -16,7 +16,7 @@ steps: value: 123 variable-mapping: - variable: inputFile - from-workflow-variable: + from-workflow: variable: candidateMolecules - name: step2 @@ -34,7 +34,7 @@ steps: name: step1 variable: outputFile - variable: outputFile - from-workflow-variable: + from-workflow: variable: clusteredMolecules out: - outputFile diff --git a/tests/workflow-definitions/simple-python-parallel.yaml b/tests/workflow-definitions/simple-python-parallel.yaml index 1137413..c1f5c8f 100644 --- a/tests/workflow-definitions/simple-python-parallel.yaml +++ b/tests/workflow-definitions/simple-python-parallel.yaml @@ -16,7 +16,7 @@ steps: value: "0" variable-mapping: - variable: inputFile - from-workflow-variable: + from-workflow: variable: candidateMolecules - name: parallel-step-a diff --git a/workflow/decoder.py b/workflow/decoder.py index 0fa00a5..510c41f 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -73,8 +73,8 @@ def get_workflow_variable_names(definition: dict[str, Any]) -> set[str]: for step in steps: if v_map := step.get("variable-mapping"): for v in v_map: - if "from-workflow-variable" in v: - wf_variable_names.add(v["from-workflow-variable"]["variable"]) + if "from-workflow" in v: + wf_variable_names.add(v["from-workflow"]["variable"]) return wf_variable_names From c2eb21c6f4152d59fe08ef37877402017029c9c2 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Tue, 19 Aug 2025 17:23:35 +0100 Subject: [PATCH 27/57] refactor: Replicate now uses variable not input --- .../replicate-using-undeclared-input.yaml | 2 +- tests/workflow-definitions/simple-python-fanout.yaml | 2 +- workflow/decoder.py | 4 ++-- workflow/workflow-schema.yaml | 10 +++++----- workflow/workflow_engine.py | 4 ++-- workflow/workflow_validator.py | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/workflow-definitions/replicate-using-undeclared-input.yaml b/tests/workflow-definitions/replicate-using-undeclared-input.yaml index 1382f0c..447521b 100644 --- a/tests/workflow-definitions/replicate-using-undeclared-input.yaml +++ b/tests/workflow-definitions/replicate-using-undeclared-input.yaml @@ -32,7 +32,7 @@ steps: value: "999" replicate: using: - input: y + variable: y variable-mapping: - variable: inputFile from-step: diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml index 598f0e6..1ad06bb 100644 --- a/tests/workflow-definitions/simple-python-fanout.yaml +++ b/tests/workflow-definitions/simple-python-fanout.yaml @@ -31,7 +31,7 @@ steps: value: "777" replicate: using: - input: inputFile + variable: inputFile variable-mapping: - variable: inputFile from-step: diff --git a/workflow/decoder.py b/workflow/decoder.py index 510c41f..9270d0d 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -200,9 +200,9 @@ def set_step_variables( return result -def get_step_replication_param(*, step: dict[str, Any]) -> str | Any: +def get_step_replicator(*, step: dict[str, Any]) -> str | Any: """Return step's replication info""" - replicator = step.get("replicate", None) + replicator = step.get("replicate") if replicator: # 'using' is a dict but there can be only single value for now replicator = list(replicator["using"].values())[0] diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index 910dc66..27c726c 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -60,15 +60,15 @@ definitions: pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$ # A step replication control variable - # that is based on a step input variable - replicate-using-input: + # that is based on a step variable + replicate-using-variable: type: object additionalProperties: false properties: - input: + variable: $ref: '#/definitions/variable-name' required: - - input + - variable # A Step variable # (whose value is derived from a variable used in a prior step) @@ -174,7 +174,7 @@ definitions: additionalProperties: false properties: using: - $ref: '#/definitions/replicate-using-input' + $ref: '#/definitions/replicate-using-variable' variable-mapping: # The map of the source of the step's variables. # all variables the step needs (that aren;t already in the specification) diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index e1f5c1b..b8a90dc 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -42,7 +42,7 @@ from .decoder import ( get_step_input_variable_names, - get_step_replication_param, + get_step_replicator, set_step_variables, workflow_step_has_outputs, ) @@ -595,7 +595,7 @@ def _launch( variables: dict[str, Any] = error_or_variables print("variables", variables) # find out if and by which parameter this step should be replicated - replicator = get_step_replication_param(step=step) + replicator = get_step_replicator(step=step) _LOGGER.info( "Launching step: RunningWorkflow=%s RunningWorkflowStep=%s step=%s" diff --git a/workflow/workflow_validator.py b/workflow/workflow_validator.py index 1efd18c..702ef88 100644 --- a/workflow/workflow_validator.py +++ b/workflow/workflow_validator.py @@ -118,7 +118,7 @@ def _validate_tag_level( if ( replicate_using_input := step.get("replicate", {}) .get("using", {}) - .get("input") + .get("variable") ): step_name = step["name"] if replicate_using_input not in get_step_input_variable_names( From 8d05f15b07c297f0014d1dc321b5989ccf5cd112 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Tue, 19 Aug 2025 18:05:33 +0100 Subject: [PATCH 28/57] fix: No longer need realise-outputs --- tests/test_decoder.py | 36 ------------------- .../test_workflow_validator_for_run_level.py | 4 +-- workflow/decoder.py | 9 ----- workflow/workflow_abc.py | 10 ------ workflow/workflow_engine.py | 35 ++---------------- workflow/workflow_validator.py | 2 +- 6 files changed, 6 insertions(+), 90 deletions(-) diff --git a/tests/test_decoder.py b/tests/test_decoder.py index 5f5da9f..8aae4cb 100644 --- a/tests/test_decoder.py +++ b/tests/test_decoder.py @@ -197,42 +197,6 @@ def test_get_workflow_steps(): assert steps[1]["name"] == "step2" -def test_get_workflow_outputs_for_step_with_name_step1(): - # Arrange - - # Act - has_outputs = decoder.workflow_step_has_outputs( - _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "step1" - ) - - # Assert - assert not has_outputs - - -def test_get_workflow_outputs_for_step_with_name_step2(): - # Arrange - - # Act - has_outputs = decoder.workflow_step_has_outputs( - _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "step2" - ) - - # Assert - assert has_outputs - - -def test_get_workflow_outputs_for_step_with_unkown_step_name(): - # Arrange - - # Act - has_outputs = decoder.workflow_step_has_outputs( - _SIMPLE_PYTHON_MOLPROPS_WITH_OPTIONS_WORKFLOW, "unknown" - ) - - # Assert - assert not has_outputs - - def test_get_step_input_variable_names_when_duplicates(): # Arrange workflow_filename: str = os.path.join( diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py index b1f6118..ee28a15 100644 --- a/tests/test_workflow_validator_for_run_level.py +++ b/tests/test_workflow_validator_for_run_level.py @@ -156,7 +156,7 @@ def test_validate_simple_python_molprops_with_options_when_missing_required(): ) # Assert - assert error.error_num == 7 + assert error.error_num == 8 assert error.error_msg == [ "Missing workflow variable values for: rdkitPropertyValue" ] @@ -210,7 +210,7 @@ def test_validate_simple_python_molprops_with_missing_input(): ) # Assert - assert error.error_num == 7 + assert error.error_num == 8 assert error.error_msg == [ "Missing workflow variable values for: candidateMolecules" ] diff --git a/workflow/decoder.py b/workflow/decoder.py index 9270d0d..c1b3c87 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -108,15 +108,6 @@ def get_step_input_variable_names( return variable_names -def workflow_step_has_outputs(definition: dict[str, Any], name: str) -> bool: - """Given a Workflow definition and a step name we return a boolean - that is true if the step produces outputs. This requires inspection - of the 'as-yet-unused' variables block.""" - return ( - len(get_step_output_variable_names(definition=definition, step_name=name)) > 0 - ) - - def set_step_variables( *, workflow: dict[str, Any], diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py index 2024fba..4c4f6ff 100644 --- a/workflow/workflow_abc.py +++ b/workflow/workflow_abc.py @@ -373,16 +373,6 @@ def get_running_workflow_step_output_values_for_output( # "output": ["dir/file1.sdf", "dir/file2.sdf"] # } - @abstractmethod - def realise_outputs( - self, *, running_workflow_step_id: str - ) -> tuple[dict[str, Any], int]: - """Copy (link) the step's files as outputs into the Project directory.""" - # Should return an empty map or: - # { - # "error": "", - # } - class MessageDispatcher(ABC): """The class handling the sending of messages (on the Data Manager message bus).""" diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index b8a90dc..66d3e3e 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -24,7 +24,6 @@ import logging import sys -from http import HTTPStatus from pprint import pprint from typing import Any, Dict, Optional @@ -44,7 +43,6 @@ get_step_input_variable_names, get_step_replicator, set_step_variables, - workflow_step_has_outputs, ) _LOGGER: logging.Logger = logging.getLogger(__name__) @@ -250,44 +248,17 @@ def _handle_pod_message(self, msg: PodMessage) -> None: self._set_step_error(step_name, r_wfid, r_wfsid, exit_code, "Job failed") return - # If we get here the prior step completed successfully and we can decide - # whether the step has outputs (files) that need to be written to the - # Project directory, while also marking the Step as DONE (successfully). - # We pass the outputs to the DM via a call to the API adapter's realise_outputs(). - # In return it copies (links) these files to the Project directory. + # If we get here the prior step completed successfullyso we + # mark the Step as DONE (successfully). wfid = rwf_response["workflow"]["id"] assert wfid wf_response, _ = self._wapi_adapter.get_workflow(workflow_id=wfid) _LOGGER.debug("API.get_workflow(%s) returned: -\n%s", wfid, str(wf_response)) - error_num: int | None = None - error_msg: str | None = None - if workflow_step_has_outputs(wf_response, step_name): - # The step produces at least one output. - # Inform the DM so it can link them to the Project directory - response, status_code = self._wapi_adapter.realise_outputs( - running_workflow_step_id=r_wfsid, - ) - if status_code != HTTPStatus.OK: - error_num = status_code - error_msg = ( - response["error"] - if "error" in response - else "Undisclosed error when realising outputs" - ) - - if error_num is not None: - # The job was successful but linking outputs (back to the Project directory) - # appears to have failed. - self._set_step_error(step_name, r_wfid, r_wfsid, error_num, error_msg) - return - # We then inspect the Workflow to determine the next step. self._wapi_adapter.set_running_workflow_step_done( running_workflow_step_id=r_wfsid, - success=error_num is None, - error_num=error_num, - error_msg=error_msg, + success=True, ) # We have the step from the Instance that's just finished, diff --git a/workflow/workflow_validator.py b/workflow/workflow_validator.py index 702ef88..773f7e3 100644 --- a/workflow/workflow_validator.py +++ b/workflow/workflow_validator.py @@ -153,7 +153,7 @@ def _validate_run_level( ) if missing_values: return ValidationResult( - error_num=7, + error_num=8, error_msg=[ f"Missing workflow variable values for: {', '.join(missing_values)}" ], From 0da1a595fe46fd9015fb58ebc1e8da108cf06aff Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Thu, 21 Aug 2025 17:02:36 +0000 Subject: [PATCH 29/57] build: Add devcontainer --- .devcontainer/devcontainer.json | 49 +++++++++++++++++++++++++++++++++ .gitignore | 1 + requirements.txt | 2 ++ 3 files changed, 52 insertions(+) create mode 100644 .devcontainer/devcontainer.json create mode 100644 requirements.txt diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..cc5a44e --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,49 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/python +{ + "name": "Python 3", + "image": "mcr.microsoft.com/devcontainers/python:1-3.13-bullseye", + "features": { + "ghcr.io/devcontainers/features/docker-in-docker:2": { + "moby": true, + "azureDnsAutoDetection": true, + "installDockerBuildx": true, + "installDockerComposeSwitch": true, + "version": "latest", + "dockerDashComposeVersion": "v2" + }, + "ghcr.io/devcontainers/features/git:1": { + "ppa": true, + "version": "os-provided" + } + }, + // We mount bash history in an attempt to preserver history + // between container restarts + // (see https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history) + "mounts": [ + "source=projectname-bashhistory,target=/commandhistory,type=volume" + ], + "customizations": { + "vscode": { + "extensions": [ + "codezombiech.gitignore", + "donjayamanne.githistory", + "donjayamanne.git-extension-pack", + "eamodio.gitlens", + "github.vscode-github-actions", + "ms-kubernetes-tools.vscode-kubernetes-tools", + "ms-python.vscode-pylance", + "sourcery.sourcery", + "streetsidesoftware.code-spell-checker", + "trond-snekvik.simple-rst", + "vivaxy.vscode-conventional-commits", + "yzhang.markdown-all-in-one" + ] + } + }, + "postCreateCommand": { + "Install Python requirements": "pip3 install --user -r requirements.txt", + "Fix Volume Permissions": "sudo chown -R $(whoami): /commandhistory" + }, + "forwardPorts": [] +} diff --git a/.gitignore b/.gitignore index 6e25b42..27fa4a4 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ dist/ **/__pycache__/ **/*.pickle tests/project-root/project-*/ +**/.DS_Store # temp files *~ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..801c6a7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +poetry == 1.8.5 +pre-commit == 4.2.0 From 6ddca27390fb816e846b6dc34840e2d10f5a3728 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Thu, 21 Aug 2025 17:12:14 +0000 Subject: [PATCH 30/57] build: No need for DinD --- .devcontainer/devcontainer.json | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index cc5a44e..c15be0a 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,17 +1,9 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/python { - "name": "Python 3", + "name": "WorkflowEngine Python 3.13", "image": "mcr.microsoft.com/devcontainers/python:1-3.13-bullseye", "features": { - "ghcr.io/devcontainers/features/docker-in-docker:2": { - "moby": true, - "azureDnsAutoDetection": true, - "installDockerBuildx": true, - "installDockerComposeSwitch": true, - "version": "latest", - "dockerDashComposeVersion": "v2" - }, "ghcr.io/devcontainers/features/git:1": { "ppa": true, "version": "os-provided" From bca4f9c6295ca02b58edef5b0870412c062c1907 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Thu, 21 Aug 2025 17:12:32 +0000 Subject: [PATCH 31/57] docs: Docs on devcontainer --- README.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index c86b422..1f471d3 100644 --- a/README.rst +++ b/README.rst @@ -38,10 +38,9 @@ The project's written in Python and uses `Poetry`_ for dependency and package management. We also use `pre-commit`_ to manage our pre-commit hooks, which rely on `black`_, `mypy`_, `pylint`_, amongst others. -Create your environment:: +From within a VS Code `devcontainer`_] environment (recommended):: - poetry shell - poetry install --with dev + poetry install --with dev --sync pre-commit install -t commit-msg -t pre-commit And then start by running the pre-commit hooks to ensure you're stating with a @@ -51,9 +50,10 @@ _clean_ project:: And then run the tests:: - coverage run -m pytest - coverage report + poetry run coverage run -m pytest + poetry run coverage report +.. _devcontainer: https://code.visualstudio.com/docs/devcontainers/containers .. _Poetry: https://python-poetry.org .. _pre-commit: https://pre-commit.com .. _black: https://github.com/psf/black From b9e3f00e6d750fedbd6b23a4ade8eb33877d0230 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Fri, 22 Aug 2025 16:34:21 +0000 Subject: [PATCH 32/57] docs: Doc tweak --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 1f471d3..086a375 100644 --- a/README.rst +++ b/README.rst @@ -38,7 +38,7 @@ The project's written in Python and uses `Poetry`_ for dependency and package management. We also use `pre-commit`_ to manage our pre-commit hooks, which rely on `black`_, `mypy`_, `pylint`_, amongst others. -From within a VS Code `devcontainer`_] environment (recommended):: +From within a VS Code `devcontainer`_ environment (recommended):: poetry install --with dev --sync pre-commit install -t commit-msg -t pre-commit From 5a9903b3098b845d34cde9558c3447de402ccb83 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Fri, 22 Aug 2025 16:38:47 +0000 Subject: [PATCH 33/57] feat: Some work on the refactored engine --- tests/test_workflow_engine_examples.py | 4 +- .../example-smiles-to-file.yaml | 9 ++ .../simple-python-molprops-with-options.yaml | 56 +-------- workflow/decoder.py | 117 ++++++------------ workflow/workflow_engine.py | 59 +++++---- 5 files changed, 88 insertions(+), 157 deletions(-) diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py index d4c1a6f..641e608 100644 --- a/tests/test_workflow_engine_examples.py +++ b/tests/test_workflow_engine_examples.py @@ -130,6 +130,7 @@ def wait_for_workflow( # are the responsibility of the caller. attempts = 0 done = False + response = None while not done: response, _ = da.get_running_workflow(running_workflow_id=r_wfid) if response["done"]: @@ -141,6 +142,7 @@ def wait_for_workflow( time.sleep(completion_poll_period_s) # When we get here the workflow must have finished (not timed-out), # and it must have passed (or failed) according the the caller's expectation. + assert response assert response["done"] assert response["success"] == expect_success @@ -400,7 +402,7 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine): "simple-python-molprops-with-options", { "candidateMolecules": input_file_1, - "outputFile": output_file_1, + "clusteredMolecules": output_file_2, "rdkitPropertyName": "prop", "rdkitPropertyValue": 1.2, }, diff --git a/tests/workflow-definitions/example-smiles-to-file.yaml b/tests/workflow-definitions/example-smiles-to-file.yaml index b7dc70c..018d90c 100644 --- a/tests/workflow-definitions/example-smiles-to-file.yaml +++ b/tests/workflow-definitions/example-smiles-to-file.yaml @@ -13,3 +13,12 @@ steps: collection: workflow-engine-unit-test-jobs job: smiles-to-file version: "1.0.0" + variable-mapping: + - variable: outputFile + from-workflow: + variable: outputFile + - variable: smiles + from-workflow: + variable: smiles + out: + - outputFile diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml index 3a3904a..9ef80e5 100644 --- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml +++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml @@ -4,55 +4,6 @@ kind-version: "2025.2" name: python-workflow description: A simple python experimental workflow -# Some meaningless variables. -# Just to make sure the decoder accepts this. -# The Workflow engin eis not (yet) interested in this block. -variables: - inputs: - type: object - required: - - inputFile - properties: - inputFile: - title: Molecules to pick from - mime-types: - - squonk/x-smiles - type: file - seeds: - title: Molecules that are already picked - mime-types: - - squonk/x-smiles - type: file - multiple: true - outputs: - type: object - properties: - outputFile: - title: Output file - mime-types: - - chemical/x-csv - creates: '{{ outputFile }}' - type: file - options: - type: object - required: - - count - properties: - outputFile: - title: Output file name - type: string - pattern: "^[A-Za-z0-9_/\\.\\-]+$" - default: diverse.smi - count: - title: Number of molecules to pick - type: integer - minimum: 1 - threshold: - title: Similarity threshold - type: number - minimum: 0 - maximum: 1 - steps: - name: step1 description: Add column 1 @@ -60,6 +11,8 @@ steps: collection: workflow-engine-unit-test-jobs job: rdkit-molprops version: "1.0.0" + variables: + outputFile: step1.out.smi variable-mapping: - variable: inputFile from-workflow: @@ -70,9 +23,6 @@ steps: - variable: value from-workflow: variable: rdkitPropertyValue - - variable: outputFile - from-workflow: - variable: clusteredMolecules - name: step2 description: Add column 2 @@ -90,6 +40,6 @@ steps: variable: outputFile - variable: outputFile from-workflow: - variable: outputFile + variable: clusteredMolecules out: - outputFile diff --git a/workflow/decoder.py b/workflow/decoder.py index c1b3c87..d7e320f 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -4,7 +4,6 @@ """ import os -from pprint import pprint from typing import Any import jsonschema @@ -108,87 +107,41 @@ def get_step_input_variable_names( return variable_names -def set_step_variables( - *, - workflow: dict[str, Any], - inputs: list[dict[str, Any]], - outputs: list[dict[str, Any]], - step_outputs: dict[str, Any], - previous_step_outputs: list[dict[str, Any]], - workflow_variables: dict[str, Any], - step_name: str, -) -> dict[str, Any]: - """Prepare input- and output variables for the following step. - - Inputs are defined in step definition but their values may - come from previous step outputs. - """ - assert workflow - - result = {} - - print("ssv: wf vars:") - pprint(workflow_variables) - print("ssv: inputs:") - pprint(inputs) - print("ssv: outputs", outputs) - print("ssv: step_outputs", step_outputs) - print("ssv: prev step outputs", previous_step_outputs) - print("ssv: step_name", step_name) - - for item in inputs: - p_key = item["input"] - p_val = "" - val = item["from"] - if "workflow-input" in val.keys(): - p_val = workflow_variables[val["workflow-input"]] - result[p_key] = p_val - elif "step" in val.keys(): - # this links the variable to previous step output - if previous_step_outputs: - for out in previous_step_outputs: - if out["output"] == val["output"]: - # p_val = out["as"] - if step_outputs["output"]: - p_val = step_outputs["output"] - print("\n!!!!!!!!!!!!!if clause!!!!!!!!!!!!!!!!!!!!!\n") - print(p_val) - else: - # what do I need to do here?? - print("\n!!!!!!!!!!!!!else clause!!!!!!!!!!!!!!!!!!!!!\n") - print(out) - print(val) - - # this bit handles multiple inputs: if a step - # requires input from multiple steps, add them to - # the list in result dict. this is the reason for - # mypy ignore statements, mypy doesn't understand - # redefinition - if p_key in result: - if not isinstance(result[p_key], set): - result[p_key] = {result[p_key]} # type: ignore [assignment] - result[p_key].add(p_val) # type: ignore [attr-defined] - else: - result[p_key] = p_val - else: - if val["output"] in workflow_variables: - result[p_key] = workflow_variables[val["output"]] - - for item in outputs: - p_key = item["output"] - # p_val = item["as"] - # p_val = step_outputs["output"] - p_val = "somefile.smi" - result[p_key] = p_val - - # options = set_variables_from_options_for_step( - # definition=workflow, - # variables=workflow_variables, - # step_name=step_name, - # ) - # - # result |= options - return result +def get_step_workflow_variable_mapping( + *, step: dict[str, Any] +) -> list[tuple[str, str]]: + """Returns a list of workflow vaiable name to step variable name tuples + for the given step.""" + variable_mapping: list[tuple[str, str]] = [] + if "variable-mapping" in step: + for v_map in step["variable-mapping"]: + if "from-workflow" in v_map: + # Tuple is "from" -> "to" + variable_mapping.append( + (v_map["from-workflow"]["variable"], v_map["variable"]) + ) + return variable_mapping + + +def get_step_prior_step_variable_mapping( + *, step: dict[str, Any] +) -> dict[str, list[tuple[str, str]]]: + """Returns list of tuples, indexed by prior step name, of source step vaiable name + to this step's variable name.""" + variable_mapping: dict[str, list[tuple[str, str]]] = {} + if "variable-mapping" in step: + for v_map in step["variable-mapping"]: + if "from-step" in v_map: + step_name = v_map["from-step"]["name"] + step_variable = v_map["from-step"]["variable"] + # Tuple is "from" -> "to" + if step_name in variable_mapping: + variable_mapping[step_name].append( + (step_variable, v_map["variable"]) + ) + else: + variable_mapping[step_name] = [(step_variable, v_map["variable"])] + return variable_mapping def get_step_replicator(*, step: dict[str, Any]) -> str | Any: diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index 66d3e3e..71ee0ac 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -41,8 +41,9 @@ from .decoder import ( get_step_input_variable_names, + get_step_prior_step_variable_mapping, get_step_replicator, - set_step_variables, + get_step_workflow_variable_mapping, ) _LOGGER: logging.Logger = logging.getLogger(__name__) @@ -485,16 +486,6 @@ def _validate_step_command( print("final prev step outputs", previous_step_outputs) - step_vars = set_step_variables( - workflow=workflow, - workflow_variables=all_variables, - inputs=inputs, - outputs=outputs, - step_outputs=step_outputs, - previous_step_outputs=previous_step_outputs, - step_name=running_wf_step["name"], - ) - all_variables |= step_vars _LOGGER.debug( "Index %s (%s) all_variables=%s", our_step_index, @@ -533,27 +524,52 @@ def _launch( wf_step_data, _ = self._wapi_adapter.get_workflow_steps_driving_this_step( running_workflow_step_id=rwfs_id, ) - print("wf_step_data") - pprint(wf_step_data) assert wf_step_data["caller_step_index"] >= 0 our_step_index: int = wf_step_data["caller_step_index"] print("step in _launch:", step_name) pprint(step) + # Workflow variables set by the user... + rwf_variables: dict[str, Any] = rwf.get("variables", {}) + # Now check the step command can be executed # (by trying to decoding the Job command). + # Before we do this we have to construct the variable map + # for this step. # - # We pass in the workflow variables (these are provided by the user - # when the workflow is run. All workflow variables will be present in the - # running workflow record) - running_workflow_variables: dict[str, Any] | None = rwf.get("variables") + # We start with any variables provided in the step specification + all_variables: dict[str, Any] = step["specification"].get("variables", {}) + # We now have to iterate through the step's variable mapping block. + # This will name any workflow variables (from) + # and their corresponding step variable (to). + step_wf_v_map: list[tuple[str, str]] = get_step_workflow_variable_mapping( + step=step + ) + for from_to in step_wf_v_map: + all_variables[from_to[1]] = rwf_variables[from_to[0]] + # We must now apply variables from prior steps identified in the + # current step's mapping block. We're given a map indexed by + # prior step name that's a list of tuples name the prior step's + # variable (from) and the curent step variable (to). + step_prior_v_map: dict[str, list[tuple[str, str]]] = ( + get_step_prior_step_variable_mapping(step=step) + ) + for prior_step_name, v_map in step_prior_v_map.items(): + # Load the prior step + prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name( + name=prior_step_name, running_workflow_id=rwf_id + ) + # Get its variables and copy the value + for from_to in v_map: + all_variables[from_to[1]] = prior_step["variables"][from_to[0]] + error_or_variables: str | dict[str, Any] = self._validate_step_command( running_workflow_step_id=rwfs_id, step=step, workflow_steps=wf_step_data["steps"], our_step_index=our_step_index, - running_workflow_variables=running_workflow_variables, + running_workflow_variables=all_variables, ) if isinstance(error_or_variables, str): error_msg = error_or_variables @@ -570,13 +586,14 @@ def _launch( _LOGGER.info( "Launching step: RunningWorkflow=%s RunningWorkflowStep=%s step=%s" - " (name=%s project=%s, variables=%s)", + " variables=%s name=%s project=%s, (all_variables=%s)", rwf_id, rwfs_id, step_name, + variables, rwf["name"], project_id, - variables, + all_variables, ) # When we launch a step we need to identify all the prior steps in the workflow, @@ -593,7 +610,7 @@ def _launch( prior_steps: list[str] = [] if our_step_index > 0: # We need the step ID of the prior step. - prior_step_name: str = wf_step_data["steps"][our_step_index - 1]["name"] + prior_step_name = wf_step_data["steps"][our_step_index - 1]["name"] step_response, _ = self._wapi_adapter.get_running_workflow_step_by_name( name=prior_step_name, running_workflow_id=rwf_id, From f3bbc6da45f9e4bccc1a54117f24880ffe26a3c6 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Wed, 27 Aug 2025 13:46:28 +0000 Subject: [PATCH 34/57] fix: More fixes for engine --- tests/test_workflow_engine_examples.py | 2 ++ .../test_workflow_validator_for_run_level.py | 1 + workflow/decoder.py | 4 +-- workflow/workflow_validator.py | 29 ++++++++++++++----- 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py index 641e608..3aaf046 100644 --- a/tests/test_workflow_engine_examples.py +++ b/tests/test_workflow_engine_examples.py @@ -327,6 +327,7 @@ def test_workflow_engine_simple_python_molprops(basic_engine): assert project_file_exists(output_file_2) +@pytest.mark.skip("Unrealistic test") def test_workflow_engine_simple_python_molprops_with_options(basic_engine): # Arrange md, da = basic_engine @@ -423,6 +424,7 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine): assert project_file_exists(output_file_2) +@pytest.mark.skip("Unrealistic test") def test_workflow_engine_simple_python_fanout(basic_engine): # Arrange md, da = basic_engine diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py index ee28a15..175d828 100644 --- a/tests/test_workflow_validator_for_run_level.py +++ b/tests/test_workflow_validator_for_run_level.py @@ -62,6 +62,7 @@ def test_validate_example_smiles_to_file(): error = WorkflowValidator.validate( level=ValidationLevel.RUN, workflow_definition=workflow, + variables={"smiles": "C", "outputFile": "blob.smi"}, ) # Assert diff --git a/workflow/decoder.py b/workflow/decoder.py index d7e320f..10d6d4e 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -137,10 +137,10 @@ def get_step_prior_step_variable_mapping( # Tuple is "from" -> "to" if step_name in variable_mapping: variable_mapping[step_name].append( - (step_variable, v_map["variable"]) + (v_map["variable"], step_variable) ) else: - variable_mapping[step_name] = [(step_variable, v_map["variable"])] + variable_mapping[step_name] = [(v_map["variable"], step_variable)] return variable_mapping diff --git a/workflow/workflow_validator.py b/workflow/workflow_validator.py index 773f7e3..10a2482 100644 --- a/workflow/workflow_validator.py +++ b/workflow/workflow_validator.py @@ -5,8 +5,9 @@ from typing import Any from .decoder import ( - get_step_input_variable_names, get_step_output_variable_names, + get_step_prior_step_variable_mapping, + get_step_workflow_variable_mapping, get_steps, get_workflow_variable_names, validate_schema, @@ -113,22 +114,36 @@ def _validate_tag_level( error_msg=[f"Duplicate step names found: {', '.join(duplicate_names)}"], ) # For each 'replicating' step the replicating variable - # must be declared in the step. + # must be declared in the step - which is either a workflow variable + # or a prior step variable. for step in get_steps(workflow_definition): if ( replicate_using_input := step.get("replicate", {}) .get("using", {}) .get("variable") ): - step_name = step["name"] - if replicate_using_input not in get_step_input_variable_names( - workflow_definition, step_name - ): + found: bool = False + for variable_map in get_step_workflow_variable_mapping(step=step): + if replicate_using_input == variable_map[0]: + found = True + break + if not found: + for ( + step_name, + variable_map_list, + ) in get_step_prior_step_variable_mapping(step=step).items(): + for variable_map in variable_map_list: + if replicate_using_input == variable_map[0]: + found = True + break + if found: + break + if not found: return ValidationResult( error_num=7, error_msg=[ "Replicate input variable is not declared:" - f" {replicate_using_input} (step={step_name})" + f" {replicate_using_input} (step={step["name"]})" ], ) From 880944009d504ce456be0125c43cd14720b58a79 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Wed, 27 Aug 2025 15:12:04 +0000 Subject: [PATCH 35/57] fix: More work on the decoder --- tests/test_decoder.py | 2 +- tests/test_workflow_engine_examples.py | 59 ++--- tests/wapi_adapter.py | 6 - .../simple-python-molprops.yaml | 1 + workflow/decoder.py | 4 +- workflow/workflow_engine.py | 228 ++++-------------- 6 files changed, 65 insertions(+), 235 deletions(-) diff --git a/tests/test_decoder.py b/tests/test_decoder.py index 8aae4cb..46c05d9 100644 --- a/tests/test_decoder.py +++ b/tests/test_decoder.py @@ -142,7 +142,7 @@ def test_validate_schema_for_step_specification_variable_names(): assert error is None -@pytest.mark.skip(reason="DO not support combination atm") +@pytest.mark.skip(reason="We do not support combination atm") def test_validate_schema_for_simple_python_parallel(): # Arrange diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py index 3aaf046..bcd2498 100644 --- a/tests/test_workflow_engine_examples.py +++ b/tests/test_workflow_engine_examples.py @@ -208,37 +208,6 @@ def test_workflow_engine_example_smiles_to_file(basic_engine): assert project_file_exists(output_file) -@pytest.mark.skip("Unrealistic test") -def test_workflow_engine_shortcut_example_1(basic_engine): - # Arrange - md, da = basic_engine - - # Make sure files that should be generated by the test - # do not exist before we run the test. - output_file_a = "a.sdf" - assert not project_file_exists(output_file_a) - output_file_b = "b.sdf" - assert not project_file_exists(output_file_b) - - # Act - r_wfid = start_workflow(md, da, "shortcut-example-1", {}) - - # Assert - wait_for_workflow(da, r_wfid) - # Additional, detailed checks... - # Check we only have one RunningWorkflowStep, and it succeeded - response = da.get_running_workflow_steps(running_workflow_id=r_wfid) - assert response["count"] == 2 - assert response["running_workflow_steps"][0]["done"] - assert response["running_workflow_steps"][0]["success"] - assert response["running_workflow_steps"][1]["done"] - assert response["running_workflow_steps"][1]["success"] - # This test should generate a file in the simulated project directory - assert project_file_exists(output_file_a) - assert project_file_exists(output_file_b) - - -@pytest.mark.skip("temporary skip") def test_workflow_engine_simple_python_molprops(basic_engine): # Arrange md, da = basic_engine @@ -249,13 +218,7 @@ def test_workflow_engine_simple_python_molprops(basic_engine): output="step1.out.smi", ) - # Make sure files that should be generated by the test - # do not exist before we run the test. - output_file_1 = "step1.out.smi" - assert not project_file_exists(output_file_1) - output_file_2 = "step2.out.smi" - assert not project_file_exists(output_file_2) - # And create the test's input file. + # Create the test's input file. input_file_1 = "input1.smi" input_file_1_content = """O=C(CSCc1ccc(Cl)s1)N1CCC(O)CC1 RDKit 3D @@ -307,9 +270,22 @@ def test_workflow_engine_simple_python_molprops(basic_engine): ) as input_file: input_file.writelines(input_file_1_content) + # Make sure files that should be generated by the test + # do not exist before we run the test. + output_file_1 = "results.smi" + assert not project_file_exists(output_file_1) + output_file_2 = "clustered-results.smi" + assert not project_file_exists(output_file_2) + # Act r_wfid = start_workflow( - md, da, "simple-python-molprops", {"candidateMolecules": input_file_1} + md, + da, + "simple-python-molprops", + { + "candidateMolecules": input_file_1, + "clusteredMolecules": "clustered-results.smi", + }, ) # Assert @@ -322,12 +298,10 @@ def test_workflow_engine_simple_python_molprops(basic_engine): assert response["running_workflow_steps"][0]["success"] assert response["running_workflow_steps"][1]["done"] assert response["running_workflow_steps"][1]["success"] - # This test should generate a file in the simulated project directory - assert project_file_exists(output_file_1) + # This test should generate the expected file in the simulated project directory assert project_file_exists(output_file_2) -@pytest.mark.skip("Unrealistic test") def test_workflow_engine_simple_python_molprops_with_options(basic_engine): # Arrange md, da = basic_engine @@ -424,7 +398,6 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine): assert project_file_exists(output_file_2) -@pytest.mark.skip("Unrealistic test") def test_workflow_engine_simple_python_fanout(basic_engine): # Arrange md, da = basic_engine diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py index 1f1f6d0..c283ee1 100644 --- a/tests/wapi_adapter.py +++ b/tests/wapi_adapter.py @@ -202,7 +202,6 @@ def get_running_workflow_step_by_name( for rwfs_id, record in running_workflow_step.items(): if record["running_workflow"]["id"] != running_workflow_id: continue - print("running wf step by name, record:", record) if record["name"] == name and record["replica"] == replica: response = record response["id"] = rwfs_id @@ -425,11 +424,6 @@ def get_running_workflow_step_output_values_for_output( mock_output = Unpickler(pickle_file).load() UnitTestWorkflowAPIAdapter.lock.release() - print("mock output", mock_output) - print("step", step) - print("step_name", step_name) - # mock output {'first-step': {'output_variable': 'results', 'output': ['chunk_1.smi', 'chunk_2.smi']}} - if step_name not in mock_output: return {"output": []}, 0 # The record's output variable must match (there's only one record per step atm) diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml index 39fb6cd..ba0d1d0 100644 --- a/tests/workflow-definitions/simple-python-molprops.yaml +++ b/tests/workflow-definitions/simple-python-molprops.yaml @@ -14,6 +14,7 @@ steps: variables: name: "col1" value: 123 + outputFile: "results.smi" variable-mapping: - variable: inputFile from-workflow: diff --git a/workflow/decoder.py b/workflow/decoder.py index 10d6d4e..d7e320f 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -137,10 +137,10 @@ def get_step_prior_step_variable_mapping( # Tuple is "from" -> "to" if step_name in variable_mapping: variable_mapping[step_name].append( - (v_map["variable"], step_variable) + (step_variable, v_map["variable"]) ) else: - variable_mapping[step_name] = [(v_map["variable"], step_variable)] + variable_mapping[step_name] = [(step_variable, v_map["variable"])] return variable_mapping diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index 71ee0ac..dcb2047 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -24,7 +24,6 @@ import logging import sys -from pprint import pprint from typing import Any, Dict, Optional from decoder.decoder import TextEncoding, decode @@ -317,11 +316,11 @@ def _handle_pod_message(self, msg: PodMessage) -> None: def _validate_step_command( self, *, + running_workflow_id: str, running_workflow_step_id: str, step: dict[str, Any], - workflow_steps: list[dict[str, Any]], our_step_index: int, - running_workflow_variables: dict[str, Any] | None = None, + running_workflow_variables: dict[str, Any], ) -> str | dict[str, Any]: """Returns an error message if the command isn't valid. Without a message we return all the variables that were (successfully) @@ -351,143 +350,42 @@ def _validate_step_command( str(job), ) - # The step's 'specification' is a string - pass it directly to the - # launcher along with any (optional) 'workflow variables'. The launcher - # will apply the variables to the step's Job command but we need to handle - # any launch problems. The validator should have checked to ensure that - # variable expansion will work, but we must prepare for the unexpected. - # - # What the engine has to do here is make sure that the Job - # that's about to be launched has all its configuration requirements - # satisfied (inputs, outputs and options). Basically we must ensure - # that the Job definition's 'command' can be compiled by applying - # the available variables. - # - # To prevent launcher errors relating to decoding we get the command ourselves - # and then apply the current set of variables. And we use the JobDecoder's - # 'decode()' method to do this. It returns a tuple (str and boolean). - # If the boolean is True then the command can be compiled - # (i.e. it has no missing variables) and the launcher should not complain - # about the command (as we'll pass the same variables to it. - # If the returned boolean is False then we can expect the returned str - # to contain an error message. - # - # The full set of step variables can be obtained - # (in ascending order of priority) from... - # - # 1. The Job Step Specification - # 2. The RunningWorkflow - # - # If variable 'x' is defined in all three then the RunningWorkflow's - # value must be used. - - # 1. Get any variables from the step specification. - all_variables = step_spec.pop("variables") if "variables" in step_spec else {} - # 2. Merge running workflow variables on top of these - if running_workflow_variables: - all_variables |= running_workflow_variables - - # We must always process the current step's variables - _LOGGER.debug("Validating step %s (%s)", step, running_workflow_step_id) - inputs = step.get("inputs", []) - outputs = step.get("outputs", []) - previous_step_outputs = [] - _LOGGER.debug( - "We are at workflow step index %d (%s)", - our_step_index, - running_workflow_step_id, - ) - - # resolve all previous steps - previous_step_names = set() - if our_step_index > 0: - print("prev step inputs", inputs) - for inp in inputs: - if step_name := inp["from"].get("step", None): - previous_step_names.add(step_name) - - for step in workflow_steps: - if step["name"] in previous_step_names: - previous_step_outputs.extend(step.get("outputs", [])) - - _LOGGER.debug( - "Index %s (%s) workflow_variables=%s", - our_step_index, - running_workflow_step_id, - all_variables, - ) - _LOGGER.debug( - "Index %s (%s) inputs=%s", our_step_index, running_workflow_step_id, inputs - ) - _LOGGER.debug( - "Index %s (%s) outputs=%s", - our_step_index, - running_workflow_step_id, - outputs, - ) - _LOGGER.debug( - "Index %s (%s) previous_step_outputs=%s", - our_step_index, - running_workflow_step_id, - previous_step_outputs, - ) + # Start with any variables provided in the step's specification. + # This will be ou t"all variables" map for this step, + # whcih we will add to (and maybe even over-write)... + all_variables: dict[str, Any] = step["specification"].get("variables", {}) - # there should probably be an easier way to access this - running_wf_step, _ = self._wapi_adapter.get_running_workflow_step( - running_workflow_step_id=running_workflow_step_id - ) - running_wf_id = running_wf_step["running_workflow"]["id"] - running_wf, _ = self._wapi_adapter.get_running_workflow( - running_workflow_id=running_wf_id + # Next, we iterate through the step's "variable mapping" block. + # This tells us all the variables that are set from either the + # 'workflow' or 'a prior step'. + + # Start with any workflow variables in the step. + # This will be a list of tuples of "in" and "out" variable names. + # "in" variables are worklfow variables, and "out" variables + # are expected Job variables. We use this to add variables + # to the "all variables" map. + for from_to in get_step_workflow_variable_mapping(step=step): + all_variables[from_to[1]] = running_workflow_variables[from_to[0]] + + # Now we apply variables from the "variable mapping" block + # related to values used in prior steps. The decoder gives + # us a map indexed by prior step name that's a list of "in" "out" + # tuples as above. + step_prior_v_map: dict[str, list[tuple[str, str]]] = ( + get_step_prior_step_variable_mapping(step=step) ) - print("running wf") - pprint(running_wf) - workflow_id = running_wf["workflow"]["id"] - workflow, _ = self._wapi_adapter.get_workflow(workflow_id=workflow_id) - - print("workflow") - pprint(workflow) - - # for step in workflow["steps"]: - # if step["name"] in previous_step_names: - - previous_step_id = None - for name in previous_step_names: - result, _ = self._wapi_adapter.get_running_workflow_step_by_name( - name=name, running_workflow_id=running_wf_id, replica=0 + for prior_step_name, v_map in step_prior_v_map.items(): + # Retrieve the prior "running" step + # in order to get the variables that were set there... + prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name( + name=prior_step_name, running_workflow_id=running_workflow_id ) - print("by name results", result) - print("by name results, vars", result["variables"]) - print("by name results, od", result["id"]) - previous_step_id = result["id"] - # if name == 'first-step': - # previous_step_id = result["id"] - - # don't understand how this is structured - print("prev steps", previous_step_names) - print("outputs", outputs) - print() - step_outputs: dict[str, Any] = {"output": []} - if previous_step_id: - for output in outputs: - for k, v in output.items(): - print("sending params to output mock", k, v) - try: - step_outputs, _ = ( - self._wapi_adapter.get_running_workflow_step_output_values_for_output( - running_workflow_step_id=previous_step_id, - output_variable=v, # foraeach outputs key - ) - ) - - print("mockputs", running_workflow_step_id, step_outputs) - except AssertionError: - print("no output for step", running_workflow_step_id, k, v) - - print("final prev step outputs", previous_step_outputs) + # Copy "in" value to "out"... + for from_to in v_map: + all_variables[from_to[1]] = prior_step["variables"][from_to[0]] _LOGGER.debug( - "Index %s (%s) all_variables=%s", + "Index %s (%s) workflow_variables=%s", our_step_index, running_workflow_step_id, all_variables, @@ -527,49 +425,19 @@ def _launch( assert wf_step_data["caller_step_index"] >= 0 our_step_index: int = wf_step_data["caller_step_index"] - print("step in _launch:", step_name) - pprint(step) + # A mojor pievce of work is to get ourselves into a position + # that allows us to check the step command can be executed. + # We do this by compiling a map of varibales we belive the step needs. - # Workflow variables set by the user... + # Get all the workflow variables that were provided + # by the user when they "ran" the workflow... rwf_variables: dict[str, Any] = rwf.get("variables", {}) - - # Now check the step command can be executed - # (by trying to decoding the Job command). - # Before we do this we have to construct the variable map - # for this step. - # - # We start with any variables provided in the step specification - all_variables: dict[str, Any] = step["specification"].get("variables", {}) - # We now have to iterate through the step's variable mapping block. - # This will name any workflow variables (from) - # and their corresponding step variable (to). - step_wf_v_map: list[tuple[str, str]] = get_step_workflow_variable_mapping( - step=step - ) - for from_to in step_wf_v_map: - all_variables[from_to[1]] = rwf_variables[from_to[0]] - # We must now apply variables from prior steps identified in the - # current step's mapping block. We're given a map indexed by - # prior step name that's a list of tuples name the prior step's - # variable (from) and the curent step variable (to). - step_prior_v_map: dict[str, list[tuple[str, str]]] = ( - get_step_prior_step_variable_mapping(step=step) - ) - for prior_step_name, v_map in step_prior_v_map.items(): - # Load the prior step - prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name( - name=prior_step_name, running_workflow_id=rwf_id - ) - # Get its variables and copy the value - for from_to in v_map: - all_variables[from_to[1]] = prior_step["variables"][from_to[0]] - error_or_variables: str | dict[str, Any] = self._validate_step_command( + running_workflow_id=rwf_id, running_workflow_step_id=rwfs_id, step=step, - workflow_steps=wf_step_data["steps"], our_step_index=our_step_index, - running_workflow_variables=all_variables, + running_workflow_variables=rwf_variables, ) if isinstance(error_or_variables, str): error_msg = error_or_variables @@ -580,22 +448,22 @@ def _launch( project_id = rwf["project"]["id"] variables: dict[str, Any] = error_or_variables - print("variables", variables) - # find out if and by which parameter this step should be replicated - replicator = get_step_replicator(step=step) _LOGGER.info( "Launching step: RunningWorkflow=%s RunningWorkflowStep=%s step=%s" - " variables=%s name=%s project=%s, (all_variables=%s)", + " variables=%s name=%s project=%s, (variables=%s)", rwf_id, rwfs_id, step_name, variables, rwf["name"], project_id, - all_variables, + variables, ) + # Is this a replicating step? + replicator = get_step_replicator(step=step) + # When we launch a step we need to identify all the prior steps in the workflow, # those we depend on. The DataManager will then link their outputs to # out instance directory. For simple workflows there is only one prior step, @@ -629,9 +497,6 @@ def _launch( # 'running_workflow_step_inputs' # A list of Job input variable names - print("variables") - pprint(variables) - inputs: list[str] = [] inputs.extend(iter(get_step_input_variable_names(wf, step_name))) if replicator: @@ -643,9 +508,6 @@ def _launch( else: single_step_variables = [variables] - print("single step variables") - pprint(single_step_variables) - for params in single_step_variables: lp: LaunchParameters = LaunchParameters( project_id=project_id, From 77e3cff78fcaa318c7cbbbca20a83fa5fd0de2b8 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Wed, 27 Aug 2025 16:00:30 +0000 Subject: [PATCH 36/57] fix: Variable mapping now exposed as a Translation dataclass --- workflow/decoder.py | 34 ++++++++++++++++++++++------------ workflow/workflow_engine.py | 11 ++++++----- workflow/workflow_validator.py | 10 +++++----- 3 files changed, 33 insertions(+), 22 deletions(-) diff --git a/workflow/decoder.py b/workflow/decoder.py index d7e320f..78fb211 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -4,6 +4,7 @@ """ import os +from dataclasses import dataclass from typing import Any import jsonschema @@ -23,6 +24,14 @@ assert _WORKFLOW_SCHEMA +@dataclass +class Translation: + """A source ("in_") to destination ("out") variable map.""" + + in_: str + out: str + + def validate_schema(workflow: dict[str, Any]) -> str | None: """Checks the Workflow Definition against the built-in schema. If there's an error the error text is returned, otherwise None. @@ -107,28 +116,27 @@ def get_step_input_variable_names( return variable_names -def get_step_workflow_variable_mapping( - *, step: dict[str, Any] -) -> list[tuple[str, str]]: +def get_step_workflow_variable_mapping(*, step: dict[str, Any]) -> list[Translation]: """Returns a list of workflow vaiable name to step variable name tuples for the given step.""" - variable_mapping: list[tuple[str, str]] = [] + variable_mapping: list[Translation] = [] if "variable-mapping" in step: for v_map in step["variable-mapping"]: if "from-workflow" in v_map: - # Tuple is "from" -> "to" variable_mapping.append( - (v_map["from-workflow"]["variable"], v_map["variable"]) + Translation( + in_=v_map["from-workflow"]["variable"], out=v_map["variable"] + ) ) return variable_mapping def get_step_prior_step_variable_mapping( *, step: dict[str, Any] -) -> dict[str, list[tuple[str, str]]]: - """Returns list of tuples, indexed by prior step name, of source step vaiable name - to this step's variable name.""" - variable_mapping: dict[str, list[tuple[str, str]]] = {} +) -> dict[str, list[Translation]]: + """Returns list of translate objects, indexed by prior step name, + that identify source step vaiable name to this step's variable name.""" + variable_mapping: dict[str, list[Translation]] = {} if "variable-mapping" in step: for v_map in step["variable-mapping"]: if "from-step" in v_map: @@ -137,10 +145,12 @@ def get_step_prior_step_variable_mapping( # Tuple is "from" -> "to" if step_name in variable_mapping: variable_mapping[step_name].append( - (step_variable, v_map["variable"]) + Translation(in_=step_variable, out=v_map["variable"]) ) else: - variable_mapping[step_name] = [(step_variable, v_map["variable"])] + variable_mapping[step_name] = [ + Translation(in_=step_variable, out=v_map["variable"]) + ] return variable_mapping diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index dcb2047..fbf200f 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -39,6 +39,7 @@ ) from .decoder import ( + Translation, get_step_input_variable_names, get_step_prior_step_variable_mapping, get_step_replicator, @@ -364,14 +365,14 @@ def _validate_step_command( # "in" variables are worklfow variables, and "out" variables # are expected Job variables. We use this to add variables # to the "all variables" map. - for from_to in get_step_workflow_variable_mapping(step=step): - all_variables[from_to[1]] = running_workflow_variables[from_to[0]] + for tr in get_step_workflow_variable_mapping(step=step): + all_variables[tr.out] = running_workflow_variables[tr.in_] # Now we apply variables from the "variable mapping" block # related to values used in prior steps. The decoder gives # us a map indexed by prior step name that's a list of "in" "out" # tuples as above. - step_prior_v_map: dict[str, list[tuple[str, str]]] = ( + step_prior_v_map: dict[str, list[Translation]] = ( get_step_prior_step_variable_mapping(step=step) ) for prior_step_name, v_map in step_prior_v_map.items(): @@ -381,8 +382,8 @@ def _validate_step_command( name=prior_step_name, running_workflow_id=running_workflow_id ) # Copy "in" value to "out"... - for from_to in v_map: - all_variables[from_to[1]] = prior_step["variables"][from_to[0]] + for tr in v_map: + all_variables[tr.out] = prior_step["variables"][tr.in_] _LOGGER.debug( "Index %s (%s) workflow_variables=%s", diff --git a/workflow/workflow_validator.py b/workflow/workflow_validator.py index 10a2482..1d94973 100644 --- a/workflow/workflow_validator.py +++ b/workflow/workflow_validator.py @@ -123,17 +123,17 @@ def _validate_tag_level( .get("variable") ): found: bool = False - for variable_map in get_step_workflow_variable_mapping(step=step): - if replicate_using_input == variable_map[0]: + for translation in get_step_workflow_variable_mapping(step=step): + if replicate_using_input == translation.out: found = True break if not found: for ( step_name, - variable_map_list, + translations, ) in get_step_prior_step_variable_mapping(step=step).items(): - for variable_map in variable_map_list: - if replicate_using_input == variable_map[0]: + for translation in translations: + if replicate_using_input == translation.out: found = True break if found: From 8f1c09865d15d6bf74cb6f43742fa5b094223ea7 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Thu, 28 Aug 2025 13:04:31 +0000 Subject: [PATCH 37/57] fix: Major refactoring of logic (for new launch/workflow API) --- tests/instance_launcher.py | 27 +- tests/test_test_instance_launcher.py | 28 +- tests/test_workflow_engine_examples.py | 1 + .../simple-python-fanout.yaml | 6 +- workflow/workflow_abc.py | 65 +---- workflow/workflow_engine.py | 241 ++++++------------ 6 files changed, 121 insertions(+), 247 deletions(-) diff --git a/tests/instance_launcher.py b/tests/instance_launcher.py index 3256c0b..c938e9e 100644 --- a/tests/instance_launcher.py +++ b/tests/instance_launcher.py @@ -68,7 +68,7 @@ def __init__( elif os.path.isdir(file_path): shutil.rmtree(file_path) - def launch(self, launch_parameters: LaunchParameters) -> LaunchResult: + def launch(self, *, launch_parameters: LaunchParameters) -> LaunchResult: assert launch_parameters assert launch_parameters.project_id == TEST_PROJECT_ID assert launch_parameters.specification @@ -76,10 +76,24 @@ def launch(self, launch_parameters: LaunchParameters) -> LaunchResult: os.makedirs(EXECUTION_DIRECTORY, exist_ok=True) - # Create an Instance record (and dummy Task ID) - response = self._api_adapter.create_instance( - running_workflow_step_id=launch_parameters.running_workflow_step_id + # Create a running workflow step + assert launch_parameters.running_workflow_id + assert launch_parameters.step_name + response, _ = self._api_adapter.create_running_workflow_step( + running_workflow_id=launch_parameters.running_workflow_id, + step=launch_parameters.step_name, + replica=launch_parameters.step_replication_number, ) + assert "id" in response + rwfs_id: str = response["id"] + # And add the variables we've been provided with + if launch_parameters.variables: + _ = self._api_adapter.set_running_workflow_step_variables( + running_workflow_step_id=rwfs_id, variables=launch_parameters.variables + ) + + # Create an Instance record (and dummy Task ID) + response = self._api_adapter.create_instance(running_workflow_step_id=rwfs_id) instance_id = response["id"] task_id = "task-00000000-0000-0000-0000-000000000001" @@ -96,8 +110,8 @@ def launch(self, launch_parameters: LaunchParameters) -> LaunchResult: # The command may not need any, but we do the decoding anyway. decoded_command, status = job_decoder.decode( job["command"], - launch_parameters.specification_variables, - launch_parameters.running_workflow_step_id, + launch_parameters.variables, + rwfs_id, TextEncoding.JINJA2_3_0, ) print(f"Decoded command: {decoded_command}") @@ -129,6 +143,7 @@ def launch(self, launch_parameters: LaunchParameters) -> LaunchResult: self._msg_dispatcher.send(pod_message) return LaunchResult( + running_workflow_step_id=rwfs_id, instance_id=instance_id, task_id=task_id, command=" ".join(subprocess_cmd), diff --git a/tests/test_test_instance_launcher.py b/tests/test_test_instance_launcher.py index 38b8e06..33a34b1 100644 --- a/tests/test_test_instance_launcher.py +++ b/tests/test_test_instance_launcher.py @@ -33,23 +33,18 @@ def test_launch_nop(basic_launcher): project_id=TEST_PROJECT_ID, variables={}, ) - response, _ = utaa.create_running_workflow_step( - running_workflow_id=response["id"], step="step-1" - ) - rwfsid = response["id"] lp: LaunchParameters = LaunchParameters( project_id=TEST_PROJECT_ID, name="Test Instance", launching_user_name="dlister", launching_user_api_token="1234567890", running_workflow_id=rwfid, - running_workflow_step_id=rwfsid, + step_name="step-1", specification={"collection": "workflow-engine-unit-test-jobs", "job": "nop"}, - specification_variables={}, ) # Act - result = launcher.launch(lp) + result = launcher.launch(launch_parameters=lp) # Assert assert result.error_num == 0 @@ -69,26 +64,21 @@ def test_launch_nop_fail(basic_launcher): variables={}, ) rwfid = response["id"] - response, _ = utaa.create_running_workflow_step( - running_workflow_id=response["id"], step="step-1" - ) - rwfsid = response["id"] lp: LaunchParameters = LaunchParameters( project_id=TEST_PROJECT_ID, name="Test Instance", launching_user_name="dlister", launching_user_api_token="1234567890", running_workflow_id=rwfid, - running_workflow_step_id=rwfsid, + step_name="step-1", specification={ "collection": "workflow-engine-unit-test-jobs", "job": "nop-fail", }, - specification_variables={}, ) # Act - result = launcher.launch(lp) + result = launcher.launch(launch_parameters=lp) # Assert assert result.error_num == 0 @@ -108,26 +98,22 @@ def test_launch_smiles_to_file(basic_launcher): variables={}, ) rwfid = response["id"] - response, _ = utaa.create_running_workflow_step( - running_workflow_id=response["id"], step="step-1" - ) - rwfsid = response["id"] lp: LaunchParameters = LaunchParameters( project_id=TEST_PROJECT_ID, name="Test Instance", launching_user_name="dlister", launching_user_api_token="1234567890", running_workflow_id=rwfid, - running_workflow_step_id=rwfsid, + step_name="step-1", specification={ "collection": "workflow-engine-unit-test-jobs", "job": "smiles-to-file", }, - specification_variables={"smiles": "C1=CC=CC=C1", "outputFile": "output.smi"}, + variables={"smiles": "C1=CC=CC=C1", "outputFile": "output.smi"}, ) # Act - result = launcher.launch(lp) + result = launcher.launch(launch_parameters=lp) # Assert assert result.error_num == 0 diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py index bcd2498..20985bf 100644 --- a/tests/test_workflow_engine_examples.py +++ b/tests/test_workflow_engine_examples.py @@ -398,6 +398,7 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine): assert project_file_exists(output_file_2) +@pytest.mark.skip(reason="WIP") def test_workflow_engine_simple_python_fanout(basic_engine): # Arrange md, da = basic_engine diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml index 1ad06bb..1adb7a6 100644 --- a/tests/workflow-definitions/simple-python-fanout.yaml +++ b/tests/workflow-definitions/simple-python-fanout.yaml @@ -13,8 +13,9 @@ steps: job: splitsmiles version: "1.0.0" variables: - name: "count" + name: count value: "1" + outputFile: results.smi variable-mapping: - variable: inputFile from-workflow: @@ -27,8 +28,9 @@ steps: job: append-col version: "1.0.0" variables: - name: "desc1" + name: desc1 value: "777" + outputFile: results.smi replicate: using: variable: inputFile diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py index 4c4f6ff..0a0acc0 100644 --- a/workflow/workflow_abc.py +++ b/workflow/workflow_abc.py @@ -27,7 +27,7 @@ class LaunchParameters: specification: dict[str, Any] # An alternative way to pass variables to the specification. # If used it will replace any 'variables' already present in the specification. - specification_variables: dict[str, Any] | None = None + variables: dict[str, Any] | None = None # A string. In DM v4 converted to a boolean and set in the # instance Pod as a label. Setting this means the Instances # that are created will not be automatically removed by the Job operator. @@ -35,28 +35,15 @@ class LaunchParameters: # The RunningWorkflow UUID. # Required if the Instance is part of a Workflow step. running_workflow_id: str | None = None - # The RunningWorkflowStep UUID. + # The RunningWorkflow's step name. # Required if the Instance is part of a Workflow step. - running_workflow_step_id: str | None = None - # A list of prior workflow steps that this step depends upon. - # - # This list gives the InstanceLauncher an opportunity to take the outputs - # of a prior instance and link them to the instance directory for the - # instance to be launched. We need to do this for Workflows because Instances - # run as apart of a Workflow do not automatically have their outputs copied (linked) - # to the Project directory when they complete. As an example, a step that relies - # on the output files from two prior steps will provide the following list: - - # - # ["r-workflow-step-a04d", "r-workflow-step-d904"] - running_workflow_step_prior_steps: list[str] | None = None - # Workflow step Job inputs (for this step Instance). These Workflow Inputs (files) - # are a list of Job input variable names for file variables where the - # file is expected to be present in the Project directory. It is simply a list of - # Job variable names. The launcher is expected to find the 'value' of these - # variables and then move the file to the instance directory. - # - # ["inputFile"] - running_workflow_step_inputs: list[str] | None = None + step_name: str | None = None + # The step replication number. + # If only one instance of the step is expected to run + # this value can be left at 0 (zero). If this step's launch + # is expected to be executed more than once the value should be + # non-zero (and unique for this workflow run). + step_replication_number: int = 0 # The application ID (a custom resource name) # used to identify the 'type' of Instance to create. # For DM Jobs this will be 'datamanagerjobs.squonk.it' @@ -75,6 +62,9 @@ class LaunchResult: # The following optional properties # may not be present if there's a launch error. # + # A running workflow step UUID + # (if the step is part of a running workflow) + running_workflow_step_id: str | None = None # The Instance UUID that was created for you. instance_id: str | None = None # The Task UUID that is handling the Instance launch @@ -94,7 +84,6 @@ def launch( self, *, launch_parameters: LaunchParameters, - **kwargs: str, ) -> LaunchResult: """Launch a (Job) Instance""" @@ -199,25 +188,6 @@ def set_running_workflow_done( """Set the success value for a RunningWorkflow Record. If not successful an error code and message should be provided.""" - @abstractmethod - def create_running_workflow_step( - self, - *, - running_workflow_id: str, - step: str, - replica: int = 0, - prior_running_workflow_step_id: str | None = None, - ) -> tuple[dict[str, Any], int]: - """Create a RunningWorkflowStep Record (from a RunningWorkflow). - If this is a replica (concurrent execution) of a step the replica - value must be set to a value greater than 0. The replica is unique - for a given step and is used to distinguish between running workflow steps - generated from the same step name.""" - # Should return: - # { - # "id": "r-workflow-step-00000000-0000-0000-0000-000000000001", - # } - @abstractmethod def get_running_workflow_step( self, *, running_workflow_step_id: str @@ -292,17 +262,6 @@ def get_running_workflow_step_by_name( # "id": "r-workflow-step-00000000-0000-0000-0000-000000000001", # }, - @abstractmethod - def set_running_workflow_step_variables( - self, - *, - running_workflow_step_id: str, - variables: dict[str, Any], - ) -> None: - """Set the variables used prior to decoding the step command for each step. - This can be used to understand step failures but will also be vital - when adding variables values to subsequent steps from prior step values.""" - @abstractmethod def set_running_workflow_step_done( self, diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index fbf200f..d4e8cca 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -24,7 +24,7 @@ import logging import sys -from typing import Any, Dict, Optional +from typing import Any, Optional from decoder.decoder import TextEncoding, decode from google.protobuf.message import Message @@ -40,9 +40,7 @@ from .decoder import ( Translation, - get_step_input_variable_names, get_step_prior_step_variable_mapping, - get_step_replicator, get_step_workflow_variable_mapping, ) @@ -123,28 +121,13 @@ def _handle_workflow_start_message(self, r_wfid: str) -> None: wf_response, _ = self._wapi_adapter.get_workflow(workflow_id=wfid) _LOGGER.debug("API.get_workflow(%s) returned: -\n%s", wfid, str(wf_response)) - # Now find the first step, - # and create a corresponding RunningWorkflowStep record... - first_step: Dict[str, Any] = wf_response["steps"][0] - first_step_name: str = first_step["name"] - # We need this even if the following goes wrong. - response, _ = self._wapi_adapter.create_running_workflow_step( - running_workflow_id=r_wfid, - step=first_step_name, - ) - _LOGGER.debug( - "API.create_running_workflow_step(%s, %s) returned: -\n%s", - r_wfid, - first_step_name, - str(response), - ) - assert "id" in response - r_wfsid: str = response["id"] + # Now find the first step (index 0)... + first_step: dict[str, Any] = wf_response["steps"][0] - # Launch the first step. + # Launch it. # If there's a launch problem the step (and running workflow) will have # and error, stopping it. There will be no Pod event as the launch has failed. - self._launch(wf=wf_response, rwf=rwf_response, rwfs_id=r_wfsid, step=first_step) + self._launch(rwf=rwf_response, step=first_step) def _handle_workflow_stop_message(self, r_wfid: str) -> None: """Logic to handle a STOP message.""" @@ -273,33 +256,14 @@ def _handle_pod_message(self, msg: PodMessage) -> None: launch_attempted: bool = False for step in wf_response["steps"]: if step["name"] == step_name: + step_index = wf_response["steps"].index(step) if step_index + 1 < len(wf_response["steps"]): - # There's another step - for this simple logic it is the next step. - + # There's another step! + # For this simple logic it is the next step. next_step = wf_response["steps"][step_index + 1] - next_step_name = next_step["name"] - rwfs_response, _ = self._wapi_adapter.create_running_workflow_step( - running_workflow_id=r_wfid, - step=next_step_name, - ) - assert "id" in rwfs_response - r_wfsid = rwfs_response["id"] - assert r_wfsid - _LOGGER.debug( - "API.create_running_workflow_step(%s, %s) returned: -\n%s", - r_wfid, - next_step_name, - str(response), - ) - - self._launch( - wf=wf_response, - rwf=rwf_response, - rwfs_id=r_wfsid, - step=next_step, - ) + self._launch(rwf=rwf_response, step=next_step) # Something was started (or there was a launch error and the step # and running workflow error will have been set). @@ -318,18 +282,12 @@ def _validate_step_command( self, *, running_workflow_id: str, - running_workflow_step_id: str, step: dict[str, Any], - our_step_index: int, running_workflow_variables: dict[str, Any], ) -> str | dict[str, Any]: """Returns an error message if the command isn't valid. Without a message we return all the variables that were (successfully) - applied to the command. - - We are also given a list of steps in workflow_steps and out position in - the list with our_step_index.""" - assert our_step_index >= 0 + applied to the command.""" # We get the Job from the step specification, which must contain # the keys "collection", "job", and "version". Here we assume that @@ -343,11 +301,10 @@ def _validate_step_command( collection=job_collection, job=job_job, version=job_version ) _LOGGER.debug( - "API.get_job(%s, %s, %s) for %s returned: -\n%s", + "API.get_job(%s, %s, %s) returned: -\n%s", job_collection, job_job, job_version, - running_workflow_step_id, str(job), ) @@ -361,11 +318,12 @@ def _validate_step_command( # 'workflow' or 'a prior step'. # Start with any workflow variables in the step. - # This will be a list of tuples of "in" and "out" variable names. + # This will be a list of Translations of "in" and "out" variable names. # "in" variables are worklfow variables, and "out" variables # are expected Job variables. We use this to add variables # to the "all variables" map. for tr in get_step_workflow_variable_mapping(step=step): + assert tr.in_ in running_workflow_variables all_variables[tr.out] = running_workflow_variables[tr.in_] # Now we apply variables from the "variable mapping" block @@ -382,134 +340,74 @@ def _validate_step_command( name=prior_step_name, running_workflow_id=running_workflow_id ) # Copy "in" value to "out"... + print(v_map) + print(prior_step["variables"]) for tr in v_map: + assert tr.in_ in prior_step["variables"] all_variables[tr.out] = prior_step["variables"][tr.in_] - _LOGGER.debug( - "Index %s (%s) workflow_variables=%s", - our_step_index, - running_workflow_step_id, - all_variables, - ) - - # Set the variables for this step (so they can be inspected on error) - self._wapi_adapter.set_running_workflow_step_variables( - running_workflow_step_id=running_workflow_step_id, - variables=all_variables, - ) - # Now ... can the command be compiled!? message, success = decode( job["command"], all_variables, "command", TextEncoding.JINJA2_3_0 ) return all_variables if success else message - def _launch( - self, - *, - wf: dict[str, Any], - rwf: dict[str, Any], - rwfs_id: str, - step: dict[str, Any], - ) -> None: + def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None: step_name: str = step["name"] rwf_id: str = rwf["id"] + project_id = rwf["project"]["id"] - _LOGGER.info("Validating step command: %s (step=%s)...", rwf_id, step_name) - - # Get step data - importantly, giving us the sequence of steps in the response. - # Steps will be in wf_step_data["steps"] and our position in the list - # is wf_step_data["caller_step_index"] - wf_step_data, _ = self._wapi_adapter.get_workflow_steps_driving_this_step( - running_workflow_step_id=rwfs_id, - ) - assert wf_step_data["caller_step_index"] >= 0 - our_step_index: int = wf_step_data["caller_step_index"] - - # A mojor pievce of work is to get ourselves into a position + # A mojor piece of work to accomplish is to get ourselves into a position # that allows us to check the step command can be executed. - # We do this by compiling a map of varibales we belive the step needs. + # We do this by compiling a map of variables we belive the step needs. - # Get all the workflow variables that were provided - # by the user when they "ran" the workflow... + # We start with all the workflow variables that were provided + # by the user when they "ran" the workflow. We're given a full set of + # variables in response (on success) or an error string (on failure) rwf_variables: dict[str, Any] = rwf.get("variables", {}) error_or_variables: str | dict[str, Any] = self._validate_step_command( running_workflow_id=rwf_id, - running_workflow_step_id=rwfs_id, step=step, - our_step_index=our_step_index, running_workflow_variables=rwf_variables, ) if isinstance(error_or_variables, str): error_msg = error_or_variables msg = f"Failed command validation error_msg={error_msg}" _LOGGER.warning(msg) - self._set_step_error(step_name, rwf_id, rwfs_id, 1, msg) + self._set_step_error(step_name, rwf_id, None, 1, msg) return - project_id = rwf["project"]["id"] variables: dict[str, Any] = error_or_variables - - _LOGGER.info( - "Launching step: RunningWorkflow=%s RunningWorkflowStep=%s step=%s" - " variables=%s name=%s project=%s, (variables=%s)", - rwf_id, - rwfs_id, - step_name, - variables, - rwf["name"], - project_id, - variables, - ) - + num_replicas: int = 0 # Is this a replicating step? - replicator = get_step_replicator(step=step) - - # When we launch a step we need to identify all the prior steps in the workflow, - # those we depend on. The DataManager will then link their outputs to - # out instance directory. For simple workflows there is only one prior step, - # and it's the one immediately prior to this one. - # - # We put all the prior step IDs in: - - # 'running_workflow_step_prior_steps' - # A list of step UUID strings. - # - # In this 'simple' linear implementation that is simply the immediately - # preceding step. - prior_steps: list[str] = [] - if our_step_index > 0: - # We need the step ID of the prior step. - prior_step_name = wf_step_data["steps"][our_step_index - 1]["name"] - step_response, _ = self._wapi_adapter.get_running_workflow_step_by_name( - name=prior_step_name, - running_workflow_id=rwf_id, + # The number of 'replicas' is zero if the step is only launched once + # (i.e. there are no replicas). + + # replicator = get_step_replicator(step=step) + # if replicator: + # single_step_variables = [] + # for replicating_param in variables[replicator]: + # ssv = {**variables} + # ssv[replicator] = replicating_param + # single_step_variables.append(ssv) + # else: + # single_step_variables = [variables] + + assert num_replicas >= 0 + step_replication_number: int = 1 if num_replicas else 0 + for _ in range(1 + num_replicas): + + _LOGGER.info( + "Launching step: %s RunningWorkflow=%s (name=%s)" + " variables=%s project=%s (step_replication_number=%s)", + step_name, + rwf_id, + rwf["name"], + variables, + project_id, + step_replication_number, ) - assert "id" in step_response - prior_steps.append(step_response["id"]) - - # We must also identify workflow inputs that are required by the step we are - # about to launch and pass those using a launch parameter. The launcher - # will ensure these are copied into out instance directory before we are run. - # We cannot provide the variable values (even though we have them) because - # the DM passes input through 'InputHandlers', which may translate the value. - # So we have to pass the name and let the DM move the files after - # the InputHandler has run. - # - # 'running_workflow_step_inputs' - # A list of Job input variable names - - inputs: list[str] = [] - inputs.extend(iter(get_step_input_variable_names(wf, step_name))) - if replicator: - single_step_variables = [] - for replicating_param in variables[replicator]: - ssv = {**variables} - ssv[replicator] = replicating_param - single_step_variables.append(ssv) - else: - single_step_variables = [variables] - for params in single_step_variables: lp: LaunchParameters = LaunchParameters( project_id=project_id, name=step_name, @@ -517,25 +415,36 @@ def _launch( launching_user_name=rwf["running_user"], launching_user_api_token=rwf["running_user_api_token"], specification=step["specification"], - specification_variables=params, + variables=variables, running_workflow_id=rwf_id, - running_workflow_step_id=rwfs_id, - running_workflow_step_prior_steps=prior_steps, - running_workflow_step_inputs=inputs, + step_name=step_name, + step_replication_number=step_replication_number, ) lr: LaunchResult = self._instance_launcher.launch(launch_parameters=lp) + rwfs_id: str | None = lr.running_workflow_step_id + assert rwfs_id + if lr.error_num: self._set_step_error( step_name, rwf_id, rwfs_id, lr.error_num, lr.error_msg ) else: - _LOGGER.info("Launched step '%s' (command=%s)", step_name, lr.command) + _LOGGER.info( + "Launched step '%s' step_id=%s (command=%s)", + step_name, + rwfs_id, + lr.command, + ) + + # Do we need to increment the replication number? + if num_replicas: + step_replication_number += 1 def _set_step_error( self, step_name: str, r_wfid: str, - r_wfsid: str, + r_wfsid: str | None, error_num: Optional[int], error_msg: Optional[str], ) -> None: @@ -548,12 +457,14 @@ def _set_step_error( error_msg, ) r_wf_error: str = f"Step '{step_name}' ERROR({error_num}): {error_msg}" - self._wapi_adapter.set_running_workflow_step_done( - running_workflow_step_id=r_wfsid, - success=False, - error_num=error_num, - error_msg=r_wf_error, - ) + # There may be a pre-step error (so assume the ID can also be None) + if r_wfsid: + self._wapi_adapter.set_running_workflow_step_done( + running_workflow_step_id=r_wfsid, + success=False, + error_num=error_num, + error_msg=r_wf_error, + ) # We must also set the running workflow as done (failed) self._wapi_adapter.set_running_workflow_done( running_workflow_id=r_wfid, From 7770d7f448742ecbaea87c2109ce203849aaa1fc Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Thu, 28 Aug 2025 15:34:22 +0000 Subject: [PATCH 38/57] feat: First successful replicating workflow test --- tests/test_decoder.py | 11 --- tests/test_workflow_engine_examples.py | 32 +------- .../test_workflow_validator_for_run_level.py | 22 ------ .../test_workflow_validator_for_tag_level.py | 22 ------ .../simple-python-fanout.yaml | 6 +- workflow/decoder.py | 58 ++++++++++++-- workflow/workflow_engine.py | 78 ++++++++++++------- 7 files changed, 111 insertions(+), 118 deletions(-) diff --git a/tests/test_decoder.py b/tests/test_decoder.py index 46c05d9..4958731 100644 --- a/tests/test_decoder.py +++ b/tests/test_decoder.py @@ -142,17 +142,6 @@ def test_validate_schema_for_step_specification_variable_names(): assert error is None -@pytest.mark.skip(reason="We do not support combination atm") -def test_validate_schema_for_simple_python_parallel(): - # Arrange - - # Act - error = decoder.validate_schema(_SIMPLE_PYTHON_PARALLEL_WORKFLOW) - - # Assert - assert error is None - - def test_get_workflow_variables_for_smiple_python_molprops(): # Arrange diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py index 20985bf..9d07f13 100644 --- a/tests/test_workflow_engine_examples.py +++ b/tests/test_workflow_engine_examples.py @@ -398,41 +398,22 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine): assert project_file_exists(output_file_2) -@pytest.mark.skip(reason="WIP") def test_workflow_engine_simple_python_fanout(basic_engine): # Arrange md, da = basic_engine da.mock_get_running_workflow_step_output_values_for_output( step_name="first-step", - output_variable="outputFile", + output_variable="outputBase", output=["chunk_1.smi", "chunk_2.smi"], ) - # da.mock_get_running_workflow_step_output_values_for_output( - # step_name="parallel-step", - # output_variable="outputFile", - # output=["chunk_1_proc.smi", "chunk_2_proc.smi"] - # ) - - # da.mock_get_running_workflow_step_output_values_for_output( - # step_name="final-step", - # output_variable="outputFile", - # output=["final-step.out.smi"], - # ) - # Make sure files that should be generated by the test # do not exist before we run the test. output_file_first = "chunk_1.smi" output_file_second = "chunk_2.smi" assert not project_file_exists(output_file_first) assert not project_file_exists(output_file_second) - output_file_p_first = "chunk_1_proc.smi" - output_file_p_second = "chunk_2_proc.smi" - assert not project_file_exists(output_file_p_first) - assert not project_file_exists(output_file_p_second) - # output_file_final = "final-step.out.smi" - # assert not project_file_exists(output_file_final) # And create the test's input file. input_file_1 = "input1.smi" input_file_1_content = """O=C(CSCc1ccc(Cl)s1)N1CCC(O)CC1 @@ -458,15 +439,10 @@ def test_workflow_engine_simple_python_fanout(basic_engine): print("response") pprint(response) - assert response["count"] == 2 + assert response["count"] == 3 assert response["running_workflow_steps"][0]["done"] assert response["running_workflow_steps"][0]["success"] assert response["running_workflow_steps"][1]["done"] assert response["running_workflow_steps"][1]["success"] - # assert response["running_workflow_steps"][2]["done"] - # assert response["running_workflow_steps"][2]["success"] - # assert response["running_workflow_steps"][3]["done"] - # assert response["running_workflow_steps"][3]["success"] - # This test should generate a file in the simulated project directory - # assert project_file_exists(output_file_first) - # assert project_file_exists(output_file_final) + assert response["running_workflow_steps"][2]["done"] + assert response["running_workflow_steps"][2]["success"] diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py index 175d828..1be6694 100644 --- a/tests/test_workflow_validator_for_run_level.py +++ b/tests/test_workflow_validator_for_run_level.py @@ -217,28 +217,6 @@ def test_validate_simple_python_molprops_with_missing_input(): ] -@pytest.mark.skip("Unsupported workflow") -def test_validate_simple_python_parallel(): - # Arrange - workflow_filename: str = os.path.join( - os.path.dirname(__file__), - "workflow-definitions", - "simple-python-parallel.yaml", - ) - with open(workflow_filename, "r", encoding="utf8") as workflow_file: - workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) - assert workflow - - # Act - error = WorkflowValidator.validate( - level=ValidationLevel.TAG, - workflow_definition=workflow, - ) - - # Assert - assert error.error_num == 0 - - def test_validate_replicate_using_undeclared_input(): # Arrange workflow_filename: str = os.path.join( diff --git a/tests/test_workflow_validator_for_tag_level.py b/tests/test_workflow_validator_for_tag_level.py index 8a5d264..96e8e74 100644 --- a/tests/test_workflow_validator_for_tag_level.py +++ b/tests/test_workflow_validator_for_tag_level.py @@ -109,28 +109,6 @@ def test_validate_shortcut_example_1(): assert error.error_msg is None -@pytest.mark.skip("Unsupported workflow") -def test_validate_simple_python_parallel(): - # Arrange - workflow_filename: str = os.path.join( - os.path.dirname(__file__), - "workflow-definitions", - "simple-python-parallel.yaml", - ) - with open(workflow_filename, "r", encoding="utf8") as workflow_file: - workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) - assert workflow - - # Act - error = WorkflowValidator.validate( - level=ValidationLevel.TAG, - workflow_definition=workflow, - ) - - # Assert - assert error.error_num == 0 - - def test_validate_simple_python_molprops(): # Arrange workflow_filename: str = os.path.join( diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml index 1adb7a6..97ab108 100644 --- a/tests/workflow-definitions/simple-python-fanout.yaml +++ b/tests/workflow-definitions/simple-python-fanout.yaml @@ -7,7 +7,7 @@ description: >- steps: - name: first-step - description: Create inputs + description: Split an input file specification: collection: workflow-engine-unit-test-jobs job: splitsmiles @@ -15,7 +15,7 @@ steps: variables: name: count value: "1" - outputFile: results.smi + outputBase: chunk variable-mapping: - variable: inputFile from-workflow: @@ -38,6 +38,6 @@ steps: - variable: inputFile from-step: name: first-step - variable: outputFile + variable: outputBase out: - outputFile diff --git a/workflow/decoder.py b/workflow/decoder.py index 78fb211..91c9464 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -5,6 +5,7 @@ import os from dataclasses import dataclass +from enum import Enum from typing import Any import jsonschema @@ -32,6 +33,26 @@ class Translation: out: str +class ReplicationOrigin(Enum): + """Oirgin of a replication variable.""" + + STEP_VARIABLE = 1 + WORKFLOW_VARIABLE = 2 + + +@dataclass +class ReplicationDriver: + """A step's replication driver. + The 'variable' is the variable for the step-to-be-executed + whose value is 'driven' by the values of the 'source_variable'. + The source variable is either from a step (or a workflow).""" + + origin: ReplicationOrigin + variable: str + source_variable: str + source_step_name: str | None = None + + def validate_schema(workflow: dict[str, Any]) -> str | None: """Checks the Workflow Definition against the built-in schema. If there's an error the error text is returned, otherwise None. @@ -154,11 +175,36 @@ def get_step_prior_step_variable_mapping( return variable_mapping -def get_step_replicator(*, step: dict[str, Any]) -> str | Any: - """Return step's replication info""" - replicator = step.get("replicate") - if replicator: +def get_step_replication_driver(*, step: dict[str, Any]) -> ReplicationDriver | None: + """If the step is expected to replicate we return its replication driver, + which consists of a (prior) step name and an (output) variable name. + Otherwise it returns nothing.""" + if replicator := step.get("replicate"): + # We need the variable we replicate against, + # and the step that owns the variable. + # # 'using' is a dict but there can be only single value for now - replicator = list(replicator["using"].values())[0] + variable: str = replicator["using"]["variable"] + source_variable: str | None = None + # Is the variable from a prior step? + step_name: str | None = None + step_v_map = get_step_prior_step_variable_mapping(step=step) + for step_name_candidate, mappings in step_v_map.items(): + for mapping in mappings: + if mapping.out == variable: + step_name = step_name_candidate + source_variable = mapping.in_ + break + if step_name: + break + assert step_name + assert source_variable + + return ReplicationDriver( + origin=ReplicationOrigin.STEP_VARIABLE, + variable=variable, + source_step_name=step_name, + source_variable=source_variable, + ) - return replicator + return None diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index d4e8cca..988829a 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -39,8 +39,11 @@ ) from .decoder import ( + ReplicationDriver, + ReplicationOrigin, Translation, get_step_prior_step_variable_mapping, + get_step_replication_driver, get_step_workflow_variable_mapping, ) @@ -340,8 +343,6 @@ def _validate_step_command( name=prior_step_name, running_workflow_id=running_workflow_id ) # Copy "in" value to "out"... - print(v_map) - print(prior_step["variables"]) for tr in v_map: assert tr.in_ in prior_step["variables"] all_variables[tr.out] = prior_step["variables"][tr.in_] @@ -378,34 +379,63 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None: return variables: dict[str, Any] = error_or_variables - num_replicas: int = 0 - # Is this a replicating step? - # The number of 'replicas' is zero if the step is only launched once - # (i.e. there are no replicas). - - # replicator = get_step_replicator(step=step) - # if replicator: - # single_step_variables = [] - # for replicating_param in variables[replicator]: - # ssv = {**variables} - # ssv[replicator] = replicating_param - # single_step_variables.append(ssv) - # else: - # single_step_variables = [variables] - - assert num_replicas >= 0 - step_replication_number: int = 1 if num_replicas else 0 - for _ in range(1 + num_replicas): + + # A replication number, + # use only for steps expected to replicate (even if just once) + step_replication_number: int = 0 + # Does this step have a replicating driver? + r_driver: ReplicationDriver | None = get_step_replication_driver(step=step) + replication_values: list[str] = [] + if r_driver: + if r_driver.origin == ReplicationOrigin.STEP_VARIABLE: + # We need to get the variable values from a prior step + # We need the prior steps running-workflow-step-id + assert r_driver.source_step_name + response, _ = self._wapi_adapter.get_running_workflow_step_by_name( + name=r_driver.source_step_name, + running_workflow_id=rwf_id, + ) + assert "id" in response + o_rwfs_id: str = response["id"] + response, _ = ( + self._wapi_adapter.get_running_workflow_step_output_values_for_output( + running_workflow_step_id=o_rwfs_id, + output_variable=r_driver.source_variable, + ) + ) + assert "output" in response + replication_values = response["output"] + else: + assert False, "Unsupported origin" + + num_step_instances: int = max(1, len(replication_values)) + for iteration in range(num_step_instances): + + # If we are replicating this step then we must replace the step's variable + # with a value expected for this iteration. + if r_driver: + iter_variable: str = r_driver.variable + iter_value: str = replication_values[iteration] + _LOGGER.info( + "Replicating step: %s iteration=%s variable=%s value=%s", + step_name, + iteration, + iter_variable, + iter_value, + ) + # Over-write the replicating variable + # and set the replication numebr to a unique +ve non-zero value... + variables[iter_variable] = iter_value + step_replication_number = iteration + 1 _LOGGER.info( "Launching step: %s RunningWorkflow=%s (name=%s)" - " variables=%s project=%s (step_replication_number=%s)", + " variables=%s project=%s", step_name, rwf_id, rwf["name"], variables, project_id, - step_replication_number, ) lp: LaunchParameters = LaunchParameters( @@ -436,10 +466,6 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None: lr.command, ) - # Do we need to increment the replication number? - if num_replicas: - step_replication_number += 1 - def _set_step_error( self, step_name: str, From c53b2459ab6edd2dfcef567e33d77028cbca6a7c Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Fri, 29 Aug 2025 16:20:26 +0000 Subject: [PATCH 39/57] feat: Use of decoder 2.4.0 (traits) --- poetry.lock | 543 ++++++++++++++++++++++++++++--------------------- pyproject.toml | 2 +- 2 files changed, 307 insertions(+), 238 deletions(-) diff --git a/poetry.lock b/poetry.lock index 7ca8cda..35b18b5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -43,78 +43,99 @@ files = [ [[package]] name = "coverage" -version = "7.8.2" +version = "7.10.6" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.9" files = [ - {file = "coverage-7.8.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bd8ec21e1443fd7a447881332f7ce9d35b8fbd2849e761bb290b584535636b0a"}, - {file = "coverage-7.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4c26c2396674816deaeae7ded0e2b42c26537280f8fe313335858ffff35019be"}, - {file = "coverage-7.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1aec326ed237e5880bfe69ad41616d333712c7937bcefc1343145e972938f9b3"}, - {file = "coverage-7.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5e818796f71702d7a13e50c70de2a1924f729228580bcba1607cccf32eea46e6"}, - {file = "coverage-7.8.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:546e537d9e24efc765c9c891328f30f826e3e4808e31f5d0f87c4ba12bbd1622"}, - {file = "coverage-7.8.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ab9b09a2349f58e73f8ebc06fac546dd623e23b063e5398343c5270072e3201c"}, - {file = "coverage-7.8.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fd51355ab8a372d89fb0e6a31719e825cf8df8b6724bee942fb5b92c3f016ba3"}, - {file = "coverage-7.8.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0774df1e093acb6c9e4d58bce7f86656aeed6c132a16e2337692c12786b32404"}, - {file = "coverage-7.8.2-cp310-cp310-win32.whl", hash = "sha256:00f2e2f2e37f47e5f54423aeefd6c32a7dbcedc033fcd3928a4f4948e8b96af7"}, - {file = "coverage-7.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:145b07bea229821d51811bf15eeab346c236d523838eda395ea969d120d13347"}, - {file = "coverage-7.8.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b99058eef42e6a8dcd135afb068b3d53aff3921ce699e127602efff9956457a9"}, - {file = "coverage-7.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5feb7f2c3e6ea94d3b877def0270dff0947b8d8c04cfa34a17be0a4dc1836879"}, - {file = "coverage-7.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:670a13249b957bb9050fab12d86acef7bf8f6a879b9d1a883799276e0d4c674a"}, - {file = "coverage-7.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0bdc8bf760459a4a4187b452213e04d039990211f98644c7292adf1e471162b5"}, - {file = "coverage-7.8.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07a989c867986c2a75f158f03fdb413128aad29aca9d4dbce5fc755672d96f11"}, - {file = "coverage-7.8.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2db10dedeb619a771ef0e2949ccba7b75e33905de959c2643a4607bef2f3fb3a"}, - {file = "coverage-7.8.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e6ea7dba4e92926b7b5f0990634b78ea02f208d04af520c73a7c876d5a8d36cb"}, - {file = "coverage-7.8.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ef2f22795a7aca99fc3c84393a55a53dd18ab8c93fb431004e4d8f0774150f54"}, - {file = "coverage-7.8.2-cp311-cp311-win32.whl", hash = "sha256:641988828bc18a6368fe72355df5f1703e44411adbe49bba5644b941ce6f2e3a"}, - {file = "coverage-7.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:8ab4a51cb39dc1933ba627e0875046d150e88478dbe22ce145a68393e9652975"}, - {file = "coverage-7.8.2-cp311-cp311-win_arm64.whl", hash = "sha256:8966a821e2083c74d88cca5b7dcccc0a3a888a596a04c0b9668a891de3a0cc53"}, - {file = "coverage-7.8.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e2f6fe3654468d061942591aef56686131335b7a8325684eda85dacdf311356c"}, - {file = "coverage-7.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76090fab50610798cc05241bf83b603477c40ee87acd358b66196ab0ca44ffa1"}, - {file = "coverage-7.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2bd0a0a5054be160777a7920b731a0570284db5142abaaf81bcbb282b8d99279"}, - {file = "coverage-7.8.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da23ce9a3d356d0affe9c7036030b5c8f14556bd970c9b224f9c8205505e3b99"}, - {file = "coverage-7.8.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9392773cffeb8d7e042a7b15b82a414011e9d2b5fdbbd3f7e6a6b17d5e21b20"}, - {file = "coverage-7.8.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:876cbfd0b09ce09d81585d266c07a32657beb3eaec896f39484b631555be0fe2"}, - {file = "coverage-7.8.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3da9b771c98977a13fbc3830f6caa85cae6c9c83911d24cb2d218e9394259c57"}, - {file = "coverage-7.8.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a990f6510b3292686713bfef26d0049cd63b9c7bb17e0864f133cbfd2e6167f"}, - {file = "coverage-7.8.2-cp312-cp312-win32.whl", hash = "sha256:bf8111cddd0f2b54d34e96613e7fbdd59a673f0cf5574b61134ae75b6f5a33b8"}, - {file = "coverage-7.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:86a323a275e9e44cdf228af9b71c5030861d4d2610886ab920d9945672a81223"}, - {file = "coverage-7.8.2-cp312-cp312-win_arm64.whl", hash = "sha256:820157de3a589e992689ffcda8639fbabb313b323d26388d02e154164c57b07f"}, - {file = "coverage-7.8.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ea561010914ec1c26ab4188aef8b1567272ef6de096312716f90e5baa79ef8ca"}, - {file = "coverage-7.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cb86337a4fcdd0e598ff2caeb513ac604d2f3da6d53df2c8e368e07ee38e277d"}, - {file = "coverage-7.8.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26a4636ddb666971345541b59899e969f3b301143dd86b0ddbb570bd591f1e85"}, - {file = "coverage-7.8.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5040536cf9b13fb033f76bcb5e1e5cb3b57c4807fef37db9e0ed129c6a094257"}, - {file = "coverage-7.8.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc67994df9bcd7e0150a47ef41278b9e0a0ea187caba72414b71dc590b99a108"}, - {file = "coverage-7.8.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e6c86888fd076d9e0fe848af0a2142bf606044dc5ceee0aa9eddb56e26895a0"}, - {file = "coverage-7.8.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:684ca9f58119b8e26bef860db33524ae0365601492e86ba0b71d513f525e7050"}, - {file = "coverage-7.8.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8165584ddedb49204c4e18da083913bdf6a982bfb558632a79bdaadcdafd0d48"}, - {file = "coverage-7.8.2-cp313-cp313-win32.whl", hash = "sha256:34759ee2c65362163699cc917bdb2a54114dd06d19bab860725f94ef45a3d9b7"}, - {file = "coverage-7.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:2f9bc608fbafaee40eb60a9a53dbfb90f53cc66d3d32c2849dc27cf5638a21e3"}, - {file = "coverage-7.8.2-cp313-cp313-win_arm64.whl", hash = "sha256:9fe449ee461a3b0c7105690419d0b0aba1232f4ff6d120a9e241e58a556733f7"}, - {file = "coverage-7.8.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8369a7c8ef66bded2b6484053749ff220dbf83cba84f3398c84c51a6f748a008"}, - {file = "coverage-7.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:159b81df53a5fcbc7d45dae3adad554fdbde9829a994e15227b3f9d816d00b36"}, - {file = "coverage-7.8.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6fcbbd35a96192d042c691c9e0c49ef54bd7ed865846a3c9d624c30bb67ce46"}, - {file = "coverage-7.8.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05364b9cc82f138cc86128dc4e2e1251c2981a2218bfcd556fe6b0fbaa3501be"}, - {file = "coverage-7.8.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46d532db4e5ff3979ce47d18e2fe8ecad283eeb7367726da0e5ef88e4fe64740"}, - {file = "coverage-7.8.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4000a31c34932e7e4fa0381a3d6deb43dc0c8f458e3e7ea6502e6238e10be625"}, - {file = "coverage-7.8.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:43ff5033d657cd51f83015c3b7a443287250dc14e69910577c3e03bd2e06f27b"}, - {file = "coverage-7.8.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94316e13f0981cbbba132c1f9f365cac1d26716aaac130866ca812006f662199"}, - {file = "coverage-7.8.2-cp313-cp313t-win32.whl", hash = "sha256:3f5673888d3676d0a745c3d0e16da338c5eea300cb1f4ada9c872981265e76d8"}, - {file = "coverage-7.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:2c08b05ee8d7861e45dc5a2cc4195c8c66dca5ac613144eb6ebeaff2d502e73d"}, - {file = "coverage-7.8.2-cp313-cp313t-win_arm64.whl", hash = "sha256:1e1448bb72b387755e1ff3ef1268a06617afd94188164960dba8d0245a46004b"}, - {file = "coverage-7.8.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:496948261eaac5ac9cf43f5d0a9f6eb7a6d4cb3bedb2c5d294138142f5c18f2a"}, - {file = "coverage-7.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eacd2de0d30871eff893bab0b67840a96445edcb3c8fd915e6b11ac4b2f3fa6d"}, - {file = "coverage-7.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b039ffddc99ad65d5078ef300e0c7eed08c270dc26570440e3ef18beb816c1ca"}, - {file = "coverage-7.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e49824808d4375ede9dd84e9961a59c47f9113039f1a525e6be170aa4f5c34d"}, - {file = "coverage-7.8.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b069938961dfad881dc2f8d02b47645cd2f455d3809ba92a8a687bf513839787"}, - {file = "coverage-7.8.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:de77c3ba8bb686d1c411e78ee1b97e6e0b963fb98b1637658dd9ad2c875cf9d7"}, - {file = "coverage-7.8.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1676628065a498943bd3f64f099bb573e08cf1bc6088bbe33cf4424e0876f4b3"}, - {file = "coverage-7.8.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8e1a26e7e50076e35f7afafde570ca2b4d7900a491174ca357d29dece5aacee7"}, - {file = "coverage-7.8.2-cp39-cp39-win32.whl", hash = "sha256:6782a12bf76fa61ad9350d5a6ef5f3f020b57f5e6305cbc663803f2ebd0f270a"}, - {file = "coverage-7.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:1efa4166ba75ccefd647f2d78b64f53f14fb82622bc94c5a5cb0a622f50f1c9e"}, - {file = "coverage-7.8.2-pp39.pp310.pp311-none-any.whl", hash = "sha256:ec455eedf3ba0bbdf8f5a570012617eb305c63cb9f03428d39bf544cb2b94837"}, - {file = "coverage-7.8.2-py3-none-any.whl", hash = "sha256:726f32ee3713f7359696331a18daf0c3b3a70bb0ae71141b9d3c52be7c595e32"}, - {file = "coverage-7.8.2.tar.gz", hash = "sha256:a886d531373a1f6ff9fad2a2ba4a045b68467b779ae729ee0b3b10ac20033b27"}, + {file = "coverage-7.10.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:70e7bfbd57126b5554aa482691145f798d7df77489a177a6bef80de78860a356"}, + {file = "coverage-7.10.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e41be6f0f19da64af13403e52f2dec38bbc2937af54df8ecef10850ff8d35301"}, + {file = "coverage-7.10.6-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c61fc91ab80b23f5fddbee342d19662f3d3328173229caded831aa0bd7595460"}, + {file = "coverage-7.10.6-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10356fdd33a7cc06e8051413140bbdc6f972137508a3572e3f59f805cd2832fd"}, + {file = "coverage-7.10.6-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:80b1695cf7c5ebe7b44bf2521221b9bb8cdf69b1f24231149a7e3eb1ae5fa2fb"}, + {file = "coverage-7.10.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:2e4c33e6378b9d52d3454bd08847a8651f4ed23ddbb4a0520227bd346382bbc6"}, + {file = "coverage-7.10.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c8a3ec16e34ef980a46f60dc6ad86ec60f763c3f2fa0db6d261e6e754f72e945"}, + {file = "coverage-7.10.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7d79dabc0a56f5af990cc6da9ad1e40766e82773c075f09cc571e2076fef882e"}, + {file = "coverage-7.10.6-cp310-cp310-win32.whl", hash = "sha256:86b9b59f2b16e981906e9d6383eb6446d5b46c278460ae2c36487667717eccf1"}, + {file = "coverage-7.10.6-cp310-cp310-win_amd64.whl", hash = "sha256:e132b9152749bd33534e5bd8565c7576f135f157b4029b975e15ee184325f528"}, + {file = "coverage-7.10.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c706db3cabb7ceef779de68270150665e710b46d56372455cd741184f3868d8f"}, + {file = "coverage-7.10.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e0c38dc289e0508ef68ec95834cb5d2e96fdbe792eaccaa1bccac3966bbadcc"}, + {file = "coverage-7.10.6-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:752a3005a1ded28f2f3a6e8787e24f28d6abe176ca64677bcd8d53d6fe2ec08a"}, + {file = "coverage-7.10.6-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:689920ecfd60f992cafca4f5477d55720466ad2c7fa29bb56ac8d44a1ac2b47a"}, + {file = "coverage-7.10.6-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec98435796d2624d6905820a42f82149ee9fc4f2d45c2c5bc5a44481cc50db62"}, + {file = "coverage-7.10.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b37201ce4a458c7a758ecc4efa92fa8ed783c66e0fa3c42ae19fc454a0792153"}, + {file = "coverage-7.10.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:2904271c80898663c810a6b067920a61dd8d38341244a3605bd31ab55250dad5"}, + {file = "coverage-7.10.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5aea98383463d6e1fa4e95416d8de66f2d0cb588774ee20ae1b28df826bcb619"}, + {file = "coverage-7.10.6-cp311-cp311-win32.whl", hash = "sha256:e3fb1fa01d3598002777dd259c0c2e6d9d5e10e7222976fc8e03992f972a2cba"}, + {file = "coverage-7.10.6-cp311-cp311-win_amd64.whl", hash = "sha256:f35ed9d945bece26553d5b4c8630453169672bea0050a564456eb88bdffd927e"}, + {file = "coverage-7.10.6-cp311-cp311-win_arm64.whl", hash = "sha256:99e1a305c7765631d74b98bf7dbf54eeea931f975e80f115437d23848ee8c27c"}, + {file = "coverage-7.10.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5b2dd6059938063a2c9fee1af729d4f2af28fd1a545e9b7652861f0d752ebcea"}, + {file = "coverage-7.10.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:388d80e56191bf846c485c14ae2bc8898aa3124d9d35903fef7d907780477634"}, + {file = "coverage-7.10.6-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:90cb5b1a4670662719591aa92d0095bb41714970c0b065b02a2610172dbf0af6"}, + {file = "coverage-7.10.6-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:961834e2f2b863a0e14260a9a273aff07ff7818ab6e66d2addf5628590c628f9"}, + {file = "coverage-7.10.6-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bf9a19f5012dab774628491659646335b1928cfc931bf8d97b0d5918dd58033c"}, + {file = "coverage-7.10.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:99c4283e2a0e147b9c9cc6bc9c96124de9419d6044837e9799763a0e29a7321a"}, + {file = "coverage-7.10.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:282b1b20f45df57cc508c1e033403f02283adfb67d4c9c35a90281d81e5c52c5"}, + {file = "coverage-7.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8cdbe264f11afd69841bd8c0d83ca10b5b32853263ee62e6ac6a0ab63895f972"}, + {file = "coverage-7.10.6-cp312-cp312-win32.whl", hash = "sha256:a517feaf3a0a3eca1ee985d8373135cfdedfbba3882a5eab4362bda7c7cf518d"}, + {file = "coverage-7.10.6-cp312-cp312-win_amd64.whl", hash = "sha256:856986eadf41f52b214176d894a7de05331117f6035a28ac0016c0f63d887629"}, + {file = "coverage-7.10.6-cp312-cp312-win_arm64.whl", hash = "sha256:acf36b8268785aad739443fa2780c16260ee3fa09d12b3a70f772ef100939d80"}, + {file = "coverage-7.10.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ffea0575345e9ee0144dfe5701aa17f3ba546f8c3bb48db62ae101afb740e7d6"}, + {file = "coverage-7.10.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:95d91d7317cde40a1c249d6b7382750b7e6d86fad9d8eaf4fa3f8f44cf171e80"}, + {file = "coverage-7.10.6-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3e23dd5408fe71a356b41baa82892772a4cefcf758f2ca3383d2aa39e1b7a003"}, + {file = "coverage-7.10.6-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0f3f56e4cb573755e96a16501a98bf211f100463d70275759e73f3cbc00d4f27"}, + {file = "coverage-7.10.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:db4a1d897bbbe7339946ffa2fe60c10cc81c43fab8b062d3fcb84188688174a4"}, + {file = "coverage-7.10.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d8fd7879082953c156d5b13c74aa6cca37f6a6f4747b39538504c3f9c63d043d"}, + {file = "coverage-7.10.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:28395ca3f71cd103b8c116333fa9db867f3a3e1ad6a084aa3725ae002b6583bc"}, + {file = "coverage-7.10.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:61c950fc33d29c91b9e18540e1aed7d9f6787cc870a3e4032493bbbe641d12fc"}, + {file = "coverage-7.10.6-cp313-cp313-win32.whl", hash = "sha256:160c00a5e6b6bdf4e5984b0ef21fc860bc94416c41b7df4d63f536d17c38902e"}, + {file = "coverage-7.10.6-cp313-cp313-win_amd64.whl", hash = "sha256:628055297f3e2aa181464c3808402887643405573eb3d9de060d81531fa79d32"}, + {file = "coverage-7.10.6-cp313-cp313-win_arm64.whl", hash = "sha256:df4ec1f8540b0bcbe26ca7dd0f541847cc8a108b35596f9f91f59f0c060bfdd2"}, + {file = "coverage-7.10.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:c9a8b7a34a4de3ed987f636f71881cd3b8339f61118b1aa311fbda12741bff0b"}, + {file = "coverage-7.10.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8dd5af36092430c2b075cee966719898f2ae87b636cefb85a653f1d0ba5d5393"}, + {file = "coverage-7.10.6-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:b0353b0f0850d49ada66fdd7d0c7cdb0f86b900bb9e367024fd14a60cecc1e27"}, + {file = "coverage-7.10.6-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d6b9ae13d5d3e8aeca9ca94198aa7b3ebbc5acfada557d724f2a1f03d2c0b0df"}, + {file = "coverage-7.10.6-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:675824a363cc05781b1527b39dc2587b8984965834a748177ee3c37b64ffeafb"}, + {file = "coverage-7.10.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:692d70ea725f471a547c305f0d0fc6a73480c62fb0da726370c088ab21aed282"}, + {file = "coverage-7.10.6-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:851430a9a361c7a8484a36126d1d0ff8d529d97385eacc8dfdc9bfc8c2d2cbe4"}, + {file = "coverage-7.10.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d9369a23186d189b2fc95cc08b8160ba242057e887d766864f7adf3c46b2df21"}, + {file = "coverage-7.10.6-cp313-cp313t-win32.whl", hash = "sha256:92be86fcb125e9bda0da7806afd29a3fd33fdf58fba5d60318399adf40bf37d0"}, + {file = "coverage-7.10.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6b3039e2ca459a70c79523d39347d83b73f2f06af5624905eba7ec34d64d80b5"}, + {file = "coverage-7.10.6-cp313-cp313t-win_arm64.whl", hash = "sha256:3fb99d0786fe17b228eab663d16bee2288e8724d26a199c29325aac4b0319b9b"}, + {file = "coverage-7.10.6-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:6008a021907be8c4c02f37cdc3ffb258493bdebfeaf9a839f9e71dfdc47b018e"}, + {file = "coverage-7.10.6-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:5e75e37f23eb144e78940b40395b42f2321951206a4f50e23cfd6e8a198d3ceb"}, + {file = "coverage-7.10.6-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0f7cb359a448e043c576f0da00aa8bfd796a01b06aa610ca453d4dde09cc1034"}, + {file = "coverage-7.10.6-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c68018e4fc4e14b5668f1353b41ccf4bc83ba355f0e1b3836861c6f042d89ac1"}, + {file = "coverage-7.10.6-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cd4b2b0707fc55afa160cd5fc33b27ccbf75ca11d81f4ec9863d5793fc6df56a"}, + {file = "coverage-7.10.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4cec13817a651f8804a86e4f79d815b3b28472c910e099e4d5a0e8a3b6a1d4cb"}, + {file = "coverage-7.10.6-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:f2a6a8e06bbda06f78739f40bfb56c45d14eb8249d0f0ea6d4b3d48e1f7c695d"}, + {file = "coverage-7.10.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:081b98395ced0d9bcf60ada7661a0b75f36b78b9d7e39ea0790bb4ed8da14747"}, + {file = "coverage-7.10.6-cp314-cp314-win32.whl", hash = "sha256:6937347c5d7d069ee776b2bf4e1212f912a9f1f141a429c475e6089462fcecc5"}, + {file = "coverage-7.10.6-cp314-cp314-win_amd64.whl", hash = "sha256:adec1d980fa07e60b6ef865f9e5410ba760e4e1d26f60f7e5772c73b9a5b0713"}, + {file = "coverage-7.10.6-cp314-cp314-win_arm64.whl", hash = "sha256:a80f7aef9535442bdcf562e5a0d5a5538ce8abe6bb209cfbf170c462ac2c2a32"}, + {file = "coverage-7.10.6-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:0de434f4fbbe5af4fa7989521c655c8c779afb61c53ab561b64dcee6149e4c65"}, + {file = "coverage-7.10.6-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6e31b8155150c57e5ac43ccd289d079eb3f825187d7c66e755a055d2c85794c6"}, + {file = "coverage-7.10.6-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:98cede73eb83c31e2118ae8d379c12e3e42736903a8afcca92a7218e1f2903b0"}, + {file = "coverage-7.10.6-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f863c08f4ff6b64fa8045b1e3da480f5374779ef187f07b82e0538c68cb4ff8e"}, + {file = "coverage-7.10.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b38261034fda87be356f2c3f42221fdb4171c3ce7658066ae449241485390d5"}, + {file = "coverage-7.10.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0e93b1476b79eae849dc3872faeb0bf7948fd9ea34869590bc16a2a00b9c82a7"}, + {file = "coverage-7.10.6-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:ff8a991f70f4c0cf53088abf1e3886edcc87d53004c7bb94e78650b4d3dac3b5"}, + {file = "coverage-7.10.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ac765b026c9f33044419cbba1da913cfb82cca1b60598ac1c7a5ed6aac4621a0"}, + {file = "coverage-7.10.6-cp314-cp314t-win32.whl", hash = "sha256:441c357d55f4936875636ef2cfb3bee36e466dcf50df9afbd398ce79dba1ebb7"}, + {file = "coverage-7.10.6-cp314-cp314t-win_amd64.whl", hash = "sha256:073711de3181b2e204e4870ac83a7c4853115b42e9cd4d145f2231e12d670930"}, + {file = "coverage-7.10.6-cp314-cp314t-win_arm64.whl", hash = "sha256:137921f2bac5559334ba66122b753db6dc5d1cf01eb7b64eb412bb0d064ef35b"}, + {file = "coverage-7.10.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:90558c35af64971d65fbd935c32010f9a2f52776103a259f1dee865fe8259352"}, + {file = "coverage-7.10.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8953746d371e5695405806c46d705a3cd170b9cc2b9f93953ad838f6c1e58612"}, + {file = "coverage-7.10.6-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c83f6afb480eae0313114297d29d7c295670a41c11b274e6bca0c64540c1ce7b"}, + {file = "coverage-7.10.6-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7eb68d356ba0cc158ca535ce1381dbf2037fa8cb5b1ae5ddfc302e7317d04144"}, + {file = "coverage-7.10.6-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5b15a87265e96307482746d86995f4bff282f14b027db75469c446da6127433b"}, + {file = "coverage-7.10.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fc53ba868875bfbb66ee447d64d6413c2db91fddcfca57025a0e7ab5b07d5862"}, + {file = "coverage-7.10.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:efeda443000aa23f276f4df973cb82beca682fd800bb119d19e80504ffe53ec2"}, + {file = "coverage-7.10.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:9702b59d582ff1e184945d8b501ffdd08d2cee38d93a2206aa5f1365ce0b8d78"}, + {file = "coverage-7.10.6-cp39-cp39-win32.whl", hash = "sha256:2195f8e16ba1a44651ca684db2ea2b2d4b5345da12f07d9c22a395202a05b23c"}, + {file = "coverage-7.10.6-cp39-cp39-win_amd64.whl", hash = "sha256:f32ff80e7ef6a5b5b606ea69a36e97b219cd9dc799bcf2963018a4d8f788cfbf"}, + {file = "coverage-7.10.6-py3-none-any.whl", hash = "sha256:92c4ecf6bf11b2e85fd4d8204814dc26e6a19f0c9d938c207c5cb0eadfcabbe3"}, + {file = "coverage-7.10.6.tar.gz", hash = "sha256:f644a3ae5933a552a29dbb9aa2f90c677a875f80ebea028e5a52a4f429044b90"}, ] [package.extras] @@ -122,40 +143,35 @@ toml = ["tomli"] [[package]] name = "distlib" -version = "0.3.9" +version = "0.4.0" description = "Distribution utilities" optional = false python-versions = "*" files = [ - {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"}, - {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"}, + {file = "distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16"}, + {file = "distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d"}, ] [[package]] name = "filelock" -version = "3.18.0" +version = "3.19.1" description = "A platform independent file lock." optional = false python-versions = ">=3.9" files = [ - {file = "filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de"}, - {file = "filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2"}, + {file = "filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d"}, + {file = "filelock-3.19.1.tar.gz", hash = "sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58"}, ] -[package.extras] -docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"] -typing = ["typing-extensions (>=4.12.2)"] - [[package]] name = "identify" -version = "2.6.12" +version = "2.6.13" description = "File identification library for Python" optional = false python-versions = ">=3.9" files = [ - {file = "identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2"}, - {file = "identify-2.6.12.tar.gz", hash = "sha256:d8de45749f1efb108badef65ee8386f0f7bb19a7f26185f74de6367bffbaf0e6"}, + {file = "identify-2.6.13-py2.py3-none-any.whl", hash = "sha256:60381139b3ae39447482ecc406944190f690d4a2997f2584062089848361b33b"}, + {file = "identify-2.6.13.tar.gz", hash = "sha256:da8d6c828e773620e13bfa86ea601c5a5310ba4bcd65edf378198b56a1f9fb32"}, ] [package.extras] @@ -163,13 +179,13 @@ license = ["ukkonen"] [[package]] name = "im-data-manager-job-decoder" -version = "2.1.0" +version = "2.4.0" description = "Job decoding logic" optional = false python-versions = ">=3.10" files = [ - {file = "im_data_manager_job_decoder-2.1.0-py3-none-any.whl", hash = "sha256:b4eefdbdf3d7f5ccb9e154f1d737ca4d25f31e74a94d3a620c71a3752c49d4f8"}, - {file = "im_data_manager_job_decoder-2.1.0.tar.gz", hash = "sha256:11ce891837c7e152be241caac137df192764c06cf2ab6ce84890825bb8c12d25"}, + {file = "im_data_manager_job_decoder-2.4.0-py3-none-any.whl", hash = "sha256:4a911e2a8760dd381247f2f740b2e280a817dbdad65c65164dad97dfcf9058bf"}, + {file = "im_data_manager_job_decoder-2.4.0.tar.gz", hash = "sha256:34e7a8ac0421edc26760491ffd8b9183f0757ebc9e25dabf865235c936fad458"}, ] [package.dependencies] @@ -221,13 +237,13 @@ i18n = ["Babel (>=2.7)"] [[package]] name = "jsonschema" -version = "4.24.0" +version = "4.25.1" description = "An implementation of JSON Schema validation for Python" optional = false python-versions = ">=3.9" files = [ - {file = "jsonschema-4.24.0-py3-none-any.whl", hash = "sha256:a462455f19f5faf404a7902952b6f0e3ce868f3ee09a359b05eca6673bd8412d"}, - {file = "jsonschema-4.24.0.tar.gz", hash = "sha256:0b4e8069eb12aedfa881333004bccaec24ecef5a8a6a4b6df142b2cc9599d196"}, + {file = "jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63"}, + {file = "jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85"}, ] [package.dependencies] @@ -238,7 +254,7 @@ rpds-py = ">=0.7.1" [package.extras] format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] -format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=24.6.0)"] +format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "rfc3987-syntax (>=1.1.0)", "uri-template", "webcolors (>=24.6.0)"] [[package]] name = "jsonschema-specifications" @@ -348,13 +364,13 @@ files = [ [[package]] name = "platformdirs" -version = "4.3.8" +version = "4.4.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.9" files = [ - {file = "platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4"}, - {file = "platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc"}, + {file = "platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85"}, + {file = "platformdirs-4.4.0.tar.gz", hash = "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf"}, ] [package.extras] @@ -397,41 +413,56 @@ virtualenv = ">=20.10.0" [[package]] name = "protobuf" -version = "6.31.1" +version = "6.32.0" description = "" optional = false python-versions = ">=3.9" files = [ - {file = "protobuf-6.31.1-cp310-abi3-win32.whl", hash = "sha256:7fa17d5a29c2e04b7d90e5e32388b8bfd0e7107cd8e616feef7ed3fa6bdab5c9"}, - {file = "protobuf-6.31.1-cp310-abi3-win_amd64.whl", hash = "sha256:426f59d2964864a1a366254fa703b8632dcec0790d8862d30034d8245e1cd447"}, - {file = "protobuf-6.31.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:6f1227473dc43d44ed644425268eb7c2e488ae245d51c6866d19fe158e207402"}, - {file = "protobuf-6.31.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:a40fc12b84c154884d7d4c4ebd675d5b3b5283e155f324049ae396b95ddebc39"}, - {file = "protobuf-6.31.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:4ee898bf66f7a8b0bd21bce523814e6fbd8c6add948045ce958b73af7e8878c6"}, - {file = "protobuf-6.31.1-cp39-cp39-win32.whl", hash = "sha256:0414e3aa5a5f3ff423828e1e6a6e907d6c65c1d5b7e6e975793d5590bdeecc16"}, - {file = "protobuf-6.31.1-cp39-cp39-win_amd64.whl", hash = "sha256:8764cf4587791e7564051b35524b72844f845ad0bb011704c3736cce762d8fe9"}, - {file = "protobuf-6.31.1-py3-none-any.whl", hash = "sha256:720a6c7e6b77288b85063569baae8536671b39f15cc22037ec7045658d80489e"}, - {file = "protobuf-6.31.1.tar.gz", hash = "sha256:d8cac4c982f0b957a4dc73a80e2ea24fab08e679c0de9deb835f4a12d69aca9a"}, + {file = "protobuf-6.32.0-cp310-abi3-win32.whl", hash = "sha256:84f9e3c1ff6fb0308dbacb0950d8aa90694b0d0ee68e75719cb044b7078fe741"}, + {file = "protobuf-6.32.0-cp310-abi3-win_amd64.whl", hash = "sha256:a8bdbb2f009cfc22a36d031f22a625a38b615b5e19e558a7b756b3279723e68e"}, + {file = "protobuf-6.32.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d52691e5bee6c860fff9a1c86ad26a13afbeb4b168cd4445c922b7e2cf85aaf0"}, + {file = "protobuf-6.32.0-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:501fe6372fd1c8ea2a30b4d9be8f87955a64d6be9c88a973996cef5ef6f0abf1"}, + {file = "protobuf-6.32.0-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:75a2aab2bd1aeb1f5dc7c5f33bcb11d82ea8c055c9becbb41c26a8c43fd7092c"}, + {file = "protobuf-6.32.0-cp39-cp39-win32.whl", hash = "sha256:7db8ed09024f115ac877a1427557b838705359f047b2ff2f2b2364892d19dacb"}, + {file = "protobuf-6.32.0-cp39-cp39-win_amd64.whl", hash = "sha256:15eba1b86f193a407607112ceb9ea0ba9569aed24f93333fe9a497cf2fda37d3"}, + {file = "protobuf-6.32.0-py3-none-any.whl", hash = "sha256:ba377e5b67b908c8f3072a57b63e2c6a4cbd18aea4ed98d2584350dbf46f2783"}, + {file = "protobuf-6.32.0.tar.gz", hash = "sha256:a81439049127067fc49ec1d36e25c6ee1d1a2b7be930675f919258d03c04e7d2"}, ] +[[package]] +name = "pygments" +version = "2.19.2" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"}, + {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"}, +] + +[package.extras] +windows-terminal = ["colorama (>=0.4.6)"] + [[package]] name = "pytest" -version = "8.3.5" +version = "8.4.1" description = "pytest: simple powerful testing with Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"}, - {file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"}, + {file = "pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7"}, + {file = "pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c"}, ] [package.dependencies] -colorama = {version = "*", markers = "sys_platform == \"win32\""} -iniconfig = "*" -packaging = "*" +colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} +iniconfig = ">=1" +packaging = ">=20" pluggy = ">=1.5,<2" +pygments = ">=2.7.2" [package.extras] -dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] [[package]] name = "pyyaml" @@ -513,150 +544,188 @@ typing-extensions = {version = ">=4.4.0", markers = "python_version < \"3.13\""} [[package]] name = "rpds-py" -version = "0.25.1" +version = "0.27.1" description = "Python bindings to Rust's persistent data structures (rpds)" optional = false python-versions = ">=3.9" files = [ - {file = "rpds_py-0.25.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:f4ad628b5174d5315761b67f212774a32f5bad5e61396d38108bd801c0a8f5d9"}, - {file = "rpds_py-0.25.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c742af695f7525e559c16f1562cf2323db0e3f0fbdcabdf6865b095256b2d40"}, - {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:605ffe7769e24b1800b4d024d24034405d9404f0bc2f55b6db3362cd34145a6f"}, - {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ccc6f3ddef93243538be76f8e47045b4aad7a66a212cd3a0f23e34469473d36b"}, - {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f70316f760174ca04492b5ab01be631a8ae30cadab1d1081035136ba12738cfa"}, - {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1dafef8df605fdb46edcc0bf1573dea0d6d7b01ba87f85cd04dc855b2b4479e"}, - {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0701942049095741a8aeb298a31b203e735d1c61f4423511d2b1a41dcd8a16da"}, - {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e87798852ae0b37c88babb7f7bbbb3e3fecc562a1c340195b44c7e24d403e380"}, - {file = "rpds_py-0.25.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3bcce0edc1488906c2d4c75c94c70a0417e83920dd4c88fec1078c94843a6ce9"}, - {file = "rpds_py-0.25.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e2f6a2347d3440ae789505693a02836383426249d5293541cd712e07e7aecf54"}, - {file = "rpds_py-0.25.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4fd52d3455a0aa997734f3835cbc4c9f32571345143960e7d7ebfe7b5fbfa3b2"}, - {file = "rpds_py-0.25.1-cp310-cp310-win32.whl", hash = "sha256:3f0b1798cae2bbbc9b9db44ee068c556d4737911ad53a4e5093d09d04b3bbc24"}, - {file = "rpds_py-0.25.1-cp310-cp310-win_amd64.whl", hash = "sha256:3ebd879ab996537fc510a2be58c59915b5dd63bccb06d1ef514fee787e05984a"}, - {file = "rpds_py-0.25.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5f048bbf18b1f9120685c6d6bb70cc1a52c8cc11bdd04e643d28d3be0baf666d"}, - {file = "rpds_py-0.25.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4fbb0dbba559959fcb5d0735a0f87cdbca9e95dac87982e9b95c0f8f7ad10255"}, - {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4ca54b9cf9d80b4016a67a0193ebe0bcf29f6b0a96f09db942087e294d3d4c2"}, - {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1ee3e26eb83d39b886d2cb6e06ea701bba82ef30a0de044d34626ede51ec98b0"}, - {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89706d0683c73a26f76a5315d893c051324d771196ae8b13e6ffa1ffaf5e574f"}, - {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2013ee878c76269c7b557a9a9c042335d732e89d482606990b70a839635feb7"}, - {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45e484db65e5380804afbec784522de84fa95e6bb92ef1bd3325d33d13efaebd"}, - {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:48d64155d02127c249695abb87d39f0faf410733428d499867606be138161d65"}, - {file = "rpds_py-0.25.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:048893e902132fd6548a2e661fb38bf4896a89eea95ac5816cf443524a85556f"}, - {file = "rpds_py-0.25.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0317177b1e8691ab5879f4f33f4b6dc55ad3b344399e23df2e499de7b10a548d"}, - {file = "rpds_py-0.25.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bffcf57826d77a4151962bf1701374e0fc87f536e56ec46f1abdd6a903354042"}, - {file = "rpds_py-0.25.1-cp311-cp311-win32.whl", hash = "sha256:cda776f1967cb304816173b30994faaf2fd5bcb37e73118a47964a02c348e1bc"}, - {file = "rpds_py-0.25.1-cp311-cp311-win_amd64.whl", hash = "sha256:dc3c1ff0abc91444cd20ec643d0f805df9a3661fcacf9c95000329f3ddf268a4"}, - {file = "rpds_py-0.25.1-cp311-cp311-win_arm64.whl", hash = "sha256:5a3ddb74b0985c4387719fc536faced33cadf2172769540c62e2a94b7b9be1c4"}, - {file = "rpds_py-0.25.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b5ffe453cde61f73fea9430223c81d29e2fbf412a6073951102146c84e19e34c"}, - {file = "rpds_py-0.25.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:115874ae5e2fdcfc16b2aedc95b5eef4aebe91b28e7e21951eda8a5dc0d3461b"}, - {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a714bf6e5e81b0e570d01f56e0c89c6375101b8463999ead3a93a5d2a4af91fa"}, - {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:35634369325906bcd01577da4c19e3b9541a15e99f31e91a02d010816b49bfda"}, - {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4cb2b3ddc16710548801c6fcc0cfcdeeff9dafbc983f77265877793f2660309"}, - {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9ceca1cf097ed77e1a51f1dbc8d174d10cb5931c188a4505ff9f3e119dfe519b"}, - {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c2cd1a4b0c2b8c5e31ffff50d09f39906fe351389ba143c195566056c13a7ea"}, - {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1de336a4b164c9188cb23f3703adb74a7623ab32d20090d0e9bf499a2203ad65"}, - {file = "rpds_py-0.25.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9fca84a15333e925dd59ce01da0ffe2ffe0d6e5d29a9eeba2148916d1824948c"}, - {file = "rpds_py-0.25.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:88ec04afe0c59fa64e2f6ea0dd9657e04fc83e38de90f6de201954b4d4eb59bd"}, - {file = "rpds_py-0.25.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a8bd2f19e312ce3e1d2c635618e8a8d8132892bb746a7cf74780a489f0f6cdcb"}, - {file = "rpds_py-0.25.1-cp312-cp312-win32.whl", hash = "sha256:e5e2f7280d8d0d3ef06f3ec1b4fd598d386cc6f0721e54f09109a8132182fbfe"}, - {file = "rpds_py-0.25.1-cp312-cp312-win_amd64.whl", hash = "sha256:db58483f71c5db67d643857404da360dce3573031586034b7d59f245144cc192"}, - {file = "rpds_py-0.25.1-cp312-cp312-win_arm64.whl", hash = "sha256:6d50841c425d16faf3206ddbba44c21aa3310a0cebc3c1cdfc3e3f4f9f6f5728"}, - {file = "rpds_py-0.25.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:659d87430a8c8c704d52d094f5ba6fa72ef13b4d385b7e542a08fc240cb4a559"}, - {file = "rpds_py-0.25.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:68f6f060f0bbdfb0245267da014d3a6da9be127fe3e8cc4a68c6f833f8a23bb1"}, - {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:083a9513a33e0b92cf6e7a6366036c6bb43ea595332c1ab5c8ae329e4bcc0a9c"}, - {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:816568614ecb22b18a010c7a12559c19f6fe993526af88e95a76d5a60b8b75fb"}, - {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c6564c0947a7f52e4792983f8e6cf9bac140438ebf81f527a21d944f2fd0a40"}, - {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c4a128527fe415d73cf1f70a9a688d06130d5810be69f3b553bf7b45e8acf79"}, - {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a49e1d7a4978ed554f095430b89ecc23f42014a50ac385eb0c4d163ce213c325"}, - {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d74ec9bc0e2feb81d3f16946b005748119c0f52a153f6db6a29e8cd68636f295"}, - {file = "rpds_py-0.25.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3af5b4cc10fa41e5bc64e5c198a1b2d2864337f8fcbb9a67e747e34002ce812b"}, - {file = "rpds_py-0.25.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:79dc317a5f1c51fd9c6a0c4f48209c6b8526d0524a6904fc1076476e79b00f98"}, - {file = "rpds_py-0.25.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1521031351865e0181bc585147624d66b3b00a84109b57fcb7a779c3ec3772cd"}, - {file = "rpds_py-0.25.1-cp313-cp313-win32.whl", hash = "sha256:5d473be2b13600b93a5675d78f59e63b51b1ba2d0476893415dfbb5477e65b31"}, - {file = "rpds_py-0.25.1-cp313-cp313-win_amd64.whl", hash = "sha256:a7b74e92a3b212390bdce1d93da9f6488c3878c1d434c5e751cbc202c5e09500"}, - {file = "rpds_py-0.25.1-cp313-cp313-win_arm64.whl", hash = "sha256:dd326a81afe332ede08eb39ab75b301d5676802cdffd3a8f287a5f0b694dc3f5"}, - {file = "rpds_py-0.25.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:a58d1ed49a94d4183483a3ce0af22f20318d4a1434acee255d683ad90bf78129"}, - {file = "rpds_py-0.25.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f251bf23deb8332823aef1da169d5d89fa84c89f67bdfb566c49dea1fccfd50d"}, - {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dbd586bfa270c1103ece2109314dd423df1fa3d9719928b5d09e4840cec0d72"}, - {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6d273f136e912aa101a9274c3145dcbddbe4bac560e77e6d5b3c9f6e0ed06d34"}, - {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:666fa7b1bd0a3810a7f18f6d3a25ccd8866291fbbc3c9b912b917a6715874bb9"}, - {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:921954d7fbf3fccc7de8f717799304b14b6d9a45bbeec5a8d7408ccbf531faf5"}, - {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3d86373ff19ca0441ebeb696ef64cb58b8b5cbacffcda5a0ec2f3911732a194"}, - {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c8980cde3bb8575e7c956a530f2c217c1d6aac453474bf3ea0f9c89868b531b6"}, - {file = "rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8eb8c84ecea987a2523e057c0d950bcb3f789696c0499290b8d7b3107a719d78"}, - {file = "rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:e43a005671a9ed5a650f3bc39e4dbccd6d4326b24fb5ea8be5f3a43a6f576c72"}, - {file = "rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:58f77c60956501a4a627749a6dcb78dac522f249dd96b5c9f1c6af29bfacfb66"}, - {file = "rpds_py-0.25.1-cp313-cp313t-win32.whl", hash = "sha256:2cb9e5b5e26fc02c8a4345048cd9998c2aca7c2712bd1b36da0c72ee969a3523"}, - {file = "rpds_py-0.25.1-cp313-cp313t-win_amd64.whl", hash = "sha256:401ca1c4a20cc0510d3435d89c069fe0a9ae2ee6495135ac46bdd49ec0495763"}, - {file = "rpds_py-0.25.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ce4c8e485a3c59593f1a6f683cf0ea5ab1c1dc94d11eea5619e4fb5228b40fbd"}, - {file = "rpds_py-0.25.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d8222acdb51a22929c3b2ddb236b69c59c72af4019d2cba961e2f9add9b6e634"}, - {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4593c4eae9b27d22df41cde518b4b9e4464d139e4322e2127daa9b5b981b76be"}, - {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd035756830c712b64725a76327ce80e82ed12ebab361d3a1cdc0f51ea21acb0"}, - {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:114a07e85f32b125404f28f2ed0ba431685151c037a26032b213c882f26eb908"}, - {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dec21e02e6cc932538b5203d3a8bd6aa1480c98c4914cb88eea064ecdbc6396a"}, - {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:09eab132f41bf792c7a0ea1578e55df3f3e7f61888e340779b06050a9a3f16e9"}, - {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c98f126c4fc697b84c423e387337d5b07e4a61e9feac494362a59fd7a2d9ed80"}, - {file = "rpds_py-0.25.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0e6a327af8ebf6baba1c10fadd04964c1965d375d318f4435d5f3f9651550f4a"}, - {file = "rpds_py-0.25.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:bc120d1132cff853ff617754196d0ac0ae63befe7c8498bd67731ba368abe451"}, - {file = "rpds_py-0.25.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:140f61d9bed7839446bdd44852e30195c8e520f81329b4201ceead4d64eb3a9f"}, - {file = "rpds_py-0.25.1-cp39-cp39-win32.whl", hash = "sha256:9c006f3aadeda131b438c3092124bd196b66312f0caa5823ef09585a669cf449"}, - {file = "rpds_py-0.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:a61d0b2c7c9a0ae45732a77844917b427ff16ad5464b4d4f5e4adb955f582890"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b24bf3cd93d5b6ecfbedec73b15f143596c88ee249fa98cefa9a9dc9d92c6f28"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:0eb90e94f43e5085623932b68840b6f379f26db7b5c2e6bcef3179bd83c9330f"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d50e4864498a9ab639d6d8854b25e80642bd362ff104312d9770b05d66e5fb13"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7c9409b47ba0650544b0bb3c188243b83654dfe55dcc173a86832314e1a6a35d"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:796ad874c89127c91970652a4ee8b00d56368b7e00d3477f4415fe78164c8000"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:85608eb70a659bf4c1142b2781083d4b7c0c4e2c90eff11856a9754e965b2540"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4feb9211d15d9160bc85fa72fed46432cdc143eb9cf6d5ca377335a921ac37b"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ccfa689b9246c48947d31dd9d8b16d89a0ecc8e0e26ea5253068efb6c542b76e"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:3c5b317ecbd8226887994852e85de562f7177add602514d4ac40f87de3ae45a8"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:454601988aab2c6e8fd49e7634c65476b2b919647626208e376afcd22019eeb8"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:1c0c434a53714358532d13539272db75a5ed9df75a4a090a753ac7173ec14e11"}, - {file = "rpds_py-0.25.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f73ce1512e04fbe2bc97836e89830d6b4314c171587a99688082d090f934d20a"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ee86d81551ec68a5c25373c5643d343150cc54672b5e9a0cafc93c1870a53954"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:89c24300cd4a8e4a51e55c31a8ff3918e6651b241ee8876a42cc2b2a078533ba"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:771c16060ff4e79584dc48902a91ba79fd93eade3aa3a12d6d2a4aadaf7d542b"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:785ffacd0ee61c3e60bdfde93baa6d7c10d86f15655bd706c89da08068dc5038"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a40046a529cc15cef88ac5ab589f83f739e2d332cb4d7399072242400ed68c9"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:85fc223d9c76cabe5d0bff82214459189720dc135db45f9f66aa7cffbf9ff6c1"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0be9965f93c222fb9b4cc254235b3b2b215796c03ef5ee64f995b1b69af0762"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8378fa4a940f3fb509c081e06cb7f7f2adae8cf46ef258b0e0ed7519facd573e"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:33358883a4490287e67a2c391dfaea4d9359860281db3292b6886bf0be3d8692"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:1d1fadd539298e70cac2f2cb36f5b8a65f742b9b9f1014dd4ea1f7785e2470bf"}, - {file = "rpds_py-0.25.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:9a46c2fb2545e21181445515960006e85d22025bd2fe6db23e76daec6eb689fe"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:50f2c501a89c9a5f4e454b126193c5495b9fb441a75b298c60591d8a2eb92e1b"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d779b325cc8238227c47fbc53964c8cc9a941d5dbae87aa007a1f08f2f77b23"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:036ded36bedb727beeabc16dc1dad7cb154b3fa444e936a03b67a86dc6a5066e"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:245550f5a1ac98504147cba96ffec8fabc22b610742e9150138e5d60774686d7"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ff7c23ba0a88cb7b104281a99476cccadf29de2a0ef5ce864959a52675b1ca83"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e37caa8cdb3b7cf24786451a0bdb853f6347b8b92005eeb64225ae1db54d1c2b"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f2f48ab00181600ee266a095fe815134eb456163f7d6699f525dee471f312cf"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9e5fc7484fa7dce57e25063b0ec9638ff02a908304f861d81ea49273e43838c1"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:d3c10228d6cf6fe2b63d2e7985e94f6916fa46940df46b70449e9ff9297bd3d1"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:5d9e40f32745db28c1ef7aad23f6fc458dc1e29945bd6781060f0d15628b8ddf"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:35a8d1a24b5936b35c5003313bc177403d8bdef0f8b24f28b1c4a255f94ea992"}, - {file = "rpds_py-0.25.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:6099263f526efff9cf3883dfef505518730f7a7a93049b1d90d42e50a22b4793"}, - {file = "rpds_py-0.25.1.tar.gz", hash = "sha256:8960b6dac09b62dac26e75d7e2c4a22efb835d827a7278c34f72b2b84fa160e3"}, + {file = "rpds_py-0.27.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:68afeec26d42ab3b47e541b272166a0b4400313946871cba3ed3a4fc0cab1cef"}, + {file = "rpds_py-0.27.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:74e5b2f7bb6fa38b1b10546d27acbacf2a022a8b5543efb06cfebc72a59c85be"}, + {file = "rpds_py-0.27.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9024de74731df54546fab0bfbcdb49fae19159ecaecfc8f37c18d2c7e2c0bd61"}, + {file = "rpds_py-0.27.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:31d3ebadefcd73b73928ed0b2fd696f7fefda8629229f81929ac9c1854d0cffb"}, + {file = "rpds_py-0.27.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2e7f8f169d775dd9092a1743768d771f1d1300453ddfe6325ae3ab5332b4657"}, + {file = "rpds_py-0.27.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d905d16f77eb6ab2e324e09bfa277b4c8e5e6b8a78a3e7ff8f3cdf773b4c013"}, + {file = "rpds_py-0.27.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50c946f048209e6362e22576baea09193809f87687a95a8db24e5fbdb307b93a"}, + {file = "rpds_py-0.27.1-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:3deab27804d65cd8289eb814c2c0e807c4b9d9916c9225e363cb0cf875eb67c1"}, + {file = "rpds_py-0.27.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8b61097f7488de4be8244c89915da8ed212832ccf1e7c7753a25a394bf9b1f10"}, + {file = "rpds_py-0.27.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8a3f29aba6e2d7d90528d3c792555a93497fe6538aa65eb675b44505be747808"}, + {file = "rpds_py-0.27.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:dd6cd0485b7d347304067153a6dc1d73f7d4fd995a396ef32a24d24b8ac63ac8"}, + {file = "rpds_py-0.27.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6f4461bf931108c9fa226ffb0e257c1b18dc2d44cd72b125bec50ee0ab1248a9"}, + {file = "rpds_py-0.27.1-cp310-cp310-win32.whl", hash = "sha256:ee5422d7fb21f6a00c1901bf6559c49fee13a5159d0288320737bbf6585bd3e4"}, + {file = "rpds_py-0.27.1-cp310-cp310-win_amd64.whl", hash = "sha256:3e039aabf6d5f83c745d5f9a0a381d031e9ed871967c0a5c38d201aca41f3ba1"}, + {file = "rpds_py-0.27.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:be898f271f851f68b318872ce6ebebbc62f303b654e43bf72683dbdc25b7c881"}, + {file = "rpds_py-0.27.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:62ac3d4e3e07b58ee0ddecd71d6ce3b1637de2d373501412df395a0ec5f9beb5"}, + {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4708c5c0ceb2d034f9991623631d3d23cb16e65c83736ea020cdbe28d57c0a0e"}, + {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:abfa1171a9952d2e0002aba2ad3780820b00cc3d9c98c6630f2e93271501f66c"}, + {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b507d19f817ebaca79574b16eb2ae412e5c0835542c93fe9983f1e432aca195"}, + {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:168b025f8fd8d8d10957405f3fdcef3dc20f5982d398f90851f4abc58c566c52"}, + {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb56c6210ef77caa58e16e8c17d35c63fe3f5b60fd9ba9d424470c3400bcf9ed"}, + {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:d252f2d8ca0195faa707f8eb9368955760880b2b42a8ee16d382bf5dd807f89a"}, + {file = "rpds_py-0.27.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6e5e54da1e74b91dbc7996b56640f79b195d5925c2b78efaa8c5d53e1d88edde"}, + {file = "rpds_py-0.27.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ffce0481cc6e95e5b3f0a47ee17ffbd234399e6d532f394c8dce320c3b089c21"}, + {file = "rpds_py-0.27.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a205fdfe55c90c2cd8e540ca9ceba65cbe6629b443bc05db1f590a3db8189ff9"}, + {file = "rpds_py-0.27.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:689fb5200a749db0415b092972e8eba85847c23885c8543a8b0f5c009b1a5948"}, + {file = "rpds_py-0.27.1-cp311-cp311-win32.whl", hash = "sha256:3182af66048c00a075010bc7f4860f33913528a4b6fc09094a6e7598e462fe39"}, + {file = "rpds_py-0.27.1-cp311-cp311-win_amd64.whl", hash = "sha256:b4938466c6b257b2f5c4ff98acd8128ec36b5059e5c8f8372d79316b1c36bb15"}, + {file = "rpds_py-0.27.1-cp311-cp311-win_arm64.whl", hash = "sha256:2f57af9b4d0793e53266ee4325535a31ba48e2f875da81a9177c9926dfa60746"}, + {file = "rpds_py-0.27.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ae2775c1973e3c30316892737b91f9283f9908e3cc7625b9331271eaaed7dc90"}, + {file = "rpds_py-0.27.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2643400120f55c8a96f7c9d858f7be0c88d383cd4653ae2cf0d0c88f668073e5"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16323f674c089b0360674a4abd28d5042947d54ba620f72514d69be4ff64845e"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a1f4814b65eacac94a00fc9a526e3fdafd78e439469644032032d0d63de4881"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ba32c16b064267b22f1850a34051121d423b6f7338a12b9459550eb2096e7ec"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5c20f33fd10485b80f65e800bbe5f6785af510b9f4056c5a3c612ebc83ba6cb"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:466bfe65bd932da36ff279ddd92de56b042f2266d752719beb97b08526268ec5"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:41e532bbdcb57c92ba3be62c42e9f096431b4cf478da9bc3bc6ce5c38ab7ba7a"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f149826d742b406579466283769a8ea448eed82a789af0ed17b0cd5770433444"}, + {file = "rpds_py-0.27.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:80c60cfb5310677bd67cb1e85a1e8eb52e12529545441b43e6f14d90b878775a"}, + {file = "rpds_py-0.27.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7ee6521b9baf06085f62ba9c7a3e5becffbc32480d2f1b351559c001c38ce4c1"}, + {file = "rpds_py-0.27.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a512c8263249a9d68cac08b05dd59d2b3f2061d99b322813cbcc14c3c7421998"}, + {file = "rpds_py-0.27.1-cp312-cp312-win32.whl", hash = "sha256:819064fa048ba01b6dadc5116f3ac48610435ac9a0058bbde98e569f9e785c39"}, + {file = "rpds_py-0.27.1-cp312-cp312-win_amd64.whl", hash = "sha256:d9199717881f13c32c4046a15f024971a3b78ad4ea029e8da6b86e5aa9cf4594"}, + {file = "rpds_py-0.27.1-cp312-cp312-win_arm64.whl", hash = "sha256:33aa65b97826a0e885ef6e278fbd934e98cdcfed80b63946025f01e2f5b29502"}, + {file = "rpds_py-0.27.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e4b9fcfbc021633863a37e92571d6f91851fa656f0180246e84cbd8b3f6b329b"}, + {file = "rpds_py-0.27.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1441811a96eadca93c517d08df75de45e5ffe68aa3089924f963c782c4b898cf"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55266dafa22e672f5a4f65019015f90336ed31c6383bd53f5e7826d21a0e0b83"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d78827d7ac08627ea2c8e02c9e5b41180ea5ea1f747e9db0915e3adf36b62dcf"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae92443798a40a92dc5f0b01d8a7c93adde0c4dc965310a29ae7c64d72b9fad2"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c46c9dd2403b66a2a3b9720ec4b74d4ab49d4fabf9f03dfdce2d42af913fe8d0"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2efe4eb1d01b7f5f1939f4ef30ecea6c6b3521eec451fb93191bf84b2a522418"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:15d3b4d83582d10c601f481eca29c3f138d44c92187d197aff663a269197c02d"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4ed2e16abbc982a169d30d1a420274a709949e2cbdef119fe2ec9d870b42f274"}, + {file = "rpds_py-0.27.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a75f305c9b013289121ec0f1181931975df78738cdf650093e6b86d74aa7d8dd"}, + {file = "rpds_py-0.27.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:67ce7620704745881a3d4b0ada80ab4d99df390838839921f99e63c474f82cf2"}, + {file = "rpds_py-0.27.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9d992ac10eb86d9b6f369647b6a3f412fc0075cfd5d799530e84d335e440a002"}, + {file = "rpds_py-0.27.1-cp313-cp313-win32.whl", hash = "sha256:4f75e4bd8ab8db624e02c8e2fc4063021b58becdbe6df793a8111d9343aec1e3"}, + {file = "rpds_py-0.27.1-cp313-cp313-win_amd64.whl", hash = "sha256:f9025faafc62ed0b75a53e541895ca272815bec18abe2249ff6501c8f2e12b83"}, + {file = "rpds_py-0.27.1-cp313-cp313-win_arm64.whl", hash = "sha256:ed10dc32829e7d222b7d3b93136d25a406ba9788f6a7ebf6809092da1f4d279d"}, + {file = "rpds_py-0.27.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:92022bbbad0d4426e616815b16bc4127f83c9a74940e1ccf3cfe0b387aba0228"}, + {file = "rpds_py-0.27.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:47162fdab9407ec3f160805ac3e154df042e577dd53341745fc7fb3f625e6d92"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb89bec23fddc489e5d78b550a7b773557c9ab58b7946154a10a6f7a214a48b2"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e48af21883ded2b3e9eb48cb7880ad8598b31ab752ff3be6457001d78f416723"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6f5b7bd8e219ed50299e58551a410b64daafb5017d54bbe822e003856f06a802"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08f1e20bccf73b08d12d804d6e1c22ca5530e71659e6673bce31a6bb71c1e73f"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dc5dceeaefcc96dc192e3a80bbe1d6c410c469e97bdd47494a7d930987f18b2"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:d76f9cc8665acdc0c9177043746775aa7babbf479b5520b78ae4002d889f5c21"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:134fae0e36022edad8290a6661edf40c023562964efea0cc0ec7f5d392d2aaef"}, + {file = "rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb11a4f1b2b63337cfd3b4d110af778a59aae51c81d195768e353d8b52f88081"}, + {file = "rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:13e608ac9f50a0ed4faec0e90ece76ae33b34c0e8656e3dceb9a7db994c692cd"}, + {file = "rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dd2135527aa40f061350c3f8f89da2644de26cd73e4de458e79606384f4f68e7"}, + {file = "rpds_py-0.27.1-cp313-cp313t-win32.whl", hash = "sha256:3020724ade63fe320a972e2ffd93b5623227e684315adce194941167fee02688"}, + {file = "rpds_py-0.27.1-cp313-cp313t-win_amd64.whl", hash = "sha256:8ee50c3e41739886606388ba3ab3ee2aae9f35fb23f833091833255a31740797"}, + {file = "rpds_py-0.27.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:acb9aafccaae278f449d9c713b64a9e68662e7799dbd5859e2c6b3c67b56d334"}, + {file = "rpds_py-0.27.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b7fb801aa7f845ddf601c49630deeeccde7ce10065561d92729bfe81bd21fb33"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0dd05afb46597b9a2e11c351e5e4283c741237e7f617ffb3252780cca9336a"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b6dfb0e058adb12d8b1d1b25f686e94ffa65d9995a5157afe99743bf7369d62b"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ed090ccd235f6fa8bb5861684567f0a83e04f52dfc2e5c05f2e4b1309fcf85e7"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bf876e79763eecf3e7356f157540d6a093cef395b65514f17a356f62af6cc136"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:12ed005216a51b1d6e2b02a7bd31885fe317e45897de81d86dcce7d74618ffff"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:ee4308f409a40e50593c7e3bb8cbe0b4d4c66d1674a316324f0c2f5383b486f9"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0b08d152555acf1f455154d498ca855618c1378ec810646fcd7c76416ac6dc60"}, + {file = "rpds_py-0.27.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:dce51c828941973a5684d458214d3a36fcd28da3e1875d659388f4f9f12cc33e"}, + {file = "rpds_py-0.27.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:c1476d6f29eb81aa4151c9a31219b03f1f798dc43d8af1250a870735516a1212"}, + {file = "rpds_py-0.27.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:3ce0cac322b0d69b63c9cdb895ee1b65805ec9ffad37639f291dd79467bee675"}, + {file = "rpds_py-0.27.1-cp314-cp314-win32.whl", hash = "sha256:dfbfac137d2a3d0725758cd141f878bf4329ba25e34979797c89474a89a8a3a3"}, + {file = "rpds_py-0.27.1-cp314-cp314-win_amd64.whl", hash = "sha256:a6e57b0abfe7cc513450fcf529eb486b6e4d3f8aee83e92eb5f1ef848218d456"}, + {file = "rpds_py-0.27.1-cp314-cp314-win_arm64.whl", hash = "sha256:faf8d146f3d476abfee026c4ae3bdd9ca14236ae4e4c310cbd1cf75ba33d24a3"}, + {file = "rpds_py-0.27.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:ba81d2b56b6d4911ce735aad0a1d4495e808b8ee4dc58715998741a26874e7c2"}, + {file = "rpds_py-0.27.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:84f7d509870098de0e864cad0102711c1e24e9b1a50ee713b65928adb22269e4"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9e960fc78fecd1100539f14132425e1d5fe44ecb9239f8f27f079962021523e"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:62f85b665cedab1a503747617393573995dac4600ff51869d69ad2f39eb5e817"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fed467af29776f6556250c9ed85ea5a4dd121ab56a5f8b206e3e7a4c551e48ec"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2729615f9d430af0ae6b36cf042cb55c0936408d543fb691e1a9e36648fd35a"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b207d881a9aef7ba753d69c123a35d96ca7cb808056998f6b9e8747321f03b8"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:639fd5efec029f99b79ae47e5d7e00ad8a773da899b6309f6786ecaf22948c48"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fecc80cb2a90e28af8a9b366edacf33d7a91cbfe4c2c4544ea1246e949cfebeb"}, + {file = "rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42a89282d711711d0a62d6f57d81aa43a1368686c45bc1c46b7f079d55692734"}, + {file = "rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:cf9931f14223de59551ab9d38ed18d92f14f055a5f78c1d8ad6493f735021bbb"}, + {file = "rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f39f58a27cc6e59f432b568ed8429c7e1641324fbe38131de852cd77b2d534b0"}, + {file = "rpds_py-0.27.1-cp314-cp314t-win32.whl", hash = "sha256:d5fa0ee122dc09e23607a28e6d7b150da16c662e66409bbe85230e4c85bb528a"}, + {file = "rpds_py-0.27.1-cp314-cp314t-win_amd64.whl", hash = "sha256:6567d2bb951e21232c2f660c24cf3470bb96de56cdcb3f071a83feeaff8a2772"}, + {file = "rpds_py-0.27.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c918c65ec2e42c2a78d19f18c553d77319119bf43aa9e2edf7fb78d624355527"}, + {file = "rpds_py-0.27.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1fea2b1a922c47c51fd07d656324531adc787e415c8b116530a1d29c0516c62d"}, + {file = "rpds_py-0.27.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbf94c58e8e0cd6b6f38d8de67acae41b3a515c26169366ab58bdca4a6883bb8"}, + {file = "rpds_py-0.27.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c2a8fed130ce946d5c585eddc7c8eeef0051f58ac80a8ee43bd17835c144c2cc"}, + {file = "rpds_py-0.27.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:037a2361db72ee98d829bc2c5b7cc55598ae0a5e0ec1823a56ea99374cfd73c1"}, + {file = "rpds_py-0.27.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5281ed1cc1d49882f9997981c88df1a22e140ab41df19071222f7e5fc4e72125"}, + {file = "rpds_py-0.27.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fd50659a069c15eef8aa3d64bbef0d69fd27bb4a50c9ab4f17f83a16cbf8905"}, + {file = "rpds_py-0.27.1-cp39-cp39-manylinux_2_31_riscv64.whl", hash = "sha256:c4b676c4ae3921649a15d28ed10025548e9b561ded473aa413af749503c6737e"}, + {file = "rpds_py-0.27.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:079bc583a26db831a985c5257797b2b5d3affb0386e7ff886256762f82113b5e"}, + {file = "rpds_py-0.27.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:4e44099bd522cba71a2c6b97f68e19f40e7d85399de899d66cdb67b32d7cb786"}, + {file = "rpds_py-0.27.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e202e6d4188e53c6661af813b46c37ca2c45e497fc558bacc1a7630ec2695aec"}, + {file = "rpds_py-0.27.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f41f814b8eaa48768d1bb551591f6ba45f87ac76899453e8ccd41dba1289b04b"}, + {file = "rpds_py-0.27.1-cp39-cp39-win32.whl", hash = "sha256:9e71f5a087ead99563c11fdaceee83ee982fd39cf67601f4fd66cb386336ee52"}, + {file = "rpds_py-0.27.1-cp39-cp39-win_amd64.whl", hash = "sha256:71108900c9c3c8590697244b9519017a400d9ba26a36c48381b3f64743a44aab"}, + {file = "rpds_py-0.27.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7ba22cb9693df986033b91ae1d7a979bc399237d45fccf875b76f62bb9e52ddf"}, + {file = "rpds_py-0.27.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5b640501be9288c77738b5492b3fd3abc4ba95c50c2e41273c8a1459f08298d3"}, + {file = "rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb08b65b93e0c6dd70aac7f7890a9c0938d5ec71d5cb32d45cf844fb8ae47636"}, + {file = "rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d7ff07d696a7a38152ebdb8212ca9e5baab56656749f3d6004b34ab726b550b8"}, + {file = "rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fb7c72262deae25366e3b6c0c0ba46007967aea15d1eea746e44ddba8ec58dcc"}, + {file = "rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7b002cab05d6339716b03a4a3a2ce26737f6231d7b523f339fa061d53368c9d8"}, + {file = "rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23f6b69d1c26c4704fec01311963a41d7de3ee0570a84ebde4d544e5a1859ffc"}, + {file = "rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:530064db9146b247351f2a0250b8f00b289accea4596a033e94be2389977de71"}, + {file = "rpds_py-0.27.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7b90b0496570bd6b0321724a330d8b545827c4df2034b6ddfc5f5275f55da2ad"}, + {file = "rpds_py-0.27.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:879b0e14a2da6a1102a3fc8af580fc1ead37e6d6692a781bd8c83da37429b5ab"}, + {file = "rpds_py-0.27.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:0d807710df3b5faa66c731afa162ea29717ab3be17bdc15f90f2d9f183da4059"}, + {file = "rpds_py-0.27.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:3adc388fc3afb6540aec081fa59e6e0d3908722771aa1e37ffe22b220a436f0b"}, + {file = "rpds_py-0.27.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c796c0c1cc68cb08b0284db4229f5af76168172670c74908fdbd4b7d7f515819"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cdfe4bb2f9fe7458b7453ad3c33e726d6d1c7c0a72960bcc23800d77384e42df"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:8fabb8fd848a5f75a2324e4a84501ee3a5e3c78d8603f83475441866e60b94a3"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eda8719d598f2f7f3e0f885cba8646644b55a187762bec091fa14a2b819746a9"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c64d07e95606ec402a0a1c511fe003873fa6af630bda59bac77fac8b4318ebc"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93a2ed40de81bcff59aabebb626562d48332f3d028ca2036f1d23cbb52750be4"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:387ce8c44ae94e0ec50532d9cb0edce17311024c9794eb196b90e1058aadeb66"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaf94f812c95b5e60ebaf8bfb1898a7d7cb9c1af5744d4a67fa47796e0465d4e"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:4848ca84d6ded9b58e474dfdbad4b8bfb450344c0551ddc8d958bf4b36aa837c"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2bde09cbcf2248b73c7c323be49b280180ff39fadcfe04e7b6f54a678d02a7cf"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:94c44ee01fd21c9058f124d2d4f0c9dc7634bec93cd4b38eefc385dabe71acbf"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:df8b74962e35c9249425d90144e721eed198e6555a0e22a563d29fe4486b51f6"}, + {file = "rpds_py-0.27.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:dc23e6820e3b40847e2f4a7726462ba0cf53089512abe9ee16318c366494c17a"}, + {file = "rpds_py-0.27.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:aa8933159edc50be265ed22b401125c9eebff3171f570258854dbce3ecd55475"}, + {file = "rpds_py-0.27.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a50431bf02583e21bf273c71b89d710e7a710ad5e39c725b14e685610555926f"}, + {file = "rpds_py-0.27.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78af06ddc7fe5cc0e967085a9115accee665fb912c22a3f54bad70cc65b05fe6"}, + {file = "rpds_py-0.27.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:70d0738ef8fee13c003b100c2fbd667ec4f133468109b3472d249231108283a3"}, + {file = "rpds_py-0.27.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e2f6fd8a1cea5bbe599b6e78a6e5ee08db434fc8ffea51ff201c8765679698b3"}, + {file = "rpds_py-0.27.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8177002868d1426305bb5de1e138161c2ec9eb2d939be38291d7c431c4712df8"}, + {file = "rpds_py-0.27.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:008b839781d6c9bf3b6a8984d1d8e56f0ec46dc56df61fd669c49b58ae800400"}, + {file = "rpds_py-0.27.1-pp39-pypy39_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:a55b9132bb1ade6c734ddd2759c8dc132aa63687d259e725221f106b83a0e485"}, + {file = "rpds_py-0.27.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a46fdec0083a26415f11d5f236b79fa1291c32aaa4a17684d82f7017a1f818b1"}, + {file = "rpds_py-0.27.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:8a63b640a7845f2bdd232eb0d0a4a2dd939bcdd6c57e6bb134526487f3160ec5"}, + {file = "rpds_py-0.27.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:7e32721e5d4922deaaf963469d795d5bde6093207c52fec719bd22e5d1bedbc4"}, + {file = "rpds_py-0.27.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:2c426b99a068601b5f4623573df7a7c3d72e87533a2dd2253353a03e7502566c"}, + {file = "rpds_py-0.27.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:4fc9b7fe29478824361ead6e14e4f5aed570d477e06088826537e202d25fe859"}, + {file = "rpds_py-0.27.1.tar.gz", hash = "sha256:26a1c73171d10b7acccbded82bf6a586ab8203601e565badc74bbbf8bc5a10f8"}, ] [[package]] name = "typing-extensions" -version = "4.13.2" -description = "Backported and Experimental Type Hints for Python 3.8+" +version = "4.15.0" +description = "Backported and Experimental Type Hints for Python 3.9+" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"}, - {file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"}, + {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, + {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, ] [[package]] name = "virtualenv" -version = "20.31.2" +version = "20.34.0" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" files = [ - {file = "virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11"}, - {file = "virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af"}, + {file = "virtualenv-20.34.0-py3-none-any.whl", hash = "sha256:341f5afa7eee943e4984a9207c025feedd768baff6753cd660c857ceb3e36026"}, + {file = "virtualenv-20.34.0.tar.gz", hash = "sha256:44815b2c9dee7ed86e387b842a84f20b93f7f417f95886ca1996a72a4138eb1a"}, ] [package.dependencies] @@ -671,4 +740,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "53c45992ce1109262a0db6e79aced43423e6fd83798b0b2bf45acca1bfc6d056" +content-hash = "fd2a28449c2fa3c9e20e3589fc27e0e773815be05e9e43a871d583a56a02dbb6" diff --git a/pyproject.toml b/pyproject.toml index f2ea162..c1deb22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ packages = [ [tool.poetry.dependencies] python = "^3.12" im-protobuf = "^8.2.0" -im-data-manager-job-decoder = "^2.1.0" +im-data-manager-job-decoder = "^2.4.0" jsonschema = "^4.21.1" pyyaml = ">= 5.3.1, < 7.0" From 3883412969c3657632aaf6294d664590f60ea596 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Mon, 1 Sep 2025 16:11:29 +0000 Subject: [PATCH 40/57] refactor: Switch away from workflow replicate property --- poetry.lock | 8 +-- pyproject.toml | 2 +- tests/test_workflow_engine_examples.py | 1 + .../test_workflow_validator_for_run_level.py | 24 -------- .../test_workflow_validator_for_tag_level.py | 24 -------- .../simple-python-fanout.yaml | 3 - workflow/decoder.py | 56 ------------------- workflow/workflow-schema.yaml | 19 ------- workflow/workflow_engine.py | 38 +++---------- workflow/workflow_validator.py | 35 ------------ 10 files changed, 13 insertions(+), 197 deletions(-) diff --git a/poetry.lock b/poetry.lock index 35b18b5..716a33c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -179,13 +179,13 @@ license = ["ukkonen"] [[package]] name = "im-data-manager-job-decoder" -version = "2.4.0" +version = "2.5.0" description = "Job decoding logic" optional = false python-versions = ">=3.10" files = [ - {file = "im_data_manager_job_decoder-2.4.0-py3-none-any.whl", hash = "sha256:4a911e2a8760dd381247f2f740b2e280a817dbdad65c65164dad97dfcf9058bf"}, - {file = "im_data_manager_job_decoder-2.4.0.tar.gz", hash = "sha256:34e7a8ac0421edc26760491ffd8b9183f0757ebc9e25dabf865235c936fad458"}, + {file = "im_data_manager_job_decoder-2.5.0-py3-none-any.whl", hash = "sha256:d177a37083b73c82d71c137cd36ab3bf54de0a4ab5ab55e5aec49acb238b86f6"}, + {file = "im_data_manager_job_decoder-2.5.0.tar.gz", hash = "sha256:1a0523ccead3ad851dcf6a450ec1792be1830d20a938d3ddfdf04ffcdf915a47"}, ] [package.dependencies] @@ -740,4 +740,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "fd2a28449c2fa3c9e20e3589fc27e0e773815be05e9e43a871d583a56a02dbb6" +content-hash = "341541770454fac78492e8f33f1aca9418582886dfe2d24af99cc06dbd1b7137" diff --git a/pyproject.toml b/pyproject.toml index c1deb22..cde7f6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ packages = [ [tool.poetry.dependencies] python = "^3.12" im-protobuf = "^8.2.0" -im-data-manager-job-decoder = "^2.4.0" +im-data-manager-job-decoder = "^2.5.0" jsonschema = "^4.21.1" pyyaml = ">= 5.3.1, < 7.0" diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py index 9d07f13..40e0573 100644 --- a/tests/test_workflow_engine_examples.py +++ b/tests/test_workflow_engine_examples.py @@ -398,6 +398,7 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine): assert project_file_exists(output_file_2) +@pytest.mark.skip(reason="WIP") def test_workflow_engine_simple_python_fanout(basic_engine): # Arrange md, da = basic_engine diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py index 1be6694..e76239d 100644 --- a/tests/test_workflow_validator_for_run_level.py +++ b/tests/test_workflow_validator_for_run_level.py @@ -217,30 +217,6 @@ def test_validate_simple_python_molprops_with_missing_input(): ] -def test_validate_replicate_using_undeclared_input(): - # Arrange - workflow_filename: str = os.path.join( - os.path.dirname(__file__), - "workflow-definitions", - "replicate-using-undeclared-input.yaml", - ) - with open(workflow_filename, "r", encoding="utf8") as workflow_file: - workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) - assert workflow - - # Act - error = WorkflowValidator.validate( - level=ValidationLevel.TAG, - workflow_definition=workflow, - ) - - # Assert - assert error.error_num == 7 - assert error.error_msg == [ - "Replicate input variable is not declared: y (step=step-2)" - ] - - def test_validate_duplicate_step_output_variable_names(): # Arrange workflow_filename: str = os.path.join( diff --git a/tests/test_workflow_validator_for_tag_level.py b/tests/test_workflow_validator_for_tag_level.py index 96e8e74..4c1719d 100644 --- a/tests/test_workflow_validator_for_tag_level.py +++ b/tests/test_workflow_validator_for_tag_level.py @@ -151,30 +151,6 @@ def test_validate_simple_python_molprops_with_options(): assert error.error_msg is None -def test_validate_replicate_using_undeclared_input(): - # Arrange - workflow_filename: str = os.path.join( - os.path.dirname(__file__), - "workflow-definitions", - "replicate-using-undeclared-input.yaml", - ) - with open(workflow_filename, "r", encoding="utf8") as workflow_file: - workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) - assert workflow - - # Act - error = WorkflowValidator.validate( - level=ValidationLevel.TAG, - workflow_definition=workflow, - ) - - # Assert - assert error.error_num == 7 - assert error.error_msg == [ - "Replicate input variable is not declared: y (step=step-2)" - ] - - def test_validate_duplicate_step_output_variable_names(): # Arrange workflow_filename: str = os.path.join( diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-fanout.yaml index 97ab108..0a50216 100644 --- a/tests/workflow-definitions/simple-python-fanout.yaml +++ b/tests/workflow-definitions/simple-python-fanout.yaml @@ -31,9 +31,6 @@ steps: name: desc1 value: "777" outputFile: results.smi - replicate: - using: - variable: inputFile variable-mapping: - variable: inputFile from-step: diff --git a/workflow/decoder.py b/workflow/decoder.py index 91c9464..674d1ad 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -5,7 +5,6 @@ import os from dataclasses import dataclass -from enum import Enum from typing import Any import jsonschema @@ -33,26 +32,6 @@ class Translation: out: str -class ReplicationOrigin(Enum): - """Oirgin of a replication variable.""" - - STEP_VARIABLE = 1 - WORKFLOW_VARIABLE = 2 - - -@dataclass -class ReplicationDriver: - """A step's replication driver. - The 'variable' is the variable for the step-to-be-executed - whose value is 'driven' by the values of the 'source_variable'. - The source variable is either from a step (or a workflow).""" - - origin: ReplicationOrigin - variable: str - source_variable: str - source_step_name: str | None = None - - def validate_schema(workflow: dict[str, Any]) -> str | None: """Checks the Workflow Definition against the built-in schema. If there's an error the error text is returned, otherwise None. @@ -173,38 +152,3 @@ def get_step_prior_step_variable_mapping( Translation(in_=step_variable, out=v_map["variable"]) ] return variable_mapping - - -def get_step_replication_driver(*, step: dict[str, Any]) -> ReplicationDriver | None: - """If the step is expected to replicate we return its replication driver, - which consists of a (prior) step name and an (output) variable name. - Otherwise it returns nothing.""" - if replicator := step.get("replicate"): - # We need the variable we replicate against, - # and the step that owns the variable. - # - # 'using' is a dict but there can be only single value for now - variable: str = replicator["using"]["variable"] - source_variable: str | None = None - # Is the variable from a prior step? - step_name: str | None = None - step_v_map = get_step_prior_step_variable_mapping(step=step) - for step_name_candidate, mappings in step_v_map.items(): - for mapping in mappings: - if mapping.out == variable: - step_name = step_name_candidate - source_variable = mapping.in_ - break - if step_name: - break - assert step_name - assert source_variable - - return ReplicationDriver( - origin=ReplicationOrigin.STEP_VARIABLE, - variable=variable, - source_step_name=step_name, - source_variable=source_variable, - ) - - return None diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index 27c726c..90acb29 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -59,17 +59,6 @@ definitions: type: string pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$ - # A step replication control variable - # that is based on a step variable - replicate-using-variable: - type: object - additionalProperties: false - properties: - variable: - $ref: '#/definitions/variable-name' - required: - - variable - # A Step variable # (whose value is derived from a variable used in a prior step) step-variable-from-step: @@ -167,14 +156,6 @@ definitions: # The format of this is essentially idenical to the specification # used when a Job is launched via the DM API. $ref: '#/definitions/step-specification' - replicate: - # Used to indicate one input variable that is used to replicate/spawn - # step instances based on the number of values generated for the variable. - type: object - additionalProperties: false - properties: - using: - $ref: '#/definitions/replicate-using-variable' variable-mapping: # The map of the source of the step's variables. # all variables the step needs (that aren;t already in the specification) diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index 988829a..d33751f 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -39,11 +39,8 @@ ) from .decoder import ( - ReplicationDriver, - ReplicationOrigin, Translation, get_step_prior_step_variable_mapping, - get_step_replication_driver, get_step_workflow_variable_mapping, ) @@ -380,41 +377,20 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None: variables: dict[str, Any] = error_or_variables - # A replication number, - # use only for steps expected to replicate (even if just once) + # A step replication number, + # used only for steps expected to run in parallel (even if just once) step_replication_number: int = 0 - # Does this step have a replicating driver? - r_driver: ReplicationDriver | None = get_step_replication_driver(step=step) replication_values: list[str] = [] - if r_driver: - if r_driver.origin == ReplicationOrigin.STEP_VARIABLE: - # We need to get the variable values from a prior step - # We need the prior steps running-workflow-step-id - assert r_driver.source_step_name - response, _ = self._wapi_adapter.get_running_workflow_step_by_name( - name=r_driver.source_step_name, - running_workflow_id=rwf_id, - ) - assert "id" in response - o_rwfs_id: str = response["id"] - response, _ = ( - self._wapi_adapter.get_running_workflow_step_output_values_for_output( - running_workflow_step_id=o_rwfs_id, - output_variable=r_driver.source_variable, - ) - ) - assert "output" in response - replication_values = response["output"] - else: - assert False, "Unsupported origin" + source_is_splitter: bool = False + iter_variable: str | None = None num_step_instances: int = max(1, len(replication_values)) for iteration in range(num_step_instances): # If we are replicating this step then we must replace the step's variable # with a value expected for this iteration. - if r_driver: - iter_variable: str = r_driver.variable + if source_is_splitter: + assert iter_variable iter_value: str = replication_values[iteration] _LOGGER.info( "Replicating step: %s iteration=%s variable=%s value=%s", @@ -424,7 +400,7 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None: iter_value, ) # Over-write the replicating variable - # and set the replication numebr to a unique +ve non-zero value... + # and set the replication number to a unique +ve non-zero value... variables[iter_variable] = iter_value step_replication_number = iteration + 1 diff --git a/workflow/workflow_validator.py b/workflow/workflow_validator.py index 1d94973..4a646d3 100644 --- a/workflow/workflow_validator.py +++ b/workflow/workflow_validator.py @@ -6,8 +6,6 @@ from .decoder import ( get_step_output_variable_names, - get_step_prior_step_variable_mapping, - get_step_workflow_variable_mapping, get_steps, get_workflow_variable_names, validate_schema, @@ -113,39 +111,6 @@ def _validate_tag_level( error_num=2, error_msg=[f"Duplicate step names found: {', '.join(duplicate_names)}"], ) - # For each 'replicating' step the replicating variable - # must be declared in the step - which is either a workflow variable - # or a prior step variable. - for step in get_steps(workflow_definition): - if ( - replicate_using_input := step.get("replicate", {}) - .get("using", {}) - .get("variable") - ): - found: bool = False - for translation in get_step_workflow_variable_mapping(step=step): - if replicate_using_input == translation.out: - found = True - break - if not found: - for ( - step_name, - translations, - ) in get_step_prior_step_variable_mapping(step=step).items(): - for translation in translations: - if replicate_using_input == translation.out: - found = True - break - if found: - break - if not found: - return ValidationResult( - error_num=7, - error_msg=[ - "Replicate input variable is not declared:" - f" {replicate_using_input} (step={step["name"]})" - ], - ) return _VALIDATION_SUCCESS From 6397955b44e564bcf327d7b3c54d9199d2b47f69 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Tue, 2 Sep 2025 09:45:09 +0000 Subject: [PATCH 41/57] refactor: Refactored using decoder 2.5.0 --- tests/job-definitions/job-definitions.yaml | 7 ++ tests/test_workflow_engine_examples.py | 1 - workflow/decoder.py | 19 ++++- workflow/workflow_engine.py | 87 +++++++++++++++++----- 4 files changed, 89 insertions(+), 25 deletions(-) diff --git a/tests/job-definitions/job-definitions.yaml b/tests/job-definitions/job-definitions.yaml index 7e3e1b7..0c48e84 100644 --- a/tests/job-definitions/job-definitions.yaml +++ b/tests/job-definitions/job-definitions.yaml @@ -136,3 +136,10 @@ jobs: splitsmiles: command: >- copyf.py {{ inputFile }} + # Simulate multiple output files... + variables: + outputs: + properties: + outputBase: + creates: '{{ outputBase }}_*.smi' + type: files diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py index 40e0573..9d07f13 100644 --- a/tests/test_workflow_engine_examples.py +++ b/tests/test_workflow_engine_examples.py @@ -398,7 +398,6 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine): assert project_file_exists(output_file_2) -@pytest.mark.skip(reason="WIP") def test_workflow_engine_simple_python_fanout(basic_engine): # Arrange md, da = basic_engine diff --git a/workflow/decoder.py b/workflow/decoder.py index 674d1ad..daf6a3d 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -61,6 +61,16 @@ def get_steps(definition: dict[str, Any]) -> list[dict[str, Any]]: return response +def get_step(definition: dict[str, Any], name: str) -> dict[str, Any]: + """Given a Workflow definition this function returns a named step + (if it exists).""" + steps: list[dict[str, Any]] = get_steps(definition) + for step in steps: + if step["name"] == name: + return step + return {} + + def get_name(definition: dict[str, Any]) -> str: """Given a Workflow definition this function returns its name.""" return str(definition.get("name", "")) @@ -117,8 +127,8 @@ def get_step_input_variable_names( def get_step_workflow_variable_mapping(*, step: dict[str, Any]) -> list[Translation]: - """Returns a list of workflow vaiable name to step variable name tuples - for the given step.""" + """Returns a list of workflow vaiable name to step variable name + Translation objects for the given step.""" variable_mapping: list[Translation] = [] if "variable-mapping" in step: for v_map in step["variable-mapping"]: @@ -134,8 +144,9 @@ def get_step_workflow_variable_mapping(*, step: dict[str, Any]) -> list[Translat def get_step_prior_step_variable_mapping( *, step: dict[str, Any] ) -> dict[str, list[Translation]]: - """Returns list of translate objects, indexed by prior step name, - that identify source step vaiable name to this step's variable name.""" + """Returns list of Translation objects, indexed by prior step name, + that identify source step (output) variable name to this step's (input) + variable name.""" variable_mapping: dict[str, list[Translation]] = {} if "variable-mapping" in step: for v_map in step["variable-mapping"]: diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index d33751f..01d49a2 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -26,7 +26,8 @@ import sys from typing import Any, Optional -from decoder.decoder import TextEncoding, decode +import decoder.decoder as job_defintion_decoder +from decoder.decoder import TextEncoding from google.protobuf.message import Message from informaticsmatters.protobuf.datamanager.pod_message_pb2 import PodMessage from informaticsmatters.protobuf.datamanager.workflow_message_pb2 import WorkflowMessage @@ -40,6 +41,7 @@ from .decoder import ( Translation, + get_step, get_step_prior_step_variable_mapping, get_step_workflow_variable_mapping, ) @@ -127,7 +129,7 @@ def _handle_workflow_start_message(self, r_wfid: str) -> None: # Launch it. # If there's a launch problem the step (and running workflow) will have # and error, stopping it. There will be no Pod event as the launch has failed. - self._launch(rwf=rwf_response, step=first_step) + self._launch(wf=wf_response, rwf=rwf_response, step=first_step) def _handle_workflow_stop_message(self, r_wfid: str) -> None: """Logic to handle a STOP message.""" @@ -263,7 +265,7 @@ def _handle_pod_message(self, msg: PodMessage) -> None: # There's another step! # For this simple logic it is the next step. next_step = wf_response["steps"][step_index + 1] - self._launch(rwf=rwf_response, step=next_step) + self._launch(wf=wf_response, rwf=rwf_response, step=next_step) # Something was started (or there was a launch error and the step # and running workflow error will have been set). @@ -278,21 +280,13 @@ def _handle_pod_message(self, msg: PodMessage) -> None: success=True, ) - def _validate_step_command( - self, - *, - running_workflow_id: str, - step: dict[str, Any], - running_workflow_variables: dict[str, Any], - ) -> str | dict[str, Any]: - """Returns an error message if the command isn't valid. - Without a message we return all the variables that were (successfully) - applied to the command.""" - + def _get_step_job(self, *, step: dict[str, Any]) -> dict[str, Any]: + """Gets the Job definition for a given Step.""" # We get the Job from the step specification, which must contain # the keys "collection", "job", and "version". Here we assume that # the workflow definition has passed the RUN-level validation # which means we can get these values. + assert "specification" in step step_spec: dict[str, Any] = step["specification"] job_collection: str = step_spec["collection"] job_job: str = step_spec["job"] @@ -300,6 +294,7 @@ def _validate_step_command( job, _ = self._wapi_adapter.get_job( collection=job_collection, job=job_job, version=job_version ) + _LOGGER.debug( "API.get_job(%s, %s, %s) returned: -\n%s", job_collection, @@ -308,6 +303,19 @@ def _validate_step_command( str(job), ) + return job + + def _validate_step_command( + self, + *, + running_workflow_id: str, + step: dict[str, Any], + running_workflow_variables: dict[str, Any], + ) -> str | dict[str, Any]: + """Returns an error message if the command isn't valid. + Without a message we return all the variables that were (successfully) + applied to the command.""" + # Start with any variables provided in the step's specification. # This will be ou t"all variables" map for this step, # whcih we will add to (and maybe even over-write)... @@ -345,12 +353,15 @@ def _validate_step_command( all_variables[tr.out] = prior_step["variables"][tr.in_] # Now ... can the command be compiled!? - message, success = decode( + job: dict[str, Any] = self._get_step_job(step=step) + message, success = job_defintion_decoder.decode( job["command"], all_variables, "command", TextEncoding.JINJA2_3_0 ) return all_variables if success else message - def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None: + def _launch( + self, *, wf: dict[str, Any], rwf: dict[str, Any], step: dict[str, Any] + ) -> None: step_name: str = step["name"] rwf_id: str = rwf["id"] project_id = rwf["project"]["id"] @@ -380,17 +391,53 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None: # A step replication number, # used only for steps expected to run in parallel (even if just once) step_replication_number: int = 0 + # Do we replicate this step (run it more than once)? + # We do if a variable in this step's mapping block + # refers to an output of a prior step whose type is 'files'. + # If the prior step is a 'splitter' we populate the 'replication_values' array + # with the list of files the prior step genrated for its output. replication_values: list[str] = [] - source_is_splitter: bool = False iter_variable: str | None = None + tr_map: dict[str, list[Translation]] = get_step_prior_step_variable_mapping( + step=step + ) + for p_step_name, tr_list in tr_map.items(): + # We need to get the Job definition for each step + # and then check whether the (ouptu) variable is of type 'files'... + wf_step: dict[str, Any] = get_step(wf, p_step_name) + assert wf_step + job_definition: dict[str, Any] = self._get_step_job(step=wf_step) + jd_outputs: dict[str, Any] = job_defintion_decoder.get_outputs( + job_definition + ) + for tr in tr_list: + if jd_outputs.get(tr.in_, {}).get("type") == "files": + iter_variable = tr.out + # Get the prior running step's output values + response, _ = self._wapi_adapter.get_running_workflow_step_by_name( + name=p_step_name, + running_workflow_id=rwf_id, + ) + rwfs_id = response["id"] + assert rwfs_id + result, _ = ( + self._wapi_adapter.get_running_workflow_step_output_values_for_output( + running_workflow_step_id=rwfs_id, + output_variable=tr.in_, + ) + ) + replication_values = result["output"].copy() + break + # Stop if we've got an iteration variable + if iter_variable: + break num_step_instances: int = max(1, len(replication_values)) for iteration in range(num_step_instances): # If we are replicating this step then we must replace the step's variable # with a value expected for this iteration. - if source_is_splitter: - assert iter_variable + if iter_variable: iter_value: str = replication_values[iteration] _LOGGER.info( "Replicating step: %s iteration=%s variable=%s value=%s", @@ -427,7 +474,7 @@ def _launch(self, *, rwf: dict[str, Any], step: dict[str, Any]) -> None: step_replication_number=step_replication_number, ) lr: LaunchResult = self._instance_launcher.launch(launch_parameters=lp) - rwfs_id: str | None = lr.running_workflow_step_id + rwfs_id = lr.running_workflow_step_id assert rwfs_id if lr.error_num: From 7f4b0c6e892308c6517e26c6cbdc597b172d971d Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Tue, 2 Sep 2025 09:51:30 +0000 Subject: [PATCH 42/57] docs: Doc tweak --- workflow/workflow_engine.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index 01d49a2..606b45d 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -396,6 +396,9 @@ def _launch( # refers to an output of a prior step whose type is 'files'. # If the prior step is a 'splitter' we populate the 'replication_values' array # with the list of files the prior step genrated for its output. + # + # In this engine we onlhy act on the _first_ match, i.e. there CANNOT + # be more than one prior step variable that is 'files'! replication_values: list[str] = [] iter_variable: str | None = None tr_map: dict[str, list[Translation]] = get_step_prior_step_variable_mapping( From 5e2c8bc7e973acc3bfa7575361cb3db5c9414d20 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Tue, 2 Sep 2025 10:47:26 +0000 Subject: [PATCH 43/57] refactor: variable-map is now 'plumbing' and Translation is a 'Connector' --- tests/test_workflow_engine_examples.py | 6 +-- ...cate-step-input-output-variable-names.yaml | 4 +- .../example-smiles-to-file.yaml | 2 +- .../replicate-using-undeclared-input.yaml | 4 +- .../shortcut-example-1.yaml | 2 +- .../simple-python-molprops-with-options.yaml | 4 +- .../simple-python-molprops.yaml | 4 +- .../simple-python-parallel.yaml | 6 +-- ....yaml => simple-python-split-combine.yaml} | 12 +++--- workflow/decoder.py | 35 +++++++++-------- workflow/workflow-schema.yaml | 6 +-- workflow/workflow_engine.py | 38 +++++++++---------- 12 files changed, 61 insertions(+), 62 deletions(-) rename tests/workflow-definitions/{simple-python-fanout.yaml => simple-python-split-combine.yaml} (75%) diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py index 9d07f13..dbfd779 100644 --- a/tests/test_workflow_engine_examples.py +++ b/tests/test_workflow_engine_examples.py @@ -398,12 +398,12 @@ def test_workflow_engine_simple_python_molprops_with_options(basic_engine): assert project_file_exists(output_file_2) -def test_workflow_engine_simple_python_fanout(basic_engine): +def test_workflow_engine_simple_python_split_combine(basic_engine): # Arrange md, da = basic_engine da.mock_get_running_workflow_step_output_values_for_output( - step_name="first-step", + step_name="split", output_variable="outputBase", output=["chunk_1.smi", "chunk_2.smi"], ) @@ -427,7 +427,7 @@ def test_workflow_engine_simple_python_fanout(basic_engine): r_wfid = start_workflow( md, da, - "simple-python-fanout", + "simple-python-split-combine", {"candidateMolecules": input_file_1}, ) diff --git a/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml b/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml index 3ba3926..deaae85 100644 --- a/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml +++ b/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml @@ -14,7 +14,7 @@ steps: variables: name: "col1" value: 123 - variable-mapping: + plumbing: - variable: inputFile from-workflow: variable: candidateMolecules @@ -34,7 +34,7 @@ steps: variables: name: "col2" value: "999" - variable-mapping: + plumbing: - variable: inputFile from-step: name: step1 diff --git a/tests/workflow-definitions/example-smiles-to-file.yaml b/tests/workflow-definitions/example-smiles-to-file.yaml index 018d90c..29c3e98 100644 --- a/tests/workflow-definitions/example-smiles-to-file.yaml +++ b/tests/workflow-definitions/example-smiles-to-file.yaml @@ -13,7 +13,7 @@ steps: collection: workflow-engine-unit-test-jobs job: smiles-to-file version: "1.0.0" - variable-mapping: + plumbing: - variable: outputFile from-workflow: variable: outputFile diff --git a/tests/workflow-definitions/replicate-using-undeclared-input.yaml b/tests/workflow-definitions/replicate-using-undeclared-input.yaml index 447521b..0828b48 100644 --- a/tests/workflow-definitions/replicate-using-undeclared-input.yaml +++ b/tests/workflow-definitions/replicate-using-undeclared-input.yaml @@ -14,7 +14,7 @@ steps: variables: name: "col1" value: 123 - variable-mapping: + plumbing: - variable: inputFile from-workflow: variable: candidateMolecules @@ -33,7 +33,7 @@ steps: replicate: using: variable: y - variable-mapping: + plumbing: - variable: inputFile from-step: name: step-1 diff --git a/tests/workflow-definitions/shortcut-example-1.yaml b/tests/workflow-definitions/shortcut-example-1.yaml index 0b6c2c3..b9c4a87 100644 --- a/tests/workflow-definitions/shortcut-example-1.yaml +++ b/tests/workflow-definitions/shortcut-example-1.yaml @@ -19,7 +19,7 @@ steps: collection: workflow-engine-unit-test-jobs job: shortcut-example-1-process-b version: "1.0.0" - variable-mapping: + plumbing: - variable: inputFile from-step: name: example-1-step-1 diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml index 9ef80e5..de1ad86 100644 --- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml +++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml @@ -13,7 +13,7 @@ steps: version: "1.0.0" variables: outputFile: step1.out.smi - variable-mapping: + plumbing: - variable: inputFile from-workflow: variable: candidateMolecules @@ -33,7 +33,7 @@ steps: variables: name: "col2" value: "999" - variable-mapping: + plumbing: - variable: inputFile from-step: name: step1 diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml index ba0d1d0..5639da3 100644 --- a/tests/workflow-definitions/simple-python-molprops.yaml +++ b/tests/workflow-definitions/simple-python-molprops.yaml @@ -15,7 +15,7 @@ steps: name: "col1" value: 123 outputFile: "results.smi" - variable-mapping: + plumbing: - variable: inputFile from-workflow: variable: candidateMolecules @@ -29,7 +29,7 @@ steps: variables: name: "col2" value: "999" - variable-mapping: + plumbing: - variable: inputFile from-step: name: step1 diff --git a/tests/workflow-definitions/simple-python-parallel.yaml b/tests/workflow-definitions/simple-python-parallel.yaml index c1f5c8f..dc8e3f3 100644 --- a/tests/workflow-definitions/simple-python-parallel.yaml +++ b/tests/workflow-definitions/simple-python-parallel.yaml @@ -14,7 +14,7 @@ steps: variables: name: "unnecessary" value: "0" - variable-mapping: + plumbing: - variable: inputFile from-workflow: variable: candidateMolecules @@ -28,7 +28,7 @@ steps: variables: name: "desc1" value: "777" - variable-mapping: + plumbing: - variable: inputFile from-step: name: first-step @@ -43,7 +43,7 @@ steps: variables: name: "desc2" value: "999" - variable-mapping: + plumbing: - variable: inputFile from-step: name: first-step diff --git a/tests/workflow-definitions/simple-python-fanout.yaml b/tests/workflow-definitions/simple-python-split-combine.yaml similarity index 75% rename from tests/workflow-definitions/simple-python-fanout.yaml rename to tests/workflow-definitions/simple-python-split-combine.yaml index 0a50216..f39a100 100644 --- a/tests/workflow-definitions/simple-python-fanout.yaml +++ b/tests/workflow-definitions/simple-python-split-combine.yaml @@ -3,10 +3,12 @@ kind: DataManagerWorkflow kind-version: "2025.2" name: python-workflow description: >- - A simple parallel workflow. Input is split into N chunks and N processes of the same job is started + A simple parallel workflow. The input is split into chunks and a number outputFile + parallel steps processes these outputs. Finally a combine step concatenates the + files. steps: -- name: first-step +- name: split description: Split an input file specification: collection: workflow-engine-unit-test-jobs @@ -16,7 +18,7 @@ steps: name: count value: "1" outputBase: chunk - variable-mapping: + plumbing: - variable: inputFile from-workflow: variable: candidateMolecules @@ -31,10 +33,10 @@ steps: name: desc1 value: "777" outputFile: results.smi - variable-mapping: + plumbing: - variable: inputFile from-step: - name: first-step + name: split variable: outputBase out: - outputFile diff --git a/workflow/decoder.py b/workflow/decoder.py index daf6a3d..1c50e96 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -25,8 +25,9 @@ @dataclass -class Translation: - """A source ("in_") to destination ("out") variable map.""" +class Connector: + """A connection - connexts a plumbing source variable ("in_") + to destination variable ("out").""" in_: str out: str @@ -83,13 +84,13 @@ def get_description(definition: dict[str, Any]) -> str | None: def get_workflow_variable_names(definition: dict[str, Any]) -> set[str]: """Given a Workflow definition this function returns all the names of the - variables that need to be defined at the workflow level. These are the 'variables' - used in every steps' variabale-mapping block. + variables defined in steps that need to be defined at the workflow level. + These are the 'variables' used in every step's 'plumbing' block. """ wf_variable_names: set[str] = set() steps: list[dict[str, Any]] = get_steps(definition) for step in steps: - if v_map := step.get("variable-mapping"): + if v_map := step.get("plumbing"): for v in v_map: if "from-workflow" in v: wf_variable_names.add(v["from-workflow"]["variable"]) @@ -126,40 +127,38 @@ def get_step_input_variable_names( return variable_names -def get_step_workflow_variable_mapping(*, step: dict[str, Any]) -> list[Translation]: +def get_step_workflow_plumbing(*, step: dict[str, Any]) -> list[Connector]: """Returns a list of workflow vaiable name to step variable name Translation objects for the given step.""" - variable_mapping: list[Translation] = [] - if "variable-mapping" in step: - for v_map in step["variable-mapping"]: + variable_mapping: list[Connector] = [] + if "plumbing" in step: + for v_map in step["plumbing"]: if "from-workflow" in v_map: variable_mapping.append( - Translation( + Connector( in_=v_map["from-workflow"]["variable"], out=v_map["variable"] ) ) return variable_mapping -def get_step_prior_step_variable_mapping( - *, step: dict[str, Any] -) -> dict[str, list[Translation]]: +def get_step_prior_step_plumbing(*, step: dict[str, Any]) -> dict[str, list[Connector]]: """Returns list of Translation objects, indexed by prior step name, that identify source step (output) variable name to this step's (input) variable name.""" - variable_mapping: dict[str, list[Translation]] = {} - if "variable-mapping" in step: - for v_map in step["variable-mapping"]: + variable_mapping: dict[str, list[Connector]] = {} + if "plumbing" in step: + for v_map in step["plumbing"]: if "from-step" in v_map: step_name = v_map["from-step"]["name"] step_variable = v_map["from-step"]["variable"] # Tuple is "from" -> "to" if step_name in variable_mapping: variable_mapping[step_name].append( - Translation(in_=step_variable, out=v_map["variable"]) + Connector(in_=step_variable, out=v_map["variable"]) ) else: variable_mapping[step_name] = [ - Translation(in_=step_variable, out=v_map["variable"]) + Connector(in_=step_variable, out=v_map["variable"]) ] return variable_mapping diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index 90acb29..97a4610 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -156,11 +156,11 @@ definitions: # The format of this is essentially idenical to the specification # used when a Job is launched via the DM API. $ref: '#/definitions/step-specification' - variable-mapping: + plumbing: # The map of the source of the step's variables. - # all variables the step needs (that aren;t already in the specification) + # All variables the step needs (that aren't already in the specification) # need to be declared here. They either come "from" a prior step - # or are expected in th erunning workflow variables. Here we simply + # or are expected to be a workflow variable. Here we simply # associate every required variable to a source. type: array items: diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index 606b45d..75f3bd3 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -40,10 +40,10 @@ ) from .decoder import ( - Translation, + Connector, get_step, - get_step_prior_step_variable_mapping, - get_step_workflow_variable_mapping, + get_step_prior_step_plumbing, + get_step_workflow_plumbing, ) _LOGGER: logging.Logger = logging.getLogger(__name__) @@ -330,27 +330,27 @@ def _validate_step_command( # "in" variables are worklfow variables, and "out" variables # are expected Job variables. We use this to add variables # to the "all variables" map. - for tr in get_step_workflow_variable_mapping(step=step): - assert tr.in_ in running_workflow_variables - all_variables[tr.out] = running_workflow_variables[tr.in_] + for connector in get_step_workflow_plumbing(step=step): + assert connector.in_ in running_workflow_variables + all_variables[connector.out] = running_workflow_variables[connector.in_] # Now we apply variables from the "variable mapping" block # related to values used in prior steps. The decoder gives # us a map indexed by prior step name that's a list of "in" "out" # tuples as above. - step_prior_v_map: dict[str, list[Translation]] = ( - get_step_prior_step_variable_mapping(step=step) + prior_step_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing( + step=step ) - for prior_step_name, v_map in step_prior_v_map.items(): + for prior_step_name, connections in prior_step_plumbing.items(): # Retrieve the prior "running" step # in order to get the variables that were set there... prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name( name=prior_step_name, running_workflow_id=running_workflow_id ) # Copy "in" value to "out"... - for tr in v_map: - assert tr.in_ in prior_step["variables"] - all_variables[tr.out] = prior_step["variables"][tr.in_] + for connector in connections: + assert connector.in_ in prior_step["variables"] + all_variables[connector.out] = prior_step["variables"][connector.in_] # Now ... can the command be compiled!? job: dict[str, Any] = self._get_step_job(step=step) @@ -401,10 +401,8 @@ def _launch( # be more than one prior step variable that is 'files'! replication_values: list[str] = [] iter_variable: str | None = None - tr_map: dict[str, list[Translation]] = get_step_prior_step_variable_mapping( - step=step - ) - for p_step_name, tr_list in tr_map.items(): + plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(step=step) + for p_step_name, connections in plumbing.items(): # We need to get the Job definition for each step # and then check whether the (ouptu) variable is of type 'files'... wf_step: dict[str, Any] = get_step(wf, p_step_name) @@ -413,9 +411,9 @@ def _launch( jd_outputs: dict[str, Any] = job_defintion_decoder.get_outputs( job_definition ) - for tr in tr_list: - if jd_outputs.get(tr.in_, {}).get("type") == "files": - iter_variable = tr.out + for connector in connections: + if jd_outputs.get(connector.in_, {}).get("type") == "files": + iter_variable = connector.out # Get the prior running step's output values response, _ = self._wapi_adapter.get_running_workflow_step_by_name( name=p_step_name, @@ -426,7 +424,7 @@ def _launch( result, _ = ( self._wapi_adapter.get_running_workflow_step_output_values_for_output( running_workflow_step_id=rwfs_id, - output_variable=tr.in_, + output_variable=connector.in_, ) ) replication_values = result["output"].copy() From c39ddb7aa0968de2573ba798d6fb1ed4342922ad Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Tue, 2 Sep 2025 10:54:00 +0000 Subject: [PATCH 44/57] refactor: Better function and variable naming (plumbing) --- workflow/decoder.py | 46 ++++++++++++++++++++----------------- workflow/workflow_engine.py | 28 ++++++++++++++-------- 2 files changed, 43 insertions(+), 31 deletions(-) diff --git a/workflow/decoder.py b/workflow/decoder.py index 1c50e96..b41552e 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -127,38 +127,42 @@ def get_step_input_variable_names( return variable_names -def get_step_workflow_plumbing(*, step: dict[str, Any]) -> list[Connector]: - """Returns a list of workflow vaiable name to step variable name - Translation objects for the given step.""" - variable_mapping: list[Connector] = [] - if "plumbing" in step: - for v_map in step["plumbing"]: +def get_step_workflow_variable_connections( + *, step_definition: dict[str, Any] +) -> list[Connector]: + """Returns a list of connectors that connect a workflow variable name + to a step variable name for the given step definition.""" + connections: list[Connector] = [] + if "plumbing" in step_definition: + for v_map in step_definition["plumbing"]: if "from-workflow" in v_map: - variable_mapping.append( + connections.append( Connector( in_=v_map["from-workflow"]["variable"], out=v_map["variable"] ) ) - return variable_mapping - - -def get_step_prior_step_plumbing(*, step: dict[str, Any]) -> dict[str, list[Connector]]: - """Returns list of Translation objects, indexed by prior step name, - that identify source step (output) variable name to this step's (input) - variable name.""" - variable_mapping: dict[str, list[Connector]] = {} - if "plumbing" in step: - for v_map in step["plumbing"]: + return connections + + +def get_step_prior_step_plumbing( + *, step_definition: dict[str, Any] +) -> dict[str, list[Connector]]: + """Returns list of variable Connections, indexed by prior step name, + that identify a source step variable name (an output) to an input variable in this + step (an input).""" + plumbing: dict[str, list[Connector]] = {} + if "plumbing" in step_definition: + for v_map in step_definition["plumbing"]: if "from-step" in v_map: step_name = v_map["from-step"]["name"] step_variable = v_map["from-step"]["variable"] # Tuple is "from" -> "to" - if step_name in variable_mapping: - variable_mapping[step_name].append( + if step_name in plumbing: + plumbing[step_name].append( Connector(in_=step_variable, out=v_map["variable"]) ) else: - variable_mapping[step_name] = [ + plumbing[step_name] = [ Connector(in_=step_variable, out=v_map["variable"]) ] - return variable_mapping + return plumbing diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index 75f3bd3..d295682 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -43,7 +43,7 @@ Connector, get_step, get_step_prior_step_plumbing, - get_step_workflow_plumbing, + get_step_workflow_variable_connections, ) _LOGGER: logging.Logger = logging.getLogger(__name__) @@ -129,7 +129,7 @@ def _handle_workflow_start_message(self, r_wfid: str) -> None: # Launch it. # If there's a launch problem the step (and running workflow) will have # and error, stopping it. There will be no Pod event as the launch has failed. - self._launch(wf=wf_response, rwf=rwf_response, step=first_step) + self._launch(wf=wf_response, rwf=rwf_response, step_definition=first_step) def _handle_workflow_stop_message(self, r_wfid: str) -> None: """Logic to handle a STOP message.""" @@ -265,7 +265,9 @@ def _handle_pod_message(self, msg: PodMessage) -> None: # There's another step! # For this simple logic it is the next step. next_step = wf_response["steps"][step_index + 1] - self._launch(wf=wf_response, rwf=rwf_response, step=next_step) + self._launch( + wf=wf_response, rwf=rwf_response, step_definition=next_step + ) # Something was started (or there was a launch error and the step # and running workflow error will have been set). @@ -330,7 +332,7 @@ def _validate_step_command( # "in" variables are worklfow variables, and "out" variables # are expected Job variables. We use this to add variables # to the "all variables" map. - for connector in get_step_workflow_plumbing(step=step): + for connector in get_step_workflow_variable_connections(step_definition=step): assert connector.in_ in running_workflow_variables all_variables[connector.out] = running_workflow_variables[connector.in_] @@ -339,7 +341,7 @@ def _validate_step_command( # us a map indexed by prior step name that's a list of "in" "out" # tuples as above. prior_step_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing( - step=step + step_definition=step ) for prior_step_name, connections in prior_step_plumbing.items(): # Retrieve the prior "running" step @@ -360,9 +362,13 @@ def _validate_step_command( return all_variables if success else message def _launch( - self, *, wf: dict[str, Any], rwf: dict[str, Any], step: dict[str, Any] + self, + *, + wf: dict[str, Any], + rwf: dict[str, Any], + step_definition: dict[str, Any], ) -> None: - step_name: str = step["name"] + step_name: str = step_definition["name"] rwf_id: str = rwf["id"] project_id = rwf["project"]["id"] @@ -376,7 +382,7 @@ def _launch( rwf_variables: dict[str, Any] = rwf.get("variables", {}) error_or_variables: str | dict[str, Any] = self._validate_step_command( running_workflow_id=rwf_id, - step=step, + step=step_definition, running_workflow_variables=rwf_variables, ) if isinstance(error_or_variables, str): @@ -401,7 +407,9 @@ def _launch( # be more than one prior step variable that is 'files'! replication_values: list[str] = [] iter_variable: str | None = None - plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing(step=step) + plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing( + step_definition=step_definition + ) for p_step_name, connections in plumbing.items(): # We need to get the Job definition for each step # and then check whether the (ouptu) variable is of type 'files'... @@ -468,7 +476,7 @@ def _launch( debug=rwf.get("debug"), launching_user_name=rwf["running_user"], launching_user_api_token=rwf["running_user_api_token"], - specification=step["specification"], + specification=step_definition["specification"], variables=variables, running_workflow_id=rwf_id, step_name=step_name, From cdd936e6d727134f37c6b94261b79e1c6d7d4e58 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Tue, 2 Sep 2025 13:46:59 +0000 Subject: [PATCH 45/57] feat: new _prepare_step_variables function --- tests/job-definitions/job-definitions.yaml | 17 ++- .../simple-python-split-combine.yaml | 4 +- workflow/workflow_engine.py | 112 ++++++++++++++---- 3 files changed, 106 insertions(+), 27 deletions(-) diff --git a/tests/job-definitions/job-definitions.yaml b/tests/job-definitions/job-definitions.yaml index 0c48e84..03a3b69 100644 --- a/tests/job-definitions/job-definitions.yaml +++ b/tests/job-definitions/job-definitions.yaml @@ -132,12 +132,27 @@ jobs: concatenate: command: >- concatenate.py {% for ifile in inputFile %}{{ ifile }} {% endfor %} --outputFile {{ outputFile }} + # Simulate a multiple input files Job (combiner)... + variables: + inputs: + properties: + inputFile: + type: files + outputs: + properties: + outputBase: + creates: '{{ outputFile }}' + type: file splitsmiles: command: >- copyf.py {{ inputFile }} - # Simulate multiple output files... + # Simulate a multiple output files Job (splitetr)... variables: + inputs: + properties: + inputFile: + type: file outputs: properties: outputBase: diff --git a/tests/workflow-definitions/simple-python-split-combine.yaml b/tests/workflow-definitions/simple-python-split-combine.yaml index f39a100..26fb3d7 100644 --- a/tests/workflow-definitions/simple-python-split-combine.yaml +++ b/tests/workflow-definitions/simple-python-split-combine.yaml @@ -15,15 +15,13 @@ steps: job: splitsmiles version: "1.0.0" variables: - name: count - value: "1" outputBase: chunk plumbing: - variable: inputFile from-workflow: variable: candidateMolecules -- name: parallel-step +- name: parallel description: Add some params specification: collection: workflow-engine-unit-test-jobs diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index d295682..3273350 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -24,6 +24,7 @@ import logging import sys +from dataclasses import dataclass from typing import Any, Optional import decoder.decoder as job_defintion_decoder @@ -51,6 +52,20 @@ _LOGGER.addHandler(logging.StreamHandler(sys.stdout)) +@dataclass +class StepPreparationResponse: + """Step preparation response object. Iterations is +ve (non-zero) if a step + can be launched - it's value indicates how many times. If a step can be launched + 'variables' will not be None. If a parallel set of steps can take place + (even just one) 'iteration_variable' will be set and 'iteration_values' + will be a list containing a value for eacdh step.""" + + iterations: int + variables: dict[str, Any] | None = None + iteration_variable: str | None = None + iteration_values: list[str] | None = None + + class WorkflowEngine: """The workflow engine.""" @@ -126,10 +141,18 @@ def _handle_workflow_start_message(self, r_wfid: str) -> None: # Now find the first step (index 0)... first_step: dict[str, Any] = wf_response["steps"][0] + sp_resp = self._prepare_step_variables( + wf=wf_response, step_definition=first_step, rwf=rwf_response + ) + assert sp_resp.variables is not None # Launch it. # If there's a launch problem the step (and running workflow) will have # and error, stopping it. There will be no Pod event as the launch has failed. - self._launch(wf=wf_response, rwf=rwf_response, step_definition=first_step) + self._launch( + rwf=rwf_response, + step_definition=first_step, + step_preparation_response=sp_resp, + ) def _handle_workflow_stop_message(self, r_wfid: str) -> None: """Logic to handle a STOP message.""" @@ -265,8 +288,31 @@ def _handle_pod_message(self, msg: PodMessage) -> None: # There's another step! # For this simple logic it is the next step. next_step = wf_response["steps"][step_index + 1] + + # A mojor piece of work to accomplish is to get ourselves into a position + # that allows us to check the step command can be executed. + # We do this by compiling a map of variables we belive the step needs. + + # If the step about to be launched is based on a prior step + # that generates multiple outputs (files) then we have to + # exit unless all of the step instances have completed. + # + # Do we need a 'prepare variables' function? + # One that returns a map of variables or nothing + # (e.g. 'nothing' when a step launch cannot be attempted) + sp_resp = self._prepare_step_variables( + wf=wf_response, step_definition=next_step, rwf=rwf_response + ) + if sp_resp.iterations == 0: + # Cannot prepare variables for this step, + # we have to leave. + return + assert sp_resp.variables is not None + self._launch( - wf=wf_response, rwf=rwf_response, step_definition=next_step + rwf=rwf_response, + step_definition=next_step, + step_preparation_response=sp_resp, ) # Something was started (or there was a launch error and the step @@ -361,20 +407,18 @@ def _validate_step_command( ) return all_variables if success else message - def _launch( + def _prepare_step_variables( self, *, wf: dict[str, Any], - rwf: dict[str, Any], step_definition: dict[str, Any], - ) -> None: + rwf: dict[str, Any], + ) -> StepPreparationResponse: + """Attempts to prepare a map of step variables. If variables cannot be + presented to the step we return an object with 'iterations' set to zero.""" + step_name: str = step_definition["name"] rwf_id: str = rwf["id"] - project_id = rwf["project"]["id"] - - # A mojor piece of work to accomplish is to get ourselves into a position - # that allows us to check the step command can be executed. - # We do this by compiling a map of variables we belive the step needs. # We start with all the workflow variables that were provided # by the user when they "ran" the workflow. We're given a full set of @@ -390,13 +434,10 @@ def _launch( msg = f"Failed command validation error_msg={error_msg}" _LOGGER.warning(msg) self._set_step_error(step_name, rwf_id, None, 1, msg) - return + return StepPreparationResponse(iterations=0) variables: dict[str, Any] = error_or_variables - # A step replication number, - # used only for steps expected to run in parallel (even if just once) - step_replication_number: int = 0 # Do we replicate this step (run it more than once)? # We do if a variable in this step's mapping block # refers to an output of a prior step whose type is 'files'. @@ -405,7 +446,7 @@ def _launch( # # In this engine we onlhy act on the _first_ match, i.e. there CANNOT # be more than one prior step variable that is 'files'! - replication_values: list[str] = [] + iter_values: list[str] = [] iter_variable: str | None = None plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing( step_definition=step_definition @@ -435,34 +476,59 @@ def _launch( output_variable=connector.in_, ) ) - replication_values = result["output"].copy() + iter_values = result["output"].copy() break # Stop if we've got an iteration variable if iter_variable: break - num_step_instances: int = max(1, len(replication_values)) - for iteration in range(num_step_instances): + num_step_instances: int = max(1, len(iter_values)) + return StepPreparationResponse( + variables=variables, + iterations=num_step_instances, + iteration_variable=iter_variable, + iteration_values=iter_values, + ) + + def _launch( + self, + *, + rwf: dict[str, Any], + step_definition: dict[str, Any], + step_preparation_response: StepPreparationResponse, + ) -> None: + step_name: str = step_definition["name"] + rwf_id: str = rwf["id"] + project_id = rwf["project"]["id"] + + # A step replication number, + # used only for steps expected to run in parallel (even if just once) + step_replication_number: int = 0 + + variables = step_preparation_response.variables + assert variables is not None + for iteration in range(step_preparation_response.iterations): # If we are replicating this step then we must replace the step's variable # with a value expected for this iteration. - if iter_variable: - iter_value: str = replication_values[iteration] + if step_preparation_response.iteration_variable: + assert step_preparation_response.iteration_values + iter_value: str = step_preparation_response.iteration_values[iteration] _LOGGER.info( "Replicating step: %s iteration=%s variable=%s value=%s", step_name, iteration, - iter_variable, + step_preparation_response.iteration_variable, iter_value, ) # Over-write the replicating variable # and set the replication number to a unique +ve non-zero value... - variables[iter_variable] = iter_value + variables[step_preparation_response.iteration_variable] = iter_value step_replication_number = iteration + 1 _LOGGER.info( "Launching step: %s RunningWorkflow=%s (name=%s)" - " variables=%s project=%s", + " step_variables=%s project=%s", step_name, rwf_id, rwf["name"], From c08ed5c1adf2720a21f2e95d86ac6c4e9645c430 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Tue, 2 Sep 2025 14:57:36 +0000 Subject: [PATCH 46/57] feat: Refactoring --- workflow/workflow_engine.py | 127 ++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 62 deletions(-) diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index 3273350..edf5715 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -141,7 +141,7 @@ def _handle_workflow_start_message(self, r_wfid: str) -> None: # Now find the first step (index 0)... first_step: dict[str, Any] = wf_response["steps"][0] - sp_resp = self._prepare_step_variables( + sp_resp = self._prepare_step( wf=wf_response, step_definition=first_step, rwf=rwf_response ) assert sp_resp.variables is not None @@ -300,7 +300,7 @@ def _handle_pod_message(self, msg: PodMessage) -> None: # Do we need a 'prepare variables' function? # One that returns a map of variables or nothing # (e.g. 'nothing' when a step launch cannot be attempted) - sp_resp = self._prepare_step_variables( + sp_resp = self._prepare_step( wf=wf_response, step_definition=next_step, rwf=rwf_response ) if sp_resp.iterations == 0: @@ -353,93 +353,96 @@ def _get_step_job(self, *, step: dict[str, Any]) -> dict[str, Any]: return job - def _validate_step_command( + def _prepare_step( self, *, - running_workflow_id: str, - step: dict[str, Any], - running_workflow_variables: dict[str, Any], - ) -> str | dict[str, Any]: - """Returns an error message if the command isn't valid. - Without a message we return all the variables that were (successfully) - applied to the command.""" + wf: dict[str, Any], + step_definition: dict[str, Any], + rwf: dict[str, Any], + ) -> StepPreparationResponse: + """Attempts to prepare a map of step variables. If variables cannot be + presented to the step we return an object with 'iterations' set to zero.""" + + step_name: str = step_definition["name"] + rwf_id: str = rwf["id"] + + # Compile a set of variables for this step. # Start with any variables provided in the step's specification. - # This will be ou t"all variables" map for this step, - # whcih we will add to (and maybe even over-write)... - all_variables: dict[str, Any] = step["specification"].get("variables", {}) + # A map that we will add to (and maybe even over-write)... + variables: dict[str, Any] = step_definition["specification"].get( + "variables", {} + ) - # Next, we iterate through the step's "variable mapping" block. - # This tells us all the variables that are set from either the - # 'workflow' or 'a prior step'. + # All the running workflow variables + rwf_variables: dict[str, Any] = rwf.get("variables", {}) - # Start with any workflow variables in the step. - # This will be a list of Translations of "in" and "out" variable names. + # Process the step's plumbing realting to workflow variables. + # This will be a list of Connectors of "in" and "out" variable names. # "in" variables are worklfow variables, and "out" variables # are expected Job variables. We use this to add variables - # to the "all variables" map. - for connector in get_step_workflow_variable_connections(step_definition=step): - assert connector.in_ in running_workflow_variables - all_variables[connector.out] = running_workflow_variables[connector.in_] + # to the variables map. + for connector in get_step_workflow_variable_connections( + step_definition=step_definition + ): + assert connector.in_ in rwf_variables + variables[connector.out] = rwf_variables[connector.in_] - # Now we apply variables from the "variable mapping" block + # Now we apply variables from the "plumbing" block # related to values used in prior steps. The decoder gives # us a map indexed by prior step name that's a list of "in" "out" # tuples as above. prior_step_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing( - step_definition=step + step_definition=step_definition ) for prior_step_name, connections in prior_step_plumbing.items(): # Retrieve the prior "running" step # in order to get the variables that were set there... prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name( - name=prior_step_name, running_workflow_id=running_workflow_id + name=prior_step_name, running_workflow_id=rwf_id ) # Copy "in" value to "out"... for connector in connections: assert connector.in_ in prior_step["variables"] - all_variables[connector.out] = prior_step["variables"][connector.in_] + variables[connector.out] = prior_step["variables"][connector.in_] # Now ... can the command be compiled!? - job: dict[str, Any] = self._get_step_job(step=step) + job: dict[str, Any] = self._get_step_job(step=step_definition) message, success = job_defintion_decoder.decode( - job["command"], all_variables, "command", TextEncoding.JINJA2_3_0 + job["command"], variables, "command", TextEncoding.JINJA2_3_0 ) - return all_variables if success else message - - def _prepare_step_variables( - self, - *, - wf: dict[str, Any], - step_definition: dict[str, Any], - rwf: dict[str, Any], - ) -> StepPreparationResponse: - """Attempts to prepare a map of step variables. If variables cannot be - presented to the step we return an object with 'iterations' set to zero.""" - - step_name: str = step_definition["name"] - rwf_id: str = rwf["id"] - - # We start with all the workflow variables that were provided - # by the user when they "ran" the workflow. We're given a full set of - # variables in response (on success) or an error string (on failure) - rwf_variables: dict[str, Any] = rwf.get("variables", {}) - error_or_variables: str | dict[str, Any] = self._validate_step_command( - running_workflow_id=rwf_id, - step=step_definition, - running_workflow_variables=rwf_variables, - ) - if isinstance(error_or_variables, str): - error_msg = error_or_variables - msg = f"Failed command validation error_msg={error_msg}" + if not success: + msg = f"Failed command validation error_msg={message}" _LOGGER.warning(msg) self._set_step_error(step_name, rwf_id, None, 1, msg) return StepPreparationResponse(iterations=0) - variables: dict[str, Any] = error_or_variables + # Our inputs + our_job_definition: dict[str, Any] = self._get_step_job(step=step_definition) + our_inputs: dict[str, Any] = job_defintion_decoder.get_inputs( + our_job_definition + ) + + # Are we a combiner step? + # + # We are if a variable in our step's plumbing refers to an input that is + # of type 'files'. A combiner's input is required to accept a space-separated + # set of files. + we_are_a_combiner: bool = False + our_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing( + step_definition=step_definition + ) + for p_step_name, connections in our_plumbing.items(): + for connector in connections: + if our_inputs.get(connector.out, {}).get("type") == "files": + we_are_a_combiner = True + + assert not we_are_a_combiner + + # We're not a combiner... # Do we replicate this step (run it more than once)? - # We do if a variable in this step's mapping block + # We do if a variable in this step's plumbing # refers to an output of a prior step whose type is 'files'. # If the prior step is a 'splitter' we populate the 'replication_values' array # with the list of files the prior step genrated for its output. @@ -448,12 +451,9 @@ def _prepare_step_variables( # be more than one prior step variable that is 'files'! iter_values: list[str] = [] iter_variable: str | None = None - plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing( - step_definition=step_definition - ) - for p_step_name, connections in plumbing.items(): + for p_step_name, connections in our_plumbing.items(): # We need to get the Job definition for each step - # and then check whether the (ouptu) variable is of type 'files'... + # and then check whether the (output) variable is of type 'files'... wf_step: dict[str, Any] = get_step(wf, p_step_name) assert wf_step job_definition: dict[str, Any] = self._get_step_job(step=wf_step) @@ -497,6 +497,9 @@ def _launch( step_definition: dict[str, Any], step_preparation_response: StepPreparationResponse, ) -> None: + """Given a runningWorkflow record, a step defitnion (from the Workflow), + and the step's variables (in a preparation object) this method launches + one or more instances of the given step.""" step_name: str = step_definition["name"] rwf_id: str = rwf["id"] project_id = rwf["project"]["id"] From 19e58ae8249d600d903de82044a2898f658f8c9d Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Tue, 2 Sep 2025 16:00:36 +0000 Subject: [PATCH 47/57] refactor: More combiner logic --- tests/instance_launcher.py | 6 ++ tests/test_test_wapi_adapter.py | 66 -------------- tests/wapi_adapter.py | 44 ++-------- .../simple-python-split-combine.yaml | 14 +++ workflow/workflow_abc.py | 60 +++++++------ workflow/workflow_engine.py | 88 +++++++++++++------ 6 files changed, 119 insertions(+), 159 deletions(-) diff --git a/tests/instance_launcher.py b/tests/instance_launcher.py index c938e9e..c59d138 100644 --- a/tests/instance_launcher.py +++ b/tests/instance_launcher.py @@ -76,6 +76,12 @@ def launch(self, *, launch_parameters: LaunchParameters) -> LaunchResult: os.makedirs(EXECUTION_DIRECTORY, exist_ok=True) + if launch_parameters.step_replication_number: + assert ( + launch_parameters.step_replication_number + <= launch_parameters.total_number_of_replicas + ) + # Create a running workflow step assert launch_parameters.running_workflow_id assert launch_parameters.step_name diff --git a/tests/test_test_wapi_adapter.py b/tests/test_test_wapi_adapter.py index a8ebc33..f116d69 100644 --- a/tests/test_test_wapi_adapter.py +++ b/tests/test_test_wapi_adapter.py @@ -421,72 +421,6 @@ def test_create_instance_and_get_step_instance_directory_by_name(): assert response["instance_directory"] == f".{i_id}" -def test_get_workflow_steps_driving_this_step_when_1st_step(): - # Arrange - utaa = UnitTestWorkflowAPIAdapter() - response = utaa.create_workflow( - workflow_definition={ - "name": "blah", - "steps": [{"name": "step-1"}, {"name": "step-2"}, {"name": "step-3"}], - } - ) - response = utaa.create_running_workflow( - user_id="dlister", - workflow_id=response["id"], - project_id=TEST_PROJECT_ID, - variables={}, - ) - response, _ = utaa.create_running_workflow_step( - running_workflow_id=response["id"], step="step-1" - ) - rwfs_id = response["id"] - - # Act - response, _ = utaa.get_workflow_steps_driving_this_step( - running_workflow_step_id=rwfs_id - ) - - # Assert - assert response["caller_step_index"] == 0 - assert len(response["steps"]) == 3 - assert response["steps"][0]["name"] == "step-1" - assert response["steps"][1]["name"] == "step-2" - assert response["steps"][2]["name"] == "step-3" - - -def test_get_workflow_steps_driving_this_step_when_2nd_step(): - # Arrange - utaa = UnitTestWorkflowAPIAdapter() - response = utaa.create_workflow( - workflow_definition={ - "name": "blah", - "steps": [{"name": "step-1"}, {"name": "step-2"}, {"name": "step-3"}], - } - ) - response = utaa.create_running_workflow( - user_id="dlister", - workflow_id=response["id"], - project_id=TEST_PROJECT_ID, - variables={}, - ) - response, _ = utaa.create_running_workflow_step( - running_workflow_id=response["id"], step="step-2" - ) - rwfs_id = response["id"] - - # Act - response, _ = utaa.get_workflow_steps_driving_this_step( - running_workflow_step_id=rwfs_id - ) - - # Assert - assert response["caller_step_index"] == 1 - assert len(response["steps"]) == 3 - assert response["steps"][0]["name"] == "step-1" - assert response["steps"][1]["name"] == "step-2" - assert response["steps"][2]["name"] == "step-3" - - def test_get_running_workflow_step_by_name(): # Arrange utaa = UnitTestWorkflowAPIAdapter() diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py index c283ee1..75bd0e9 100644 --- a/tests/wapi_adapter.py +++ b/tests/wapi_adapter.py @@ -112,6 +112,12 @@ def get_running_steps( # Does nothing at the moment - this is used for the STOP logic. return {"count": 0, "steps": []}, 0 + def get_status_of_all_step_instances_by_name( + self, *, running_workflow_id: str, step_name: str + ) -> tuple[dict[str, Any], int]: + # Need to implement! + return {"count": 0, "status": []}, 0 + def set_running_workflow_done( self, *, @@ -249,44 +255,6 @@ def set_running_workflow_step_done( Pickler(pickle_file).dump(running_workflow_step) UnitTestWorkflowAPIAdapter.lock.release() - def get_workflow_steps_driving_this_step( - self, - *, - running_workflow_step_id: str, - ) -> tuple[dict[str, Any], int]: - # To accomplish this we get the running workflow for the step, - # then the workflow, then the steps from that workflow. - # We return a dictionary and an HTTP response code. - UnitTestWorkflowAPIAdapter.lock.acquire() - with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "rb") as pickle_file: - running_workflow_step = Unpickler(pickle_file).load() - UnitTestWorkflowAPIAdapter.lock.release() - - assert running_workflow_step_id in running_workflow_step - - running_workflow_id: str = running_workflow_step[running_workflow_step_id][ - "running_workflow" - ]["id"] - rwf_response, _ = self.get_running_workflow( - running_workflow_id=running_workflow_id - ) - assert rwf_response - workflow_id: str = rwf_response["workflow"]["id"] - wf_response, _ = self.get_workflow(workflow_id=workflow_id) - assert wf_response - # Find the caller's python in the step sequence (-1 if not found) - caller_step_index: int = -1 - index: int = 0 - for step in wf_response["steps"]: - if step["name"] == running_workflow_step[running_workflow_step_id]["name"]: - caller_step_index = index - break - index += 1 - return { - "caller_step_index": caller_step_index, - "steps": wf_response["steps"].copy(), - }, 0 - def get_instance(self, *, instance_id: str) -> tuple[dict[str, Any], int]: UnitTestWorkflowAPIAdapter.lock.acquire() with open(_INSTANCE_PICKLE_FILE, "rb") as pickle_file: diff --git a/tests/workflow-definitions/simple-python-split-combine.yaml b/tests/workflow-definitions/simple-python-split-combine.yaml index 26fb3d7..739ec15 100644 --- a/tests/workflow-definitions/simple-python-split-combine.yaml +++ b/tests/workflow-definitions/simple-python-split-combine.yaml @@ -36,5 +36,19 @@ steps: from-step: name: split variable: outputBase + +#- name: combine +# description: Add some params +# specification: +# collection: workflow-engine-unit-test-jobs +# job: concatenate +# version: "1.0.0" +# variables: +# outputFile: results.smi +# plumbing: +# - variable: inputFile +# from-step: +# variable: outputFile +# name: parallel out: - outputFile diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py index 0a0acc0..ee9c22a 100644 --- a/workflow/workflow_abc.py +++ b/workflow/workflow_abc.py @@ -42,8 +42,12 @@ class LaunchParameters: # If only one instance of the step is expected to run # this value can be left at 0 (zero). If this step's launch # is expected to be executed more than once the value should be - # non-zero (and unique for this workflow run). + # 1..'N'. step_replication_number: int = 0 + # The total number of replicas of this instance that are expected to be laucnhed. + # if step_replication_number is set, this has to be set. It is 'N'. + # If step_replication_number is zero this value is ignored. + total_number_of_replicas: int = 0 # The application ID (a custom resource name) # used to identify the 'type' of Instance to create. # For DM Jobs this will be 'datamanagerjobs.squonk.it' @@ -144,7 +148,7 @@ def get_running_workflow( # "running_user": "alan", # "running_user_api_token": "123456789", # "done": False, - # "success": false, + # "success": False, # "error_num": 0, # "error_msg": "", # "workflow": { @@ -171,7 +175,29 @@ def get_running_steps( # "count": 1, # "steps": [ # { - # "name:": "step-1234" + # "name": "step-1234" + # } + # ] + # } + + @abstractmethod + def get_status_of_all_step_instances_by_name( + self, *, running_workflow_id: str, step_name: str + ) -> tuple[dict[str, Any], int]: + """Get a list of step execution statuses for the named step.""" + # Should return: + # { + # "count": 2, + # "status": [ + # { + # "done": True, + # "success": True, + # "running_workflow_step_id": "step-0001" + # }, + # { + # "done": False, + # "success": False, + # "running_workflow_step_id": "step-0002" # } # ] # } @@ -195,9 +221,9 @@ def get_running_workflow_step( """Get a RunningWorkflowStep Record""" # Should return: # { - # "name:": "step-1234", + # "name": "step-1234", # "done": False, - # "success": false, + # "success": False, # "error_num": 0, # "error_msg": "", # "variables": { @@ -234,9 +260,9 @@ def get_running_workflow_step_by_name( # Should return: # { # "id": "r-workflow-step-00000000-0000-0000-0000-000000000001", - # "name:": "step-1234", + # "name": "step-1234", # "done": False, - # "success": false, + # "success": False, # "error_num": 0, # "error_msg": "", # "variables": { @@ -274,26 +300,6 @@ def set_running_workflow_step_done( """Set the success value for a RunningWorkflowStep Record, If not successful an error code and message should be provided.""" - @abstractmethod - def get_workflow_steps_driving_this_step( - self, - *, - running_workflow_step_id: str, - ) -> tuple[dict[str, Any], int]: - """Get all the step records that belong to the Workflow for the given - RunningWorkflowStep record ID. You are also given the caller's position - in the list, which will be -1 if the caller is not present.""" - # It should return: - # { - # "caller_step_index": 0, - # "steps": [ - # { - # "name": "step-name" - # "specification": "{}", - # } - # ] - # } - @abstractmethod def get_instance(self, *, instance_id: str) -> tuple[dict[str, Any], int]: """Get an Instance Record""" diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index edf5715..34d51b7 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -58,12 +58,14 @@ class StepPreparationResponse: can be launched - it's value indicates how many times. If a step can be launched 'variables' will not be None. If a parallel set of steps can take place (even just one) 'iteration_variable' will be set and 'iteration_values' - will be a list containing a value for eacdh step.""" + will be a list containing a value for each step. If prparation failed + 'error_msg' chould contain something useful.""" iterations: int variables: dict[str, Any] | None = None iteration_variable: str | None = None iteration_values: list[str] | None = None + error_msg: str | None = None class WorkflowEngine: @@ -145,6 +147,7 @@ def _handle_workflow_start_message(self, r_wfid: str) -> None: wf=wf_response, step_definition=first_step, rwf=rwf_response ) assert sp_resp.variables is not None + assert sp_resp.error_msg is None # Launch it. # If there's a launch problem the step (and running workflow) will have # and error, stopping it. There will be no Pod event as the launch has failed. @@ -303,7 +306,7 @@ def _handle_pod_message(self, msg: PodMessage) -> None: sp_resp = self._prepare_step( wf=wf_response, step_definition=next_step, rwf=rwf_response ) - if sp_resp.iterations == 0: + if sp_resp.iterations == 0 or sp_resp.error_msg: # Cannot prepare variables for this step, # we have to leave. return @@ -366,7 +369,59 @@ def _prepare_step( step_name: str = step_definition["name"] rwf_id: str = rwf["id"] - # Compile a set of variables for this step. + # Before we move on, are we combiner? + # + # We are if a variable in our step's plumbing refers to an input of ours + # that is of type 'files'. If we are a combiner then we use the name of the + # step we are combining (there can only be one) so that we can ensure + # all its step instances have finished (successfully). We cannot + # move on until all the files we depend on are ready. + + our_job_definition: dict[str, Any] = self._get_step_job(step=step_definition) + our_inputs: dict[str, Any] = job_defintion_decoder.get_inputs( + our_job_definition + ) + our_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing( + step_definition=step_definition + ) + step_name_being_combined: str | None = None + for p_step_name, connections in our_plumbing.items(): + for connector in connections: + if our_inputs.get(connector.out, {}).get("type") == "files": + step_name_being_combined = p_step_name + break + if step_name_being_combined: + break + if step_name_being_combined: + response, _ = self._wapi_adapter.get_status_of_all_step_instances_by_name( + running_workflow_id=rwf_id, + step_name=step_name_being_combined, + ) + # Assume succes... + all_step_instances_done: bool = True + all_step_instances_successful: bool = True + assert "count" in response + assert response["count"] > 0 + assert "status" in response + for status in response["status"]: + if not status["done"]: + all_step_instances_done = False + break + if not status["success"]: + all_step_instances_successful = False + break + if not all_step_instances_done: + # Can't move on - but other steps need to finish. + return StepPreparationResponse(iterations=0) + elif not all_step_instances_successful: + # Can't move on - all prior steps are done, + # but at least one was in error. + return StepPreparationResponse( + iterations=0, + error_msg="A prior step 'step_name_being_combined' iteration has failed", + ) + + # Now compile a set of variables for this step. # Start with any variables provided in the step's specification. # A map that we will add to (and maybe even over-write)... @@ -417,30 +472,6 @@ def _prepare_step( self._set_step_error(step_name, rwf_id, None, 1, msg) return StepPreparationResponse(iterations=0) - # Our inputs - our_job_definition: dict[str, Any] = self._get_step_job(step=step_definition) - our_inputs: dict[str, Any] = job_defintion_decoder.get_inputs( - our_job_definition - ) - - # Are we a combiner step? - # - # We are if a variable in our step's plumbing refers to an input that is - # of type 'files'. A combiner's input is required to accept a space-separated - # set of files. - we_are_a_combiner: bool = False - our_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing( - step_definition=step_definition - ) - for p_step_name, connections in our_plumbing.items(): - for connector in connections: - if our_inputs.get(connector.out, {}).get("type") == "files": - we_are_a_combiner = True - - assert not we_are_a_combiner - - # We're not a combiner... - # Do we replicate this step (run it more than once)? # We do if a variable in this step's plumbing # refers to an output of a prior step whose type is 'files'. @@ -507,7 +538,7 @@ def _launch( # A step replication number, # used only for steps expected to run in parallel (even if just once) step_replication_number: int = 0 - + total_replicas: int = step_preparation_response.iterations variables = step_preparation_response.variables assert variables is not None for iteration in range(step_preparation_response.iterations): @@ -550,6 +581,7 @@ def _launch( running_workflow_id=rwf_id, step_name=step_name, step_replication_number=step_replication_number, + total_number_of_replicas=total_replicas, ) lr: LaunchResult = self._instance_launcher.launch(launch_parameters=lp) rwfs_id = lr.running_workflow_step_id From f6707c8b41852ce7e141ad455c0e8dd39bfbe6c0 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Tue, 2 Sep 2025 16:36:46 +0000 Subject: [PATCH 48/57] feat: refactor definition of an output --- tests/test_decoder.py | 38 ---------------- tests/test_workflow_engine_examples.py | 2 +- .../test_workflow_validator_for_run_level.py | 24 ---------- .../test_workflow_validator_for_tag_level.py | 24 ---------- ...cate-step-input-output-variable-names.yaml | 44 ------------------- .../example-smiles-to-file.yaml | 4 +- .../replicate-using-undeclared-input.yaml | 4 +- .../shortcut-example-1.yaml | 4 +- .../simple-python-molprops-with-options.yaml | 4 +- .../simple-python-molprops.yaml | 4 +- .../simple-python-parallel.yaml | 14 +++--- .../simple-python-split-combine.yaml | 11 +++-- workflow/workflow-schema.yaml | 32 +++++++------- 13 files changed, 40 insertions(+), 169 deletions(-) delete mode 100644 tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml diff --git a/tests/test_decoder.py b/tests/test_decoder.py index 4958731..28136b3 100644 --- a/tests/test_decoder.py +++ b/tests/test_decoder.py @@ -184,41 +184,3 @@ def test_get_workflow_steps(): assert len(steps) == 2 assert steps[0]["name"] == "step1" assert steps[1]["name"] == "step2" - - -def test_get_step_input_variable_names_when_duplicates(): - # Arrange - workflow_filename: str = os.path.join( - os.path.dirname(__file__), - "workflow-definitions", - "duplicate-step-input-output-variable-names.yaml", - ) - with open(workflow_filename, "r", encoding="utf8") as wf_file: - definition: Dict[str, Any] = yaml.safe_load(wf_file) - - # Act - inputs = decoder.get_step_input_variable_names(definition, "step-1") - - # Assert - assert len(inputs) == 2 - assert inputs[0] == "inputFile" - assert inputs[1] == "inputFile" - - -def test_get_step_output_variable_names_when_duplicates(): - # Arrange - workflow_filename: str = os.path.join( - os.path.dirname(__file__), - "workflow-definitions", - "duplicate-step-input-output-variable-names.yaml", - ) - with open(workflow_filename, "r", encoding="utf8") as wf_file: - definition: Dict[str, Any] = yaml.safe_load(wf_file) - - # Act - outputs = decoder.get_step_output_variable_names(definition, "step-2") - - # Assert - assert len(outputs) == 2 - assert outputs[0] == "outputFile" - assert outputs[1] == "outputFile" diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py index dbfd779..720fd4e 100644 --- a/tests/test_workflow_engine_examples.py +++ b/tests/test_workflow_engine_examples.py @@ -428,7 +428,7 @@ def test_workflow_engine_simple_python_split_combine(basic_engine): md, da, "simple-python-split-combine", - {"candidateMolecules": input_file_1}, + {"candidateMolecules": input_file_1, "combination": "combination.smi"}, ) # Assert diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py index e76239d..6b575a3 100644 --- a/tests/test_workflow_validator_for_run_level.py +++ b/tests/test_workflow_validator_for_run_level.py @@ -215,27 +215,3 @@ def test_validate_simple_python_molprops_with_missing_input(): assert error.error_msg == [ "Missing workflow variable values for: candidateMolecules" ] - - -def test_validate_duplicate_step_output_variable_names(): - # Arrange - workflow_filename: str = os.path.join( - os.path.dirname(__file__), - "workflow-definitions", - "duplicate-step-input-output-variable-names.yaml", - ) - with open(workflow_filename, "r", encoding="utf8") as workflow_file: - workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) - assert workflow - - # Act - error = WorkflowValidator.validate( - level=ValidationLevel.TAG, - workflow_definition=workflow, - ) - - # Assert - assert error.error_num == 3 - assert error.error_msg == [ - "Duplicate step output variable: outputFile (step=step-2)" - ] diff --git a/tests/test_workflow_validator_for_tag_level.py b/tests/test_workflow_validator_for_tag_level.py index 4c1719d..ddf403f 100644 --- a/tests/test_workflow_validator_for_tag_level.py +++ b/tests/test_workflow_validator_for_tag_level.py @@ -149,27 +149,3 @@ def test_validate_simple_python_molprops_with_options(): # Assert assert error.error_num == 0 assert error.error_msg is None - - -def test_validate_duplicate_step_output_variable_names(): - # Arrange - workflow_filename: str = os.path.join( - os.path.dirname(__file__), - "workflow-definitions", - "duplicate-step-input-output-variable-names.yaml", - ) - with open(workflow_filename, "r", encoding="utf8") as workflow_file: - workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) - assert workflow - - # Act - error = WorkflowValidator.validate( - level=ValidationLevel.TAG, - workflow_definition=workflow, - ) - - # Assert - assert error.error_num == 3 - assert error.error_msg == [ - "Duplicate step output variable: outputFile (step=step-2)" - ] diff --git a/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml b/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml deleted file mode 100644 index deaae85..0000000 --- a/tests/workflow-definitions/duplicate-step-input-output-variable-names.yaml +++ /dev/null @@ -1,44 +0,0 @@ ---- -kind: DataManagerWorkflow -kind-version: "2025.2" -name: duplicate-step-output-variable-names -description: A workflow where step-1 has duplicate output variable names - -steps: -- name: step-1 - description: Add column 1 - specification: - collection: workflow-engine-unit-test-jobs - job: rdkit-molprops - version: "1.0.0" - variables: - name: "col1" - value: 123 - plumbing: - - variable: inputFile - from-workflow: - variable: candidateMolecules - - variable: inputFile - from-workflow: - variable: candidateMolecules - in: - - inputFile - - inputFile - -- name: step-2 - description: Add column 2 - specification: - collection: workflow-engine-unit-test-jobs - job: cluster-butina - version: "1.0.0" - variables: - name: "col2" - value: "999" - plumbing: - - variable: inputFile - from-step: - name: step1 - variable: outputFile - out: - - outputFile - - outputFile diff --git a/tests/workflow-definitions/example-smiles-to-file.yaml b/tests/workflow-definitions/example-smiles-to-file.yaml index 29c3e98..0f0844a 100644 --- a/tests/workflow-definitions/example-smiles-to-file.yaml +++ b/tests/workflow-definitions/example-smiles-to-file.yaml @@ -20,5 +20,5 @@ steps: - variable: smiles from-workflow: variable: smiles - out: - - outputFile + - variable: outputFile + to-project: diff --git a/tests/workflow-definitions/replicate-using-undeclared-input.yaml b/tests/workflow-definitions/replicate-using-undeclared-input.yaml index 0828b48..5b81d9e 100644 --- a/tests/workflow-definitions/replicate-using-undeclared-input.yaml +++ b/tests/workflow-definitions/replicate-using-undeclared-input.yaml @@ -38,5 +38,5 @@ steps: from-step: name: step-1 variable: outputFile - out: - - outputFile + - variable: outputFile + to-project: diff --git a/tests/workflow-definitions/shortcut-example-1.yaml b/tests/workflow-definitions/shortcut-example-1.yaml index b9c4a87..5bb94c3 100644 --- a/tests/workflow-definitions/shortcut-example-1.yaml +++ b/tests/workflow-definitions/shortcut-example-1.yaml @@ -24,5 +24,5 @@ steps: from-step: name: example-1-step-1 variable: outputFile - out: - - outputFile + - variable: outputFile + to-project: diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml index de1ad86..30cf64b 100644 --- a/tests/workflow-definitions/simple-python-molprops-with-options.yaml +++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml @@ -41,5 +41,5 @@ steps: - variable: outputFile from-workflow: variable: clusteredMolecules - out: - - outputFile + - variable: outputFile + to-project: diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml index 5639da3..054bb2b 100644 --- a/tests/workflow-definitions/simple-python-molprops.yaml +++ b/tests/workflow-definitions/simple-python-molprops.yaml @@ -37,5 +37,5 @@ steps: - variable: outputFile from-workflow: variable: clusteredMolecules - out: - - outputFile + - variable: outputFile + to-project: diff --git a/tests/workflow-definitions/simple-python-parallel.yaml b/tests/workflow-definitions/simple-python-parallel.yaml index dc8e3f3..2a0fcb7 100644 --- a/tests/workflow-definitions/simple-python-parallel.yaml +++ b/tests/workflow-definitions/simple-python-parallel.yaml @@ -55,14 +55,14 @@ steps: collection: workflow-engine-unit-test-jobs job: concatenate version: "1.0.0" - inputs: - - input: inputFile + plumbing: + - variable: inputFile from: step: parallel-step-a - output: outputFile - - input: inputFile + variable: outputFile + - variable: inputFile from: step: parallel-step-b - output: outputFile - out: - - outputFile + variable: outputFile + - variable: outputFile + to-project: diff --git a/tests/workflow-definitions/simple-python-split-combine.yaml b/tests/workflow-definitions/simple-python-split-combine.yaml index 739ec15..476d27d 100644 --- a/tests/workflow-definitions/simple-python-split-combine.yaml +++ b/tests/workflow-definitions/simple-python-split-combine.yaml @@ -38,7 +38,7 @@ steps: variable: outputBase #- name: combine -# description: Add some params +# description: Combine the parallel files # specification: # collection: workflow-engine-unit-test-jobs # job: concatenate @@ -47,8 +47,11 @@ steps: # outputFile: results.smi # plumbing: # - variable: inputFile -# from-step: # variable: outputFile +# from-step: # name: parallel - out: - - outputFile +# - variable: outputFile +# from-workflow: +# variable: combination +# - variable: outputFile +# to-project: diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index 97a4610..bd55c27 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -102,6 +102,20 @@ definitions: - variable - from-workflow + # A Step variable + # (whose value is to be copied to the project directory) + step-variable-to-project: + type: object + additionalProperties: false + properties: + variable: + $ref: '#/definitions/variable-name' + to-project: + type: 'null' + required: + - variable + - to-project + # A step specification variable # (there must be at least one if a variables block is defined). # Typical variable syntax based on Python's definition of a variable @@ -167,23 +181,7 @@ definitions: anyOf: - $ref: "#/definitions/step-variable-from-step" - $ref: "#/definitions/step-variable-from-workflow" - minItems: 1 - in: - # An optional list of the step variables that are inputs. - # These are typically files, expected to be present in the Project directory, - # that need to be copied (by the DM) into the step's instance directory. - type: array - items: - $ref: '#/definitions/variable-name' - minItems: 1 - out: - # An optional list of the step variables that are outputs. - # These are typically files, expected to be present in the Step Instance directory, - # when it finished (successfully), that need to be copied (by the DM) - # into the Project directory via "realise_outputs()" - type: array - items: - $ref: '#/definitions/variable-name' + - $ref: "#/definitions/step-variable-to-project" minItems: 1 required: - name From 5834c8c998d004d32839468e1ea1ded25c618626 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Wed, 3 Sep 2025 11:00:21 +0000 Subject: [PATCH 49/57] fix: Add get_status_of_all_step_instances_by_name implementation (and fix step replicas) --- tests/instance_launcher.py | 1 + tests/wapi_adapter.py | 22 +++++++++++++++++++--- workflow/workflow_abc.py | 4 +++- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/tests/instance_launcher.py b/tests/instance_launcher.py index c59d138..137a77b 100644 --- a/tests/instance_launcher.py +++ b/tests/instance_launcher.py @@ -89,6 +89,7 @@ def launch(self, *, launch_parameters: LaunchParameters) -> LaunchResult: running_workflow_id=launch_parameters.running_workflow_id, step=launch_parameters.step_name, replica=launch_parameters.step_replication_number, + replicas=launch_parameters.total_number_of_replicas, ) assert "id" in response rwfs_id: str = response["id"] diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py index 75bd0e9..71e326b 100644 --- a/tests/wapi_adapter.py +++ b/tests/wapi_adapter.py @@ -113,10 +113,24 @@ def get_running_steps( return {"count": 0, "steps": []}, 0 def get_status_of_all_step_instances_by_name( - self, *, running_workflow_id: str, step_name: str + self, *, running_workflow_id: str, name: str ) -> tuple[dict[str, Any], int]: - # Need to implement! - return {"count": 0, "status": []}, 0 + UnitTestWorkflowAPIAdapter.lock.acquire() + with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "rb") as pickle_file: + running_workflow_step = Unpickler(pickle_file).load() + UnitTestWorkflowAPIAdapter.lock.release() + + steps: list[dict[str, Any]] = [] + for rwfs_id, record in running_workflow_step.items(): + if record["running_workflow"]["id"] != running_workflow_id: + continue + if record["name"] == name: + response = record + response["id"] = rwfs_id + if record["replica"] == 0: + _ = response.pop("replica") + steps.append(response) + return {"count": len(steps), "status": steps}, 0 def set_running_workflow_done( self, @@ -146,6 +160,7 @@ def create_running_workflow_step( running_workflow_id: str, step: str, replica: int = 0, + replicas: int = 0, prior_running_workflow_step_id: str | None = None, ) -> tuple[dict[str, Any], int]: if replica: @@ -164,6 +179,7 @@ def create_running_workflow_step( "done": False, "success": False, "replica": replica, + "replicas": replicas, "variables": {}, "running_workflow": {"id": running_workflow_id}, } diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py index ee9c22a..2648772 100644 --- a/workflow/workflow_abc.py +++ b/workflow/workflow_abc.py @@ -182,7 +182,7 @@ def get_running_steps( @abstractmethod def get_status_of_all_step_instances_by_name( - self, *, running_workflow_id: str, step_name: str + self, *, name: str, running_workflow_id: str ) -> tuple[dict[str, Any], int]: """Get a list of step execution statuses for the named step.""" # Should return: @@ -226,6 +226,8 @@ def get_running_workflow_step( # "success": False, # "error_num": 0, # "error_msg": "", + # "replica": 0, + # "replicas": 0, # "variables": { # "x": 1, # "y": 2, From cfaeaec5c47b781d1cc400380fc5b83e904fed76 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Wed, 3 Sep 2025 11:00:54 +0000 Subject: [PATCH 50/57] docs: Doc tweak --- workflow/workflow-schema.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index bd55c27..2266fca 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -103,7 +103,7 @@ definitions: - from-workflow # A Step variable - # (whose value is to be copied to the project directory) + # (whose value (a file) is to be copied to the project directory) step-variable-to-project: type: object additionalProperties: false From 7d0363efef546b6e49daf75fdf8ebf5a6decb8b6 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Wed, 3 Sep 2025 11:01:39 +0000 Subject: [PATCH 51/57] fix: Typo in YAML --- tests/workflow-definitions/simple-python-split-combine.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/workflow-definitions/simple-python-split-combine.yaml b/tests/workflow-definitions/simple-python-split-combine.yaml index 476d27d..da306ae 100644 --- a/tests/workflow-definitions/simple-python-split-combine.yaml +++ b/tests/workflow-definitions/simple-python-split-combine.yaml @@ -47,9 +47,9 @@ steps: # outputFile: results.smi # plumbing: # - variable: inputFile -# variable: outputFile # from-step: # name: parallel +# variable: outputFile # - variable: outputFile # from-workflow: # variable: combination From f3361199abbafd73ab59ae880b7dd09c6e9c6860 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Wed, 3 Sep 2025 11:04:03 +0000 Subject: [PATCH 52/57] feat: Minor work on combiner logic --- workflow/workflow_engine.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index 34d51b7..ce0f1fe 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -393,16 +393,19 @@ def _prepare_step( if step_name_being_combined: break if step_name_being_combined: + print("*** COMBINER") response, _ = self._wapi_adapter.get_status_of_all_step_instances_by_name( + name=step_name_being_combined, running_workflow_id=rwf_id, - step_name=step_name_being_combined, ) # Assume succes... - all_step_instances_done: bool = True - all_step_instances_successful: bool = True assert "count" in response - assert response["count"] > 0 + num_being_combined: int = response["count"] + assert num_being_combined > 0 assert "status" in response + + all_step_instances_done: bool = True + all_step_instances_successful: bool = True for status in response["status"]: if not status["done"]: all_step_instances_done = False @@ -412,15 +415,30 @@ def _prepare_step( break if not all_step_instances_done: # Can't move on - but other steps need to finish. + _LOGGER.debug( + "Assessing start of combiner step (%s)" + " but not all steps (%s) to be combined are done", + step_name, + step_name_being_combined, + ) return StepPreparationResponse(iterations=0) elif not all_step_instances_successful: # Can't move on - all prior steps are done, # but at least one was in error. + _LOGGER.debug( + "Assessing start of combiner step (%s)" + " but at least one step (%s) to be combined failed", + step_name, + step_name_being_combined, + ) return StepPreparationResponse( iterations=0, - error_msg="A prior step 'step_name_being_combined' iteration has failed", + error_msg=f"Prior instance of step '{step_name_being_combined}' has failed", ) + if step_name_being_combined: + print("*** COMBINER : Able to start") + # Now compile a set of variables for this step. # Start with any variables provided in the step's specification. @@ -447,6 +465,9 @@ def _prepare_step( # related to values used in prior steps. The decoder gives # us a map indexed by prior step name that's a list of "in" "out" # tuples as above. + # + # If this is a combiner step remember that we need to inspect + # variables from all the prior steps. prior_step_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing( step_definition=step_definition ) From bea2cdb963dd7878e05e9ac23bf6f274790cfd6d Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Wed, 3 Sep 2025 12:51:49 +0000 Subject: [PATCH 53/57] fix: First very basic combiner run --- tests/test_workflow_engine_examples.py | 12 +- .../simple-python-split-combine.yaml | 36 +++--- workflow/workflow_engine.py | 120 +++++++++++------- 3 files changed, 98 insertions(+), 70 deletions(-) diff --git a/tests/test_workflow_engine_examples.py b/tests/test_workflow_engine_examples.py index 720fd4e..056c787 100644 --- a/tests/test_workflow_engine_examples.py +++ b/tests/test_workflow_engine_examples.py @@ -439,10 +439,8 @@ def test_workflow_engine_simple_python_split_combine(basic_engine): print("response") pprint(response) - assert response["count"] == 3 - assert response["running_workflow_steps"][0]["done"] - assert response["running_workflow_steps"][0]["success"] - assert response["running_workflow_steps"][1]["done"] - assert response["running_workflow_steps"][1]["success"] - assert response["running_workflow_steps"][2]["done"] - assert response["running_workflow_steps"][2]["success"] + assert response["count"] == 4 + rwf_steps = response["running_workflow_steps"] + for rwf_step in rwf_steps: + assert rwf_step["done"] + assert rwf_step["success"] diff --git a/tests/workflow-definitions/simple-python-split-combine.yaml b/tests/workflow-definitions/simple-python-split-combine.yaml index da306ae..2dcc68e 100644 --- a/tests/workflow-definitions/simple-python-split-combine.yaml +++ b/tests/workflow-definitions/simple-python-split-combine.yaml @@ -37,21 +37,21 @@ steps: name: split variable: outputBase -#- name: combine -# description: Combine the parallel files -# specification: -# collection: workflow-engine-unit-test-jobs -# job: concatenate -# version: "1.0.0" -# variables: -# outputFile: results.smi -# plumbing: -# - variable: inputFile -# from-step: -# name: parallel -# variable: outputFile -# - variable: outputFile -# from-workflow: -# variable: combination -# - variable: outputFile -# to-project: +- name: combine + description: Combine the parallel files + specification: + collection: workflow-engine-unit-test-jobs + job: concatenate + version: "1.0.0" + variables: + outputFile: results.smi + plumbing: + - variable: inputFile + from-step: + name: parallel + variable: outputFile + - variable: outputFile + from-workflow: + variable: combination + - variable: outputFile + to-project: diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index ce0f1fe..37aecfe 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -384,24 +384,28 @@ def _prepare_step( our_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing( step_definition=step_definition ) + step_is_combiner: bool = False step_name_being_combined: str | None = None + combiner_input_variable: str | None = None + num_step_recplicas_being_combined: int = 0 for p_step_name, connections in our_plumbing.items(): for connector in connections: if our_inputs.get(connector.out, {}).get("type") == "files": step_name_being_combined = p_step_name + combiner_input_variable = connector.out + step_is_combiner = True break if step_name_being_combined: break if step_name_being_combined: - print("*** COMBINER") response, _ = self._wapi_adapter.get_status_of_all_step_instances_by_name( name=step_name_being_combined, running_workflow_id=rwf_id, ) # Assume succes... assert "count" in response - num_being_combined: int = response["count"] - assert num_being_combined > 0 + num_step_recplicas_being_combined = response["count"] + assert num_step_recplicas_being_combined > 0 assert "status" in response all_step_instances_done: bool = True @@ -436,10 +440,8 @@ def _prepare_step( error_msg=f"Prior instance of step '{step_name_being_combined}' has failed", ) - if step_name_being_combined: - print("*** COMBINER : Able to start") - - # Now compile a set of variables for this step. + # I think we can start this step, + # so compile a set of variables for it. # Start with any variables provided in the step's specification. # A map that we will add to (and maybe even over-write)... @@ -472,15 +474,39 @@ def _prepare_step( step_definition=step_definition ) for prior_step_name, connections in prior_step_plumbing.items(): - # Retrieve the prior "running" step - # in order to get the variables that were set there... - prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name( - name=prior_step_name, running_workflow_id=rwf_id - ) - # Copy "in" value to "out"... - for connector in connections: - assert connector.in_ in prior_step["variables"] - variables[connector.out] = prior_step["variables"][connector.in_] + if step_is_combiner and prior_step_name == step_name_being_combined: + assert combiner_input_variable + input_source_list: list[str] = [] + for replica in range(1, num_step_recplicas_being_combined + 1): + prior_step, _ = ( + self._wapi_adapter.get_running_workflow_step_by_name( + name=prior_step_name, + replica=replica, + running_workflow_id=rwf_id, + ) + ) + # Copy "in" value to "out"... + for connector in connections: + assert connector.in_ in prior_step["variables"] + if connector.out == combiner_input_variable: + input_source_list.append( + prior_step["variables"][connector.in_] + ) + else: + variables[connector.out] = prior_step["variables"][ + connector.in_ + ] + variables[combiner_input_variable] = input_source_list + else: + # Retrieve the prior "running" step + # in order to get the variables that were set there... + prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name( + name=prior_step_name, running_workflow_id=rwf_id + ) + # Copy "in" value to "out"... + for connector in connections: + assert connector.in_ in prior_step["variables"] + variables[connector.out] = prior_step["variables"][connector.in_] # Now ... can the command be compiled!? job: dict[str, Any] = self._get_step_job(step=step_definition) @@ -494,7 +520,8 @@ def _prepare_step( return StepPreparationResponse(iterations=0) # Do we replicate this step (run it more than once)? - # We do if a variable in this step's plumbing + # + # We do if this is not a combiner step and a variable in this step's plumbing # refers to an output of a prior step whose type is 'files'. # If the prior step is a 'splitter' we populate the 'replication_values' array # with the list of files the prior step genrated for its output. @@ -503,36 +530,39 @@ def _prepare_step( # be more than one prior step variable that is 'files'! iter_values: list[str] = [] iter_variable: str | None = None - for p_step_name, connections in our_plumbing.items(): - # We need to get the Job definition for each step - # and then check whether the (output) variable is of type 'files'... - wf_step: dict[str, Any] = get_step(wf, p_step_name) - assert wf_step - job_definition: dict[str, Any] = self._get_step_job(step=wf_step) - jd_outputs: dict[str, Any] = job_defintion_decoder.get_outputs( - job_definition - ) - for connector in connections: - if jd_outputs.get(connector.in_, {}).get("type") == "files": - iter_variable = connector.out - # Get the prior running step's output values - response, _ = self._wapi_adapter.get_running_workflow_step_by_name( - name=p_step_name, - running_workflow_id=rwf_id, - ) - rwfs_id = response["id"] - assert rwfs_id - result, _ = ( - self._wapi_adapter.get_running_workflow_step_output_values_for_output( - running_workflow_step_id=rwfs_id, - output_variable=connector.in_, + if not step_is_combiner: + for p_step_name, connections in our_plumbing.items(): + # We need to get the Job definition for each step + # and then check whether the (output) variable is of type 'files'... + wf_step: dict[str, Any] = get_step(wf, p_step_name) + assert wf_step + job_definition: dict[str, Any] = self._get_step_job(step=wf_step) + jd_outputs: dict[str, Any] = job_defintion_decoder.get_outputs( + job_definition + ) + for connector in connections: + if jd_outputs.get(connector.in_, {}).get("type") == "files": + iter_variable = connector.out + # Get the prior running step's output values + response, _ = ( + self._wapi_adapter.get_running_workflow_step_by_name( + name=p_step_name, + running_workflow_id=rwf_id, + ) ) - ) - iter_values = result["output"].copy() + rwfs_id = response["id"] + assert rwfs_id + result, _ = ( + self._wapi_adapter.get_running_workflow_step_output_values_for_output( + running_workflow_step_id=rwfs_id, + output_variable=connector.in_, + ) + ) + iter_values = result["output"].copy() + break + # Stop if we've got an iteration variable + if iter_variable: break - # Stop if we've got an iteration variable - if iter_variable: - break num_step_instances: int = max(1, len(iter_values)) return StepPreparationResponse( From 94fd202c2e2a9850b37b25d59b3fe8cee7167d62 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Wed, 3 Sep 2025 14:04:13 +0000 Subject: [PATCH 54/57] docs: Doc tweak --- workflow/workflow_engine.py | 69 ++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index 37aecfe..f24a16f 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -65,6 +65,7 @@ class StepPreparationResponse: variables: dict[str, Any] | None = None iteration_variable: str | None = None iteration_values: list[str] | None = None + error_num: int = 0 error_msg: str | None = None @@ -306,12 +307,20 @@ def _handle_pod_message(self, msg: PodMessage) -> None: sp_resp = self._prepare_step( wf=wf_response, step_definition=next_step, rwf=rwf_response ) - if sp_resp.iterations == 0 or sp_resp.error_msg: + if sp_resp.iterations == 0: # Cannot prepare variables for this step, - # we have to leave. + # it might be a combiner step and some prior steps may still + # be running ... or something's gone wrong. + if sp_resp.error_num: + self._wapi_adapter.set_running_workflow_done( + running_workflow_id=r_wfid, + success=False, + error_num=sp_resp.error_num, + error_msg=sp_resp.error_msg, + ) return - assert sp_resp.variables is not None + assert sp_resp.variables is not None self._launch( rwf=rwf_response, step_definition=next_step, @@ -359,17 +368,19 @@ def _get_step_job(self, *, step: dict[str, Any]) -> dict[str, Any]: def _prepare_step( self, *, - wf: dict[str, Any], step_definition: dict[str, Any], + wf: dict[str, Any], rwf: dict[str, Any], ) -> StepPreparationResponse: """Attempts to prepare a map of step variables. If variables cannot be - presented to the step we return an object with 'iterations' set to zero.""" + presented to the step we return an object with 'iterations' set to zero. + If there's a problem that means we should be able to proceed but cannot, + we set 'error_num' and 'error_msg'.""" step_name: str = step_definition["name"] rwf_id: str = rwf["id"] - # Before we move on, are we combiner? + # Before we move on, are we a combiner? # # We are if a variable in our step's plumbing refers to an input of ours # that is of type 'files'. If we are a combiner then we use the name of the @@ -397,17 +408,24 @@ def _prepare_step( break if step_name_being_combined: break - if step_name_being_combined: + + if step_is_combiner: + assert step_name_being_combined + assert combiner_input_variable + + # Are all the step instances we're combining done? + response, _ = self._wapi_adapter.get_status_of_all_step_instances_by_name( name=step_name_being_combined, running_workflow_id=rwf_id, ) - # Assume succes... assert "count" in response num_step_recplicas_being_combined = response["count"] assert num_step_recplicas_being_combined > 0 assert "status" in response + # Assume they're all done + # and undo our assumption if not... all_step_instances_done: bool = True all_step_instances_successful: bool = True for status in response["status"]: @@ -418,7 +436,7 @@ def _prepare_step( all_step_instances_successful = False break if not all_step_instances_done: - # Can't move on - but other steps need to finish. + # Can't move on - other steps need to finish. _LOGGER.debug( "Assessing start of combiner step (%s)" " but not all steps (%s) to be combined are done", @@ -428,8 +446,8 @@ def _prepare_step( return StepPreparationResponse(iterations=0) elif not all_step_instances_successful: # Can't move on - all prior steps are done, - # but at least one was in error. - _LOGGER.debug( + # but at least one was not successful. + _LOGGER.warning( "Assessing start of combiner step (%s)" " but at least one step (%s) to be combined failed", step_name, @@ -437,6 +455,7 @@ def _prepare_step( ) return StepPreparationResponse( iterations=0, + error_num=1, error_msg=f"Prior instance of step '{step_name_being_combined}' has failed", ) @@ -448,11 +467,11 @@ def _prepare_step( variables: dict[str, Any] = step_definition["specification"].get( "variables", {} ) - - # All the running workflow variables + # ...and the running workflow variables rwf_variables: dict[str, Any] = rwf.get("variables", {}) - # Process the step's plumbing realting to workflow variables. + # Process the step's "plumbing" relating to workflow variables. + # # This will be a list of Connectors of "in" and "out" variable names. # "in" variables are worklfow variables, and "out" variables # are expected Job variables. We use this to add variables @@ -463,13 +482,12 @@ def _prepare_step( assert connector.in_ in rwf_variables variables[connector.out] = rwf_variables[connector.in_] - # Now we apply variables from the "plumbing" block - # related to values used in prior steps. The decoder gives - # us a map indexed by prior step name that's a list of "in" "out" - # tuples as above. + # Now process variables (from the "plumbing" block) + # that relate to values used in prior steps. # - # If this is a combiner step remember that we need to inspect - # variables from all the prior steps. + # The decoder gives us a map indexed by prior step name that's a list of + # "in" "out" connectors as above. If this is a combiner step remember + # that we need to inspect variables from all the prior steps. prior_step_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing( step_definition=step_definition ) @@ -486,6 +504,8 @@ def _prepare_step( ) ) # Copy "in" value to "out"... + # accumulating thiose for the 'combining' variable, + # which will be set as a list when we're done. for connector in connections: assert connector.in_ in prior_step["variables"] if connector.out == combiner_input_variable: @@ -508,16 +528,17 @@ def _prepare_step( assert connector.in_ in prior_step["variables"] variables[connector.out] = prior_step["variables"][connector.in_] - # Now ... can the command be compiled!? + # All variables are set ... + # is this enough to satisfy the setp's Job command? + job: dict[str, Any] = self._get_step_job(step=step_definition) message, success = job_defintion_decoder.decode( job["command"], variables, "command", TextEncoding.JINJA2_3_0 ) if not success: - msg = f"Failed command validation error_msg={message}" + msg = f"Failed command validation for step {step_name} error_msg={message}" _LOGGER.warning(msg) - self._set_step_error(step_name, rwf_id, None, 1, msg) - return StepPreparationResponse(iterations=0) + return StepPreparationResponse(iterations=0, error_num=2, error_msg=msg) # Do we replicate this step (run it more than once)? # From 8dd930852939bdc7686a3931d2f543fa1d33ec8b Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Wed, 3 Sep 2025 14:26:45 +0000 Subject: [PATCH 55/57] fix: replica always starts at 0 --- tests/wapi_adapter.py | 6 ++-- workflow/workflow_abc.py | 6 ++-- workflow/workflow_engine.py | 68 +++++++++++++++++++------------------ 3 files changed, 41 insertions(+), 39 deletions(-) diff --git a/tests/wapi_adapter.py b/tests/wapi_adapter.py index 71e326b..4c42850 100644 --- a/tests/wapi_adapter.py +++ b/tests/wapi_adapter.py @@ -160,11 +160,11 @@ def create_running_workflow_step( running_workflow_id: str, step: str, replica: int = 0, - replicas: int = 0, + replicas: int = 1, prior_running_workflow_step_id: str | None = None, ) -> tuple[dict[str, Any], int]: - if replica: - assert replica > 0 + assert replica >= 0 + assert replicas > replica UnitTestWorkflowAPIAdapter.lock.acquire() with open(_RUNNING_WORKFLOW_STEP_PICKLE_FILE, "rb") as pickle_file: diff --git a/workflow/workflow_abc.py b/workflow/workflow_abc.py index 2648772..ae88898 100644 --- a/workflow/workflow_abc.py +++ b/workflow/workflow_abc.py @@ -45,9 +45,9 @@ class LaunchParameters: # 1..'N'. step_replication_number: int = 0 # The total number of replicas of this instance that are expected to be laucnhed. - # if step_replication_number is set, this has to be set. It is 'N'. - # If step_replication_number is zero this value is ignored. - total_number_of_replicas: int = 0 + # This cannot be less than 1 and must be grater than any value of + # 'step_replication_number' that will be used fo rthe same step. + total_number_of_replicas: int = 1 # The application ID (a custom resource name) # used to identify the 'type' of Instance to create. # For DM Jobs this will be 'datamanagerjobs.squonk.it' diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index f24a16f..b173bf4 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -54,17 +54,17 @@ @dataclass class StepPreparationResponse: - """Step preparation response object. Iterations is +ve (non-zero) if a step + """Step preparation response object. 'replicas' is +ve (non-zero) if a step can be launched - it's value indicates how many times. If a step can be launched 'variables' will not be None. If a parallel set of steps can take place - (even just one) 'iteration_variable' will be set and 'iteration_values' - will be a list containing a value for each step. If prparation failed - 'error_msg' chould contain something useful.""" + (even just one) 'replica_variable' will be set and 'replica_values' + will be a list containing a value for each step instance. If preparation fails + 'erro_num' wil be set, and 'error_msg' should contain something useful.""" - iterations: int + replicas: int variables: dict[str, Any] | None = None - iteration_variable: str | None = None - iteration_values: list[str] | None = None + replica_variable: str | None = None + replica_values: list[str] | None = None error_num: int = 0 error_msg: str | None = None @@ -307,9 +307,10 @@ def _handle_pod_message(self, msg: PodMessage) -> None: sp_resp = self._prepare_step( wf=wf_response, step_definition=next_step, rwf=rwf_response ) - if sp_resp.iterations == 0: + if sp_resp.replicas == 0: # Cannot prepare variables for this step, - # it might be a combiner step and some prior steps may still + # it might be a step dependent on more than one prior step + # (like a 'combiner') and some prior steps may still # be running ... or something's gone wrong. if sp_resp.error_num: self._wapi_adapter.set_running_workflow_done( @@ -443,7 +444,7 @@ def _prepare_step( step_name, step_name_being_combined, ) - return StepPreparationResponse(iterations=0) + return StepPreparationResponse(replicas=0) elif not all_step_instances_successful: # Can't move on - all prior steps are done, # but at least one was not successful. @@ -454,7 +455,7 @@ def _prepare_step( step_name_being_combined, ) return StepPreparationResponse( - iterations=0, + replicas=0, error_num=1, error_msg=f"Prior instance of step '{step_name_being_combined}' has failed", ) @@ -482,12 +483,12 @@ def _prepare_step( assert connector.in_ in rwf_variables variables[connector.out] = rwf_variables[connector.in_] - # Now process variables (from the "plumbing" block) + # Now process variables (in the "plumbing" block) # that relate to values used in prior steps. # # The decoder gives us a map indexed by prior step name that's a list of # "in" "out" connectors as above. If this is a combiner step remember - # that we need to inspect variables from all the prior steps. + # that the combiner_input_variable is a used as a list. prior_step_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing( step_definition=step_definition ) @@ -495,7 +496,7 @@ def _prepare_step( if step_is_combiner and prior_step_name == step_name_being_combined: assert combiner_input_variable input_source_list: list[str] = [] - for replica in range(1, num_step_recplicas_being_combined + 1): + for replica in range(num_step_recplicas_being_combined): prior_step, _ = ( self._wapi_adapter.get_running_workflow_step_by_name( name=prior_step_name, @@ -538,7 +539,7 @@ def _prepare_step( if not success: msg = f"Failed command validation for step {step_name} error_msg={message}" _LOGGER.warning(msg) - return StepPreparationResponse(iterations=0, error_num=2, error_msg=msg) + return StepPreparationResponse(replicas=0, error_num=2, error_msg=msg) # Do we replicate this step (run it more than once)? # @@ -588,9 +589,9 @@ def _prepare_step( num_step_instances: int = max(1, len(iter_values)) return StepPreparationResponse( variables=variables, - iterations=num_step_instances, - iteration_variable=iter_variable, - iteration_values=iter_values, + replicas=num_step_instances, + replica_variable=iter_variable, + replica_values=iter_values, ) def _launch( @@ -607,30 +608,31 @@ def _launch( rwf_id: str = rwf["id"] project_id = rwf["project"]["id"] - # A step replication number, - # used only for steps expected to run in parallel (even if just once) - step_replication_number: int = 0 - total_replicas: int = step_preparation_response.iterations + # Total replicas must be 1 or more + total_replicas: int = step_preparation_response.replicas + assert total_replicas >= 1 + variables = step_preparation_response.variables assert variables is not None - for iteration in range(step_preparation_response.iterations): + for replica in range(step_preparation_response.replicas): - # If we are replicating this step then we must replace the step's variable + # If we are replicating this step more than once + # the 'replica_variable' will be set. + # We must replace the step's variable # with a value expected for this iteration. - if step_preparation_response.iteration_variable: - assert step_preparation_response.iteration_values - iter_value: str = step_preparation_response.iteration_values[iteration] + if step_preparation_response.replica_variable: + assert step_preparation_response.replica_values + iter_value: str = step_preparation_response.replica_values[replica] _LOGGER.info( - "Replicating step: %s iteration=%s variable=%s value=%s", + "Replicating step: %s replica=%s variable=%s value=%s", step_name, - iteration, - step_preparation_response.iteration_variable, + replica, + step_preparation_response.replica_variable, iter_value, ) # Over-write the replicating variable # and set the replication number to a unique +ve non-zero value... - variables[step_preparation_response.iteration_variable] = iter_value - step_replication_number = iteration + 1 + variables[step_preparation_response.replica_variable] = iter_value _LOGGER.info( "Launching step: %s RunningWorkflow=%s (name=%s)" @@ -652,7 +654,7 @@ def _launch( variables=variables, running_workflow_id=rwf_id, step_name=step_name, - step_replication_number=step_replication_number, + step_replication_number=replica, total_number_of_replicas=total_replicas, ) lr: LaunchResult = self._instance_launcher.launch(launch_parameters=lp) From 179135aa4d66820ba2c80aa9c528086adf5c7cae Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Wed, 3 Sep 2025 14:32:34 +0000 Subject: [PATCH 56/57] docs: Doc tweak --- workflow/workflow_engine.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index b173bf4..6897c19 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -510,15 +510,24 @@ def _prepare_step( for connector in connections: assert connector.in_ in prior_step["variables"] if connector.out == combiner_input_variable: + # Each instance may have a different value input_source_list.append( prior_step["variables"][connector.in_] ) - else: + elif replica == 0: + # Only the first instance value are of interest, + # the rest wil be the same - only one variable + # is a list of different values. variables[connector.out] = prior_step["variables"][ connector.in_ ] + # Now we have accumulated the prior steps values (files) + # set the combiner's corresponding input variable... variables[combiner_input_variable] = input_source_list else: + # Not a preior step for a combiner, + # or not a step being combined in a combiner. + # # Retrieve the prior "running" step # in order to get the variables that were set there... prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name( From cdddc35d39d517ebf520d2f43a93ccbde6f772d7 Mon Sep 17 00:00:00 2001 From: Alan Christie Date: Thu, 4 Sep 2025 11:56:44 +0000 Subject: [PATCH 57/57] feat: Add from-link-prefix variables --- tests/job-definitions/job-definitions.yaml | 8 +- tests/jobs/concatenate.py | 11 ++- .../simple-python-split-combine.yaml | 8 +- workflow/decoder.py | 13 ++- workflow/workflow-schema.yaml | 24 ++++++ workflow/workflow_engine.py | 84 ++++++++----------- 6 files changed, 90 insertions(+), 58 deletions(-) diff --git a/tests/job-definitions/job-definitions.yaml b/tests/job-definitions/job-definitions.yaml index 03a3b69..749c68e 100644 --- a/tests/job-definitions/job-definitions.yaml +++ b/tests/job-definitions/job-definitions.yaml @@ -131,13 +131,19 @@ jobs: concatenate: command: >- - concatenate.py {% for ifile in inputFile %}{{ ifile }} {% endfor %} --outputFile {{ outputFile }} + concatenate.py --inputFile {{ inputFile }} --outputFile {{ outputFile }} # Simulate a multiple input files Job (combiner)... variables: inputs: properties: inputFile: type: files + options: + type: object + properties: + inputDirPrefix: + title: Optional inoput directory prefix + type: string outputs: properties: outputBase: diff --git a/tests/jobs/concatenate.py b/tests/jobs/concatenate.py index 2f6b22b..3fb6834 100644 --- a/tests/jobs/concatenate.py +++ b/tests/jobs/concatenate.py @@ -2,13 +2,16 @@ parser = argparse.ArgumentParser( prog="addcol", - description="Takes a list of files and writes them into single outputfile", + description="Takes an optional directory prefix and a file," + " and combines all the input files that are found" + " into single outputfile", ) -parser.add_argument("inputFile", nargs="+", type=argparse.FileType("r")) +parser.add_argument("--inputDirPrefix") +parser.add_argument("--inputFile", required=True) parser.add_argument("-o", "--outputFile", required=True) args = parser.parse_args() with open(args.outputFile, "wt", encoding="utf8") as ofile: - for f in args.inputFile: - ofile.write(f.read()) + with open(args.inputFile, "rt", encoding="utf8") as ifile: + ofile.write(ifile.read()) diff --git a/tests/workflow-definitions/simple-python-split-combine.yaml b/tests/workflow-definitions/simple-python-split-combine.yaml index 2dcc68e..bcbea06 100644 --- a/tests/workflow-definitions/simple-python-split-combine.yaml +++ b/tests/workflow-definitions/simple-python-split-combine.yaml @@ -46,12 +46,14 @@ steps: variables: outputFile: results.smi plumbing: + - variable: outputFile + from-workflow: + variable: combination - variable: inputFile from-step: name: parallel variable: outputFile - - variable: outputFile - from-workflow: - variable: combination + - variable: inputDirPrefix + from-link-prefix: - variable: outputFile to-project: diff --git a/workflow/decoder.py b/workflow/decoder.py index b41552e..aac874b 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -144,7 +144,7 @@ def get_step_workflow_variable_connections( return connections -def get_step_prior_step_plumbing( +def get_step_prior_step_connections( *, step_definition: dict[str, Any] ) -> dict[str, list[Connector]]: """Returns list of variable Connections, indexed by prior step name, @@ -166,3 +166,14 @@ def get_step_prior_step_plumbing( Connector(in_=step_variable, out=v_map["variable"]) ] return plumbing + + +def get_step_link_prefix_variables(*, step_definition: dict[str, Any]) -> set[str]: + """Returns the set of variables expected to be set to the value + of the instance directory prefix.""" + variables: set[str] = set() + if "plumbing" in step_definition: + for v_map in step_definition["plumbing"]: + if "from-link-prefix" in v_map: + variables.add(v_map["variable"]) + return variables diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index 2266fca..ba5343c 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -102,6 +102,29 @@ definitions: - variable - from-workflow + # A Step variable + # (whose value is set to the value of a directory prefix used when the DM + # links the instance directories of prior step instances into this + # step's instance directory) + # + # This _must_ be treated by the step's job as a directory prefix, + # typiclaly '.instance-', that can be used to identify directories in this step's + # execution directory where the execution directory of prior steps + # are hard-linked by the DM. A job can find all the prior step directory names + # using the selected variable (e.g. inspect any directory name + # that starts with "{variable}"). + step-variable-from-link-prefix: + type: object + additionalProperties: false + properties: + variable: + $ref: '#/definitions/variable-name' + from-link-prefix: + type: 'null' + required: + - variable + - from-link-prefix + # A Step variable # (whose value (a file) is to be copied to the project directory) step-variable-to-project: @@ -181,6 +204,7 @@ definitions: anyOf: - $ref: "#/definitions/step-variable-from-step" - $ref: "#/definitions/step-variable-from-workflow" + - $ref: "#/definitions/step-variable-from-link-prefix" - $ref: "#/definitions/step-variable-to-project" minItems: 1 required: diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index 6897c19..997abe0 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -43,7 +43,8 @@ from .decoder import ( Connector, get_step, - get_step_prior_step_plumbing, + get_step_link_prefix_variables, + get_step_prior_step_connections, get_step_workflow_variable_connections, ) @@ -77,10 +78,15 @@ def __init__( *, wapi_adapter: WorkflowAPIAdapter, instance_launcher: InstanceLauncher, + step_link_prefix: str = ".instance-", ): + """Initialiser, given a Workflow API adapter, Instance laucnher, + and a step (directory) link prefix (the directory prefix the DM uses to hard-link + prior step instanes into the next step, typically '.instance-')""" # Keep the dependent objects self._wapi_adapter = wapi_adapter self._instance_launcher = instance_launcher + self._step_link_prefix = step_link_prefix def handle_message(self, msg: Message) -> None: """Expect Workflow and Pod messages. @@ -393,7 +399,7 @@ def _prepare_step( our_inputs: dict[str, Any] = job_defintion_decoder.get_inputs( our_job_definition ) - our_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing( + our_plumbing: dict[str, list[Connector]] = get_step_prior_step_connections( step_definition=step_definition ) step_is_combiner: bool = False @@ -483,60 +489,40 @@ def _prepare_step( assert connector.in_ in rwf_variables variables[connector.out] = rwf_variables[connector.in_] + # Process the step's "plumbing" relating to link-prefix variables. + # + # This will be a set of variable names. We just set each one + # to the built-in step link prefix. + for link_variable in get_step_link_prefix_variables( + step_definition=step_definition + ): + variables[link_variable] = self._step_link_prefix + # Now process variables (in the "plumbing" block) # that relate to values used in prior steps. # # The decoder gives us a map indexed by prior step name that's a list of - # "in" "out" connectors as above. If this is a combiner step remember + # "in"/"out" connectors as above. If this is a combiner step remember # that the combiner_input_variable is a used as a list. - prior_step_plumbing: dict[str, list[Connector]] = get_step_prior_step_plumbing( - step_definition=step_definition + prior_step_plumbing: dict[str, list[Connector]] = ( + get_step_prior_step_connections(step_definition=step_definition) ) for prior_step_name, connections in prior_step_plumbing.items(): - if step_is_combiner and prior_step_name == step_name_being_combined: - assert combiner_input_variable - input_source_list: list[str] = [] - for replica in range(num_step_recplicas_being_combined): - prior_step, _ = ( - self._wapi_adapter.get_running_workflow_step_by_name( - name=prior_step_name, - replica=replica, - running_workflow_id=rwf_id, - ) - ) - # Copy "in" value to "out"... - # accumulating thiose for the 'combining' variable, - # which will be set as a list when we're done. - for connector in connections: - assert connector.in_ in prior_step["variables"] - if connector.out == combiner_input_variable: - # Each instance may have a different value - input_source_list.append( - prior_step["variables"][connector.in_] - ) - elif replica == 0: - # Only the first instance value are of interest, - # the rest wil be the same - only one variable - # is a list of different values. - variables[connector.out] = prior_step["variables"][ - connector.in_ - ] - # Now we have accumulated the prior steps values (files) - # set the combiner's corresponding input variable... - variables[combiner_input_variable] = input_source_list - else: - # Not a preior step for a combiner, - # or not a step being combined in a combiner. - # - # Retrieve the prior "running" step - # in order to get the variables that were set there... - prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name( - name=prior_step_name, running_workflow_id=rwf_id - ) - # Copy "in" value to "out"... - for connector in connections: - assert connector.in_ in prior_step["variables"] - variables[connector.out] = prior_step["variables"][connector.in_] + # Retrieve the first prior "running" step in order to get the variables + # that were used for it. + # + # For a combiner step we only need to inspect the first instance of + # the prior step (the default replica value is '0'). + # We assume all the combiner's prior (parallel) instances + # have the same variables and values. + prior_step, _ = self._wapi_adapter.get_running_workflow_step_by_name( + name=prior_step_name, + running_workflow_id=rwf_id, + ) + # Copy "in" value to "out"... + for connector in connections: + assert connector.in_ in prior_step["variables"] + variables[connector.out] = prior_step["variables"][connector.in_] # All variables are set ... # is this enough to satisfy the setp's Job command?