diff --git a/tests/test_decoder.py b/tests/test_decoder.py index 8ec0c46..f808fc5 100644 --- a/tests/test_decoder.py +++ b/tests/test_decoder.py @@ -43,6 +43,19 @@ ) assert _DUPLICATE_WORKFLOW_VARIABLE_NAMES_WORKFLOW +_STEP_SPECIFICATION_VARIABLE_NAMES_WORKFLOW_FILE: str = os.path.join( + os.path.dirname(__file__), + "workflow-definitions", + "step-specification-variable-names.yaml", +) +with open( + _STEP_SPECIFICATION_VARIABLE_NAMES_WORKFLOW_FILE, "r", encoding="utf8" +) as workflow_file: + _STEP_SPECIFICATION_VARIABLE_NAMES_WORKFLOW: Dict[str, Any] = yaml.safe_load( + workflow_file + ) +assert _STEP_SPECIFICATION_VARIABLE_NAMES_WORKFLOW + def test_validate_schema_for_minimal(): # Arrange @@ -110,6 +123,16 @@ def test_validate_schema_for_python_simple_molprops(): assert error is None +def test_validate_schema_for_step_specification_variable_names(): + # Arrange + + # Act + error = decoder.validate_schema(_STEP_SPECIFICATION_VARIABLE_NAMES_WORKFLOW) + + # Assert + assert error is None + + def test_get_workflow_variables_for_smiple_python_molprops(): # Arrange diff --git a/tests/workflow-definitions/duplicate-step-names.yaml b/tests/workflow-definitions/duplicate-step-names.yaml index 426c6fc..f89d610 100644 --- a/tests/workflow-definitions/duplicate-step-names.yaml +++ b/tests/workflow-definitions/duplicate-step-names.yaml @@ -4,8 +4,12 @@ kind-version: "2024.1" name: duplicate-step-names steps: - name: step-1 - specification: >- - {} + specification: + collection: a + job: b + version: '1.0.0' - name: step-1 - specification: >- - {} + specification: + collection: a + job: b + version: '2.0.0' diff --git a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml index bfca24a..7649404 100644 --- a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml +++ b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml @@ -17,16 +17,13 @@ variables: steps: - name: step1 description: Add column 1 - specification: >- - { - "collection": "workflow-engine-unit-test-jobs", - "job": "rdkit-molprops", - "version": "1.0.0", - "variables": { - "name": "col1", - "value": 123 - } - } + specification: + collection: workflow-engine-unit-test-jobs + job: rdkit-molprops + version: "1.0.0" + variables: + name: "col1" + value: 123 inputs: - input: inputFile from: @@ -37,16 +34,13 @@ steps: - name: step2 description: Add column 2 - specification: >- - { - "collection": "workflow-engine-unit-test-jobs", - "job": "cluster-butina", - "version":"1.0.0", - "variables": { - "name":"col2", - "value":"999" - } - } + specification: + collection: workflow-engine-unit-test-jobs + job: cluster-butina + version: "1.0.0" + variables: + name: "col2" + value: "999" inputs: - input: inputFile from: diff --git a/tests/workflow-definitions/example-nop-fail.yaml b/tests/workflow-definitions/example-nop-fail.yaml index b3bd36f..f265988 100644 --- a/tests/workflow-definitions/example-nop-fail.yaml +++ b/tests/workflow-definitions/example-nop-fail.yaml @@ -6,9 +6,7 @@ description: >- A workflow with one step that fails steps: - name: step-1 - specification: >- - { - "collection": "workflow-engine-unit-test-jobs", - "job": "nop-fail", - "version": "1.0.0" - } + specification: + collection: workflow-engine-unit-test-jobs + job: nop-fail + version: "1.0.0" diff --git a/tests/workflow-definitions/example-smiles-to-file.yaml b/tests/workflow-definitions/example-smiles-to-file.yaml index fea20c5..e543050 100644 --- a/tests/workflow-definitions/example-smiles-to-file.yaml +++ b/tests/workflow-definitions/example-smiles-to-file.yaml @@ -7,9 +7,7 @@ description: >- The step takes an input string and the Job creates a file from it. steps: - name: step-1 - specification: >- - { - "collection": "workflow-engine-unit-test-jobs", - "job": "smiles-to-file", - "version": "1.0.0" - } + specification: + collection: workflow-engine-unit-test-jobs + job: smiles-to-file + version: "1.0.0" diff --git a/tests/workflow-definitions/example-two-step-nop.yaml b/tests/workflow-definitions/example-two-step-nop.yaml index 1462e34..df64b9a 100644 --- a/tests/workflow-definitions/example-two-step-nop.yaml +++ b/tests/workflow-definitions/example-two-step-nop.yaml @@ -7,16 +7,12 @@ description: >- The steps do nothing, take no arguments, and simply return success. steps: - name: step-1 - specification: >- - { - "collection": "workflow-engine-unit-test-jobs", - "job": "nop", - "version": "1.0.0" - } + specification: + collection: workflow-engine-unit-test-jobs + job: nop + version: "1.0.0" - name: step-2 - specification: >- - { - "collection": "workflow-engine-unit-test-jobs", - "job": "nop", - "version": "1.0.0" - } + specification: + collection: workflow-engine-unit-test-jobs + job: nop + version: "1.0.0" diff --git a/tests/workflow-definitions/minimal.yaml b/tests/workflow-definitions/minimal.yaml index e8dcc53..d2dd2cb 100644 --- a/tests/workflow-definitions/minimal.yaml +++ b/tests/workflow-definitions/minimal.yaml @@ -4,5 +4,7 @@ kind-version: "2024.1" name: workflow-minimal steps: - name: step-1 - specification: >- - {} + specification: + collection: a + job: b + version: "1.0.0" diff --git a/tests/workflow-definitions/shortcut-example-1.yaml b/tests/workflow-definitions/shortcut-example-1.yaml index 390ac58..a9446e7 100644 --- a/tests/workflow-definitions/shortcut-example-1.yaml +++ b/tests/workflow-definitions/shortcut-example-1.yaml @@ -6,23 +6,19 @@ description: The shortcut example 1 workflow steps: - name: example-1-step-1 description: The first step - specification: >- - { - "collection": "workflow-engine-unit-test-jobs", - "job": "shortcut-example-1-process-a", - "version": "1.0.0" - } + specification: + collection: workflow-engine-unit-test-jobs + job: shortcut-example-1-process-a + version: "1.0.0" outputs: - output: 'outputFile' as: 'a.sdf' - name: example-1-step-2 description: The first step - specification: >- - { - "collection": "workflow-engine-unit-test-jobs", - "job": "shortcut-example-1-process-b", - "version": "1.0.0" - } + specification: + collection: workflow-engine-unit-test-jobs + job: shortcut-example-1-process-b + version: "1.0.0" inputs: - input: 'inputFile' from: diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml index 049a010..aeeeafe 100644 --- a/tests/workflow-definitions/simple-python-molprops.yaml +++ b/tests/workflow-definitions/simple-python-molprops.yaml @@ -17,16 +17,13 @@ variables: steps: - name: step1 description: Add column 1 - specification: >- - { - "collection": "workflow-engine-unit-test-jobs", - "job": "rdkit-molprops", - "version": "1.0.0", - "variables": { - "name": "col1", - "value": 123 - } - } + specification: + collection: workflow-engine-unit-test-jobs + job: rdkit-molprops + version: "1.0.0" + variables: + name: "col1" + value: 123 inputs: - input: inputFile from: @@ -37,16 +34,13 @@ steps: - name: step2 description: Add column 2 - specification: >- - { - "collection": "workflow-engine-unit-test-jobs", - "job": "cluster-butina", - "version":"1.0.0", - "variables": { - "name":"col2", - "value":"999" - } - } + specification: + collection: workflow-engine-unit-test-jobs + job: cluster-butina + version: "1.0.0" + variables: + name: "col2" + value: "999" inputs: - input: inputFile from: diff --git a/tests/workflow-definitions/step-specification-variable-names.yaml b/tests/workflow-definitions/step-specification-variable-names.yaml new file mode 100644 index 0000000..0a9bbca --- /dev/null +++ b/tests/workflow-definitions/step-specification-variable-names.yaml @@ -0,0 +1,16 @@ +--- +kind: DataManagerWorkflow +kind-version: "2024.1" +name: step-variables +description: Test a lot of variables whose format is supported +steps: +- name: step-1 + specification: + collection: a + job: b + version: '1.0.0' + variables: + _a: 1 + A-1: 2.0 + a_14_: A string + A-Long_boolean_variable_Name: true diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index 68a62d2..07f018f 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -161,6 +161,42 @@ definitions: - output - as + + # A step specification variable + # (there must be at least one if a variables block is defined). + # Typical variable syntax based on Python's definition of a variable + # but with the inclusion of 'hyphen' within the variable. + # The value of the variable (the object itself) is quite relaxed... + # it simply needs to be a string, number (integer or float), or boolean. + step-specification-variable: + type: object + additionalProperties: false + patternProperties: + '^[a-zA-Z_]{1}[a-zA-Z0-9_-]{0,79}$': + oneOf: + - type: string + - type: integer + - type: boolean + minProperties: 1 + + # Step specification + step-specification: + type: object + additionalProperties: false + properties: + collection: + type: string + job: + type: string + version: + type: string + variables: + $ref: "#/definitions/step-specification-variable" + required: + - collection + - job + - version + # Steps (in a workflow) step: type: object @@ -172,8 +208,7 @@ definitions: type: string description: A description of the step specification: - type: string - description: The Data Manager Job Specification, a JSON string + $ref: '#/definitions/step-specification' inputs: type: array items: diff --git a/workflow/workflow_engine.py b/workflow/workflow_engine.py index edff2ec..8d76bdd 100644 --- a/workflow/workflow_engine.py +++ b/workflow/workflow_engine.py @@ -22,7 +22,6 @@ is executed, and it uses thew InstanceLauncher to launch the Job (a Pod) for each step. """ -import json import logging import sys from typing import Any, Dict, Optional @@ -295,7 +294,7 @@ def _validate_step_command( # the keys "collection", "job", and "version". Here we assume that # the workflow definition has passed the RUN-level validation # which means we can get these values. - step_spec: dict[str, Any] = json.loads(step["specification"]) + step_spec: dict[str, Any] = step["specification"] job_collection: str = step_spec["collection"] job_job: str = step_spec["job"] job_version: str = step_spec["version"] @@ -466,7 +465,7 @@ def _launch( debug=rwf.get("debug"), launching_user_name=rwf["running_user"], launching_user_api_token=rwf["running_user_api_token"], - specification=json.loads(step["specification"]), + specification=step["specification"], specification_variables=variables, running_workflow_id=rwf_id, running_workflow_step_id=rwfs_id, diff --git a/workflow/workflow_validator.py b/workflow/workflow_validator.py index 60b668a..78198b8 100644 --- a/workflow/workflow_validator.py +++ b/workflow/workflow_validator.py @@ -1,6 +1,5 @@ """The WorkflowEngine validation logic.""" -import json from dataclasses import dataclass from enum import Enum from typing import Any @@ -96,40 +95,6 @@ def _validate_tag_level( error_num=2, error_msg=[f"Duplicate step names found: {', '.join(duplicate_names)}"], ) - # Each step specification must be a valid JSON string. - # and contain properties for 'collection', 'job', and 'version'. - for step in workflow_definition["steps"]: - step_name = step["name"] - try: - specification = json.loads(step["specification"]) - except json.decoder.JSONDecodeError as e: - return ValidationResult( - error_num=3, - error_msg=[ - f"Got JSONDecodeError decoding Step '{step_name}' specification: {e}" - ], - ) - except TypeError as e: - return ValidationResult( - error_num=4, - error_msg=[ - f"Got ValidationResult decoding Step '{step_name}' specification: {e}" - ], - ) - expected_keys: set[str] = {"collection", "job", "version"} - missing_keys: list[str] = [] - missing_keys.extend( - expected_key - for expected_key in expected_keys - if expected_key not in specification - ) - if missing_keys: - return ValidationResult( - error_num=5, - error_msg=[ - f"Step '{step_name}' specification is missing: {', '.join(missing_keys)}" - ], - ) # Workflow variables must be unique. duplicate_names = set() variable_names: set[str] = set()