diff --git a/tests/test_decoder.py b/tests/test_decoder.py index f808fc5..7ab7576 100644 --- a/tests/test_decoder.py +++ b/tests/test_decoder.py @@ -56,6 +56,15 @@ ) assert _STEP_SPECIFICATION_VARIABLE_NAMES_WORKFLOW +_WORKFLOW_OPTIONS_WORKFLOW_FILE: str = os.path.join( + os.path.dirname(__file__), + "workflow-definitions", + "workflow-options.yaml", +) +with open(_WORKFLOW_OPTIONS_WORKFLOW_FILE, "r", encoding="utf8") as workflow_file: + _WORKFLOW_OPTIONS: Dict[str, Any] = yaml.safe_load(workflow_file) +assert _WORKFLOW_OPTIONS + def test_validate_schema_for_minimal(): # Arrange @@ -133,6 +142,16 @@ def test_validate_schema_for_step_specification_variable_names(): assert error is None +def test_validate_schema_for_workflow_options(): + # Arrange + + # Act + error = decoder.validate_schema(_WORKFLOW_OPTIONS) + + # Assert + assert error is None + + def test_get_workflow_variables_for_smiple_python_molprops(): # Arrange diff --git a/tests/test_workflow_validator_for_run_level.py b/tests/test_workflow_validator_for_run_level.py index 725994d..e5fdfb6 100644 --- a/tests/test_workflow_validator_for_run_level.py +++ b/tests/test_workflow_validator_for_run_level.py @@ -131,6 +131,88 @@ def test_validate_simple_python_molprops(): assert error.error_msg is None +def test_validate_simple_python_molprops_with_options_when_missing_required(): + # Arrange + workflow_file: str = os.path.join( + os.path.dirname(__file__), + "workflow-definitions", + "simple-python-molprops-with-options.yaml", + ) + with open(workflow_file, "r", encoding="utf8") as workflow_file: + workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) + assert workflow + variables = { + "candidateMolecules": "input.sdf", + "clusteredMolecules": "output.sdf", + } + + # Act + error = WorkflowValidator.validate( + level=ValidationLevel.RUN, + workflow_definition=workflow, + variables=variables, + ) + + # Assert + assert error.error_num == 7 + assert error.error_msg == [ + "Missing workflow variable values for: rdkitPropertyValue" + ] + + +def test_validate_simple_python_molprops_with_options(): + # Arrange + workflow_file: str = os.path.join( + os.path.dirname(__file__), + "workflow-definitions", + "simple-python-molprops-with-options.yaml", + ) + with open(workflow_file, "r", encoding="utf8") as workflow_file: + workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) + assert workflow + variables = { + "candidateMolecules": "input.sdf", + "clusteredMolecules": "output.sdf", + "rdkitPropertyName": "col1", + "rdkitPropertyValue": 123, + } + + # Act + error = WorkflowValidator.validate( + level=ValidationLevel.RUN, + workflow_definition=workflow, + variables=variables, + ) + + # Assert + assert error.error_num == 0 + assert error.error_msg is None + + +def test_validate_simple_python_molprops_with_missing_input(): + # Arrange + workflow_file: str = os.path.join( + os.path.dirname(__file__), "workflow-definitions", "simple-python-molprops.yaml" + ) + with open(workflow_file, "r", encoding="utf8") as workflow_file: + workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader) + assert workflow + variables = {"clusteredMolecules": "output.sdf"} + + # Act + error = WorkflowValidator.validate( + level=ValidationLevel.RUN, + workflow_definition=workflow, + variables=variables, + ) + + # Assert + assert error.error_num == 7 + assert error.error_msg == [ + "Missing workflow variable values for: candidateMolecules" + ] + + def test_validate_duplicate_workflow_variable_names(): # Arrange workflow_file: str = os.path.join( diff --git a/tests/workflow-definitions/duplicate-step-names.yaml b/tests/workflow-definitions/duplicate-step-names.yaml index f89d610..78fe11a 100644 --- a/tests/workflow-definitions/duplicate-step-names.yaml +++ b/tests/workflow-definitions/duplicate-step-names.yaml @@ -2,12 +2,15 @@ kind: DataManagerWorkflow kind-version: "2024.1" name: duplicate-step-names + steps: + - name: step-1 specification: collection: a job: b version: '1.0.0' + - name: step-1 specification: collection: a diff --git a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml index 7649404..3f8c6f1 100644 --- a/tests/workflow-definitions/duplicate-workflow-variable-names.yaml +++ b/tests/workflow-definitions/duplicate-workflow-variable-names.yaml @@ -15,6 +15,7 @@ variables: as: clustered-molecules.smi steps: + - name: step1 description: Add column 1 specification: diff --git a/tests/workflow-definitions/example-nop-fail.yaml b/tests/workflow-definitions/example-nop-fail.yaml index f265988..ed1a172 100644 --- a/tests/workflow-definitions/example-nop-fail.yaml +++ b/tests/workflow-definitions/example-nop-fail.yaml @@ -4,7 +4,9 @@ kind-version: "2024.1" name: nop-fail description: >- A workflow with one step that fails + steps: + - name: step-1 specification: collection: workflow-engine-unit-test-jobs diff --git a/tests/workflow-definitions/example-smiles-to-file.yaml b/tests/workflow-definitions/example-smiles-to-file.yaml index e543050..246e410 100644 --- a/tests/workflow-definitions/example-smiles-to-file.yaml +++ b/tests/workflow-definitions/example-smiles-to-file.yaml @@ -5,7 +5,9 @@ name: smiles-to-file description: >- A workflow with one step that uses variables. The step takes an input string and the Job creates a file from it. + steps: + - name: step-1 specification: collection: workflow-engine-unit-test-jobs diff --git a/tests/workflow-definitions/example-two-step-nop.yaml b/tests/workflow-definitions/example-two-step-nop.yaml index df64b9a..6fc2a7a 100644 --- a/tests/workflow-definitions/example-two-step-nop.yaml +++ b/tests/workflow-definitions/example-two-step-nop.yaml @@ -5,12 +5,15 @@ name: two-step-nop description: >- A workflow with two steps. The steps do nothing, take no arguments, and simply return success. + steps: + - name: step-1 specification: collection: workflow-engine-unit-test-jobs job: nop version: "1.0.0" + - name: step-2 specification: collection: workflow-engine-unit-test-jobs diff --git a/tests/workflow-definitions/minimal.yaml b/tests/workflow-definitions/minimal.yaml index d2dd2cb..4e6045d 100644 --- a/tests/workflow-definitions/minimal.yaml +++ b/tests/workflow-definitions/minimal.yaml @@ -2,7 +2,9 @@ kind: DataManagerWorkflow kind-version: "2024.1" name: workflow-minimal + steps: + - name: step-1 specification: collection: a diff --git a/tests/workflow-definitions/shortcut-example-1.yaml b/tests/workflow-definitions/shortcut-example-1.yaml index a9446e7..494af9b 100644 --- a/tests/workflow-definitions/shortcut-example-1.yaml +++ b/tests/workflow-definitions/shortcut-example-1.yaml @@ -3,7 +3,9 @@ kind: DataManagerWorkflow kind-version: "2024.1" name: shortcut-example-1 description: The shortcut example 1 workflow + steps: + - name: example-1-step-1 description: The first step specification: @@ -13,6 +15,7 @@ steps: outputs: - output: 'outputFile' as: 'a.sdf' + - name: example-1-step-2 description: The first step specification: diff --git a/tests/workflow-definitions/simple-python-molprops-with-options.yaml b/tests/workflow-definitions/simple-python-molprops-with-options.yaml new file mode 100644 index 0000000..61f7ad8 --- /dev/null +++ b/tests/workflow-definitions/simple-python-molprops-with-options.yaml @@ -0,0 +1,59 @@ +--- +kind: DataManagerWorkflow +kind-version: "2024.1" +name: python-workflow +description: A simple python experimental workflow +variables: + inputs: + - name: candidateMolecules + type: squonk/x-smiles + outputs: + - name: clusteredMolecules + from: + step: step2 + output: outputFile + as: clustered-molecules.smi + options: + - name: rdkitPropertyName + default: name + as: + - option: name + step: step1 + - name: rdkitPropertyValue + as: + - option: value + step: step1 + +steps: + +- name: step1 + description: Add column 1 + specification: + collection: workflow-engine-unit-test-jobs + job: rdkit-molprops + version: "1.0.0" + inputs: + - input: inputFile + from: + workflow-input: candidateMolecules + outputs: + - output: outputFile + as: step1.out.smi + +- name: step2 + description: Add column 2 + specification: + collection: workflow-engine-unit-test-jobs + job: cluster-butina + version: "1.0.0" + variables: + name: "col2" + value: "999" + inputs: + - input: inputFile + from: + step: step1 + output: outputFile + outputs: + - output: outputFile + as: step2.out.smi diff --git a/tests/workflow-definitions/simple-python-molprops.yaml b/tests/workflow-definitions/simple-python-molprops.yaml index aeeeafe..aa761a7 100644 --- a/tests/workflow-definitions/simple-python-molprops.yaml +++ b/tests/workflow-definitions/simple-python-molprops.yaml @@ -15,6 +15,7 @@ variables: as: clustered-molecules.smi steps: + - name: step1 description: Add column 1 specification: diff --git a/tests/workflow-definitions/step-specification-variable-names.yaml b/tests/workflow-definitions/step-specification-variable-names.yaml index 0a9bbca..a7db65b 100644 --- a/tests/workflow-definitions/step-specification-variable-names.yaml +++ b/tests/workflow-definitions/step-specification-variable-names.yaml @@ -3,14 +3,16 @@ kind: DataManagerWorkflow kind-version: "2024.1" name: step-variables description: Test a lot of variables whose format is supported + steps: + - name: step-1 specification: collection: a job: b version: '1.0.0' variables: - _a: 1 - A-1: 2.0 + a: 1 + A_1: 2.0 a_14_: A string - A-Long_boolean_variable_Name: true + A_Long_boolean_variableName: true diff --git a/tests/workflow-definitions/workflow-options.yaml b/tests/workflow-definitions/workflow-options.yaml new file mode 100644 index 0000000..a0392c0 --- /dev/null +++ b/tests/workflow-definitions/workflow-options.yaml @@ -0,0 +1,54 @@ +--- +kind: DataManagerWorkflow +kind-version: "2024.1" +name: workflow-options +description: Illustrate the use of workflow options +variables: + options: + - name: variableWithoutDefault + as: + - option: variable1 + step: step-1 + - option: variable2 + step: step-2 + - name: variableWithIntegerDefault + default: 7 + as: + - option: variable3 + step: step-1 + - name: variableWithIntegerDefaultAndRange + default: 7 + minimum: 1 + maximum: 8 + as: + - option: variable4 + step: step-1 + - name: variableWithFloatDefault + default: 1.0 + as: + - option: variable5 + step: step-1 + - name: variableWithBooleanDefault + default: true + as: + - option: variable6 + step: step-1 + - name: variableWithStringDefault + default: Hello, World! + as: + - option: variable7 + step: step-1 + +steps: + +- name: step-1 + specification: + collection: a + job: b + version: '1.0.0' + +- name: step-2 + specification: + collection: a + job: b + version: '1.0.0' diff --git a/workflow/decoder.py b/workflow/decoder.py index 5a0fb8c..7b976ed 100644 --- a/workflow/decoder.py +++ b/workflow/decoder.py @@ -64,7 +64,8 @@ def get_description(definition: dict[str, Any]) -> str | None: def get_variable_names(definition: dict[str, Any]) -> list[str]: """Given a Workflow definition this function returns all the names of the - variables defined at the workflow level. This function DOES NOT deduplicate names, + variables defined at the workflow level. These are the 'names' for inputs, + outputs and options. This function DOES NOT de-duplicate names, that is the role of the validator.""" wf_variable_names: list[str] = [] variables: dict[str, Any] | None = definition.get("variables") @@ -75,6 +76,9 @@ def get_variable_names(definition: dict[str, Any]) -> list[str]: wf_variable_names.extend( output_variable["name"] for output_variable in variables.get("outputs", []) ) + wf_variable_names.extend( + option_variable["name"] for option_variable in variables.get("options", []) + ) return wf_variable_names @@ -85,8 +89,16 @@ def get_required_variable_names(definition: dict[str, Any]) -> list[str]: required_variables: list[str] = [] variables: dict[str, Any] | None = definition.get("variables") if variables: - # For now, all inputs are required... + # All inputs are required (no defaults atm)... required_variables.extend( input_variable["name"] for input_variable in variables.get("inputs", []) ) + # Options without defaults are required... + # It is the role of the engine to provide the actual default for those + # that have defaults but no user-defined value. + required_variables.extend( + option_variable["name"] + for option_variable in variables.get("options", []) + if "default" not in option_variable + ) return required_variables diff --git a/workflow/workflow-schema.yaml b/workflow/workflow-schema.yaml index 07f018f..0481405 100644 --- a/workflow/workflow-schema.yaml +++ b/workflow/workflow-schema.yaml @@ -29,6 +29,7 @@ properties: $ref: "#/definitions/step" variables: type: object + additionalProperties: false properties: inputs: type: array @@ -38,6 +39,10 @@ properties: type: array items: $ref: "#/definitions/workflow-output-parameter" + options: + type: array + items: + $ref: "#/definitions/workflow-option-parameter" required: - kind - kind-version @@ -100,6 +105,20 @@ definitions: - name - as + # Declaration of a value from a workflow input (variable) + as-step-option: + type: object + additionalProperties: false + properties: + option: + $ref: '#/definitions/template-variable-name' + step: + $ref: '#/definitions/rfc1035-label-name' + required: + - option + - step + + # Declaration of a value from a workflow input (variable) from-workflow-input: type: object @@ -123,6 +142,34 @@ definitions: - step - output + # A workflow option used as a step option + workflow-option-parameter: + type: object + additionalProperties: false + properties: + name: + $ref: '#/definitions/template-variable-name' + description: + type: string + default: + oneOf: + - type: string + - type: number + - type: boolean + minimum: + oneOf: + - type: number + maximum: + oneOf: + - type: number + as: + type: array + items: + $ref: '#/definitions/as-step-option' + required: + - name + - as + # A Step input (from an output of a prior step) step-input-from-step: type: object @@ -172,7 +219,7 @@ definitions: type: object additionalProperties: false patternProperties: - '^[a-zA-Z_]{1}[a-zA-Z0-9_-]{0,79}$': + '^[a-zA-Z]{1}[a-zA-Z0-9_]{0,79}$': oneOf: - type: string - type: integer