Skip to content

Commit d955a19

Browse files
author
Alan Christie
committed
feat: Initial schema for replicate step declaration
1 parent a74ff91 commit d955a19

12 files changed

+391
-13
lines changed

tests/test_decoder.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,15 @@
5656
)
5757
assert _DUPLICATE_WORKFLOW_VARIABLE_NAMES_WORKFLOW
5858

59+
_SIMPLE_PYTHON_PARALLEL_FILE: str = os.path.join(
60+
os.path.dirname(__file__),
61+
"workflow-definitions",
62+
"simple-python-parallel.yaml",
63+
)
64+
with open(_SIMPLE_PYTHON_PARALLEL_FILE, "r", encoding="utf8") as workflow_file:
65+
_SIMPLE_PYTHON_PARALLEL_WORKFLOW: Dict[str, Any] = yaml.safe_load(workflow_file)
66+
assert _SIMPLE_PYTHON_PARALLEL_WORKFLOW
67+
5968
_STEP_SPECIFICATION_VARIABLE_NAMES_WORKFLOW_FILE: str = os.path.join(
6069
os.path.dirname(__file__),
6170
"workflow-definitions",
@@ -165,6 +174,16 @@ def test_validate_schema_for_workflow_options():
165174
assert error is None
166175

167176

177+
def test_validate_schema_for_simple_python_parallel():
178+
# Arrange
179+
180+
# Act
181+
error = decoder.validate_schema(_SIMPLE_PYTHON_PARALLEL_WORKFLOW)
182+
183+
# Assert
184+
assert error is None
185+
186+
168187
def test_get_workflow_variables_for_smiple_python_molprops():
169188
# Arrange
170189

@@ -329,3 +348,36 @@ def test_get_workflow_outputs_for_step_with_unkown_step_name():
329348

330349
# Assert
331350
assert not outputs
351+
352+
353+
def test_get_step_input_variable_names_when_duplicates():
354+
# Arrange
355+
356+
# Act
357+
inputs = decoder.get_step_input_variable_names(
358+
_SIMPLE_PYTHON_PARALLEL_WORKFLOW, "final-step"
359+
)
360+
361+
# Assert
362+
assert len(inputs) == 2
363+
assert inputs[0] == "inputFile"
364+
assert inputs[1] == "inputFile"
365+
366+
367+
def test_get_step_output_variable_names_when_duplicates():
368+
# Arrange
369+
workflow_filename: str = os.path.join(
370+
os.path.dirname(__file__),
371+
"workflow-definitions",
372+
"duplicate-step-output-variable-names.yaml",
373+
)
374+
with open(workflow_filename, "r", encoding="utf8") as wf_file:
375+
definition: Dict[str, Any] = yaml.safe_load(wf_file)
376+
377+
# Act
378+
outputs = decoder.get_step_output_variable_names(definition, "step-1")
379+
380+
# Assert
381+
assert len(outputs) == 2
382+
assert outputs[0] == "outputFile"
383+
assert outputs[1] == "outputFile"

tests/test_workflow_validator_for_run_level.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,3 +233,72 @@ def test_validate_duplicate_workflow_variable_names():
233233
# Assert
234234
assert error.error_num == 6
235235
assert error.error_msg == ["Duplicate workflow variable names found: x"]
236+
237+
238+
def test_validate_simple_python_parallel():
239+
# Arrange
240+
workflow_file: str = os.path.join(
241+
os.path.dirname(__file__),
242+
"workflow-definitions",
243+
"simple-python-parallel.yaml",
244+
)
245+
with open(workflow_file, "r", encoding="utf8") as workflow_file:
246+
workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
247+
assert workflow
248+
249+
# Act
250+
error = WorkflowValidator.validate(
251+
level=ValidationLevel.TAG,
252+
workflow_definition=workflow,
253+
)
254+
255+
# Assert
256+
assert error.error_num == 0
257+
258+
259+
def test_validate_replicate_using_undeclared_input():
260+
# Arrange
261+
workflow_file: str = os.path.join(
262+
os.path.dirname(__file__),
263+
"workflow-definitions",
264+
"replicate-using-undeclared-input.yaml",
265+
)
266+
with open(workflow_file, "r", encoding="utf8") as workflow_file:
267+
workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
268+
assert workflow
269+
270+
# Act
271+
error = WorkflowValidator.validate(
272+
level=ValidationLevel.TAG,
273+
workflow_definition=workflow,
274+
)
275+
276+
# Assert
277+
assert error.error_num == 7
278+
assert error.error_msg == [
279+
"Replicate input variable is not declared: y (step=step-2)"
280+
]
281+
282+
283+
def test_validate_duplicate_step_output_variable_names():
284+
# Arrange
285+
workflow_file: str = os.path.join(
286+
os.path.dirname(__file__),
287+
"workflow-definitions",
288+
"duplicate-step-output-variable-names.yaml",
289+
)
290+
with open(workflow_file, "r", encoding="utf8") as workflow_file:
291+
workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
292+
assert workflow
293+
294+
# Act
295+
error = WorkflowValidator.validate(
296+
level=ValidationLevel.TAG,
297+
workflow_definition=workflow,
298+
)
299+
300+
# Assert
301+
assert error.error_num == 3
302+
assert error.error_msg == [
303+
"Duplicate step output variable: outputFile (step=step-1)"
304+
]

tests/test_workflow_validator_for_tag_level.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,27 @@ def test_validate_shortcut_example_1():
109109
assert error.error_msg is None
110110

111111

112+
def test_validate_simple_python_parallel():
113+
# Arrange
114+
workflow_file: str = os.path.join(
115+
os.path.dirname(__file__),
116+
"workflow-definitions",
117+
"simple-python-parallel.yaml",
118+
)
119+
with open(workflow_file, "r", encoding="utf8") as workflow_file:
120+
workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
121+
assert workflow
122+
123+
# Act
124+
error = WorkflowValidator.validate(
125+
level=ValidationLevel.TAG,
126+
workflow_definition=workflow,
127+
)
128+
129+
# Assert
130+
assert error.error_num == 0
131+
132+
112133
def test_validate_simple_python_molprops():
113134
# Arrange
114135
workflow_file: str = os.path.join(
@@ -171,3 +192,51 @@ def test_validate_duplicate_workflow_variable_names():
171192
# Assert
172193
assert error.error_num == 6
173194
assert error.error_msg == ["Duplicate workflow variable names found: x"]
195+
196+
197+
def test_validate_replicate_using_undeclared_input():
198+
# Arrange
199+
workflow_file: str = os.path.join(
200+
os.path.dirname(__file__),
201+
"workflow-definitions",
202+
"replicate-using-undeclared-input.yaml",
203+
)
204+
with open(workflow_file, "r", encoding="utf8") as workflow_file:
205+
workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
206+
assert workflow
207+
208+
# Act
209+
error = WorkflowValidator.validate(
210+
level=ValidationLevel.TAG,
211+
workflow_definition=workflow,
212+
)
213+
214+
# Assert
215+
assert error.error_num == 7
216+
assert error.error_msg == [
217+
"Replicate input variable is not declared: y (step=step-2)"
218+
]
219+
220+
221+
def test_validate_duplicate_step_output_variable_names():
222+
# Arrange
223+
workflow_file: str = os.path.join(
224+
os.path.dirname(__file__),
225+
"workflow-definitions",
226+
"duplicate-step-output-variable-names.yaml",
227+
)
228+
with open(workflow_file, "r", encoding="utf8") as workflow_file:
229+
workflow: dict[str, Any] = yaml.load(workflow_file, Loader=yaml.FullLoader)
230+
assert workflow
231+
232+
# Act
233+
error = WorkflowValidator.validate(
234+
level=ValidationLevel.TAG,
235+
workflow_definition=workflow,
236+
)
237+
238+
# Assert
239+
assert error.error_num == 3
240+
assert error.error_msg == [
241+
"Duplicate step output variable: outputFile (step=step-1)"
242+
]
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
---
2+
kind: DataManagerWorkflow
3+
kind-version: "2025.2"
4+
name: duplicate-step-output-variable-names
5+
description: A workflow where step-1 has duplicate output variable names
6+
variable-mapping:
7+
inputs:
8+
- name: x
9+
outputs:
10+
- name: y
11+
from:
12+
step: step-2
13+
output: outputFile
14+
as: clustered-molecules.smi
15+
16+
steps:
17+
18+
- name: step-1
19+
description: Add column 1
20+
specification:
21+
collection: workflow-engine-unit-test-jobs
22+
job: rdkit-molprops
23+
version: "1.0.0"
24+
variables:
25+
name: "col1"
26+
value: 123
27+
inputs:
28+
- input: inputFile
29+
from:
30+
workflow-input: candidateMolecules
31+
- input: inputFile
32+
from:
33+
workflow-input: candidateMolecules
34+
outputs:
35+
- output: outputFile
36+
as: __step1__out.smi
37+
- output: outputFile
38+
as: __step1__out.smi
39+
40+
- name: step-2
41+
description: Add column 2
42+
specification:
43+
collection: workflow-engine-unit-test-jobs
44+
job: cluster-butina
45+
version: "1.0.0"
46+
variables:
47+
name: "col2"
48+
value: "999"
49+
inputs:
50+
- input: inputFile
51+
from:
52+
step: step1
53+
output: outputFile
54+
outputs:
55+
- output: outputFile
56+
as: __step2__out.smi

tests/workflow-definitions/duplicate-workflow-variable-names.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ description: A workflow with a duplicate variable name in the input and output
66
variable-mapping:
77
inputs:
88
- name: x
9-
type: squonk/x-smiles
109
outputs:
1110
- name: x
1211
from:
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
---
2+
kind: DataManagerWorkflow
3+
kind-version: "2025.2"
4+
name: replicate-using-undeclared-input
5+
description: A workflow that replicates from a variable that's not declared
6+
variable-mapping:
7+
inputs:
8+
- name: x
9+
outputs:
10+
- name: y
11+
from:
12+
step: step2
13+
output: outputFile
14+
as: clustered-molecules.smi
15+
16+
steps:
17+
18+
- name: step-1
19+
description: Add column 1
20+
specification:
21+
collection: workflow-engine-unit-test-jobs
22+
job: rdkit-molprops
23+
version: "1.0.0"
24+
variables:
25+
name: "col1"
26+
value: 123
27+
inputs:
28+
- input: inputFile
29+
from:
30+
workflow-input: candidateMolecules
31+
outputs:
32+
- output: outputFile
33+
as: __step-1__out.smi
34+
35+
- name: step-2
36+
description: Add column 2
37+
specification:
38+
collection: workflow-engine-unit-test-jobs
39+
job: cluster-butina
40+
version: "1.0.0"
41+
variables:
42+
name: "col2"
43+
value: "999"
44+
replicate:
45+
using:
46+
input: y
47+
inputs:
48+
- input: inputFile
49+
from:
50+
step: step-1
51+
output: outputFile
52+
outputs:
53+
- output: outputFile
54+
as: __step-2__out.smi

tests/workflow-definitions/simple-python-molprops-with-options.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ variables:
5656
variable-mapping:
5757
inputs:
5858
- name: candidateMolecules
59-
type: squonk/x-smiles
6059
outputs:
6160
- name: clusteredMolecules
6261
from:

tests/workflow-definitions/simple-python-molprops.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ description: A simple python experimental workflow
66
variable-mapping:
77
inputs:
88
- name: candidateMolecules
9-
type: squonk/x-smiles
109
outputs:
1110
- name: clusteredMolecules
1211
from:

tests/workflow-definitions/simple-python-parallel.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ description: A simple branching workflow
66
variable-mapping:
77
inputs:
88
- name: candidateMolecules
9-
type: squonk/x-smiles
109
outputs:
1110
- name: clusteredMolecules
1211
from:

0 commit comments

Comments
 (0)