Skip to content

Commit 802aec1

Browse files
authored
Fix inline runtag with no io and duplicate outs in compiled cwl (PolusAI#341)
1 parent bf60ee4 commit 802aec1

File tree

10 files changed

+42
-102
lines changed

10 files changed

+42
-102
lines changed

.github/workflows/run_workflows.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -172,11 +172,6 @@ jobs:
172172
run: cd sophios/ && pytest tests/test_examples.py -k test_cwl_docker_extract
173173
# For self-hosted runners, make sure the docker cache is up-to-date.
174174

175-
- name: PyTest Run Workflows
176-
if: always()
177-
# NOTE: Do NOT add coverage to PYPY CI runs https://github.com/tox-dev/tox/issues/2252
178-
run: cd sophios/ && pytest tests/test_examples.py -k test_run_workflows_on_push --workers 8 --cwl_runner cwltool # --cov
179-
180175
- name: PyTest Run REST Core Tests
181176
if: always()
182177
# NOTE: Do NOT add coverage to PYPY CI runs https://github.com/tox-dev/tox/issues/2252
@@ -197,6 +192,11 @@ jobs:
197192
# NOTE: Do NOT add coverage to PYPY CI runs https://github.com/tox-dev/tox/issues/2252
198193
run: cd sophios/ && pytest tests/test_fix_payload.py -k test_fix
199194

195+
- name: PyTest Run Workflows
196+
if: always()
197+
# NOTE: Do NOT add coverage to PYPY CI runs https://github.com/tox-dev/tox/issues/2252
198+
run: cd sophios/ && pytest tests/test_examples.py -k test_run_workflows_on_push --workers 8 --cwl_runner cwltool # --cov
199+
200200
# NOTE: The steps below are for repository_dispatch only. For all other steps, please insert above
201201
# this comment.
202202

src/sophios/api/http/restapi.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,6 @@ async def compile_wf(request: Request) -> Json:
137137
tools_cwl, True, relative_run_path=True, testing=False)
138138

139139
rose_tree = compiler_info.rose
140-
input_output.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file)
141140
# generating cwl inline within the 'run' tag is post compile
142141
# and always on when compiling and preparing REST return payload
143142
rose_tree = cwl_inline_runtag(rose_tree)
@@ -162,14 +161,6 @@ async def compile_wf(request: Request) -> Json:
162161

163162
cwl_tree_run.pop('steps', None)
164163
cwl_tree_run['steps'] = cwl_tree_run.pop('steps_dict', None)
165-
166-
# currently there is a compiler bug where the output variables are duplicated
167-
# this is a workaround to remove the duplicates till the compiler is fixed
168-
for step in cwl_tree_run['steps']:
169-
170-
out_vars = cwl_tree_run['steps'][step]['out']
171-
out_vars_unique = list(set(out_vars))
172-
cwl_tree_run['steps'][step]['out'] = out_vars_unique
173164
compute_workflow: Json = {}
174165
compute_workflow = {
175166
"name": yaml_stem,

src/sophios/api/pythonapi.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -778,22 +778,10 @@ def get_cwl_workflow(self, args_dict: Dict[str, str] = {}) -> Json:
778778
compiler_info = self.compile(args_dict=args_dict, write_to_disk=False)
779779
rose_tree = compiler_info.rose
780780

781-
# this is unfortunately necessary for now
782-
# TODO: uncouple dumping to file and getting cwl steps
783-
# then remove this write_to_disk call here
784-
input_output.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file)
785-
786781
rose_tree = post_compile.cwl_inline_runtag(rose_tree)
787782
sub_node_data = rose_tree.data
788783
cwl_ast = sub_node_data.compiled_cwl
789784

790-
# Copy samee's workaround for duplicate outs
791-
for step in cwl_ast['steps']:
792-
out_vars = step['out']
793-
out_vars_unique = list(set(out_vars))
794-
out_vars_unique.sort()
795-
step['out'] = out_vars_unique
796-
797785
yaml_inputs = sub_node_data.workflow_inputs_file
798786
workflow_json: Json = {}
799787
workflow_json = {

src/sophios/main.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -166,11 +166,8 @@ def main() -> None:
166166
if args.partial_failure_enable:
167167
rose_tree = plugins.cwl_update_outputs_optional_rosetree(rose_tree)
168168

169-
io.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file)
170-
171169
if args.cwl_inline_runtag:
172170
rose_tree = pc.cwl_inline_runtag(rose_tree)
173-
io.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file)
174171

175172
if args.graphviz:
176173
if shutil.which('dot'):
@@ -191,11 +188,14 @@ def main() -> None:
191188
print("but not the graphviz system package.)")
192189

193190
if args.run_local or args.generate_run_script:
191+
# Only now we need to write the final cwl for docker-extract
192+
# and then for actually running using a cwl_runner
193+
io.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file)
194194
pc.cwl_docker_extract(args.container_engine, args.pull_dir, yaml_stem)
195195
if args.docker_remove_entrypoints:
196196
rose_tree = pc.remove_entrypoints(args.container_engine, rose_tree)
197-
io.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file)
198197
pc.find_and_create_output_dirs(rose_tree)
198+
# No need to re-write to disk as nothing of the cwl or yaml_inputs has changed!
199199
if args.toil_passthrough_flags == 'yes':
200200
run_local.run_local(args, rose_tree, args.cachedir, args.cwl_runner, False, passthrough_args=unknown_args)
201201
else:
@@ -205,6 +205,11 @@ def main() -> None:
205205
# we need to copy the output files manually. See comment above.
206206
if args.cwl_runner == 'cwltool' and args.copy_output_files:
207207
run_local.copy_output_files(yaml_stem)
208+
elif args.generate_cwl_workflow:
209+
io.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file)
210+
else:
211+
print('Please specify either --generate_cwl_workflow (compile) or --run_local (run)')
212+
sys.exit(1)
208213

209214

210215
if __name__ == '__main__':

src/sophios/plugins.py

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -165,37 +165,6 @@ def cwl_update_outputs_optional(cwl: Cwl) -> Cwl:
165165
return cwl_mod
166166

167167

168-
def cwl_update_inline_runtag(cwl: Cwl, path: Path, relative_run_path: bool) -> Cwl:
169-
"""Updates 'run' tag with inline content
170-
171-
Args:
172-
cwl (Cwl): A CWL Workflow
173-
path (Path): The directory in which to read files from
174-
relative_run_path (bool): Controls whether to use subdirectories or just one directory
175-
Returns:
176-
Cwl: A CWL Workflow with inline run (if any)
177-
"""
178-
cwl_mod = copy.deepcopy(cwl)
179-
for step in cwl_mod['steps']:
180-
runtag_orig = step.get('run', '')
181-
match runtag_orig:
182-
case str():
183-
if runtag_orig.endswith('.cwl'):
184-
if relative_run_path:
185-
yml_path = Path.cwd() / path / runtag_orig
186-
else:
187-
yml_path = Path(runtag_orig) # Assume absolute path in the runtag
188-
with open(yml_path, mode='r', encoding='utf-8') as f:
189-
runtag_raw = yaml.safe_load(f.read())
190-
# local $namespace and $schema tag shouldn't be in inline cwl steps
191-
runtag_raw.pop('$namespaces', None)
192-
runtag_raw.pop('$schemas', None)
193-
step['run'] = runtag_raw
194-
case _:
195-
pass # We only care if the runtag is a cwl filepath
196-
return cwl_mod
197-
198-
199168
Client = Union[docker.DockerClient, podman.PodmanClient] # type: ignore
200169

201170

@@ -265,30 +234,6 @@ def remove_entrypoints_podman() -> None:
265234
remove_entrypoints(client, podman.domain.images_build.BuildMixin())
266235

267236

268-
def cwl_update_inline_runtag_rosetree(rose_tree: RoseTree, path: Path, relative_run_path: bool) -> RoseTree:
269-
"""Inlines the compiled CWL files runtag
270-
271-
Args:
272-
rose_tree (RoseTree): The data associated with compiled subworkflows
273-
path (Path): The directory in which to read files from
274-
relative_run_path (bool): Controls whether to use subdirectories or just one directory.
275-
Returns:
276-
RoseTree: rose_tree with inline cwl runtag
277-
"""
278-
n_d: NodeData = rose_tree.data
279-
if n_d.compiled_cwl['class'] == 'Workflow':
280-
outputs_cwl_inline_runtag = cwl_update_inline_runtag(n_d.compiled_cwl, path, relative_run_path)
281-
else:
282-
outputs_cwl_inline_runtag = n_d.compiled_cwl
283-
284-
sub_trees_path = [cwl_update_inline_runtag_rosetree(sub_rose_tree, path, relative_run_path) for
285-
sub_rose_tree in rose_tree.sub_trees]
286-
node_data_path = NodeData(n_d.namespaces, n_d.name, n_d.yml, outputs_cwl_inline_runtag, n_d.tool,
287-
n_d.workflow_inputs_file, n_d.explicit_edge_defs, n_d.explicit_edge_calls,
288-
n_d.graph, n_d.inputs_workflow, n_d.step_name_1)
289-
return RoseTree(node_data_path, sub_trees_path)
290-
291-
292237
def cwl_update_outputs_optional_rosetree(rose_tree: RoseTree) -> RoseTree:
293238
"""Updates outputs optional for every CWL CommandLineTool
294239

src/sophios/post_compile.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from pathlib import Path
2+
import copy
23
import subprocess as sub
34
from typing import Dict, Union
45
from . import plugins
5-
from .wic_types import RoseTree
6+
from .wic_types import RoseTree, NodeData
67

78

89
def find_output_dirs(data: Union[RoseTree, Dict, list]) -> list:
@@ -73,10 +74,24 @@ def cwl_docker_extract(container_engine: str, pull_dir: str, file_name: str) ->
7374

7475

7576
def cwl_inline_runtag(rose_tree: RoseTree) -> RoseTree:
76-
"""Transform with cwl inline runtag"""
77-
# this has to happen after at least one write
78-
# so we can copy from local cwl_dapters in autogenerated/
79-
return plugins.cwl_update_inline_runtag_rosetree(rose_tree, Path('autogenerated/'), True)
77+
"""Transforms the compiled CWL within the rose_tree with inline cwl of steps in the runtag
78+
Args:
79+
rose_tree (RoseTree): The data associated with compiled subworkflows
80+
Returns:
81+
RoseTree: The updated rose_tree with inline cwl in runtag
82+
"""
83+
rose_tree_mod = copy.deepcopy(rose_tree)
84+
node_data: NodeData = rose_tree_mod.data
85+
cwl_tree = node_data.compiled_cwl
86+
87+
if cwl_tree.get('class', '') == 'Workflow':
88+
for sub_rose_tree in rose_tree_mod.sub_trees:
89+
sub_node_data: NodeData = sub_rose_tree.data
90+
sub_step_name = sub_node_data.namespaces[-1]
91+
step_to_update = next(item for item in cwl_tree['steps'] if item.get('id') == sub_step_name)
92+
step_to_update['run'] = sub_node_data.compiled_cwl
93+
sub_rose_tree = cwl_inline_runtag(sub_rose_tree)
94+
return rose_tree_mod
8095

8196

8297
def remove_entrypoints(container_engine: str, rose_tree: RoseTree) -> RoseTree:

src/sophios/utils_cwl.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ def add_yamldict_keyval_out(steps_i: Yaml, step_key: str, strs: List[str]) -> Ya
9292
if steps_i:
9393
if 'out' in steps_i:
9494
new_strs = steps_i['out'] + strs
95+
new_strs = list(set(new_strs))
9596
new_keyvals = dict([(k, v) if k != 'out' else (k, new_strs) for k, v in steps_i.items()])
9697
else:
9798
new_keyvals = dict(list(steps_i.items()) + [('out', strs)])

tests/test_cli_flags.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
import pathlib
22
import subprocess
3-
import yaml
43
import shutil
5-
6-
from sophios.main import main
7-
from sophios.cli import get_args
4+
import yaml
85

96

107
def test_generate_cwl_workflow() -> None:
@@ -19,17 +16,16 @@ def test_generate_cwl_workflow() -> None:
1916

2017
yaml_path = str(pathlib.Path(__file__).parent.parent.resolve() / "docs/tutorials/helloworld.wic")
2118

22-
args = ["sophios", "--yaml", yaml_path, "--generate_cwl_workflow"]
19+
cmd = ["sophios", "--yaml", yaml_path, "--generate_cwl_workflow"]
2320

2421
# run sophios with args
25-
subprocess.run(args)
22+
subprocess.run(cmd, check=False)
2623

27-
with open("autogenerated/helloworld.cwl", "r") as cwl_file:
24+
with open("autogenerated/helloworld.cwl", "r", encoding='utf-8') as cwl_file:
2825
result_dict = yaml.safe_load(cwl_file)
2926

30-
with open(
31-
str(pathlib.Path(__file__).parent.resolve() / "data/cwl/helloworld.cwl"), "r"
32-
) as cwl_file:
27+
with open(str(pathlib.Path(__file__).parent.resolve() / "data/cwl/helloworld.cwl"), "r",
28+
encoding='utf-8') as cwl_file:
3329
actual_dict = yaml.safe_load(cwl_file)
3430

3531
assert result_dict == actual_dict

tests/test_rest_core.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ def prepare_call_rest_api(inp_path: Path) -> Json:
118118
"""prepare payload and call rest api"""
119119
with open(inp_path, 'r', encoding='utf-8') as f:
120120
inp = json.load(f)
121-
print('----------- from rest api ----------- \n\n')
122121
scope = {}
123122
scope['type'] = 'http'
124123

tests/test_scattering_scaling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
def test_scattering_scaling(minval: int = 100, maxval: int = 500, step: int = 100) -> None:
1414
"""Tests that execution time scales linearly with input array size"""
1515
# First compile the workflow
16-
cmd = ['sophios', '--yaml', 'examples/scattering_scaling.wic']
16+
cmd = ['sophios', '--yaml', 'examples/scattering_scaling.wic', '--generate_run_script']
1717
sub.run(cmd, check=True)
1818

1919
basedir = 'autogenerated'

0 commit comments

Comments
 (0)