Skip to content

Commit aa898e5

Browse files
authored
Workflow step source and scatter (#20)
Made possible by workflows donated by the 4D Nucleome Data Coordination and Integration Center http://dcic.4dnucleome.org/ * improve order, fix remaining descr→doc
1 parent 8338ce4 commit aa898e5

File tree

3 files changed

+88
-40
lines changed

3 files changed

+88
-40
lines changed

cwlupgrader/main.py

Lines changed: 72 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from typing import (Any, Dict, List, Optional, # pylint:disable=unused-import
99
Text, Union)
1010
import ruamel.yaml
11-
11+
from ruamel.yaml.comments import CommentedMap # for consistent sort order
1212

1313
def main(args=None): # type: (Optional[List[str]]) -> int
1414
"""Main function."""
@@ -21,14 +21,15 @@ def main(args=None): # type: (Optional[List[str]]) -> int
2121
if ('cwlVersion' in document
2222
and (document['cwlVersion'] == 'cwl:draft-3'
2323
or document['cwlVersion'] == 'draft-3')):
24-
draft3_to_v1_0(document)
24+
document = draft3_to_v1_0(document)
2525
else:
2626
print("Skipping non draft-3 CWL document", file=sys.stderr)
27-
print(ruamel.yaml.dump(document, default_flow_style=False))
27+
print(ruamel.yaml.round_trip_dump(
28+
document, default_flow_style=False))
2829
return 0
2930

3031

31-
def draft3_to_v1_0(document): # type: (Dict[Text, Any]) -> None
32+
def draft3_to_v1_0(document): # type: (Dict[Text, Any]) -> Dict
3233
"""Transformation loop."""
3334
_draft3_to_v1_0(document)
3435
if isinstance(document, MutableMapping):
@@ -40,6 +41,7 @@ def draft3_to_v1_0(document): # type: (Dict[Text, Any]) -> None
4041
if isinstance(entry, MutableMapping):
4142
value[index] = _draft3_to_v1_0(entry)
4243
document['cwlVersion'] = 'v1.0'
44+
return sort_v1_0(document)
4345

4446

4547
def _draft3_to_v1_0(document):
@@ -53,6 +55,11 @@ def _draft3_to_v1_0(document):
5355
elif document["class"] == "CommandLineTool":
5456
input_output_clean(document)
5557
hints_and_requirements_clean(document)
58+
if isinstance(document["baseCommand"], list) and \
59+
len(document["baseCommand"]) == 1:
60+
document["baseCommand"] = document["baseCommand"][0]
61+
if "arguments" in document and not document["arguments"]:
62+
del document["arguments"]
5663
clean_secondary_files(document)
5764

5865
if "description" in document:
@@ -62,38 +69,46 @@ def _draft3_to_v1_0(document):
6269

6370

6471
def workflow_clean(document): # type: (MutableMapping[Text, Any]) -> None
65-
"""Transform draft-3 style Workflows to idiomatic v1.0"""
72+
"""Transform draft-3 style Workflows to more idiomatic v1.0"""
6673
input_output_clean(document)
6774
hints_and_requirements_clean(document)
6875
outputs = document['outputs']
6976
for output_id in outputs:
7077
outputs[output_id]["outputSource"] = \
7178
outputs[output_id].pop("source").lstrip('#').replace(".", "/")
72-
new_steps = {}
79+
new_steps = CommentedMap()
7380
for step in document["steps"]:
74-
new_step = copy.deepcopy(step) # type: Dict[Text, Any]
75-
del new_step["id"]
76-
new_step["out"] = [outp["id"][len(step["id"])+1:] for outp in
77-
step["outputs"]]
78-
del new_step["outputs"]
79-
ins = {}
81+
new_step = CommentedMap()
82+
new_step.update(step)
83+
step = new_step
84+
step_id = step.pop("id")
85+
step_id_len = len(step_id)+1
86+
step["out"] = [outp["id"][step_id_len:] for outp in
87+
step["outputs"]]
88+
del step["outputs"]
89+
ins = CommentedMap()
8090
for inp in step["inputs"]:
81-
ident = inp["id"][len(step["id"])+1:] # remove step id prefix
91+
ident = inp["id"][step_id_len:] # remove step id prefix
8292
if 'source' in inp:
83-
inp["source"] = inp["source"].lstrip('#')
93+
inp["source"] = inp["source"].lstrip('#').replace(".", "/")
8494
del inp["id"]
8595
if len(inp) > 1:
8696
ins[ident] = inp
8797
elif len(inp) == 1:
8898
ins[ident] = inp.popitem()[1]
8999
else:
90100
ins[ident] = {}
91-
new_step["in"] = ins
92-
del new_step["inputs"]
101+
step["in"] = ins
102+
del step["inputs"]
93103
if "scatter" in step:
94-
new_step["scatter"] = step["scatter"][ # remove step prefix
95-
len(step["id"])*2+3:]
96-
new_steps[step["id"].lstrip('#')] = new_step
104+
if len(step["scatter"]) == 1:
105+
step["scatter"] = step["scatter"][step_id_len:]
106+
else:
107+
step["scatter"] = [source[step_id_len:] for
108+
source in step["scatter"]]
109+
if "description" in step:
110+
step["doc"] = step.pop("description")
111+
new_steps[step_id.lstrip('#')] = step
97112
document["steps"] = new_steps
98113

99114

@@ -102,13 +117,15 @@ def input_output_clean(document): # type: (MutableMapping[Text, Any]) -> None
102117
for param_type in ['inputs', 'outputs']:
103118
if param_type not in document:
104119
break
105-
new_section = {}
120+
new_section = CommentedMap()
106121
for param in document[param_type]:
107122
param_id = param.pop('id').lstrip('#')
108123
if 'type' in param:
109124
param['type'] = shorten_type(param['type'])
125+
if 'description' in param:
126+
param['doc'] = param.pop('description')
110127
if len(param) > 1:
111-
new_section[param_id] = param
128+
new_section[param_id] = sort_input_or_output(param)
112129
else:
113130
new_section[param_id] = param.popitem()[1]
114131
document[param_type] = new_section
@@ -143,15 +160,19 @@ def shorten_type(type_obj): # type: (List[Any]) -> Union[Text, List[Any]]
143160
for entry in type_obj: # find arrays that we can shorten and do so
144161
if isinstance(entry, Mapping):
145162
if (entry['type'] == 'array' and
146-
isinstance(entry['items'], Text)):
163+
isinstance(entry['items'], (str, Text))):
147164
entry = entry['items'] + '[]'
165+
elif entry['type'] == 'enum':
166+
entry = sort_enum(entry)
148167
new_type.extend([entry])
149168
if len(new_type) == 2:
150169
if 'null' in new_type:
151170
type_copy = copy.deepcopy(new_type)
152171
type_copy.remove('null')
153172
if isinstance(type_copy[0], (str, Text)):
154173
return type_copy[0] + '?'
174+
if len(new_type) == 1:
175+
return new_type[0]
155176
return new_type
156177

157178

@@ -165,5 +186,33 @@ def clean_secondary_files(document):
165186
'"path"', '"location"').replace(".path", ".location")
166187

167188

189+
def sort_v1_0(document): # type: (Dict) -> Dict
190+
"""Sort the sections of the CWL document in a more meaningful order."""
191+
keyorder = ['cwlVersion', 'class', 'id', 'label', 'doc', 'requirements',
192+
'hints', 'inputs', 'stdin', 'baseCommand', 'steps',
193+
'expression', 'arguments', 'stderr', 'stdout', 'outputs',
194+
'successCodes', 'temporaryFailCodes', 'permanentFailCodes']
195+
return CommentedMap(
196+
sorted(document.items(), key=lambda i: keyorder.index(i[0])
197+
if i[0] in keyorder else 100))
198+
199+
200+
def sort_enum(enum): # type: (Mapping) -> Dict
201+
"""Sort the enum type definitions in a more meaningful order."""
202+
keyorder = ['type', 'name', 'label', 'symbols', 'inputBinding']
203+
return CommentedMap(
204+
sorted(enum.items(), key=lambda i: keyorder.index(i[0])
205+
if i[0] in keyorder else 100))
206+
207+
208+
def sort_input_or_output(io_def): # type: (Dict) -> Dict
209+
"""Sort the input definitions in a more meaningful order."""
210+
keyorder = ['label', 'doc', 'type', 'format', 'secondaryFiles',
211+
'default', 'inputBinding', 'outputBinding', 'streamable']
212+
return CommentedMap(
213+
sorted(io_def.items(), key=lambda i: keyorder.index(i[0])
214+
if i[0] in keyorder else 100))
215+
216+
168217
if __name__ == "__main__":
169-
sys.exit(main(sys.argv[:1]))
218+
sys.exit(main())

tests/draft-3-wf-v1.0.cwl

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,28 @@
1-
class: Workflow
21
cwlVersion: v1.0
3-
inputs:
4-
input_file: File?
5-
outputs:
6-
md5_report:
7-
outputSource: md5/report
8-
type: File?
9-
validatefiles_report:
10-
outputSource: validatefiles/report
11-
type: File?
2+
class: Workflow
123
requirements:
134
InlineJavascriptRequirement: {}
5+
inputs:
6+
input_file: File?
147
steps:
158
md5:
9+
run: md5.cwl
10+
out:
11+
- report
1612
in:
1713
input_file: input_file
14+
validatefiles:
15+
run: validate.cwl
1816
out:
1917
- report
20-
run: md5.cwl
21-
validatefiles:
2218
in:
2319
input_file: input_file
2420
type: {}
25-
out:
26-
- report
27-
run: validate.cwl
21+
outputs:
22+
validatefiles_report:
23+
type: File?
24+
outputSource: validatefiles/report
25+
md5_report:
26+
type: File?
27+
outputSource: md5/report
2828

tests/test_complete.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import filecmp
2-
import os
32
from cwlupgrader.main import main
43
from .util import get_data
54

@@ -11,6 +10,6 @@ def test_draft3_workflow(tmpdir, capsys):
1110
outfile.write(capsys.readouterr().out)
1211
outfile.flush()
1312
outfile.close()
14-
result = filecmp.cmp('tests/draft-3-wf-v1.0.cwl', str(test_path),
13+
result = filecmp.cmp(get_data('tests/draft-3-wf-v1.0.cwl'), str(test_path),
1514
shallow=False)
1615
assert result

0 commit comments

Comments
 (0)