Skip to content

Commit 954314c

Browse files
committed
Add $graph splitter.
1 parent 7bacebf commit 954314c

File tree

1 file changed

+133
-0
lines changed

1 file changed

+133
-0
lines changed

cwl_utils/graph_split.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#!/usr/bin/env python
2+
"""
3+
Unpacks the result of `cwltool --unpack`.
4+
5+
Only tested with a single v1.0 workflow.
6+
"""
7+
8+
import sys
9+
import os
10+
from typing import Text
11+
12+
from ruamel import yaml
13+
14+
15+
def run(args):
16+
with open(args[0], "r") as source_handle:
17+
source = yaml.load(source_handle, yaml.SafeLoader)
18+
19+
if '$graph' not in source:
20+
print("No $graph, so not for us.")
21+
return
22+
23+
version = source.pop('cwlVersion')
24+
25+
for entry in source['$graph']:
26+
entry_id = entry.pop("id")[1:]
27+
entry['cwlVersion'] = version
28+
imports = rewrite(entry, entry_id)
29+
if imports:
30+
for import_name in imports:
31+
rewrite_types(entry, "#{}".format(import_name), False)
32+
if entry_id == 'main':
33+
entry_id = "unpacked_{}".format(os.path.basename(args[0]))
34+
with open(entry_id, "w") as result_handle:
35+
yaml.dump(entry, result_handle, Dumper=yaml.SafeDumper)
36+
37+
def rewrite(document, doc_id):
38+
imports = set()
39+
if isinstance(document, list) and not isinstance(document, Text):
40+
for entry in document:
41+
imports.update(rewrite(entry, doc_id))
42+
elif isinstance(document, dict):
43+
this_id = document['id'] if 'id' in document else None
44+
for key, value in document.items():
45+
if key == 'run' and value[0] is '#':
46+
document[key] = value[1:]
47+
elif key in ('id', 'outputSource') and value.startswith('#' + doc_id):
48+
document[key] = value[len(doc_id)+2:]
49+
elif key == 'out':
50+
def rewrite_id(entry):
51+
if entry['id'].startswith(this_id):
52+
entry['id'] = entry['id'][len(this_id)+1:]
53+
return entry
54+
document[key][:] = [rewrite_id(entry) for entry in value]
55+
elif key in ('source', 'scatter', 'items'):
56+
if isinstance(value, Text) and value.startswith('#') and '/' in value:
57+
referrant_file, sub = value[1:].split('/', 1)
58+
if referrant_file == doc_id:
59+
document[key] = sub
60+
else:
61+
document[key] = '{}#{}'.format(referrant_file, sub)
62+
elif isinstance(value, list):
63+
new_sources = list()
64+
for entry in value:
65+
if entry.startswith('#' + doc_id):
66+
new_sources.append(entry[len(doc_id)+2:])
67+
else:
68+
new_sources.append(entry)
69+
document[key] = new_sources
70+
elif key == '$import':
71+
rewrite_import(document)
72+
elif key == 'class' and value == 'SchemaDefRequirement':
73+
return rewrite_schemadef(document)
74+
else:
75+
imports.update(rewrite(value, doc_id))
76+
return imports
77+
78+
def rewrite_import(document):
79+
external_file = document['$import'].split("/")[0][1:]
80+
document['$import'] = external_file
81+
82+
def rewrite_types(field, entry_file, sameself):
83+
if isinstance(field, list) and not isinstance(field, Text):
84+
for entry in field:
85+
rewrite_types(entry, entry_file, sameself)
86+
return
87+
if isinstance(field, dict):
88+
for key, value in field.items():
89+
for name in ('type', 'items'):
90+
if key == name:
91+
if isinstance(value, Text) and value.startswith(entry_file):
92+
if sameself:
93+
field[key] = value[len(entry_file)+1:]
94+
else:
95+
field[key] = "{d[0]}#{d[1]}".format(d=value[1:].split('/',1))
96+
if isinstance(value, dict):
97+
rewrite_types(value, entry_file, sameself)
98+
if isinstance(value, list) and not isinstance(value, Text):
99+
for entry in value:
100+
rewrite_types(entry, entry_file, sameself)
101+
102+
def rewrite_schemadef(document):
103+
for entry in document['types']:
104+
if '$import' in entry:
105+
rewrite_import(entry)
106+
elif 'name' in entry and '/' in entry['name']:
107+
entry_file, entry['name'] = entry['name'].split('/')
108+
for field in entry['fields']:
109+
field['name'] = field['name'].split('/')[2]
110+
rewrite_types(field, entry_file, True)
111+
with open(entry_file[1:], "a") as entry_handle:
112+
yaml.dump([entry], entry_handle, Dumper=yaml.RoundTripDumper)
113+
entry['$import'] = entry_file[1:]
114+
del entry['name']
115+
del entry['type']
116+
del entry['fields']
117+
seen_imports = set()
118+
def seen_import(entry):
119+
if '$import' in entry:
120+
external_file = entry['$import']
121+
if external_file not in seen_imports:
122+
seen_imports.add(external_file)
123+
return True
124+
return False
125+
return True
126+
types = document['types']
127+
document['types'][:] = [entry for entry in types if seen_import(entry)]
128+
return seen_imports
129+
130+
if __name__ == "__main__":
131+
run(sys.argv[1:])
132+
133+

0 commit comments

Comments
 (0)