Skip to content

Commit 0de7798

Browse files
authored
Update linking of the outputs and inputs (PolusAI#301)
1 parent c2b3664 commit 0de7798

File tree

12 files changed

+237
-56
lines changed

12 files changed

+237
-56
lines changed

src/sophios/api/http/restapi.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ async def compile_wf(request: Request) -> Json:
101101
req = converter.update_payload_missing_inputs_outputs(req)
102102
wfb_payload = converter.raw_wfb_to_lean_wfb(req)
103103
# schema non-preserving
104-
workflow_temp = converter.wfb_to_wic(wfb_payload)
105-
wkflw_name = "generic_workflow_" + str(uuid.uuid4())
104+
workflow_temp = converter.wfb_to_wic(wfb_payload, req["plugins"])
105+
wkflw_name = "workflow_"
106106
args = get_args(wkflw_name, suppliedargs)
107107

108108
# Build canonical workflow object
@@ -161,6 +161,13 @@ async def compile_wf(request: Request) -> Json:
161161
cwl_tree_run.pop('steps', None)
162162
cwl_tree_run['steps'] = cwl_tree_run.pop('steps_dict', None)
163163

164+
# currently there is a compiler bug where the output variables are duplicated
165+
# this is a workaround to remove the duplicates till the compiler is fixed
166+
for step in cwl_tree_run['steps']:
167+
168+
out_vars = cwl_tree_run['steps'][step]['out']
169+
out_vars_unique = list(set(out_vars))
170+
cwl_tree_run['steps'][step]['out'] = out_vars_unique
164171
compute_workflow: Json = {}
165172
compute_workflow = {
166173
"name": yaml_stem,

src/sophios/api/utils/converter.py

Lines changed: 96 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from sophios.wic_types import Json, Cwl
1111
from sophios.api.utils.ict.ict_spec.model import ICT
1212
from sophios.api.utils.ict.ict_spec.cast import cast_to_ict
13+
from sophios.api.utils.wfb_util import get_node_config
1314

1415
SCHEMA_FILE = Path(__file__).parent / "input_object_schema.json"
1516
SCHEMA: Json = {}
@@ -147,10 +148,16 @@ def get_topological_order(links: list[dict[str, str]]) -> list[str]:
147148
return result
148149

149150

150-
def wfb_to_wic(inp: Json) -> Cwl:
151+
def wfb_to_wic(inp: Json, plugins: List[dict[str, Any]]) -> Cwl:
151152
"""Convert lean wfb json to compliant wic"""
152153
# non-schema preserving changes
153154
inp_restrict = copy.deepcopy(inp)
155+
plugin_config_map: dict[str, dict] = {}
156+
for plugin in plugins:
157+
pid: str = plugin.get("pid", "")
158+
if pid == "":
159+
continue
160+
plugin_config_map[pid] = get_node_config(plugin)
154161

155162
for node in inp_restrict['nodes']:
156163
if node.get('settings'):
@@ -159,7 +166,6 @@ def wfb_to_wic(inp: Json) -> Cwl:
159166
node['out'] = list({k: yaml.load('!& ' + v, Loader=wic_loader())} for k, v in node['settings']
160167
['outputs'].items()) # outputs always have to be list
161168
# remove these (now) superfluous keys
162-
node.pop('settings', None)
163169
node.pop('name', None)
164170
node.pop('internal', None)
165171

@@ -168,39 +174,101 @@ def wfb_to_wic(inp: Json) -> Cwl:
168174
target_node_ids = []
169175
for edg in inp_restrict['links']:
170176
target_node_ids.append(edg['targetId'])
177+
# keep track of all the args that processed
178+
node_arg_map: dict[int, set] = {}
171179
# now set inputs on non-sink nodes as inline input '!ii '
172180
# if inputs exist
173181
non_sink_nodes = [node for node in inp_restrict['nodes'] if node['id'] not in target_node_ids]
174182
for node in non_sink_nodes:
183+
if node["id"] not in node_arg_map:
184+
node_arg_map[node['id']] = set()
175185
if node.get('in'):
176186
for nkey in node['in']:
177-
node['in'][nkey] = yaml.load('!ii ' + str(node['in'][nkey]), Loader=wic_loader())
187+
if str(node['in'][nkey]) != "":
188+
node['in'][nkey] = yaml.load('!ii ' + str(node['in'][nkey]), Loader=wic_loader())
189+
node_arg_map[node['id']].add(nkey)
190+
191+
if plugins != []: # use the look up logic similar to WFB
192+
for edg in inp_restrict['links']:
193+
# links = edge. nodes and edges is the correct terminology!
194+
src_id = edg['sourceId']
195+
tgt_id = edg['targetId']
196+
src_node = next((node for node in inp_restrict['nodes'] if node['id'] == src_id), None)
197+
tgt_node = next((node for node in inp_restrict['nodes'] if node['id'] == tgt_id), None)
198+
assert src_node, f'output(s) of source node of edge{edg} must exist!'
199+
assert tgt_node, f'input(s) of target node of edge{edg} must exist!'
200+
if src_id not in node_arg_map:
201+
node_arg_map[src_id] = set()
202+
203+
if tgt_id not in node_arg_map:
204+
node_arg_map[tgt_id] = set()
205+
206+
src_node_ui_config = plugin_config_map.get(src_node['pluginId'], None)
207+
tgt_node_ui_config = plugin_config_map.get(tgt_node['pluginId'], None)
208+
if src_node_ui_config and tgt_node_ui_config:
209+
inlet_index = edg['inletIndex']
210+
outlet_index = edg['outletIndex']
211+
212+
src_node_out_arg = src_node_ui_config['outputs'][outlet_index]["name"]
213+
tgt_node_in_arg = tgt_node_ui_config['inputs'][inlet_index]["name"]
214+
215+
if tgt_node.get('in'):
216+
source_output = src_node['out'][0][src_node_out_arg]
217+
if isinstance(source_output, dict) and 'wic_anchor' in source_output:
218+
source_output = source_output["wic_anchor"]
219+
tgt_node['in'][tgt_node_in_arg] = yaml.load('!* ' + str(source_output), Loader=wic_loader())
220+
node_arg_map[tgt_id].add(tgt_node_in_arg)
221+
222+
for node in inp_restrict['nodes']:
223+
output_dict = node['settings'].get('outputs', {})
224+
for key in output_dict:
225+
if str(output_dict[key]) != "":
226+
node['in'][key] = yaml.load('!ii ' + str(output_dict[key]), Loader=wic_loader())
227+
node_arg_map[node['id']].add(key)
228+
node.pop('settings', None)
178229

179-
# After outs are set
180-
for edg in inp_restrict['links']:
181-
# links = edge. nodes and edges is the correct terminology!
182-
src_id = edg['sourceId']
183-
tgt_id = edg['targetId']
184-
src_node = next((node for node in inp_restrict['nodes'] if node['id'] == src_id), None)
185-
tgt_node = next((node for node in inp_restrict['nodes'] if node['id'] == tgt_id), None)
186-
assert src_node, f'output(s) of source node of edge{edg} must exist!'
187-
assert tgt_node, f'input(s) of target node of edge{edg} must exist!'
188-
# flattened list of keys
189-
if src_node.get('out') and tgt_node.get('in'):
190-
src_out_keys = [sk for sout in src_node['out'] for sk in sout.keys()]
191-
tgt_in_keys = tgt_node['in'].keys()
192-
# we match the source output tag type to target input tag type
193-
# and connect them through '!* ' for input, all outputs are '!& ' before this
194-
for sk in src_out_keys:
195-
# It maybe possible that (explicit) outputs of src nodes might not have corresponding
196-
# (explicit) inputs in target node
197-
if tgt_node['in'].get(sk):
198-
tgt_node['in'][sk] = yaml.load('!* ' + tgt_node['in'][sk], Loader=wic_loader())
199-
# the inputs which aren't dependent on previous/other steps
200-
# they are by default inline input
201-
diff_keys = set(tgt_in_keys) - set(src_out_keys)
202-
for dfk in diff_keys:
203-
tgt_node['in'][dfk] = yaml.load('!ii ' + str(tgt_node['in'][dfk]), Loader=wic_loader())
230+
if "in" in node:
231+
unprocessed_args = set(node['in'].keys())
232+
if node['id'] in node_arg_map:
233+
unprocessed_args = unprocessed_args.difference(node_arg_map[node['id']])
234+
for arg in unprocessed_args:
235+
node['in'][arg] = yaml.load('!ii ' + str(node['in'][arg]), Loader=wic_loader())
236+
else: # No plugins, use the old logic
237+
# this logic is most likely not correct and need to be scrubbed
238+
# along with updating the non_wfb dummy tests
239+
for node in inp_restrict['nodes']:
240+
node.pop('settings', None)
241+
242+
for edg in inp_restrict['links']:
243+
# links = edge. nodes and edges is the correct terminology!
244+
src_id = edg['sourceId']
245+
tgt_id = edg['targetId']
246+
src_node = next((node for node in inp_restrict['nodes'] if node['id'] == src_id), None)
247+
tgt_node = next((node for node in inp_restrict['nodes'] if node['id'] == tgt_id), None)
248+
assert src_node, f'output(s) of source node of edge{edg} must exist!'
249+
assert tgt_node, f'input(s) of target node of edge{edg} must exist!'
250+
# flattened list of keys
251+
if src_node.get('out') and tgt_node.get('in'):
252+
src_out_keys = [sk for sout in src_node['out'] for sk in sout.keys()]
253+
tgt_in_keys = tgt_node['in'].keys()
254+
# we match the source output tag type to target input tag type
255+
# and connect them through '!* ' for input, all outputs are '!& ' before this
256+
for sk in src_out_keys:
257+
# It maybe possible that (explicit) outputs of src nodes might not have corresponding
258+
# (explicit) inputs in target node
259+
if tgt_node['in'].get(sk):
260+
# if the output is a dict, it is a wic_anchor, so we need to get the anchor
261+
# and use that as the input
262+
src_node_out_arg = src_node['out'][0][sk]
263+
source_output = src_node['out'][0][sk]
264+
if isinstance(source_output, dict) and 'wic_anchor' in source_output:
265+
source_output = source_output["wic_anchor"]
266+
tgt_node['in'][sk] = yaml.load('!* ' + str(source_output), Loader=wic_loader())
267+
# the inputs which aren't dependent on previous/other steps
268+
# they are by default inline input
269+
diff_keys = set(tgt_in_keys) - set(src_out_keys)
270+
for dfk in diff_keys:
271+
tgt_node['in'][dfk] = yaml.load('!ii ' + str(tgt_node['in'][dfk]), Loader=wic_loader())
204272

205273
workflow_temp: Cwl = {}
206274
if inp_restrict["links"] != []:

src/sophios/api/utils/ict/ict_spec/io/objects.py

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from typing import Optional, Union, Any
55

66
from pydantic import BaseModel, Field
7+
from sophios.api.utils.wfb_util import is_directory
78

89

910
CWL_IO_DICT: dict[str, str] = {
@@ -116,27 +117,35 @@ def _output_to_cwl(self, inputs: Any) -> dict:
116117
"""Convert outputs to CWL."""
117118
if self.io_type == "path":
118119
if self.name in inputs:
119-
if (
120-
not isinstance(self.io_format, list)
121-
and self.io_format["term"].lower()
122-
== "directory" # pylint: disable=unsubscriptable-object
123-
):
124-
cwl_type = "Directory"
125-
elif (
126-
not isinstance(self.io_format, list)
127-
and self.io_format["term"].lower()
128-
== "file" # pylint: disable=unsubscriptable-object
129-
):
130-
cwl_type = "File"
131-
elif (
132-
isinstance(self.io_format, list)
133-
and len(self.io_format) == 1
134-
and self.io_format[0].lower() == 'directory'
135-
):
120+
if is_directory(dict(self)):
136121
cwl_type = "Directory"
137122
else:
138123
cwl_type = "File"
139124

125+
# the logic here is probably wrong
126+
# let's not go here until we have a better idea of io_format in ICT Spec
127+
128+
# if (
129+
# not isinstance(self.io_format, list)
130+
# and self.io_format["term"].lower()
131+
# == "directory" # pylint: disable=unsubscriptable-object
132+
# ):
133+
# cwl_type = "Directory"
134+
# elif (
135+
# not isinstance(self.io_format, list)
136+
# and self.io_format["term"].lower()
137+
# == "file" # pylint: disable=unsubscriptable-object
138+
# ):
139+
# cwl_type = "File"
140+
# elif (
141+
# isinstance(self.io_format, list)
142+
# and len(self.io_format) == 1
143+
# and self.io_format[0].lower() == 'directory'
144+
# ):
145+
# cwl_type = "Directory"
146+
# else:
147+
# cwl_type = "File"
148+
140149
cwl_dict_ = {
141150
"outputBinding": {"glob": f"$(inputs.{self.name}.basename)"},
142151
"type": cwl_type,

src/sophios/api/utils/ict/ict_spec/tools/cwl_ict.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def clt_dict(ict_: "ICT", network_access: bool) -> dict:
4343
for io in ict_.outputs
4444
},
4545
"requirements": requirements(ict_, network_access),
46-
"baseCommand": ict_.entrypoint,
46+
"baseCommand": [],
4747
"label": ict_.title,
4848
"doc": str(ict_.documentation),
4949
}

src/sophios/api/utils/input_object_schema.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@
4545
"required": [
4646
"id",
4747
"sourceId",
48-
"targetId"
48+
"targetId",
49+
"inletIndex",
50+
"outletIndex"
4951
],
5052
"type": "object"
5153
},

src/sophios/api/utils/wfb_util.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
def is_directory(input_dict: dict) -> bool:
2+
"""Check if the given input dictionary represents a directory.
3+
4+
Args:
5+
input_dict (dict): The input dictionary containing type and name.
6+
7+
Returns:
8+
bool: True if the input represents a directory, False otherwise.
9+
"""
10+
11+
is_dir: bool = input_dict.get("type", "") == "directory" \
12+
or input_dict.get("type", "") == "file" \
13+
or input_dict.get("type", "") == "path" \
14+
or input_dict.get("type", "") == "collection" \
15+
or input_dict.get("type", "") == "csvCollection" \
16+
or input_dict.get("name", "").lower() == "file" \
17+
or input_dict.get("name", "").lower().endswith("path") \
18+
or input_dict.get("name", "").lower().endswith("dir")
19+
20+
return is_dir
21+
22+
23+
def get_node_config(plugin: dict) -> dict:
24+
"""Get the UI configuration for a specific plugin.
25+
26+
Args:
27+
plugin (dict): The plugin dictionary containing UI and inputs.
28+
29+
Returns:
30+
dict: A dictionary containing UI inputs, non-UI inputs, and outputs.
31+
"""
32+
uis = plugin.get("ui", [])
33+
plugin_inputs = plugin.get("inputs", [])
34+
35+
# split inputs into UI (form) and non-UI (circle inlets)
36+
non_ui_inputs = [] # circle inlets on the left side of the node
37+
ui_inputs = [] # UI inputs such as text fields, checkboxes, etc.
38+
39+
for i in range(len(plugin_inputs) - 1, -1, -1):
40+
input = plugin_inputs[i]
41+
42+
# find the UI element that corresponds to this input
43+
ui_input = next(
44+
(x for x in uis if "key" in x and x["key"] == "inputs." + input["name"]),
45+
None,
46+
)
47+
is_dir = is_directory(input)
48+
49+
# if input is a directory - move it to the non-UI section
50+
if is_dir:
51+
non_ui_inputs.append(input)
52+
53+
# in some cases UI is missing for the input, so we need to create it
54+
# but only if it's not a directory
55+
if not ui_input and not is_dir:
56+
calculated_ui_input = {
57+
"key": "inputs." + input["name"],
58+
"type": input["type"],
59+
"title": input["name"],
60+
"required": input["required"],
61+
"format": input["format"],
62+
}
63+
64+
ui_inputs.append(calculated_ui_input)
65+
66+
if ui_input and not is_dir:
67+
ui_input["required"] = input["required"]
68+
ui_input["format"] = input["format"]
69+
ui_inputs.append(ui_input)
70+
71+
outputs = plugin.get("outputs", [])
72+
73+
# if output has UI - move it to the UI section
74+
# this is mostly for internal nodes such as Input Data Directory
75+
for output in outputs:
76+
ui_output = next(
77+
(x for x in uis if "key" in x and x["key"] == "outputs." + output["name"]),
78+
None,
79+
)
80+
if ui_output:
81+
ui_inputs.append(ui_output)
82+
83+
result = {"ui": ui_inputs, "inputs": non_ui_inputs, "outputs": outputs}
84+
return result

src/sophios/utils_yaml.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@ def inlineinput_constructor(loader: yaml.SafeLoader, node: yaml.nodes.Node) -> D
2525
if isinstance(node, yaml.nodes.ScalarNode):
2626
try:
2727
# loader.construct_scalar always returns a string, whereas
28-
val = yaml.safe_load(node.value)
28+
if node.value == "":
29+
val = ""
30+
else:
31+
val = yaml.safe_load(node.value)
2932
# yaml.safe_load returns the correct primitive types
3033
except Exception:
3134
# but fallback to a string if it is not actually a primitive type.

tests/data/ict_data/czi_extract/czi_extract_clt.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"baseCommand": "[python3, main.py]",
2+
"baseCommand": [],
33
"class": "CommandLineTool",
44
"cwlVersion": "v1.2",
55
"doc": "None",
@@ -23,7 +23,7 @@
2323
"outputBinding": {
2424
"glob": "$(inputs.outDir.basename)"
2525
},
26-
"type": "File"
26+
"type": "Directory"
2727
}
2828
},
2929
"requirements": {

tests/data/ict_data/label_to_vector/label_to_vector_clt.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"baseCommand": "[python3, main.py]",
2+
"baseCommand": [],
33
"class": "CommandLineTool",
44
"cwlVersion": "v1.2",
55
"doc": "None",
@@ -29,7 +29,7 @@
2929
"outputBinding": {
3030
"glob": "$(inputs.outDir.basename)"
3131
},
32-
"type": "File"
32+
"type": "Directory"
3333
}
3434
},
3535
"requirements": {

0 commit comments

Comments
 (0)