common-workflow-language
diff --git a/‎Makefile‎
Lines changed: 3 additions & 1 deletion b/‎Makefile‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎cwl_utils/cwl_v1_0_expression_refactor.py‎
Lines changed: 1 addition & 1 deletion b/‎cwl_utils/cwl_v1_0_expression_refactor.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cwl_utils/cwl_v1_1_expression_refactor.py‎
Lines changed: 1 addition & 1 deletion b/‎cwl_utils/cwl_v1_1_expression_refactor.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cwl_utils/cwl_v1_2_expression_refactor.py‎
Lines changed: 13 additions & 9 deletions b/‎cwl_utils/cwl_v1_2_expression_refactor.py‎
Lines changed: 13 additions & 9 deletions
diff --git a/‎cwl_utils/parser/cwl_v1_0_utils.py‎
Lines changed: 156 additions & 30 deletions b/‎cwl_utils/parser/cwl_v1_0_utils.py‎
Lines changed: 156 additions & 30 deletions
@@ -26,7 +26,9 @@ EXTRAS=
 
 # `SHELL=bash` doesn't work for some, so don't use BASH-isms like
 # `[[` conditional expressions.
-PYSOURCES=$(filter-out $(MODULE)/parser/cwl_v%,$(shell find $(MODULE) -name "*.py")) $(wildcard tests/*.py) create_cwl_from_objects.py load_cwl_by_path.py setup.py
+PYSOURCES=$(filter-out $(MODULE)/parser/cwl_v%,$(shell find $(MODULE) -name "*.py")) \
+	  $(wildcard tests/*.py) create_cwl_from_objects.py load_cwl_by_path.py \
+	  setup.py ${MODULE}/parser/cwl_v1_?_utils.py
 DEVPKGS=diff_cover black pylint pep257 pydocstyle flake8 tox tox-pyenv \
 	isort wheel autoflake flake8-bugbear pyupgrade bandit \
 	-rtest-requirements.txt -rmypy-requirements.txt
 
@@ -1939,7 +1939,7 @@ def replace_step_valueFrom_expr_with_etool(
     step_inp: cwl.WorkflowStepInput,
     original_process: Union[cwl.CommandLineTool, cwl.ExpressionTool],
     original_step_ins: List[cwl.WorkflowStepInput],
-    source: Union[str, List[str]],
+    source: Optional[Union[str, List[str]]],
     replace_etool: bool,
     source_type: Optional[Union[cwl.InputParameter, List[cwl.InputParameter]]] = None,
 ) -> None:
 
@@ -1931,7 +1931,7 @@ def replace_step_valueFrom_expr_with_etool(
     step_inp: cwl.WorkflowStepInput,
     original_process: Union[cwl.CommandLineTool, cwl.ExpressionTool],
     original_step_ins: List[cwl.WorkflowStepInput],
-    source: Union[str, List[str]],
+    source: Optional[Union[str, List[str]]],
     replace_etool: bool,
     source_type: Optional[
         Union[cwl.WorkflowInputParameter, List[cwl.WorkflowInputParameter]]
 
@@ -702,18 +702,22 @@ def process_workflow_inputs_and_outputs(
                     target_type.name = None
                 target = cwl.WorkflowInputParameter(id=None, type=target_type)
                 if not isinstance(param2.outputSource, list):
-                    sources: Union[List[str], str] = param2.outputSource.split("#")[-1]
+                    sources = param2.outputSource.split("#")[-1]
                 else:
                     sources = [s.split("#")[-1] for s in param2.outputSource]
                 source_type_items = utils.type_for_source(workflow, sources)
-                if "null" not in source_type_items:
-                    if isinstance(source_type_items, list):
+                if isinstance(source_type_items, cwl.ArraySchema):
+                    if isinstance(source_type_items.items, list):
+                        if "null" not in source_type_items.items:
+                            source_type_items.items.append("null")
+                    elif source_type_items.items != "null":
+                        source_type_items.items = ["null", source_type_items.items]
+                elif isinstance(source_type_items, list):
+                    if "null" not in source_type_items:
                         source_type_items.append("null")
-                    else:
-                        source_type_items = ["null", source_type_items]
-                source_type = cwl.CommandInputParameter(
-                    type=cwl.ArraySchema(type="array", items=source_type_items)
-                )
+                elif source_type_items != "null":
+                    source_type_items = ["null", source_type_items]
+                source_type = cwl.CommandInputParameter(type=source_type_items)
                 replace_expr_with_etool(
                     expression,
                     etool_id,
@@ -2030,7 +2034,7 @@ def replace_step_valueFrom_expr_with_etool(
     step_inp: cwl.WorkflowStepInput,
     original_process: Union[cwl.CommandLineTool, cwl.ExpressionTool],
     original_step_ins: List[cwl.WorkflowStepInput],
-    source: Union[str, List[str]],
+    source: Optional[Union[str, List[str]]],
     replace_etool: bool,
     source_type: Optional[
         Union[cwl.WorkflowInputParameter, List[cwl.WorkflowInputParameter]]
 
@@ -1,18 +1,43 @@
 # SPDX-License-Identifier: Apache-2.0
 import hashlib
-from typing import Any, IO, List, Optional, Union
+from typing import Any, IO, List, MutableSequence, Optional, Tuple, Union, cast
 
 from ruamel import yaml
 from schema_salad.exceptions import ValidationException
 from schema_salad.utils import json_dumps
 
+import cwl_utils.parser
 import cwl_utils.parser.cwl_v1_0 as cwl
+import cwl_utils.parser.utils
 from cwl_utils.errors import WorkflowException
 
-
 CONTENT_LIMIT: int = 64 * 1024
 
 
+def _compare_type(type1: Any, type2: Any) -> bool:
+    if isinstance(type1, cwl.ArraySchema) and isinstance(type2, cwl.ArraySchema):
+        return _compare_type(type1.items, type2.items)
+    elif isinstance(type1, cwl.RecordSchema) and isinstance(type2, cwl.RecordSchema):
+        fields1 = {
+            cwl.shortname(field.name): field.type for field in (type1.fields or {})
+        }
+        fields2 = {
+            cwl.shortname(field.name): field.type for field in (type2.fields or {})
+        }
+        if fields1.keys() != fields2.keys():
+            return False
+        return all((_compare_type(fields1[k], fields2[k]) for k in fields1.keys()))
+    elif isinstance(type1, MutableSequence) and isinstance(type2, MutableSequence):
+        if len(type1) != len(type2):
+            return False
+        for t1 in type1:
+            if not any((_compare_type(t1, t2) for t2 in type2)):
+                return False
+        return True
+    else:
+        return bool(type1 == type2)
+
+
 def content_limit_respected_read_bytes(f: IO[bytes]) -> bytes:
     """
     Read file content up to 64 kB as a byte array.
@@ -32,49 +57,102 @@ def content_limit_respected_read(f: IO[bytes]) -> str:
 
 
 def convert_stdstreams_to_files(clt: cwl.CommandLineTool) -> None:
+    """Convert stdout and stderr type shortcuts to files."""
     for out in clt.outputs:
-        if out.type == 'stdout':
+        if out.type == "stdout":
             if out.outputBinding is not None:
                 raise ValidationException(
-                    "Not allowed to specify outputBinding when using stdout shortcut.")
+                    "Not allowed to specify outputBinding when using stdout shortcut."
+                )
             if clt.stdout is None:
-                clt.stdout = str(hashlib.sha1(json_dumps(  # nosec
-                    clt.save(), sort_keys=True).encode('utf-8')).hexdigest())
-            out.type = 'File'
+                clt.stdout = str(
+                    hashlib.sha1(  # nosec
+                        json_dumps(clt.save(), sort_keys=True).encode("utf-8")
+                    ).hexdigest()
+                )
+            out.type = "File"
             out.outputBinding = cwl.CommandOutputBinding(glob=clt.stdout)
-        elif out.type == 'stderr':
+        elif out.type == "stderr":
             if out.outputBinding is not None:
                 raise ValidationException(
-                    "Not allowed to specify outputBinding when using stderr shortcut.")
+                    "Not allowed to specify outputBinding when using stderr shortcut."
+                )
             if clt.stderr is None:
-                clt.stderr = str(hashlib.sha1(json_dumps(  # nosec
-                    clt.save(), sort_keys=True).encode('utf-8')).hexdigest())
-            out.type = 'File'
+                clt.stderr = str(
+                    hashlib.sha1(  # nosec
+                        json_dumps(clt.save(), sort_keys=True).encode("utf-8")
+                    ).hexdigest()
+                )
+            out.type = "File"
             out.outputBinding = cwl.CommandOutputBinding(glob=clt.stderr)
 
 
+def merge_flatten_type(src: Any) -> Any:
+    """Return the merge flattened type of the source type."""
+    if isinstance(src, MutableSequence):
+        return [merge_flatten_type(t) for t in src]
+    if isinstance(src, cwl.ArraySchema):
+        return src
+    return cwl.ArraySchema(type="array", items=src)
+
+
 def type_for_source(
     process: Union[cwl.CommandLineTool, cwl.Workflow, cwl.ExpressionTool],
     sourcenames: Union[str, List[str]],
     parent: Optional[cwl.Workflow] = None,
-) -> Union[List[Any], Any]:
+    linkMerge: Optional[str] = None,
+) -> Any:
     """Determine the type for the given sourcenames."""
-    params = param_for_source_id(process, sourcenames, parent)
+    scatter_context: List[Optional[Tuple[int, str]]] = []
+    params = param_for_source_id(process, sourcenames, parent, scatter_context)
     if not isinstance(params, list):
-        return params.type
-    new_type: List[Any] = []
-    for p in params:
-        if isinstance(p, str) and p not in new_type:
-            new_type.append(p)
-        elif hasattr(p, "type") and p.type not in new_type:
-            new_type.append(p.type)
-    return new_type
+        new_type = params.type
+        if scatter_context[0] is not None:
+            if scatter_context[0][1] == "nested_crossproduct":
+                for _ in range(scatter_context[0][0]):
+                    new_type = cwl.ArraySchema(items=new_type, type="array")
+            else:
+                new_type = cwl.ArraySchema(items=new_type, type="array")
+        if linkMerge == "merge_nested":
+            new_type = cwl.ArraySchema(items=new_type, type="array")
+        elif linkMerge == "merge_flattened":
+            new_type = merge_flatten_type(new_type)
+        return new_type
+    new_type = []
+    for p, sc in zip(params, scatter_context):
+        if isinstance(p, str) and not any((_compare_type(t, p) for t in new_type)):
+            cur_type = p
+        elif hasattr(p, "type") and not any(
+            (_compare_type(t, p.type) for t in new_type)
+        ):
+            cur_type = p.type
+        else:
+            cur_type = None
+        if cur_type is not None:
+            if sc is not None:
+                if sc[1] == "nested_crossproduct":
+                    for _ in range(sc[0]):
+                        cur_type = cwl.ArraySchema(items=cur_type, type="array")
+                else:
+                    cur_type = cwl.ArraySchema(items=cur_type, type="array")
+            new_type.append(cur_type)
+    if len(new_type) == 1:
+        new_type = new_type[0]
+    if linkMerge == "merge_nested":
+        return cwl.ArraySchema(items=new_type, type="array")
+    elif linkMerge == "merge_flattened":
+        return merge_flatten_type(new_type)
+    elif isinstance(sourcenames, List):
+        return cwl.ArraySchema(items=new_type, type="array")
+    else:
+        return new_type
 
 
 def param_for_source_id(
     process: Union[cwl.CommandLineTool, cwl.Workflow, cwl.ExpressionTool],
     sourcenames: Union[str, List[str]],
     parent: Optional[cwl.Workflow] = None,
+    scatter_context: Optional[List[Optional[Tuple[int, str]]]] = None,
 ) -> Union[List[cwl.InputParameter], cwl.InputParameter]:
     """Find the process input parameter that matches one of the given sourcenames."""
     if isinstance(sourcenames, str):
@@ -85,6 +163,8 @@ def param_for_source_id(
             for param in process.inputs:
                 if param.id.split("#")[-1] == sourcename.split("#")[-1]:
                     params.append(param)
+                    if scatter_context is not None:
+                        scatter_context.append(None)
         targets = [process]
         if parent:
             targets.append(parent)
@@ -93,26 +173,72 @@ def param_for_source_id(
                 for inp in target.inputs:
                     if inp.id.split("#")[-1] == sourcename.split("#")[-1]:
                         params.append(inp)
+                        if scatter_context is not None:
+                            scatter_context.append(None)
                 for step in target.steps:
-                    if sourcename.split("#")[-1].split("/")[0] == step.id.split("#")[-1] and step.out:
+                    if (
+                        "/".join(sourcename.split("#")[-1].split("/")[:-1])
+                        == step.id.split("#")[-1]
+                        and step.out
+                    ):
                         for outp in step.out:
                             outp_id = outp if isinstance(outp, str) else outp.id
-                            if outp_id.split("#")[-1].split("/")[-1] == sourcename.split("#")[-1].split("/", 1)[1]:
-                                if step.run and step.run.outputs:
-                                    for output in step.run.outputs:
+                            if (
+                                outp_id.split("#")[-1].split("/")[-1]
+                                == sourcename.split("#")[-1].split("/")[-1]
+                            ):
+                                step_run = step.run
+                                if isinstance(step.run, str):
+                                    step_run = cwl_utils.parser.load_document_by_uri(
+                                        path=target.loadingOptions.fetcher.urljoin(
+                                            base_url=cast(
+                                                str, target.loadingOptions.fileuri
+                                            ),
+                                            url=step.run,
+                                        ),
+                                        loadingOptions=target.loadingOptions,
+                                    )
+                                    cwl_utils.parser.utils.convert_stdstreams_to_files(
+                                        step_run
+                                    )
+                                if step_run and step_run.outputs:
+                                    for output in step_run.outputs:
                                         if (
-                                            output.id.split("#")[-1].split('/')[-1]
-                                            == sourcename.split('#')[-1].split("/", 1)[1]
+                                            output.id.split("#")[-1].split("/")[-1]
+                                            == sourcename.split("#")[-1].split("/")[-1]
                                         ):
                                             params.append(output)
+                                            if scatter_context is not None:
+                                                if isinstance(step.scatter, str):
+                                                    scatter_context.append(
+                                                        (
+                                                            1,
+                                                            step.scatterMethod
+                                                            or "dotproduct",
+                                                        )
+                                                    )
+                                                elif isinstance(
+                                                    step.scatter, MutableSequence
+                                                ):
+                                                    scatter_context.append(
+                                                        (
+                                                            len(step.scatter),
+                                                            step.scatterMethod
+                                                            or "dotproduct",
+                                                        )
+                                                    )
+                                                else:
+                                                    scatter_context.append(None)
     if len(params) == 1:
         return params[0]
     elif len(params) > 1:
         return params
     raise WorkflowException(
-        "param {} not found in {}\n or\n {}.".format(
+        "param {} not found in {}\n{}.".format(
             sourcename,
             yaml.main.round_trip_dump(cwl.save(process)),
-            yaml.main.round_trip_dump(cwl.save(parent)),
+            " or\n {}".format(yaml.main.round_trip_dump(cwl.save(parent)))
+            if parent is not None
+            else "",
         )
     )