From 7ff3e04fa2093af77f040c46a358cee92bac43a3 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 22 Jan 2025 14:09:57 +0100 Subject: [PATCH 1/2] Various cleanups to make the docs better. --- cwltool/checker.py | 74 +++++++++++++++++++++--------------- cwltool/command_line_tool.py | 2 +- cwltool/errors.py | 7 ++++ cwltool/main.py | 2 +- cwltool/mutation.py | 24 +++++++----- cwltool/pathmapper.py | 50 +++++++++++------------- cwltool/process.py | 4 +- cwltool/subgraph.py | 41 +++++++++++--------- cwltool/utils.py | 3 -- cwltool/validate_js.py | 11 ++++-- docs/conf.py | 3 +- tests/test_mpi.py | 2 +- tests/util.py | 2 +- 13 files changed, 125 insertions(+), 100 deletions(-) diff --git a/cwltool/checker.py b/cwltool/checker.py index 17cba77ba..a3b8ba5df 100644 --- a/cwltool/checker.py +++ b/cwltool/checker.py @@ -1,8 +1,7 @@ """Static checking of CWL workflow connectivity.""" -from collections import namedtuple from collections.abc import Iterator, MutableMapping, MutableSequence, Sized -from typing import Any, Literal, Optional, Union, cast +from typing import Any, Literal, NamedTuple, Optional, Union, cast from schema_salad.exceptions import ValidationException from schema_salad.sourceline import SourceLine, bullets, strip_dup_lineno @@ -184,8 +183,8 @@ def static_checker( for param in workflow_inputs + step_outputs: src_dict[cast(str, param["id"])] = param - step_inputs_val = check_all_types(src_dict, step_inputs, "source", param_to_step) - workflow_outputs_val = check_all_types( + step_inputs_val = _check_all_types(src_dict, step_inputs, "source", param_to_step) + workflow_outputs_val = _check_all_types( src_dict, workflow_outputs, "outputSource", param_to_step ) @@ -199,27 +198,34 @@ def static_checker( sink = warning.sink linkMerge = warning.linkMerge sinksf = sorted( - p["pattern"] for p in sink.get("secondaryFiles", []) if p.get("required", True) + cast(str, p["pattern"]) + for p in cast(MutableSequence[CWLObjectType], sink.get("secondaryFiles", [])) + if p.get("required", True) + ) + srcsf = sorted( + cast(str, p["pattern"]) + for p in cast(MutableSequence[CWLObjectType], src.get("secondaryFiles", [])) ) - srcsf = sorted(p["pattern"] for p in src.get("secondaryFiles", [])) # Every secondaryFile required by the sink, should be declared # by the source missing = missing_subset(srcsf, sinksf) + src_name = shortname(cast(str, src["id"])) + sink_id = cast(str, sink["id"]) + sink_name = shortname(sink_id) if missing: msg1 = "Parameter '{}' requires secondaryFiles {} but".format( - shortname(sink["id"]), + sink_name, missing, ) msg3 = SourceLine(src, "id").makeError( - "source '%s' does not provide those secondaryFiles." % (shortname(src["id"])) + "source '%s' does not provide those secondaryFiles." % (src_name) ) msg4 = SourceLine(src.get("_tool_entry", src), "secondaryFiles").makeError( "To resolve, add missing secondaryFiles patterns to definition of '%s' or" - % (shortname(src["id"])) + % (src_name) ) msg5 = SourceLine(sink.get("_tool_entry", sink), "secondaryFiles").makeError( - "mark missing secondaryFiles in definition of '%s' as optional." - % shortname(sink["id"]) + "mark missing secondaryFiles in definition of '%s' as optional." % (sink_name) ) msg = SourceLine(sink).makeError( "{}\n{}".format(msg1, bullets([msg3, msg4, msg5], " ")) @@ -229,13 +235,13 @@ def static_checker( msg = SourceLine(sink, "type").makeError( "'%s' is not an input parameter of %s, expected %s" % ( - shortname(sink["id"]), - param_to_step[sink["id"]]["run"], + sink_name, + param_to_step[sink_id]["run"], ", ".join( shortname(cast(str, s["id"])) for s in cast( list[dict[str, Union[str, bool]]], - param_to_step[sink["id"]]["inputs"], + param_to_step[sink_id]["inputs"], ) if not s.get("not_connected") ), @@ -247,12 +253,11 @@ def static_checker( msg = ( SourceLine(src, "type").makeError( "Source '%s' of type %s may be incompatible" - % (shortname(src["id"]), json_dumps(src["type"])) + % (src_name, json_dumps(src["type"])) ) + "\n" + SourceLine(sink, "type").makeError( - " with sink '%s' of type %s" - % (shortname(sink["id"]), json_dumps(sink["type"])) + " with sink '%s' of type %s" % (sink_name, json_dumps(sink["type"])) ) ) if linkMerge is not None: @@ -274,12 +279,12 @@ def static_checker( msg = ( SourceLine(src, "type").makeError( "Source '%s' of type %s is incompatible" - % (shortname(src["id"]), json_dumps(src["type"])) + % (shortname(cast(str, src["id"])), json_dumps(src["type"])) ) + "\n" + SourceLine(sink, "type").makeError( " with sink '{}' of type {}".format( - shortname(sink["id"]), json_dumps(sink["type"]) + shortname(cast(str, sink["id"])), json_dumps(sink["type"]) ) ) ) @@ -291,16 +296,17 @@ def static_checker( exception_msgs.append(msg) for sink in step_inputs: + sink_type = cast(Union[str, list[str], list[CWLObjectType], CWLObjectType], sink["type"]) if ( - "null" != sink["type"] - and "null" not in sink["type"] + "null" != sink_type + and "null" not in sink_type and "source" not in sink and "default" not in sink and "valueFrom" not in sink ): msg = SourceLine(sink).makeError( "Required parameter '%s' does not have source, default, or valueFrom expression" - % shortname(sink["id"]) + % shortname(cast(str, sink["id"])) ) exception_msgs.append(msg) @@ -313,15 +319,21 @@ def static_checker( raise ValidationException(all_exception_msg) -SrcSink = namedtuple("SrcSink", ["src", "sink", "linkMerge", "message"]) +class _SrcSink(NamedTuple): + """An error or warning message about a connection between two points of the workflow graph.""" + + src: CWLObjectType + sink: CWLObjectType + linkMerge: Optional[str] + message: Optional[str] -def check_all_types( +def _check_all_types( src_dict: dict[str, CWLObjectType], sinks: MutableSequence[CWLObjectType], sourceField: Union[Literal["source"], Literal["outputSource"]], param_to_step: dict[str, CWLObjectType], -) -> dict[str, list[SrcSink]]: +) -> dict[str, list[_SrcSink]]: """ Given a list of sinks, check if their types match with the types of their sources. @@ -329,7 +341,7 @@ def check_all_types( (from :py:func:`check_types`) :raises ValidationException: if a sourceField is missing """ - validation: dict[str, list[SrcSink]] = {"warning": [], "exception": []} + validation: dict[str, list[_SrcSink]] = {"warning": [], "exception": []} for sink in sinks: if sourceField in sink: valueFrom = cast(Optional[str], sink.get("valueFrom")) @@ -356,7 +368,7 @@ def check_all_types( srcs_of_sink += [src_dict[parm_id]] if is_conditional_step(param_to_step, parm_id) and pickValue is None: validation["warning"].append( - SrcSink( + _SrcSink( src_dict[parm_id], sink, linkMerge, @@ -380,7 +392,7 @@ def check_all_types( if pickValue is not None: validation["warning"].append( - SrcSink( + _SrcSink( src_dict[parm_id], sink, linkMerge, @@ -399,7 +411,7 @@ def check_all_types( Union[list[str], CWLObjectType], snk_typ ): # Given our type names this works even if not a list validation["warning"].append( - SrcSink( + _SrcSink( src_dict[parm_id], sink, linkMerge, @@ -419,11 +431,11 @@ def check_all_types( check_result = check_types(src, sink, linkMerge, valueFrom) if check_result == "warning": validation["warning"].append( - SrcSink(src, sink, linkMerge, message=extra_message) + _SrcSink(src, sink, linkMerge, message=extra_message) ) elif check_result == "exception": validation["exception"].append( - SrcSink(src, sink, linkMerge, message=extra_message) + _SrcSink(src, sink, linkMerge, message=extra_message) ) return validation diff --git a/cwltool/command_line_tool.py b/cwltool/command_line_tool.py index 775360016..1fe1a7044 100644 --- a/cwltool/command_line_tool.py +++ b/cwltool/command_line_tool.py @@ -276,7 +276,7 @@ def revmap_file(builder: Builder, outdir: str, f: CWLObjectType) -> Optional[CWL ) revmap_f = builder.pathmapper.reversemap(path) - if revmap_f and not builder.pathmapper.mapper(revmap_f[0]).type.startswith("Writable"): + if revmap_f and not builder.pathmapper.mapper(revmap_f[0]).type.startswith("Writable"): # type: ignore[union-attr] f["location"] = revmap_f[1] elif ( uripath == outdir diff --git a/cwltool/errors.py b/cwltool/errors.py index 045b9b383..2b7e50aed 100644 --- a/cwltool/errors.py +++ b/cwltool/errors.py @@ -11,6 +11,13 @@ from cwl_utils.errors import GraphTargetMissingException as GraphTargetMissingException from cwl_utils.errors import WorkflowException as WorkflowException +__all__ = ( + "GraphTargetMissingException", + "WorkflowException", + "UnsupportedRequirement", + "ArgumentException", +) + class UnsupportedRequirement(WorkflowException): pass diff --git a/cwltool/main.py b/cwltool/main.py index a137d8a4f..c658c3685 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -16,6 +16,7 @@ import warnings from codecs import getwriter from collections.abc import Mapping, MutableMapping, MutableSequence, Sized +from importlib.resources import files from typing import IO, Any, Callable, Optional, Union, cast import argcomplete @@ -96,7 +97,6 @@ CWLOutputType, HasReqsHints, adjustDirObjs, - files, normalizeFilesDirs, processes_to_kill, trim_listing, diff --git a/cwltool/mutation.py b/cwltool/mutation.py index 9f58a86cf..622807ec6 100644 --- a/cwltool/mutation.py +++ b/cwltool/mutation.py @@ -1,10 +1,16 @@ -from collections import namedtuple -from typing import cast +"""Support for InplaceUpdateRequirement.""" + +from typing import NamedTuple, cast from .errors import WorkflowException from .utils import CWLObjectType -MutationState = namedtuple("MutationState", ["generation", "readers", "stepname"]) + +class _MutationState(NamedTuple): + generation: int + readers: list[str] + stepname: str + _generation = "http://commonwl.org/cwltool#generation" @@ -20,11 +26,11 @@ class MutationManager: def __init__(self) -> None: """Initialize.""" - self.generations: dict[str, MutationState] = {} + self.generations: dict[str, _MutationState] = {} def register_reader(self, stepname: str, obj: CWLObjectType) -> None: loc = cast(str, obj["location"]) - current = self.generations.get(loc, MutationState(0, [], "")) + current = self.generations.get(loc, _MutationState(0, [], "")) obj_generation = obj.get(_generation, 0) if obj_generation != current.generation: @@ -40,7 +46,7 @@ def register_reader(self, stepname: str, obj: CWLObjectType) -> None: def release_reader(self, stepname: str, obj: CWLObjectType) -> None: loc = cast(str, obj["location"]) - current = self.generations.get(loc, MutationState(0, [], "")) + current = self.generations.get(loc, _MutationState(0, [], "")) obj_generation = obj.get(_generation, 0) if obj_generation != current.generation: @@ -55,7 +61,7 @@ def release_reader(self, stepname: str, obj: CWLObjectType) -> None: def register_mutation(self, stepname: str, obj: CWLObjectType) -> None: loc = cast(str, obj["location"]) - current = self.generations.get(loc, MutationState(0, [], "")) + current = self.generations.get(loc, _MutationState(0, [], "")) obj_generation = obj.get(_generation, 0) if len(current.readers) > 0: @@ -73,11 +79,11 @@ def register_mutation(self, stepname: str, obj: CWLObjectType) -> None: ) ) - self.generations[loc] = MutationState(current.generation + 1, current.readers, stepname) + self.generations[loc] = _MutationState(current.generation + 1, current.readers, stepname) def set_generation(self, obj: CWLObjectType) -> None: loc = cast(str, obj["location"]) - current = self.generations.get(loc, MutationState(0, [], "")) + current = self.generations.get(loc, _MutationState(0, [], "")) obj[_generation] = current.generation def unset_generation(self, obj: CWLObjectType) -> None: diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index 10cb7a733..774414f0a 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -1,11 +1,10 @@ -import collections import logging import os import stat import urllib import uuid from collections.abc import ItemsView, Iterable, Iterator, KeysView -from typing import Optional, cast +from typing import NamedTuple, Optional, cast from mypy_extensions import mypyc_attr from schema_salad.exceptions import ValidationException @@ -16,31 +15,24 @@ from .stdfsaccess import abspath from .utils import CWLObjectType, dedup, downloadHttpFile -MapperEnt = collections.namedtuple("MapperEnt", ["resolved", "target", "type", "staged"]) -""" Mapper entries. -.. py:attribute:: resolved - :type: str +class MapperEnt(NamedTuple): + """Mapper entries.""" - The "real" path on the local file system (after resolving relative paths - and traversing symlinks - -.. py:attribute:: target - :type: str - - The path on the target file system (under stagedir) - -.. py:attribute:: type - :type: str - - The object type. One of "File", "Directory", "CreateFile", "WritableFile", - or "CreateWritableFile". - -.. py:attribute:: staged - :type: bool - - If the File has been staged yet -""" + resolved: str + """ + The "real" path on the local file system (after resolving relative paths + and traversing symlinks + """ + target: str + """The path on the target file system (under stagedir)""" + type: Optional[str] + """ + The object type. One of "File", "Directory", "CreateFile", "WritableFile", + or "CreateWritableFile". + """ + staged: Optional[bool] + """If the File has been staged yet.""" @mypyc_attr(allow_interpreted_subclasses=True) @@ -149,7 +141,7 @@ def visit( ab = abspath(path, basedir) if "contents" in obj and path.startswith("_:"): self._pathmap[path] = MapperEnt( - obj["contents"], + cast(str, obj["contents"]), tgt, "CreateWritableFile" if copy else "CreateFile", staged, @@ -247,8 +239,10 @@ def reversemap( return (k, v[0]) return None - def update(self, key: str, resolved: str, target: str, ctype: str, stage: bool) -> MapperEnt: - """Update an existine entry.""" + def update( + self, key: str, resolved: str, target: str, ctype: Optional[str], stage: Optional[bool] + ) -> MapperEnt: + """Update an existing entry.""" m = MapperEnt(resolved, target, ctype, stage) self._pathmap[key] = m return m diff --git a/cwltool/process.py b/cwltool/process.py index fe5f84764..9f985a5d5 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -14,6 +14,7 @@ import urllib.parse import uuid from collections.abc import Iterable, Iterator, MutableMapping, MutableSequence, Sized +from importlib.resources import files from os import scandir from typing import TYPE_CHECKING, Any, Callable, Optional, Union, cast @@ -54,7 +55,6 @@ aslist, cmp_like_py2, ensure_writable, - files, get_listing, normalizeFilesDirs, random_outdir, @@ -230,7 +230,7 @@ def stage_files( items = pathmapper.items() if not symlink else pathmapper.items_exclude_children() targets: dict[str, MapperEnt] = {} for key, entry in list(items): - if "File" not in entry.type: + if entry.type is None or "File" not in entry.type: continue if entry.target not in targets: targets[entry.target] = entry diff --git a/cwltool/subgraph.py b/cwltool/subgraph.py index 204e987a8..0a0289cc3 100644 --- a/cwltool/subgraph.py +++ b/cwltool/subgraph.py @@ -1,7 +1,6 @@ import urllib -from collections import namedtuple from collections.abc import Mapping, MutableMapping, MutableSequence -from typing import Any, Optional, Union, cast +from typing import Any, NamedTuple, Optional, Union, cast from ruamel.yaml.comments import CommentedMap, CommentedSeq @@ -11,7 +10,13 @@ from .utils import CWLObjectType, aslist from .workflow import Workflow, WorkflowStep -Node = namedtuple("Node", ("up", "down", "type")) + +class _Node(NamedTuple): + up: list[str] + down: list[str] + type: Optional[str] + + UP = "up" DOWN = "down" INPUT = "input" @@ -19,9 +24,9 @@ STEP = "step" -def subgraph_visit( +def _subgraph_visit( current: str, - nodes: MutableMapping[str, Node], + nodes: MutableMapping[str, _Node], visited: set[str], direction: str, ) -> None: @@ -34,10 +39,10 @@ def subgraph_visit( if direction == UP: d = nodes[current].up for c in d: - subgraph_visit(c, nodes, visited, direction) + _subgraph_visit(c, nodes, visited, direction) -def declare_node(nodes: dict[str, Node], nodeid: str, tp: Optional[str]) -> Node: +def _declare_node(nodes: dict[str, _Node], nodeid: str, tp: Optional[str]) -> _Node: """ Record the given nodeid in the graph. @@ -47,9 +52,9 @@ def declare_node(nodes: dict[str, Node], nodeid: str, tp: Optional[str]) -> Node if nodeid in nodes: n = nodes[nodeid] if n.type is None: - nodes[nodeid] = Node(n.up, n.down, tp) + nodes[nodeid] = _Node(n.up, n.down, tp) else: - nodes[nodeid] = Node([], [], tp) + nodes[nodeid] = _Node([], [], tp) return nodes[nodeid] @@ -109,22 +114,22 @@ def get_subgraph( if tool.tool["class"] != "Workflow": raise Exception("Can only extract subgraph from workflow") - nodes: dict[str, Node] = {} + nodes: dict[str, _Node] = {} for inp in tool.tool["inputs"]: - declare_node(nodes, inp["id"], INPUT) + _declare_node(nodes, inp["id"], INPUT) for out in tool.tool["outputs"]: - declare_node(nodes, out["id"], OUTPUT) + _declare_node(nodes, out["id"], OUTPUT) for i in aslist(out.get("outputSource", CommentedSeq)): # source is upstream from output (dependency) nodes[out["id"]].up.append(i) # output is downstream from source - declare_node(nodes, i, None) + _declare_node(nodes, i, None) nodes[i].down.append(out["id"]) for st in tool.tool["steps"]: - step = declare_node(nodes, st["id"], STEP) + step = _declare_node(nodes, st["id"], STEP) for i in st["in"]: if "source" not in i: continue @@ -132,7 +137,7 @@ def get_subgraph( # source is upstream from step (dependency) step.up.append(src) # step is downstream from source - declare_node(nodes, src, None) + _declare_node(nodes, src, None) nodes[src].down.append(st["id"]) for out in st["out"]: if isinstance(out, Mapping) and "id" in out: @@ -140,16 +145,16 @@ def get_subgraph( # output is downstream from step step.down.append(out) # step is upstream from output - declare_node(nodes, out, None) + _declare_node(nodes, out, None) nodes[out].up.append(st["id"]) # Find all the downstream nodes from the starting points visited_down: set[str] = set() for r in roots: if nodes[r].type == OUTPUT: - subgraph_visit(r, nodes, visited_down, UP) + _subgraph_visit(r, nodes, visited_down, UP) else: - subgraph_visit(r, nodes, visited_down, DOWN) + _subgraph_visit(r, nodes, visited_down, DOWN) # Now make sure all the nodes are connected to upstream inputs visited: set[str] = set() diff --git a/cwltool/utils.py b/cwltool/utils.py index e460842a9..2c7f6ba12 100644 --- a/cwltool/utils.py +++ b/cwltool/utils.py @@ -23,7 +23,6 @@ from datetime import datetime from email.utils import parsedate_to_datetime from functools import partial -from importlib.resources import as_file, files from itertools import zip_longest from pathlib import Path, PurePosixPath from tempfile import NamedTemporaryFile @@ -54,8 +53,6 @@ from .stdfsaccess import StdFsAccess from .workflow_job import WorkflowJob -__all__ = ["files", "as_file"] - __random_outdir: Optional[str] = None CONTENT_LIMIT = 64 * 1024 diff --git a/cwltool/validate_js.py b/cwltool/validate_js.py index b43b7ef0d..3a490b68d 100644 --- a/cwltool/validate_js.py +++ b/cwltool/validate_js.py @@ -2,9 +2,9 @@ import itertools import json import logging -from collections import namedtuple from collections.abc import MutableMapping, MutableSequence -from typing import Any, Optional, Union, cast +from importlib.resources import files +from typing import Any, NamedTuple, Optional, Union, cast from cwl_utils.errors import SubstitutionError from cwl_utils.expression import scanner as scan_expression @@ -23,7 +23,6 @@ from .errors import WorkflowException from .loghandler import _logger -from .utils import files def is_expression(tool: Any, schema: Optional[Schema]) -> bool: @@ -110,7 +109,11 @@ def get_expressions( return [] -JSHintJSReturn = namedtuple("JSHintJSReturn", ["errors", "globals"]) +class JSHintJSReturn(NamedTuple): + """List of errors and the final values of the globals from running javascript.""" + + errors: list[str] + globals: list[str] def jshint_js( diff --git a/docs/conf.py b/docs/conf.py index 6e04b5d64..e476f4191 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -51,7 +51,8 @@ intersphinx_mapping = { "python": ("https://docs.python.org/3", None), "schema_salad": ("https://schema-salad.readthedocs.io/en/stable/", None), - "rdflib": ("https://rdflib.readthedocs.io/en/6.2.0/", None), + "rdflib": ("https://rdflib.readthedocs.io/en/stable/", None), + "cwl_utils": ("https://cwl-utils.readthedocs.io/en/stable/", None), # "ruamel.yaml": ("https://yaml.readthedocs.io/en/stable/", None), } diff --git a/tests/test_mpi.py b/tests/test_mpi.py index 92f0e353c..9da32fd05 100644 --- a/tests/test_mpi.py +++ b/tests/test_mpi.py @@ -4,6 +4,7 @@ import os.path import sys from collections.abc import Generator, MutableMapping +from importlib.resources import files from io import StringIO from pathlib import Path from typing import Any, Optional @@ -20,7 +21,6 @@ from cwltool.context import LoadingContext, RuntimeContext from cwltool.main import main from cwltool.mpi import MpiConfig, MPIRequirementName -from cwltool.utils import files from .util import get_data, working_directory diff --git a/tests/util.py b/tests/util.py index 8dd0bf74e..d7624bc5e 100644 --- a/tests/util.py +++ b/tests/util.py @@ -10,6 +10,7 @@ import sys from collections.abc import Generator, Mapping from contextlib import ExitStack +from importlib.resources import as_file, files from pathlib import Path from typing import Any, Optional, Union @@ -18,7 +19,6 @@ from cwltool.env_to_stdout import deserialize_env from cwltool.main import main from cwltool.singularity import is_version_2_6, is_version_3_or_newer -from cwltool.utils import as_file, files def force_default_container(default_container_id: str, _: str) -> str: From 895ce71550f1f2c38e47a81dcffd82288ac587a1 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 22 Jan 2025 15:16:06 +0100 Subject: [PATCH 2/2] typo --- cwltool/cwlprov/ro.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cwltool/cwlprov/ro.py b/cwltool/cwlprov/ro.py index f58919a6b..be10d3d64 100644 --- a/cwltool/cwlprov/ro.py +++ b/cwltool/cwlprov/ro.py @@ -668,7 +668,7 @@ def _relativise_files( del structure["path"] if structure.get("class") == "Directory": - # TODO: Generate anonymoys Directory with a "listing" + # TODO: Generate anonymous Directory with a "listing" # pointing to the hashed files del structure["location"]