From 647713b54c394cf7ae633cb33486a1b84864767c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 5 Feb 2025 07:51:11 +0000 Subject: [PATCH 1/5] Bump mypy from 1.14.1 to 1.15.0 Bumps [mypy](https://github.com/python/mypy) from 1.14.1 to 1.15.0. - [Changelog](https://github.com/python/mypy/blob/master/CHANGELOG.md) - [Commits](https://github.com/python/mypy/compare/v1.14.1...v1.15.0) --- updated-dependencies: - dependency-name: mypy dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- mypy-requirements.txt | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mypy-requirements.txt b/mypy-requirements.txt index 760428998..a29e35bb2 100644 --- a/mypy-requirements.txt +++ b/mypy-requirements.txt @@ -1,4 +1,4 @@ -mypy==1.14.1 # also update pyproject.toml +mypy==1.15.0 # also update pyproject.toml ruamel.yaml>=0.16.0,<0.19 cwl-utils>=0.32 cwltest diff --git a/pyproject.toml b/pyproject.toml index b243171fa..cb7d837a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ requires = [ "setuptools>=45", "setuptools_scm[toml]>=8.0.4,<9", - "mypy==1.14.1", # also update mypy-requirements.txt + "mypy==1.15.0", # also update mypy-requirements.txt "types-requests", "types-psutil", "importlib_resources>=1.4;python_version<'3.9'", From 0624959f100ffa35b39cf5f662faf8e7a556483f Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 5 Feb 2025 09:33:17 +0100 Subject: [PATCH 2/5] setup.py: modernize use of mypycify; switch to skiplist --- setup.py | 111 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 68 insertions(+), 43 deletions(-) diff --git a/setup.py b/setup.py index d3fef7b26..181154a0e 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,15 @@ #!/usr/bin/env python3 """Setup for the reference implementation of the CWL standards.""" +import glob import os import sys import warnings +from typing import TYPE_CHECKING, Any -from setuptools import setup +from setuptools import Extension, setup + +if TYPE_CHECKING: + from typing_extensions import TypeGuard if os.name == "nt": warnings.warn( @@ -20,6 +25,31 @@ stacklevel=1, ) + +def _is_list_of_setuptools_extension(items: list[Any]) -> "TypeGuard[list[Extension]]": + return all(isinstance(item, Extension) for item in items) + + +def _find_package_data(base: str, globs: list[str], root: str = "cwltool") -> list[str]: + """ + Find all interesting data files, for setup(package_data=). + + Arguments: + root: The directory to search in. + globs: A list of glob patterns to accept files. + """ + rv_dirs = [root for root, dirs, files in os.walk(base)] + rv = [] + for rv_dir in rv_dirs: + files = [] + for pat in globs: + files += glob.glob(os.path.join(rv_dir, pat)) + if not files: + continue + rv.extend([os.path.relpath(f, root) for f in files]) + return rv + + SETUP_DIR = os.path.dirname(__file__) README = os.path.join(SETUP_DIR, "README.rst") @@ -34,55 +64,50 @@ USE_MYPYC = True if USE_MYPYC: - mypyc_targets = [ - "cwltool/argparser.py", - "cwltool/builder.py", - "cwltool/checker.py", - "cwltool/command_line_tool.py", - # "cwltool/context.py", # monkeypatching - "cwltool/cwlrdf.py", - "cwltool/docker_id.py", - "cwltool/docker.py", - "cwltool/udocker.py", - "cwltool/errors.py", - "cwltool/executors.py", - "cwltool/factory.py", - "cwltool/flatten.py", - # "cwltool/__init__.py", - "cwltool/job.py", - "cwltool/load_tool.py", - # "cwltool/loghandler.py", # so we can monkeypatch the logger from tests - # "cwltool/__main__.py", - "cwltool/main.py", - "cwltool/mutation.py", - "cwltool/pack.py", - "cwltool/pathmapper.py", - "cwltool/process.py", - "cwltool/procgenerator.py", - # "cwltool/cwlprov/__init__.py", - "cwltool/cwlprov/provenance_constants.py", - "cwltool/cwlprov/provenance_profile.py", - "cwltool/cwlprov/ro.py", - # "cwltool/cwlprov/writablebagfile.py", # WritableBag is having issues - "cwltool/resolver.py", - "cwltool/secrets.py", - "cwltool/singularity.py", - "cwltool/software_requirements.py", - # "cwltool/stdfsaccess.py", # StdFsAccess needs to be subclassable - "cwltool/subgraph.py", - "cwltool/update.py", - "cwltool/utils.py", - "cwltool/validate_js.py", - "cwltool/workflow.py", + mypyc_skiplist = tuple( + os.path.join("cwltool", x) + for x in ( + "context.py", # monkeypatching + "__init__.py", + "loghandler.py", # so we can monkeypatch the logger from tests + "__main__.py", + "cwlprov/__init__.py", + "cuda.py", # for monkeypatch + "run_job.py", + "cwlprov/writablebagfile.py", # WritableBag is having issues + "stdfsaccess.py", # StdFsAccess needs to be subclassable + ) + ) + + everything = [os.path.join("cwltool", x) for x in _find_package_data("cwltool", ["*.py"])] + # Start with all the .py files + all_real_pys = [ + x for x in everything if not x.startswith(os.path.join("mypy", "typeshed") + os.sep) ] + # Strip out anything in our skiplist + mypyc_targets = [x for x in all_real_pys if x not in mypyc_skiplist] + + # Strip out any test code + mypyc_targets = [x for x in mypyc_targets if not x.startswith(("tests" + os.sep))] - from mypyc.build import mypycify # type: ignore[import-untyped] + mypyc_targets.sort() + + from mypyc.build import mypycify opt_level = os.getenv("MYPYC_OPT_LEVEL", "3") - ext_modules = mypycify(mypyc_targets, opt_level=opt_level) + debug_level = os.getenv("MYPYC_DEBUG_LEVEL", "1") + force_multifile = os.getenv("MYPYC_MULTI_FILE", "") == "1" + ext_modules = mypycify( + mypyc_targets, + opt_level=opt_level, + debug_level=debug_level, + multi_file=force_multifile, + ) else: ext_modules = [] +assert _is_list_of_setuptools_extension(ext_modules), "Expected mypycify to use setuptools" + setup( name="cwltool", description="Common workflow language reference implementation", From 9cbda99f4ffec89b6eb7115ab9932642309dd306 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 5 Feb 2025 10:18:40 +0100 Subject: [PATCH 3/5] cwlviewer: use importlib instead of __file__. --- cwltool/cwlviewer.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/cwltool/cwlviewer.py b/cwltool/cwlviewer.py index 769343964..36166c485 100644 --- a/cwltool/cwlviewer.py +++ b/cwltool/cwlviewer.py @@ -1,18 +1,28 @@ """Visualize a CWL workflow.""" from collections.abc import Iterator -from pathlib import Path +from importlib.resources import files from typing import cast from urllib.parse import urlparse import pydot import rdflib -_queries_dir = (Path(__file__).parent / "rdfqueries").resolve() -_get_inner_edges_query_path = _queries_dir / "get_inner_edges.sparql" -_get_input_edges_query_path = _queries_dir / "get_input_edges.sparql" -_get_output_edges_query_path = _queries_dir / "get_output_edges.sparql" -_get_root_query_path = _queries_dir / "get_root.sparql" + +def _get_inner_edges_query() -> str: + return files("cwltool").joinpath("rdfqueries/get_inner_edges.sparql").read_text() + + +def _get_input_edges_query() -> str: + return files("cwltool").joinpath("rdfqueries/get_input_edges.sparql").read_text() + + +def _get_output_edges_query() -> str: + return files("cwltool").joinpath("rdfqueries/get_output_edges.sparql").read_text() + + +def _get_root_query() -> str: + return files("cwltool").joinpath("rdfqueries/get_root.sparql").read_text() class CWLViewer: @@ -33,8 +43,7 @@ def _load_cwl_graph(self, rdf_description: str) -> rdflib.graph.Graph: return rdf_graph def _set_inner_edges(self) -> None: - with open(_get_inner_edges_query_path) as f: - get_inner_edges_query = f.read() + get_inner_edges_query = _get_inner_edges_query() inner_edges = cast( Iterator[rdflib.query.ResultRow], self._rdf_graph.query( @@ -96,8 +105,7 @@ def _set_inner_edges(self) -> None: ) def _set_input_edges(self) -> None: - with open(_get_input_edges_query_path) as f: - get_input_edges_query = f.read() + get_input_edges_query = _get_input_edges_query() inputs_subgraph = pydot.Subgraph(graph_name="cluster_inputs") self._dot_graph.add_subgraph(inputs_subgraph) inputs_subgraph.set("rank", "same") @@ -124,8 +132,7 @@ def _set_input_edges(self) -> None: self._dot_graph.add_edge(pydot.Edge(str(input_row["input"]), str(input_row["step"]))) def _set_output_edges(self) -> None: - with open(_get_output_edges_query_path) as f: - get_output_edges = f.read() + get_output_edges = _get_output_edges_query() outputs_graph = pydot.Subgraph(graph_name="cluster_outputs") self._dot_graph.add_subgraph(outputs_graph) outputs_graph.set("rank", "same") @@ -152,8 +159,7 @@ def _set_output_edges(self) -> None: self._dot_graph.add_edge(pydot.Edge(output_edge_row["step"], output_edge_row["output"])) def _get_root_graph_uri(self) -> rdflib.term.Identifier: - with open(_get_root_query_path) as f: - get_root_query = f.read() + get_root_query = _get_root_query() root = cast( list[rdflib.query.ResultRow], list( From b27aca4a0558977046e914c9f7395c232a54c4e2 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 5 Feb 2025 10:48:16 +0100 Subject: [PATCH 4/5] fix type errors discovered by mypyc --- cwltool/cuda.py | 7 ++++--- cwltool/singularity_utils.py | 14 +++++++------- cwltool/workflow_job.py | 2 +- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/cwltool/cuda.py b/cwltool/cuda.py index 1394ec239..d607b83da 100644 --- a/cwltool/cuda.py +++ b/cwltool/cuda.py @@ -2,6 +2,7 @@ import subprocess # nosec import xml.dom.minidom # nosec +from typing import Union from .loghandler import _logger from .utils import CWLObjectType @@ -10,9 +11,9 @@ def cuda_version_and_device_count() -> tuple[str, int]: """Determine the CUDA version and number of attached CUDA GPUs.""" try: - out = subprocess.check_output(["nvidia-smi", "-q", "-x"]) # nosec + out: Union[str, bytes] = subprocess.check_output(["nvidia-smi", "-q", "-x"]) # nosec except Exception as e: - _logger.warning("Error checking CUDA version with nvidia-smi: %s", e) + _logger.warning("Error checking CUDA version with nvidia-smi: %s", e, exc_info=e) return ("", 0) dm = xml.dom.minidom.parseString(out) # nosec @@ -62,5 +63,5 @@ def cuda_check(cuda_req: CWLObjectType, requestCount: int) -> int: return 0 return requestCount except Exception as e: - _logger.warning("Error checking CUDA requirements: %s", e) + _logger.warning("Error checking CUDA requirements: %s", e, exc_info=e) return 0 diff --git a/cwltool/singularity_utils.py b/cwltool/singularity_utils.py index e4cc88918..13f7ed3f6 100644 --- a/cwltool/singularity_utils.py +++ b/cwltool/singularity_utils.py @@ -2,7 +2,7 @@ import os import os.path -from subprocess import DEVNULL, PIPE, Popen, TimeoutExpired # nosec +import subprocess # nosec from typing import Optional _USERNS: Optional[bool] = None @@ -14,17 +14,17 @@ def singularity_supports_userns() -> bool: if _USERNS is None: try: hello_image = os.path.join(os.path.dirname(__file__), "hello.simg") - result = Popen( # nosec + result = subprocess.run( # nosec ["singularity", "exec", "--userns", hello_image, "true"], - stderr=PIPE, - stdout=DEVNULL, - universal_newlines=True, - ).communicate(timeout=60)[1] + capture_output=True, + timeout=60, + text=True, + ).stderr _USERNS = ( "No valid /bin/sh" in result or "/bin/sh doesn't exist in container" in result or "executable file not found in" in result ) - except TimeoutExpired: + except subprocess.TimeoutExpired: _USERNS = False return _USERNS diff --git a/cwltool/workflow_job.py b/cwltool/workflow_job.py index 6cd0b2e7c..b552641e1 100644 --- a/cwltool/workflow_job.py +++ b/cwltool/workflow_job.py @@ -406,7 +406,7 @@ def object_from_state( ("merge_nested" if len(connections) > 1 else None), ), ), - valueFrom=cast(str, inp.get("valueFrom")), + valueFrom=cast(Optional[str], inp.get("valueFrom")), ): raise WorkflowException( "Type mismatch between source '%s' (%s) and " From 5636b14b6e84d7b10e3c8bfc0d754690720b311d Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 5 Feb 2025 19:25:30 +0100 Subject: [PATCH 5/5] more exception tracebacks, when in debug mode --- cwltool/command_line_tool.py | 2 +- cwltool/cwlprov/provenance_profile.py | 2 +- cwltool/executors.py | 5 ++++- cwltool/job.py | 27 ++++++++++++++++++++------- cwltool/main.py | 2 +- cwltool/procgenerator.py | 6 +++--- cwltool/resolver.py | 4 ++-- cwltool/workflow.py | 4 +++- 8 files changed, 35 insertions(+), 17 deletions(-) diff --git a/cwltool/command_line_tool.py b/cwltool/command_line_tool.py index 1fe1a7044..2319e6211 100644 --- a/cwltool/command_line_tool.py +++ b/cwltool/command_line_tool.py @@ -1342,7 +1342,7 @@ def collect_output( ] ) except OSError as e: - _logger.warning(str(e)) + _logger.warning(str(e), exc_info=builder.debug) except Exception: _logger.error("Unexpected error from fs_access", exc_info=True) raise diff --git a/cwltool/cwlprov/provenance_profile.py b/cwltool/cwlprov/provenance_profile.py index e8538e51b..e2208378f 100644 --- a/cwltool/cwlprov/provenance_profile.py +++ b/cwltool/cwlprov/provenance_profile.py @@ -546,7 +546,7 @@ def declare_artefact(self, value: Any) -> ProvEntity: # FIXME: list value does not support adding "@id" return coll except TypeError: - _logger.warning("Unrecognized type %s of %r", type(value), value) + _logger.warning("Unrecognized type %s of %r", type(value), value, exc_info=True) # Let's just fall back to Python repr() entity = self.document.entity(uuid.uuid4().urn, {PROV_LABEL: repr(value)}) self.research_object.add_uri(entity.identifier.uri) diff --git a/cwltool/executors.py b/cwltool/executors.py index 33198d854..9d0559726 100644 --- a/cwltool/executors.py +++ b/cwltool/executors.py @@ -326,7 +326,10 @@ def _runner( self.exceptions.append(err) except Exception as err: # pylint: disable=broad-except _logger.exception(f"Got workflow error: {err}") - self.exceptions.append(WorkflowException(str(err))) + wf_exc = WorkflowException(str(err)) + wf_exc.__cause__ = err + wf_exc.__suppress_context__ = True + self.exceptions.append(wf_exc) finally: if runtime_context.workflow_eval_lock: with runtime_context.workflow_eval_lock: diff --git a/cwltool/job.py b/cwltool/job.py index b360be25f..2c6bb9f77 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -376,17 +376,30 @@ def stderr_stdout_log_path( except OSError as e: if e.errno == 2: if runtime: - _logger.error("'%s' not found: %s", runtime[0], str(e)) + _logger.error( + "'%s' not found: %s", runtime[0], str(e), exc_info=runtimeContext.debug + ) else: - _logger.error("'%s' not found: %s", self.command_line[0], str(e)) + _logger.error( + "'%s' not found: %s", + self.command_line[0], + str(e), + exc_info=runtimeContext.debug, + ) else: - _logger.exception("Exception while running job") + _logger.exception( + "Exception while running job: %s", str(e), exc_info=runtimeContext.debug + ) processStatus = "permanentFail" except WorkflowException as err: - _logger.error("[job %s] Job error:\n%s", self.name, str(err)) + _logger.error( + "[job %s] Job error:\n%s", self.name, str(err), exc_info=runtimeContext.debug + ) processStatus = "permanentFail" - except Exception: - _logger.exception("Exception while running job") + except Exception as err: + _logger.exception( + "Exception while running job: %s.", str(err), exc_info=runtimeContext.debug + ) processStatus = "permanentFail" if ( runtimeContext.research_obj is not None @@ -795,7 +808,7 @@ def run( ) except Exception as err: container = "Singularity" if runtimeContext.singularity else "Docker" - _logger.debug("%s error", container, exc_info=True) + _logger.debug("%s error", container, exc_info=runtimeContext.debug) if docker_is_req: raise UnsupportedRequirement( f"{container} is required to run this tool: {str(err)}" diff --git a/cwltool/main.py b/cwltool/main.py index c658c3685..90cb2e2c8 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -1289,7 +1289,7 @@ def main( if isinstance(err.code, int): return err.code else: - _logger.debug("Non-integer SystemExit: %s", err.code) + _logger.debug("Non-integer SystemExit: %s", err.code, exc_info=args.debug) return 1 del args.workflow diff --git a/cwltool/procgenerator.py b/cwltool/procgenerator.py index 9839ce5d4..07123f906 100644 --- a/cwltool/procgenerator.py +++ b/cwltool/procgenerator.py @@ -57,7 +57,7 @@ def job( except WorkflowException: raise except Exception as exc: - _logger.exception("Unexpected exception") + _logger.exception("Unexpected exception", exc_info=runtimeContext.debug) raise WorkflowException(str(exc)) from exc @@ -80,7 +80,7 @@ def __init__( self.embedded_tool = load_tool(toolpath_object["run"], loadingContext) except ValidationException as vexc: if loadingContext.debug: - _logger.exception("Validation exception") + _logger.exception("Validation exception", exc_info=loadingContext.debug) raise WorkflowException( "Tool definition %s failed validation:\n%s" % (toolpath_object["run"], indent(str(vexc))) @@ -108,7 +108,7 @@ def result( ) except ValidationException as vexc: if runtimeContext.debug: - _logger.exception("Validation exception") + _logger.exception("Validation exception", exc_info=runtimeContext.debug) raise WorkflowException( "Tool definition %s failed validation:\n%s" % (jobout["runProcess"], indent(str(vexc))) diff --git a/cwltool/resolver.py b/cwltool/resolver.py index e48957f26..918a9b24e 100644 --- a/cwltool/resolver.py +++ b/cwltool/resolver.py @@ -15,8 +15,8 @@ def resolve_local(document_loader: Optional[Loader], uri: str) -> Optional[str]: try: pathobj = Path(pathpart).resolve() - except OSError: - _logger.debug("local resolver could not resolve %s", uri) + except OSError as exc: + _logger.debug("local resolver could not resolve %s due to %s", uri, str(exc)) return None if pathobj.is_file(): diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 899ac4643..a6e2ba189 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -442,7 +442,9 @@ def job( runtimeContext, ) except WorkflowException: - _logger.error("Exception on step '%s'", runtimeContext.name) + _logger.error( + "Exception on step '%s'", runtimeContext.name, exc_info=runtimeContext.debug + ) raise except Exception as exc: _logger.exception("Unexpected exception")