Skip to content

Commit 39496f3

Browse files
committed
Detect some types of bad URLs for the job/cwl inputs
1 parent ea6273d commit 39496f3

14 files changed

+54
-29
lines changed

cwltool/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
11
"""Reference implementation of the CWL standards."""
22

33
__author__ = "[email protected]"
4+
5+
CWL_CONTENT_TYPES = [
6+
"text/plain",
7+
"application/json",
8+
"text/vnd.yaml",
9+
"text/yaml",
10+
"text/x-yaml",
11+
"application/x-yaml",
12+
]

cwltool/load_tool.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
json_dumps,
3232
)
3333

34-
from . import process, update
34+
from . import CWL_CONTENT_TYPES, process, update
3535
from .context import LoadingContext
3636
from .errors import WorkflowException
3737
from .loghandler import _logger
@@ -127,7 +127,10 @@ def fetch_document(
127127
resolver=loadingContext.resolver,
128128
document_loader=loadingContext.loader,
129129
)
130-
workflowobj = cast(CommentedMap, loadingContext.loader.fetch(fileuri))
130+
workflowobj = cast(
131+
CommentedMap,
132+
loadingContext.loader.fetch(fileuri, content_types=CWL_CONTENT_TYPES),
133+
)
131134
return loadingContext, workflowobj, uri
132135
if isinstance(argsworkflow, MutableMapping):
133136
uri = (

cwltool/main.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,11 @@
3636
from ruamel import yaml
3737
from ruamel.yaml.comments import CommentedMap, CommentedSeq
3838
from schema_salad.exceptions import ValidationException
39-
from schema_salad.ref_resolver import (
40-
Loader,
41-
file_uri,
42-
uri_file_path,
43-
)
39+
from schema_salad.ref_resolver import Loader, file_uri, uri_file_path
4440
from schema_salad.sourceline import strip_dup_lineno
45-
from schema_salad.utils import json_dumps, ContextType, FetcherCallableType
41+
from schema_salad.utils import ContextType, FetcherCallableType, json_dumps
4642

47-
from . import command_line_tool, workflow
43+
from . import CWL_CONTENT_TYPES, command_line_tool, workflow
4844
from .argparser import arg_parser, generate_parser, get_default_args
4945
from .builder import HasReqsHints
5046
from .context import LoadingContext, RuntimeContext, getdefault
@@ -332,7 +328,11 @@ def load_job_order(
332328
if args.basedir
333329
else os.path.abspath(os.path.dirname(job_order_file))
334330
)
335-
job_order_object, _ = loader.resolve_ref(job_order_file, checklinks=False)
331+
job_order_object, _ = loader.resolve_ref(
332+
job_order_file,
333+
checklinks=False,
334+
content_types=CWL_CONTENT_TYPES,
335+
)
336336

337337
if (
338338
job_order_object is not None

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ requests>=2.4.3
22
ruamel.yaml>=0.12.4,<=0.16.5
33
rdflib>=4.2.2,<5.1
44
shellescape>=3.4.1,<3.5
5-
schema-salad>=7.0.20210124093443,<8
5+
schema-salad>=7.1,<8
66
prov==1.5.1
77
bagit==1.7.0
88
mypy-extensions

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@
9797
"ruamel.yaml >= 0.12.4, <= 0.16.5",
9898
"rdflib >= 4.2.2, < 5.1.0",
9999
"shellescape >= 3.4.1, < 3.5",
100-
"schema-salad >= 7.0.20210124093443, < 8",
100+
"schema-salad >= 7.1, < 8",
101101
"mypy-extensions",
102102
"psutil",
103103
"prov == 1.5.1",

tests/test_content_type.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import pydot # type: ignore
2+
3+
4+
from .util import (
5+
get_main_output,
6+
)
7+
8+
9+
def test_content_types() -> None:
10+
for test_file in ("js_output.cwl", "js_output_workflow.cwl"):
11+
commands = [
12+
"https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/main/v1.0/v1.0/test-cwl-out2.cwl",
13+
"https://github.com/common-workflow-language/common-workflow-language/blob/main/v1.0/v1.0/empty.json",
14+
]
15+
error_code, _, stderr = get_main_output(commands)
16+
17+
assert "got content-type of 'text/html'" in stderr
18+
assert error_code == 1, stderr

tests/test_fetch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
from pathlib import Path
3-
from typing import Any, Optional
3+
from typing import Any, List, Optional
44
from urllib.parse import urljoin, urlsplit
55

66
import pytest
@@ -26,7 +26,7 @@ def __init__(
2626
) -> None:
2727
"""Create a Fetcher that provides a fixed result for testing purposes."""
2828

29-
def fetch_text(self, url): # type: (str) -> str
29+
def fetch_text(self, url: str, content_types: Optional[List[str]] = None) -> str:
3030
if url == "baz:bar/foo.cwl":
3131
return """
3232
cwlVersion: v1.0

tests/test_iwdr.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import os
2-
import tempfile
31
from pathlib import Path
42
from typing import Any
53

tests/test_provenance.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import json
22
import os
33
import pickle
4-
import shutil
54
import sys
65
import urllib
76
from pathlib import Path
8-
from typing import Any, Generator, cast
7+
from typing import Any, Generator
98

109
import arcp
1110
import bagit
@@ -14,8 +13,7 @@
1413
from rdflib.namespace import DC, DCTERMS, RDF
1514
from rdflib.term import Literal
1615

17-
from cwltool import provenance
18-
from cwltool import provenance_constants
16+
from cwltool import provenance, provenance_constants
1917
from cwltool.main import main
2018
from cwltool.provenance import ResearchObject
2119
from cwltool.stdfsaccess import StdFsAccess
@@ -407,7 +405,11 @@ def check_ro(base_path: Path, nested: bool = False) -> None:
407405
# Check all prov elements are listed
408406
formats = set()
409407
for prov in g.objects(p, OA.hasBody):
410-
assert (prov, DCTERMS.conformsTo, URIRef(provenance_constants.CWLPROV_VERSION)) in g
408+
assert (
409+
prov,
410+
DCTERMS.conformsTo,
411+
URIRef(provenance_constants.CWLPROV_VERSION),
412+
) in g
411413
# NOTE: DC.format is a Namespace method and does not resolve like other terms
412414
formats.update(set(g.objects(prov, DC["format"])))
413415
assert formats, "Could not find media types"

tests/test_relax_path_checks.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import os
21
from pathlib import Path
32

43
from cwltool.main import main

0 commit comments

Comments
 (0)