Skip to content

Commit ca785f8

Browse files
RenskeWmr-c
andauthored
Set last modification date of remote file as local file timestamp (#1676)
Co-authored-by: Michael R. Crusoe <[email protected]>
1 parent d4d4515 commit ca785f8

File tree

6 files changed

+72
-6
lines changed

6 files changed

+72
-6
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,14 @@ remove_unused_imports: $(PYSOURCES)
9797
autoflake --in-place --remove-all-unused-imports $^
9898

9999
pep257: pydocstyle
100-
## pydocstyle : check Python code style
100+
## pydocstyle : check Python docstring style
101101
pydocstyle: $(PYSOURCES)
102102
pydocstyle --add-ignore=D100,D101,D102,D103 $^ || true
103103

104104
pydocstyle_report.txt: $(PYSOURCES)
105105
pydocstyle setup.py $^ > $@ 2>&1 || true
106106

107+
## diff_pydocstyle_report : check Python docstring style for changed files only
107108
diff_pydocstyle_report: pydocstyle_report.txt
108109
diff-quality --compare-branch=main --violations=pydocstyle --fail-under=100 $^
109110

cwltool/pathmapper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def visit(
139139
):
140140
deref = ab
141141
if urllib.parse.urlsplit(deref).scheme in ["http", "https"]:
142-
deref = downloadHttpFile(path)
142+
deref, _last_modified = downloadHttpFile(path)
143143
else:
144144
# Dereference symbolic links
145145
st = os.lstat(deref)

cwltool/utils.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
"""Shared functions and other definitions."""
2-
32
import collections
43
import os
54
import random
@@ -11,6 +10,8 @@
1110
import tempfile
1211
import urllib
1312
import uuid
13+
from datetime import datetime
14+
from email.utils import parsedate_to_datetime
1415
from functools import partial
1516
from itertools import zip_longest
1617
from pathlib import Path, PurePosixPath
@@ -344,8 +345,14 @@ def trim_listing(obj): # type: (Dict[str, Any]) -> None
344345
del obj["listing"]
345346

346347

347-
def downloadHttpFile(httpurl):
348-
# type: (str) -> str
348+
def downloadHttpFile(httpurl: str) -> Tuple[str, Optional[datetime]]:
349+
"""
350+
Download a remote file, possibly using a locally cached copy.
351+
352+
Returns a tuple:
353+
- the local path for the downloaded file
354+
- the Last-Modified timestamp if received from the remote server.
355+
"""
349356
cache_session = None
350357
if "XDG_CACHE_HOME" in os.environ:
351358
directory = os.environ["XDG_CACHE_HOME"]
@@ -365,7 +372,14 @@ def downloadHttpFile(httpurl):
365372
if chunk: # filter out keep-alive new chunks
366373
f.write(chunk)
367374
r.close()
368-
return str(f.name)
375+
376+
date_raw: Optional[str] = r.headers.get("Last-Modified", None)
377+
date: Optional[datetime] = parsedate_to_datetime(date_raw) if date_raw else None
378+
if date:
379+
date_epoch = date.timestamp()
380+
os.utime(f.name, (date_epoch, date_epoch))
381+
382+
return str(f.name), date
369383

370384

371385
def ensure_writable(path: str, include_root: bool = False) -> None:

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@
134134
"pytest >= 6.2, < 7.2",
135135
"mock >= 2.0.0",
136136
"pytest-mock >= 1.10.0",
137+
"pytest-httpserver",
137138
"arcp >= 0.2.0",
138139
"rdflib-jsonld>=0.4.0, <= 0.6.1;python_version<='3.6'",
139140
],

test-requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
pytest >= 6.2, < 7.2
22
pytest-xdist
3+
pytest-httpserver
34
mock >= 2.0.0
45
pytest-mock >= 1.10.0
56
pytest-cov

tests/test_http_input.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
import os
2+
import sys
3+
from datetime import datetime
24
from pathlib import Path
35
from typing import List
46

7+
import pytest
8+
from pytest_httpserver import HTTPServer
9+
510
from cwltool.pathmapper import PathMapper
611
from cwltool.utils import CWLObjectType
712

@@ -25,3 +30,47 @@ def test_http_path_mapping(tmp_path: Path) -> None:
2530
contents = file.read()
2631

2732
assert ">Sequence 561 BP; 135 A; 106 C; 98 G; 222 T; 0 other;" in contents
33+
34+
35+
@pytest.mark.skipif(sys.version_info < (3, 7), reason="timesout on CI")
36+
def test_modification_date(tmp_path: Path) -> None:
37+
"""Local copies of remote files should preserve last modification date."""
38+
# Initialize the server
39+
headers = {
40+
"Server": "nginx",
41+
"Date": "Mon, 27 Jun 2022 14:26:17 GMT",
42+
"Content-Type": "application/zip",
43+
"Content-Length": "123906",
44+
"Connection": "keep-alive",
45+
"Last-Modified": "Tue, 14 Dec 2021 14:23:30 GMT",
46+
"ETag": '"1e402-5d31beef49671"',
47+
"Accept-Ranges": "bytes",
48+
"Strict-Transport-Security": "max-age=31536000; includeSubDomains",
49+
}
50+
51+
remote_file_name = "testfile.txt"
52+
53+
with HTTPServer() as httpserver:
54+
httpserver.expect_request(f"/{remote_file_name}").respond_with_data(
55+
response_data="Hello World", headers=headers
56+
)
57+
location = httpserver.url_for(f"/{remote_file_name}")
58+
59+
base_file: List[CWLObjectType] = [
60+
{
61+
"class": "File",
62+
"location": location,
63+
"basename": remote_file_name,
64+
}
65+
]
66+
67+
date_now = datetime.now()
68+
69+
pathmap = PathMapper(base_file, os.getcwd(), str(tmp_path))._pathmap
70+
71+
assert location in pathmap
72+
assert os.path.exists(pathmap[location].resolved)
73+
74+
last_modified = os.path.getmtime(pathmap[location].resolved)
75+
76+
assert date_now.timestamp() > last_modified

0 commit comments

Comments
 (0)