Skip to content

Commit 75e9db7

Browse files
authored
Fix the handling of bad Last-Modified headers (#277)
Originally found via an OverflowError on some Windows usage where the default Last-Modified header was used, there are several issues which were not handled correctly by the downloader. 1. OverflowError: a last mod date outside of `mktime`'s supported range causes this 2. ValueError: a malformed header causes `strptime` to fail 3. LookupError/KeyError: the header is missing (3) was previously handled by falling back to a default time which was then strptime parsed. This is now caught as a LookupError and handled with the fixed default of `0.0` used for all of these bad-parse cases. The semantics are the same, but the implementation is simpler and more uniform.
1 parent 45af60b commit 75e9db7

File tree

3 files changed

+73
-6
lines changed

3 files changed

+73
-6
lines changed

CHANGELOG.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ Unreleased
99
----------
1010

1111
.. vendor-insert-here
12+
- Fix the handling of malformed and missing ``Last-Modified`` headers in the
13+
caching downloader. Thanks :user:`balihb`! (:issue:`275`)
1214

1315
0.23.1
1416
------

src/check_jsonschema/cachedownloader.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ class FailedDownloadError(Exception):
1717

1818

1919
class CacheDownloader:
20-
_LASTMOD_DEFAULT = "Sun, 01 Jan 1970 00:00:01 GMT"
2120
_LASTMOD_FMT = "%a, %d %b %Y %H:%M:%S %Z"
2221

2322
# changed in v0.5.0
@@ -83,12 +82,15 @@ def _get_request(self) -> requests.Response:
8382
raise FailedDownloadError("encountered error during download") from e
8483

8584
def _lastmod_from_response(self, response: requests.Response) -> float:
86-
return time.mktime(
87-
time.strptime(
88-
response.headers.get("last-modified", self._LASTMOD_DEFAULT),
89-
self._LASTMOD_FMT,
85+
try:
86+
return time.mktime(
87+
time.strptime(response.headers["last-modified"], self._LASTMOD_FMT)
9088
)
91-
)
89+
# OverflowError: time outside of platform-specific bounds
90+
# ValueError: malformed/unparseable
91+
# LookupError: no such header
92+
except (OverflowError, ValueError, LookupError):
93+
return 0.0
9294

9395
def _cache_hit(self, cachefile: str, response: requests.Response) -> bool:
9496
# no file? miss

tests/unit/test_cachedownloader.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,3 +205,66 @@ def test_cachedownloader_retries_on_bad_data(tmp_path, disable_cache):
205205
assert not f.exists()
206206
else:
207207
assert f.exists()
208+
209+
210+
@pytest.mark.parametrize("file_exists", (True, False))
211+
@pytest.mark.parametrize(
212+
"failure_mode", ("header_missing", "header_malformed", "time_overflow")
213+
)
214+
def test_cachedownloader_handles_bad_lastmod_header(
215+
monkeypatch, tmp_path, file_exists, failure_mode
216+
):
217+
if failure_mode == "header_missing":
218+
responses.add(
219+
"GET",
220+
"https://example.com/schema1.json",
221+
headers={},
222+
json={},
223+
match_querystring=None,
224+
)
225+
elif failure_mode == "header_malformed":
226+
responses.add(
227+
"GET",
228+
"https://example.com/schema1.json",
229+
headers={"Last-Modified": "Jan 2000 00:00:01"},
230+
json={},
231+
match_querystring=None,
232+
)
233+
elif failure_mode == "time_overflow":
234+
add_default_response()
235+
236+
def fake_mktime(*args):
237+
raise OverflowError("uh-oh")
238+
239+
monkeypatch.setattr("time.mktime", fake_mktime)
240+
else:
241+
raise NotImplementedError
242+
243+
original_file_contents = b'{"foo": "bar"}'
244+
f = tmp_path / "schema1.json"
245+
246+
if file_exists:
247+
f.write_bytes(original_file_contents)
248+
else:
249+
assert not f.exists()
250+
251+
cd = CacheDownloader(
252+
"https://example.com/schema1.json", filename=str(f), cache_dir=str(tmp_path)
253+
)
254+
255+
# if the file already existed, it will not be overwritten by the cachedownloader
256+
# so the returned value for both the downloader and a direct file read should be the
257+
# original contents
258+
if file_exists:
259+
with cd.open() as fp:
260+
assert fp.read() == original_file_contents
261+
assert f.read_bytes() == original_file_contents
262+
# otherwise, the file will have been created with new content
263+
# both reads will show that new content
264+
else:
265+
with cd.open() as fp:
266+
assert fp.read() == b"{}"
267+
assert f.read_bytes() == b"{}"
268+
269+
# at the end, the file always exists on disk
270+
assert f.exists()

0 commit comments

Comments
 (0)