Skip to content

Commit c774f39

Browse files
authored
Merge pull request #223 from Flameeyes/permanent-redirect
Properly support modern 308 Permanent Redirect responses.
2 parents ca9fded + add2e52 commit c774f39

File tree

5 files changed

+35
-43
lines changed

5 files changed

+35
-43
lines changed

cachecontrol/adapter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from requests.adapters import HTTPAdapter
1010

11-
from .controller import CacheController
11+
from .controller import CacheController, PERMANENT_REDIRECT_STATUSES
1212
from .cache import DictCache
1313
from .filewrapper import CallbackFileWrapper
1414

@@ -97,7 +97,7 @@ def build_response(
9797
response = cached_response
9898

9999
# We always cache the 301 responses
100-
elif response.status == 301:
100+
elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
101101
self.controller.cache_response(request, response)
102102
else:
103103
# Wrap the response file with a wrapper that will cache the

cachecontrol/controller.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
2323

24+
PERMANENT_REDIRECT_STATUSES = (301, 308)
25+
2426

2527
def parse_uri(uri):
2628
"""Parses a URI using the regex given in Appendix B of RFC 3986.
@@ -41,7 +43,7 @@ def __init__(
4143
self.cache = DictCache() if cache is None else cache
4244
self.cache_etags = cache_etags
4345
self.serializer = serializer or Serializer()
44-
self.cacheable_status_codes = status_codes or (200, 203, 300, 301)
46+
self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308)
4547

4648
@classmethod
4749
def _urlnorm(cls, uri):
@@ -151,17 +153,18 @@ def cached_request(self, request):
151153
logger.warning("Cache entry deserialization failed, entry ignored")
152154
return False
153155

154-
# If we have a cached 301, return it immediately. We don't
155-
# need to test our response for other headers b/c it is
156+
# If we have a cached permanent redirect, return it immediately. We
157+
# don't need to test our response for other headers b/c it is
156158
# intrinsically "cacheable" as it is Permanent.
159+
#
157160
# See:
158161
# https://tools.ietf.org/html/rfc7231#section-6.4.2
159162
#
160163
# Client can try to refresh the value by repeating the request
161164
# with cache busting headers as usual (ie no-cache).
162-
if resp.status == 301:
165+
if int(resp.status) in PERMANENT_REDIRECT_STATUSES:
163166
msg = (
164-
'Returning cached "301 Moved Permanently" response '
167+
'Returning cached permanent redirect response '
165168
"(ignoring date and etag information)"
166169
)
167170
logger.debug(msg)
@@ -310,14 +313,14 @@ def cache_response(self, request, response, body=None, status_codes=None):
310313
if self.cache_etags and "etag" in response_headers:
311314
logger.debug("Caching due to etag")
312315
self.cache.set(
313-
cache_url, self.serializer.dumps(request, response, body=body)
316+
cache_url, self.serializer.dumps(request, response, body)
314317
)
315318

316-
# Add to the cache any 301s. We do this before looking that
317-
# the Date headers.
318-
elif response.status == 301:
319-
logger.debug("Caching permanant redirect")
320-
self.cache.set(cache_url, self.serializer.dumps(request, response))
319+
# Add to the cache any permanent redirects. We do this before looking
320+
# that the Date headers.
321+
elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
322+
logger.debug("Caching permanent redirect")
323+
self.cache.set(cache_url, self.serializer.dumps(request, response, b''))
321324

322325
# Add to the cache if the response headers demand it. If there
323326
# is no date header then we can't do anything about expiring
@@ -327,7 +330,7 @@ def cache_response(self, request, response, body=None, status_codes=None):
327330
if "max-age" in cc and cc["max-age"] > 0:
328331
logger.debug("Caching b/c date exists and max-age > 0")
329332
self.cache.set(
330-
cache_url, self.serializer.dumps(request, response, body=body)
333+
cache_url, self.serializer.dumps(request, response, body)
331334
)
332335

333336
# If the request can expire, it means we should cache it
@@ -336,7 +339,7 @@ def cache_response(self, request, response, body=None, status_codes=None):
336339
if response_headers["expires"]:
337340
logger.debug("Caching b/c of expires header")
338341
self.cache.set(
339-
cache_url, self.serializer.dumps(request, response, body=body)
342+
cache_url, self.serializer.dumps(request, response, body)
340343
)
341344

342345
def update_cached_response(self, request, response):
@@ -375,6 +378,7 @@ def update_cached_response(self, request, response):
375378
cached_response.status = 200
376379

377380
# update our cache
378-
self.cache.set(cache_url, self.serializer.dumps(request, cached_response))
381+
body = cached_response.read(decode_content=False)
382+
self.cache.set(cache_url, self.serializer.dumps(request, cached_response, body))
379383

380384
return cached_response

cachecontrol/serialize.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,26 +21,14 @@ def _b64_decode_str(s):
2121
return _b64_decode_bytes(s).decode("utf8")
2222

2323

24+
_default_body_read = object()
25+
26+
2427
class Serializer(object):
2528

26-
def dumps(self, request, response, body=None):
29+
def dumps(self, request, response, body):
2730
response_headers = CaseInsensitiveDict(response.headers)
2831

29-
if body is None:
30-
body = response.read(decode_content=False)
31-
32-
# NOTE: 99% sure this is dead code. I'm only leaving it
33-
# here b/c I don't have a test yet to prove
34-
# it. Basically, before using
35-
# `cachecontrol.filewrapper.CallbackFileWrapper`,
36-
# this made an effort to reset the file handle. The
37-
# `CallbackFileWrapper` short circuits this code by
38-
# setting the body as the content is consumed, the
39-
# result being a `body` argument is *always* passed
40-
# into cache_response, and in turn,
41-
# `Serializer.dump`.
42-
response._fp = io.BytesIO(body)
43-
4432
# NOTE: This is all a bit weird, but it's really important that on
4533
# Python 2.x these objects are unicode and not str, even when
4634
# they contain only ascii. The problem here is that msgpack

tests/test_cache_control.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def test_no_cache_with_wrong_sized_body(self, cc):
7171
# When the body is the wrong size, then we don't want to cache it
7272
# because it is obviously broken.
7373
resp = self.resp({"cache-control": "max-age=3600", "Content-Length": "5"})
74-
cc.cache_response(self.req(), resp, body=b"0" * 10)
74+
cc.cache_response(self.req(), resp, b"0" * 10)
7575

7676
assert not cc.cache.set.called
7777

@@ -86,7 +86,7 @@ def test_cache_response_cache_max_age(self, cc):
8686
resp = self.resp({"cache-control": "max-age=3600", "date": now})
8787
req = self.req()
8888
cc.cache_response(req, resp)
89-
cc.serializer.dumps.assert_called_with(req, resp, body=None)
89+
cc.serializer.dumps.assert_called_with(req, resp, None)
9090
cc.cache.set.assert_called_with(self.url, ANY)
9191

9292
def test_cache_response_cache_max_age_with_invalid_value_not_cached(self, cc):
@@ -156,7 +156,7 @@ def req(self, headers):
156156
return self.c.cached_request(mock_request)
157157

158158
def test_cache_request_no_headers(self):
159-
cached_resp = Mock(headers={"ETag": "jfd9094r808", "Content-Length": 100})
159+
cached_resp = Mock(headers={"ETag": "jfd9094r808", "Content-Length": 100}, status=200)
160160
self.c.cache = DictCache({self.url: cached_resp})
161161
resp = self.req({})
162162
assert not resp
@@ -183,7 +183,7 @@ def test_cache_request_not_in_cache(self):
183183

184184
def test_cache_request_fresh_max_age(self):
185185
now = time.strftime(TIME_FMT, time.gmtime())
186-
resp = Mock(headers={"cache-control": "max-age=3600", "date": now})
186+
resp = Mock(headers={"cache-control": "max-age=3600", "date": now}, status=200)
187187

188188
cache = DictCache({self.url: resp})
189189
self.c.cache = cache
@@ -193,7 +193,7 @@ def test_cache_request_fresh_max_age(self):
193193
def test_cache_request_unfresh_max_age(self):
194194
earlier = time.time() - 3700 # epoch - 1h01m40s
195195
now = time.strftime(TIME_FMT, time.gmtime(earlier))
196-
resp = Mock(headers={"cache-control": "max-age=3600", "date": now})
196+
resp = Mock(headers={"cache-control": "max-age=3600", "date": now}, status=200)
197197
self.c.cache = DictCache({self.url: resp})
198198
r = self.req({})
199199
assert not r
@@ -202,7 +202,7 @@ def test_cache_request_fresh_expires(self):
202202
later = time.time() + 86400 # GMT + 1 day
203203
expires = time.strftime(TIME_FMT, time.gmtime(later))
204204
now = time.strftime(TIME_FMT, time.gmtime())
205-
resp = Mock(headers={"expires": expires, "date": now})
205+
resp = Mock(headers={"expires": expires, "date": now}, status=200)
206206
cache = DictCache({self.url: resp})
207207
self.c.cache = cache
208208
r = self.req({})
@@ -212,7 +212,7 @@ def test_cache_request_unfresh_expires(self):
212212
sooner = time.time() - 86400 # GMT - 1 day
213213
expires = time.strftime(TIME_FMT, time.gmtime(sooner))
214214
now = time.strftime(TIME_FMT, time.gmtime())
215-
resp = Mock(headers={"expires": expires, "date": now})
215+
resp = Mock(headers={"expires": expires, "date": now}, status=200)
216216
cache = DictCache({self.url: resp})
217217
self.c.cache = cache
218218
r = self.req({})
@@ -221,7 +221,7 @@ def test_cache_request_unfresh_expires(self):
221221
def test_cached_request_with_bad_max_age_headers_not_returned(self):
222222
now = time.strftime(TIME_FMT, time.gmtime())
223223
# Not a valid header; this would be from a misconfigured server
224-
resp = Mock(headers={"cache-control": "max-age=xxx", "date": now})
224+
resp = Mock(headers={"cache-control": "max-age=xxx", "date": now}, status=200)
225225

226226
self.c.cache = DictCache({self.url: resp})
227227

tests/test_serialization.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def test_read_latest_version_streamable(self, url):
9393
original_resp = requests.get(url, stream=True)
9494
req = original_resp.request
9595

96-
resp = self.serializer.loads(req, self.serializer.dumps(req, original_resp.raw))
96+
resp = self.serializer.loads(req, self.serializer.dumps(req, original_resp.raw, original_resp.content))
9797

9898
assert resp.read()
9999

@@ -103,7 +103,7 @@ def test_read_latest_version(self, url):
103103
req = original_resp.request
104104

105105
resp = self.serializer.loads(
106-
req, self.serializer.dumps(req, original_resp.raw, body=data)
106+
req, self.serializer.dumps(req, original_resp.raw, data)
107107
)
108108

109109
assert resp.read() == data
@@ -118,5 +118,5 @@ def test_no_vary_header(self, url):
118118
original_resp.raw.headers["vary"] = "Foo"
119119

120120
assert self.serializer.loads(
121-
req, self.serializer.dumps(req, original_resp.raw, body=data)
121+
req, self.serializer.dumps(req, original_resp.raw, data)
122122
)

0 commit comments

Comments
 (0)