Skip to content

Commit 2ae417d

Browse files
committed
Use msgpack for cache serialization.
Msgpack is fast, supports all major Python versions, and does not add overhead for the serialization of large binary values (as commonly handled by pip).
1 parent 3b3b776 commit 2ae417d

File tree

4 files changed

+44
-74
lines changed

4 files changed

+44
-74
lines changed

cachecontrol/serialize.py

Lines changed: 14 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,10 @@
33
import json
44
import zlib
55

6+
import msgpack
67
from requests.structures import CaseInsensitiveDict
78

8-
from .compat import HTTPResponse, pickle, text_type
9-
10-
11-
def _b64_encode_bytes(b):
12-
return base64.b64encode(b).decode("ascii")
13-
14-
15-
def _b64_encode_str(s):
16-
return _b64_encode_bytes(s.encode("utf8"))
17-
18-
19-
def _b64_encode(s):
20-
if isinstance(s, text_type):
21-
return _b64_encode_str(s)
22-
return _b64_encode_bytes(s)
9+
from .compat import HTTPResponse, pickle
2310

2411

2512
def _b64_decode_bytes(b):
@@ -52,14 +39,11 @@ def dumps(self, request, response, body=None):
5239

5340
data = {
5441
"response": {
55-
"body": _b64_encode_bytes(body),
56-
"headers": dict(
57-
(_b64_encode(k), _b64_encode(v))
58-
for k, v in response.headers.items()
59-
),
42+
"body": body,
43+
"headers": dict(response.headers),
6044
"status": response.status,
6145
"version": response.version,
62-
"reason": _b64_encode_str(response.reason),
46+
"reason": response.reason,
6347
"strict": response.strict,
6448
"decode_content": response.decode_content,
6549
},
@@ -73,20 +57,7 @@ def dumps(self, request, response, body=None):
7357
header = header.strip()
7458
data["vary"][header] = request.headers.get(header, None)
7559

76-
# Encode our Vary headers to ensure they can be serialized as JSON
77-
data["vary"] = dict(
78-
(_b64_encode(k), _b64_encode(v) if v is not None else v)
79-
for k, v in data["vary"].items()
80-
)
81-
82-
return b",".join([
83-
b"cc=2",
84-
zlib.compress(
85-
json.dumps(
86-
data, separators=(",", ":"), sort_keys=True,
87-
).encode("utf8"),
88-
),
89-
])
60+
return b",".join([b"cc=3", msgpack.dumps(data, use_bin_type=True)])
9061

9162
def loads(self, request, data):
9263
# Short circuit if we've been given an empty set of data
@@ -194,3 +165,11 @@ def _loads_v2(self, request, data):
194165
)
195166

196167
return self.prepare_response(request, cached)
168+
169+
def _loads_v3(self, request, data):
170+
try:
171+
cached = msgpack.loads(data, encoding='utf-8')
172+
except ValueError:
173+
return
174+
175+
return self.prepare_response(request, cached)

docs/release_notes.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22
Release Notes
33
===============
44

5+
0.12.0
6+
======
7+
8+
Rather than using compressed JSON for caching values, we are now using
9+
MessagePack (http://msgpack.org/). MessagePack has the advantage that
10+
that serialization and deserialization is faster, especially for
11+
caching large binary payloads.
12+
13+
514
0.11.2
615
======
716

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
long_description=long_description,
2020
install_requires=[
2121
'requests',
22+
'msgpack-python',
2223
],
2324
extras_require={
2425
'filecache': ['lockfile>=0.9'],

tests/test_serialization.py

Lines changed: 20 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1+
import msgpack
12
import requests
23

3-
from mock import Mock, patch
4+
from mock import Mock
45

56
from cachecontrol.compat import pickle
67
from cachecontrol.serialize import Serializer
7-
from cachecontrol.serialize import _b64_encode
8-
from cachecontrol.serialize import _b64_decode_str
98

109

1110
class TestSerializer(object):
@@ -30,17 +29,29 @@ def setup(self):
3029
},
3130
}
3231

33-
def test_load_by_version_one(self):
32+
def test_load_by_version_v0(self):
3433
data = b'cc=0,somedata'
3534
req = Mock()
3635
resp = self.serializer.loads(req, data)
3736
assert resp is None
3837

39-
def test_read_version_two(self):
38+
def test_read_version_v1(self):
4039
req = Mock()
4140
resp = self.serializer._loads_v1(req, pickle.dumps(self.response_data))
42-
# We have to decode our urllib3 data back into a unicode
43-
# string.
41+
# We have to decode our urllib3 data back into a unicode string.
42+
assert resp.data == 'Hello World'.encode('utf-8')
43+
44+
def test_read_version_v2(self):
45+
req = Mock()
46+
compressed_base64_json = b"x\x9c%O\xb9\n\x83@\x10\xfd\x97\xa9-\x92%E\x14R\xe4 +\x16\t\xe6\x10\xbb\xb0\xc7\xe0\x81\xb8\xb2\xbb*A\xfc\xf7\x8c\xa6|\xe7\xbc\x99\xc0\xa2\xebL\xeb\x10\xa2\t\xa4\xd1_\x88\xe0\xc93'\xf9\xbe\xc8X\xf8\x95<=@\x00\x1a\x95\xd1\xf8Q\xa6\xf5\xd8z\x88\xbc\xed1\x80\x12\x85F\xeb\x96h\xca\xc2^\xf3\xac\xd7\xe7\xed\x1b\xf3SC5\x04w\xfa\x1c\x8e\x92_;Y\x1c\x96\x9a\x94]k\xc1\xdf~u\xc7\xc9 \x8fDG\xa0\xe2\xac\x92\xbc\xa9\xc9\xf1\xc8\xcbQ\xe4I\xa3\xc6U\xb9_\x14\xbb\xbdh\xc2\x1c\xd0R\xe1LK$\xd9\x9c\x17\xbe\xa7\xc3l\xb3Y\x80\xad\x94\xff\x0b\x03\xed\xa9V\x17[2\x83\xb0\xf4\xd14\xcf?E\x03Im"
47+
resp = self.serializer._loads_v2(req, compressed_base64_json)
48+
# We have to decode our urllib3 data back into a unicode string.
49+
assert resp.data == 'Hello World'.encode('utf-8')
50+
51+
def test_read_version_v3(self):
52+
req = Mock()
53+
resp = self.serializer._loads_v3(req, msgpack.dumps(self.response_data))
54+
# We have to decode our urllib3 data back into a unicode string.
4455
assert resp.data == 'Hello World'.encode('utf-8')
4556

4657
def test_read_v1_serialized_with_py2_TypeError(self):
@@ -65,7 +76,7 @@ def test_read_v2_corrupted_cache(self):
6576
req = Mock()
6677
assert self.serializer._loads_v2(req, b'') is None
6778

68-
def test_read_version_three_streamable(self, url):
79+
def test_read_latest_version_streamable(self, url):
6980
original_resp = requests.get(url, stream=True)
7081
req = original_resp.request
7182

@@ -78,7 +89,7 @@ def test_read_version_three_streamable(self, url):
7889

7990
assert resp.read()
8091

81-
def test_read_version_three(self, url):
92+
def test_read_latest_version(self, url):
8293
original_resp = requests.get(url)
8394
data = original_resp.content
8495
req = original_resp.request
@@ -110,33 +121,3 @@ def test_no_vary_header(self, url):
110121
)
111122
)
112123

113-
114-
class TestEncoding(object):
115-
116-
unicode_string = b'\u201cmax-age=31536000\u2033'.decode('utf-8')
117-
b64_result = '4oCcbWF4LWFnZT0zMTUzNjAwMOKAsw=='
118-
119-
@patch('cachecontrol.serialize._b64_encode_bytes')
120-
def test_b64_encode_with_bytes(self, encode_bytes):
121-
_b64_encode(self.unicode_string.encode('utf-8'))
122-
assert encode_bytes.called
123-
124-
@patch('cachecontrol.serialize._b64_encode_str')
125-
def test_b64_encode_with_str(self, encode_str):
126-
_b64_encode(self.unicode_string)
127-
assert encode_str.called
128-
129-
def test_b64_encode_with_unicode_encoded_as_unicode(self):
130-
"""Some servers will respond with unicode encoded strings. The
131-
test below uses unicode open and close quotes around the max
132-
age setting, which raises an exception if we treat it as a
133-
string.
134-
135-
This test ensures we recognize the unicode encoded string act
136-
accordingly.
137-
"""
138-
unicode_result = _b64_encode(self.unicode_string.encode('utf-8'))
139-
assert _b64_decode_str(unicode_result) == self.unicode_string
140-
141-
bytes_result = _b64_encode(self.unicode_string)
142-
assert _b64_decode_str(bytes_result) == self.unicode_string

0 commit comments

Comments
 (0)