Skip to content

Commit 5cf2852

Browse files
authored
Merge pull request #115 from StephanErb/msgpack
Use msgpack for cache serialization
2 parents 3b3b776 + 2ae417d commit 5cf2852

File tree

4 files changed

+44
-74
lines changed

4 files changed

+44
-74
lines changed

cachecontrol/serialize.py

Lines changed: 14 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,10 @@
33
import json
44
import zlib
55

6+
import msgpack
67
from requests.structures import CaseInsensitiveDict
78

8-
from .compat import HTTPResponse, pickle, text_type
9-
10-
11-
def _b64_encode_bytes(b):
12-
return base64.b64encode(b).decode("ascii")
13-
14-
15-
def _b64_encode_str(s):
16-
return _b64_encode_bytes(s.encode("utf8"))
17-
18-
19-
def _b64_encode(s):
20-
if isinstance(s, text_type):
21-
return _b64_encode_str(s)
22-
return _b64_encode_bytes(s)
9+
from .compat import HTTPResponse, pickle
2310

2411

2512
def _b64_decode_bytes(b):
@@ -52,14 +39,11 @@ def dumps(self, request, response, body=None):
5239

5340
data = {
5441
"response": {
55-
"body": _b64_encode_bytes(body),
56-
"headers": dict(
57-
(_b64_encode(k), _b64_encode(v))
58-
for k, v in response.headers.items()
59-
),
42+
"body": body,
43+
"headers": dict(response.headers),
6044
"status": response.status,
6145
"version": response.version,
62-
"reason": _b64_encode_str(response.reason),
46+
"reason": response.reason,
6347
"strict": response.strict,
6448
"decode_content": response.decode_content,
6549
},
@@ -73,20 +57,7 @@ def dumps(self, request, response, body=None):
7357
header = header.strip()
7458
data["vary"][header] = request.headers.get(header, None)
7559

76-
# Encode our Vary headers to ensure they can be serialized as JSON
77-
data["vary"] = dict(
78-
(_b64_encode(k), _b64_encode(v) if v is not None else v)
79-
for k, v in data["vary"].items()
80-
)
81-
82-
return b",".join([
83-
b"cc=2",
84-
zlib.compress(
85-
json.dumps(
86-
data, separators=(",", ":"), sort_keys=True,
87-
).encode("utf8"),
88-
),
89-
])
60+
return b",".join([b"cc=3", msgpack.dumps(data, use_bin_type=True)])
9061

9162
def loads(self, request, data):
9263
# Short circuit if we've been given an empty set of data
@@ -194,3 +165,11 @@ def _loads_v2(self, request, data):
194165
)
195166

196167
return self.prepare_response(request, cached)
168+
169+
def _loads_v3(self, request, data):
170+
try:
171+
cached = msgpack.loads(data, encoding='utf-8')
172+
except ValueError:
173+
return
174+
175+
return self.prepare_response(request, cached)

docs/release_notes.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22
Release Notes
33
===============
44

5+
0.12.0
6+
======
7+
8+
Rather than using compressed JSON for caching values, we are now using
9+
MessagePack (http://msgpack.org/). MessagePack has the advantage that
10+
that serialization and deserialization is faster, especially for
11+
caching large binary payloads.
12+
13+
514
0.11.2
615
======
716

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
long_description=long_description,
2020
install_requires=[
2121
'requests',
22+
'msgpack-python',
2223
],
2324
extras_require={
2425
'filecache': ['lockfile>=0.9'],

tests/test_serialization.py

Lines changed: 20 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1+
import msgpack
12
import requests
23

3-
from mock import Mock, patch
4+
from mock import Mock
45

56
from cachecontrol.compat import pickle
67
from cachecontrol.serialize import Serializer
7-
from cachecontrol.serialize import _b64_encode
8-
from cachecontrol.serialize import _b64_decode_str
98

109

1110
class TestSerializer(object):
@@ -30,17 +29,29 @@ def setup(self):
3029
},
3130
}
3231

33-
def test_load_by_version_one(self):
32+
def test_load_by_version_v0(self):
3433
data = b'cc=0,somedata'
3534
req = Mock()
3635
resp = self.serializer.loads(req, data)
3736
assert resp is None
3837

39-
def test_read_version_two(self):
38+
def test_read_version_v1(self):
4039
req = Mock()
4140
resp = self.serializer._loads_v1(req, pickle.dumps(self.response_data))
42-
# We have to decode our urllib3 data back into a unicode
43-
# string.
41+
# We have to decode our urllib3 data back into a unicode string.
42+
assert resp.data == 'Hello World'.encode('utf-8')
43+
44+
def test_read_version_v2(self):
45+
req = Mock()
46+
compressed_base64_json = b"x\x9c%O\xb9\n\x83@\x10\xfd\x97\xa9-\x92%E\x14R\xe4 +\x16\t\xe6\x10\xbb\xb0\xc7\xe0\x81\xb8\xb2\xbb*A\xfc\xf7\x8c\xa6|\xe7\xbc\x99\xc0\xa2\xebL\xeb\x10\xa2\t\xa4\xd1_\x88\xe0\xc93'\xf9\xbe\xc8X\xf8\x95<=@\x00\x1a\x95\xd1\xf8Q\xa6\xf5\xd8z\x88\xbc\xed1\x80\x12\x85F\xeb\x96h\xca\xc2^\xf3\xac\xd7\xe7\xed\x1b\xf3SC5\x04w\xfa\x1c\x8e\x92_;Y\x1c\x96\x9a\x94]k\xc1\xdf~u\xc7\xc9 \x8fDG\xa0\xe2\xac\x92\xbc\xa9\xc9\xf1\xc8\xcbQ\xe4I\xa3\xc6U\xb9_\x14\xbb\xbdh\xc2\x1c\xd0R\xe1LK$\xd9\x9c\x17\xbe\xa7\xc3l\xb3Y\x80\xad\x94\xff\x0b\x03\xed\xa9V\x17[2\x83\xb0\xf4\xd14\xcf?E\x03Im"
47+
resp = self.serializer._loads_v2(req, compressed_base64_json)
48+
# We have to decode our urllib3 data back into a unicode string.
49+
assert resp.data == 'Hello World'.encode('utf-8')
50+
51+
def test_read_version_v3(self):
52+
req = Mock()
53+
resp = self.serializer._loads_v3(req, msgpack.dumps(self.response_data))
54+
# We have to decode our urllib3 data back into a unicode string.
4455
assert resp.data == 'Hello World'.encode('utf-8')
4556

4657
def test_read_v1_serialized_with_py2_TypeError(self):
@@ -65,7 +76,7 @@ def test_read_v2_corrupted_cache(self):
6576
req = Mock()
6677
assert self.serializer._loads_v2(req, b'') is None
6778

68-
def test_read_version_three_streamable(self, url):
79+
def test_read_latest_version_streamable(self, url):
6980
original_resp = requests.get(url, stream=True)
7081
req = original_resp.request
7182

@@ -78,7 +89,7 @@ def test_read_version_three_streamable(self, url):
7889

7990
assert resp.read()
8091

81-
def test_read_version_three(self, url):
92+
def test_read_latest_version(self, url):
8293
original_resp = requests.get(url)
8394
data = original_resp.content
8495
req = original_resp.request
@@ -110,33 +121,3 @@ def test_no_vary_header(self, url):
110121
)
111122
)
112123

113-
114-
class TestEncoding(object):
115-
116-
unicode_string = b'\u201cmax-age=31536000\u2033'.decode('utf-8')
117-
b64_result = '4oCcbWF4LWFnZT0zMTUzNjAwMOKAsw=='
118-
119-
@patch('cachecontrol.serialize._b64_encode_bytes')
120-
def test_b64_encode_with_bytes(self, encode_bytes):
121-
_b64_encode(self.unicode_string.encode('utf-8'))
122-
assert encode_bytes.called
123-
124-
@patch('cachecontrol.serialize._b64_encode_str')
125-
def test_b64_encode_with_str(self, encode_str):
126-
_b64_encode(self.unicode_string)
127-
assert encode_str.called
128-
129-
def test_b64_encode_with_unicode_encoded_as_unicode(self):
130-
"""Some servers will respond with unicode encoded strings. The
131-
test below uses unicode open and close quotes around the max
132-
age setting, which raises an exception if we treat it as a
133-
string.
134-
135-
This test ensures we recognize the unicode encoded string act
136-
accordingly.
137-
"""
138-
unicode_result = _b64_encode(self.unicode_string.encode('utf-8'))
139-
assert _b64_decode_str(unicode_result) == self.unicode_string
140-
141-
bytes_result = _b64_encode(self.unicode_string)
142-
assert _b64_decode_str(bytes_result) == self.unicode_string

0 commit comments

Comments
 (0)