Skip to content
This repository was archived by the owner on Jan 13, 2021. It is now read-only.

Commit 2384ce0

Browse files
committed
Move to bytestrings everywhere for headers.
1 parent 36e0ee9 commit 2384ce0

File tree

3 files changed

+93
-39
lines changed

3 files changed

+93
-39
lines changed

hyper/common/headers.py

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
"""
88
import collections
99

10+
from hyper.compat import unicode, bytes, imap
11+
1012

1113
class HTTPHeaderMap(collections.MutableMapping):
1214
"""
@@ -24,6 +26,24 @@ class HTTPHeaderMap(collections.MutableMapping):
2426
2527
This data structure is an attempt to preserve all of that information
2628
while being as user-friendly as possible.
29+
30+
When iterated over, this structure returns headers in 'canonical form'.
31+
This form is a tuple, where the first entry is the header name (in
32+
lower-case), and the second entry is a list of header values (in original
33+
case).
34+
35+
The mapping always emits both names and values in the form of bytestrings:
36+
never unicode strings. It can accept names and values in unicode form, and
37+
will automatically be encoded to bytestrings using UTF-8. The reason for
38+
what appears to be a user-unfriendly decision here is primarily to allow
39+
the broadest-possible compatibility (to make it possible to send headers in
40+
unusual encodings) while ensuring that users are never confused about what
41+
type of data they will receive.
42+
43+
..warning:: Note that this data structure makes none of the performance
44+
guarantees of a dictionary. Lookup and deletion is not an O(1)
45+
operation. Inserting a new value *is* O(1), all other
46+
operations are O(n), including *replacing* a header entirely.
2747
"""
2848
def __init__(self, *args, **kwargs):
2949
# The meat of the structure. In practice, headers are an ordered list
@@ -42,17 +62,18 @@ def __init__(self, *args, **kwargs):
4262
self._items = []
4363

4464
for arg in args:
45-
self._items.extend(arg)
65+
self._items.extend(map(lambda x: _to_bytestring_tuple(*x), arg))
4666

4767
for k, v in kwargs.items():
48-
self._items.append((k, v))
68+
self._items.append(_to_bytestring_tuple(k, v))
4969

5070
def __getitem__(self, key):
5171
"""
5272
Unlike the dict __getitem__, this returns a list of items in the order
5373
they were added. These items are returned in 'canonical form', meaning
5474
that comma-separated values are split into multiple values.
5575
"""
76+
key = _to_bytestring(key)
5677
values = []
5778

5879
for k, v in self._items:
@@ -68,14 +89,15 @@ def __setitem__(self, key, value):
6889
"""
6990
Unlike the dict __setitem__, this appends to the list of items.
7091
"""
71-
self._items.append((key, value))
92+
self._items.append(_to_bytestring_tuple(key, value))
7293

7394
def __delitem__(self, key):
7495
"""
7596
Sadly, __delitem__ is kind of stupid here, but the best we can do is
7697
delete all headers with a given key. To correctly achieve the 'KeyError
7798
on missing key' logic from dictionaries, we need to do this slowly.
7899
"""
100+
key = _to_bytestring(key)
79101
indices = []
80102
for (i, (k, v)) in enumerate(self._items):
81103
if _keys_equal(k, key):
@@ -111,6 +133,7 @@ def __contains__(self, key):
111133
"""
112134
If any header is present with this key, returns True.
113135
"""
136+
key = _to_bytestring(key)
114137
return any(_keys_equal(key, k) for k, _ in self._items)
115138

116139
def keys(self):
@@ -169,16 +192,37 @@ def canonical_form(k, v):
169192
canonical form. This means that the header is split on commas unless for
170193
any reason it's a super-special snowflake (I'm looking at you Set-Cookie).
171194
"""
172-
SPECIAL_SNOWFLAKES = set(['set-cookie', 'set-cookie2'])
195+
SPECIAL_SNOWFLAKES = set([b'set-cookie', b'set-cookie2'])
173196

174197
k = k.lower()
175198

176199
if k in SPECIAL_SNOWFLAKES:
177200
yield k, v
178201
else:
179-
for sub_val in v.split(','):
202+
for sub_val in v.split(b','):
180203
yield k, sub_val.strip()
181204

205+
206+
def _to_bytestring(element):
207+
"""
208+
Converts a single string to a bytestring, encoding via UTF-8 if needed.
209+
"""
210+
if isinstance(element, unicode):
211+
return element.encode('utf-8')
212+
elif isinstance(element, bytes):
213+
return element
214+
else:
215+
raise ValueError("Non string type.")
216+
217+
218+
def _to_bytestring_tuple(*x):
219+
"""
220+
Converts the given strings to a bytestring if necessary, returning a
221+
tuple.
222+
"""
223+
return tuple(imap(_to_bytestring, x))
224+
225+
182226
def _keys_equal(x, y):
183227
"""
184228
Returns 'True' if the two keys are equal by the laws of HTTP headers.

hyper/compat.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def ignore_missing():
3636

3737
from urllib import urlencode
3838
from urlparse import urlparse, urlsplit
39+
from itertools import imap
3940

4041
def to_byte(char):
4142
return ord(char)
@@ -52,9 +53,14 @@ def zlib_compressobj(level=6, method=zlib.DEFLATED, wbits=15, memlevel=8,
5253
strategy=zlib.Z_DEFAULT_STRATEGY):
5354
return zlib.compressobj(level, method, wbits, memlevel, strategy)
5455

56+
unicode = unicode
57+
bytes = str
58+
5559
elif is_py3:
5660
from urllib.parse import urlencode, urlparse, urlsplit
5761

62+
imap = map
63+
5864
def to_byte(char):
5965
return char
6066

@@ -71,3 +77,6 @@ def write_to_stdout(data):
7177
ssl = ssl_compat
7278
else:
7379
import ssl
80+
81+
unicode = str
82+
bytes = bytes

test/test_headers.py

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,19 @@ class TestHTTPHeaderMap(object):
66
def test_header_map_can_insert_single_header(self):
77
h = HTTPHeaderMap()
88
h['key'] = 'value'
9-
assert h['key'] == ['value']
9+
assert h['key'] == [b'value']
1010

1111
def test_header_map_insensitive_key(self):
1212
h = HTTPHeaderMap()
1313
h['KEY'] = 'value'
14-
assert h['key'] == ['value']
14+
assert h['key'] == [b'value']
1515

1616
def test_header_map_is_iterable_in_order(self):
1717
h = HTTPHeaderMap()
1818
items = [
19-
('k1', 'v2'),
20-
('k2', 'v2'),
21-
('k2', 'v3'),
19+
(b'k1', b'v2'),
20+
(b'k2', b'v2'),
21+
(b'k2', b'v3'),
2222
]
2323

2424
for k, v in items:
@@ -29,18 +29,18 @@ def test_header_map_is_iterable_in_order(self):
2929

3030
def test_header_map_allows_multiple_values(self):
3131
h = HTTPHeaderMap()
32-
h['key'] = 'v1'
33-
h['Key'] = 'v2'
32+
h['key'] = b'v1'
33+
h[b'Key'] = b'v2'
3434

35-
assert h['key'] == ['v1', 'v2']
35+
assert h['key'] == [b'v1', b'v2']
3636

3737
def test_header_map_can_delete_value(self):
3838
h = HTTPHeaderMap()
39-
h['key'] = 'v1'
40-
del h['key']
39+
h['key'] = b'v1'
40+
del h[b'key']
4141

4242
with pytest.raises(KeyError):
43-
h['key']
43+
h[b'key']
4444

4545
def test_header_map_deletes_all_values(self):
4646
h = HTTPHeaderMap()
@@ -55,13 +55,14 @@ def test_setting_comma_separated_header(self):
5555
h = HTTPHeaderMap()
5656
h['key'] = 'v1, v2'
5757

58-
assert h['key'] == ['v1', 'v2']
58+
assert h[b'key'] == [b'v1', b'v2']
5959

6060
def test_containment(self):
6161
h = HTTPHeaderMap()
6262
h['key'] = 'val'
6363

6464
assert 'key' in h
65+
assert b'key' in h
6566
assert 'nonkey' not in h
6667

6768
def test_length_counts_lines_separately(self):
@@ -79,7 +80,7 @@ def test_keys(self):
7980
h['k1'] = 'v4'
8081

8182
assert len(list(h.keys())) == 4
82-
assert list(h.keys()) == ['k1', 'k1', 'k2', 'k1']
83+
assert list(h.keys()) == [b'k1', b'k1', b'k2', b'k1']
8384

8485
def test_values(self):
8586
h = HTTPHeaderMap()
@@ -88,14 +89,14 @@ def test_values(self):
8889
h['k1'] = 'v4'
8990

9091
assert len(list(h.values())) == 4
91-
assert list(h.values()) == ['v1', 'v2', 'v3', 'v4']
92+
assert list(h.values()) == [b'v1', b'v2', b'v3', b'v4']
9293

9394
def test_items(self):
9495
h = HTTPHeaderMap()
9596
items = [
96-
('k1', 'v2'),
97-
('k2', 'v2'),
98-
('k2', 'v3'),
97+
(b'k1', b'v2'),
98+
(b'k2', b'v2'),
99+
(b'k2', b'v3'),
99100
]
100101

101102
for k, v in items:
@@ -114,13 +115,13 @@ def test_actual_get(self):
114115
h['k2'] = 'v3'
115116
h['k1'] = 'v4'
116117

117-
assert h.get('k1') == ['v1', 'v2', 'v4']
118+
assert h.get('k1') == [b'v1', b'v2', b'v4']
118119

119120
def test_doesnt_split_set_cookie(self):
120121
h = HTTPHeaderMap()
121122
h['Set-Cookie'] = 'v1, v2'
122-
assert h['set-cookie'] == ['v1, v2']
123-
assert h.get('set-cookie') == ['v1, v2']
123+
assert h['set-cookie'] == [b'v1, v2']
124+
assert h.get(b'set-cookie') == [b'v1, v2']
124125

125126
def test_equality(self):
126127
h1 = HTTPHeaderMap()
@@ -171,44 +172,44 @@ def test_deleting_nonexistent(self):
171172

172173
def test_can_create_from_iterable(self):
173174
items = [
174-
('k1', 'v2'),
175-
('k2', 'v2'),
176-
('k2', 'v3'),
175+
(b'k1', b'v2'),
176+
(b'k2', b'v2'),
177+
(b'k2', b'v3'),
177178
]
178179
h = HTTPHeaderMap(items)
179180

180181
assert list(h) == items
181182

182183
def test_can_create_from_multiple_iterables(self):
183184
items = [
184-
('k1', 'v2'),
185-
('k2', 'v2'),
186-
('k2', 'v3'),
185+
(b'k1', b'v2'),
186+
(b'k2', b'v2'),
187+
(b'k2', b'v3'),
187188
]
188189
h = HTTPHeaderMap(items, items, items)
189190

190191
assert list(h) == items + items + items
191192

192193
def test_create_from_iterables_and_kwargs(self):
193194
items = [
194-
('k1', 'v2'),
195-
('k2', 'v2'),
196-
('k2', 'v3'),
195+
(b'k1', b'v2'),
196+
(b'k2', b'v2'),
197+
(b'k2', b'v3'),
197198
]
198199
h = list(HTTPHeaderMap(items, k3='v4', k4='v5'))
199200

200201
# kwargs are an unordered dictionary, so allow for both possible
201202
# iteration orders.
202203
assert (
203-
h == items + [('k3', 'v4'), ('k4', 'v5')] or
204-
h == items + [('k4', 'v5'), ('k3', 'v4')]
204+
h == items + [(b'k3', b'v4'), (b'k4', b'v5')] or
205+
h == items + [(b'k4', b'v5'), (b'k3', b'v4')]
205206
)
206207

207208
def test_raw_iteration(self):
208209
items = [
209-
('k1', 'v2'),
210-
('k2', 'v2, v3, v4'),
211-
('k2', 'v3'),
210+
(b'k1', b'v2'),
211+
(b'k2', b'v2, v3, v4'),
212+
(b'k2', b'v3'),
212213
]
213214
h = HTTPHeaderMap(items)
214215

0 commit comments

Comments
 (0)