Skip to content

Commit ec0ff35

Browse files
cdelerpgjones
authored andcommitted
Added ability to use LF, not only CRLF delimiter
1 parent 522b004 commit ec0ff35

File tree

3 files changed

+77
-19
lines changed

3 files changed

+77
-19
lines changed

h11/_readers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def __call__(self, buf):
153153
assert self._bytes_to_discard == 0
154154
if self._bytes_in_chunk == 0:
155155
# We need to refill our chunk count
156-
chunk_header = buf.maybe_extract_until_next(b"\r\n")
156+
chunk_header = buf.maybe_extract_until_delimiter(b"\r?\n")
157157
if chunk_header is None:
158158
return None
159159
matches = validate(

h11/_receivebuffer.py

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
import sys
23

34
__all__ = ["ReceiveBuffer"]
@@ -38,6 +39,12 @@
3839
# slightly clever thing where we delay calling compress() until we've
3940
# processed a whole event, which could in theory be slightly more efficient
4041
# than the internal bytearray support.)
42+
43+
default_delimiter = b"\n\r?\n"
44+
delimiter_regex = re.compile(b"\n\r?\n", re.MULTILINE)
45+
line_delimiter_regex = re.compile(b"\r?\n", re.MULTILINE)
46+
47+
4148
class ReceiveBuffer(object):
4249
def __init__(self):
4350
self._data = bytearray()
@@ -46,6 +53,9 @@ def __init__(self):
4653
self._looked_at = 0
4754
self._looked_for = b""
4855

56+
self._delimiter = b"\n\r?\n"
57+
self._delimiter_regex = delimiter_regex
58+
4959
def __bool__(self):
5060
return bool(len(self))
5161

@@ -79,21 +89,34 @@ def maybe_extract_at_most(self, count):
7989
self._start += len(out)
8090
return out
8191

82-
def maybe_extract_until_next(self, needle):
92+
def maybe_extract_until_delimiter(self, delimiter=b"\n\r?\n"):
8393
# Returns extracted bytes on success (advancing offset), or None on
8494
# failure
85-
if self._looked_for == needle:
86-
search_start = max(self._start, self._looked_at - len(needle) + 1)
95+
if delimiter == self._delimiter:
96+
looked_at = max(self._start, self._looked_at - len(delimiter) + 1)
8797
else:
88-
search_start = self._start
89-
offset = self._data.find(needle, search_start)
90-
if offset == -1:
98+
looked_at = self._start
99+
self._delimiter = delimiter
100+
# re.compile operation is more expensive than just byte compare
101+
if delimiter == default_delimiter:
102+
self._delimiter_regex = delimiter_regex
103+
else:
104+
self._delimiter_regex = re.compile(delimiter, re.MULTILINE)
105+
106+
delimiter_match = next(
107+
self._delimiter_regex.finditer(self._data, looked_at), None
108+
)
109+
110+
if delimiter_match is None:
91111
self._looked_at = len(self._data)
92-
self._looked_for = needle
93112
return None
94-
new_start = offset + len(needle)
95-
out = self._data[self._start : new_start]
96-
self._start = new_start
113+
114+
_, end = delimiter_match.span(0)
115+
116+
out = self._data[self._start : end]
117+
118+
self._start = end
119+
97120
return out
98121

99122
# HTTP/1.1 has a number of constructs where you keep reading lines until
@@ -102,11 +125,19 @@ def maybe_extract_lines(self):
102125
if self._data[self._start : self._start + 2] == b"\r\n":
103126
self._start += 2
104127
return []
128+
elif self._start < len(self._data) and self._data[self._start] == b"\n":
129+
self._start += 1
130+
return []
105131
else:
106-
data = self.maybe_extract_until_next(b"\r\n\r\n")
132+
data = self.maybe_extract_until_delimiter(b"\n\r?\n")
133+
107134
if data is None:
108135
return None
109-
lines = data.split(b"\r\n")
136+
137+
lines = line_delimiter_regex.split(data)
138+
110139
assert lines[-2] == lines[-1] == b""
140+
111141
del lines[-2:]
142+
112143
return lines

h11/tests/test_receivebuffer.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import pytest
2+
13
from .._receivebuffer import ReceiveBuffer
24

35

@@ -30,28 +32,28 @@ def test_receivebuffer():
3032
assert not b
3133

3234
################################################################
33-
# maybe_extract_until_next
35+
# maybe_extract_until_delimiter
3436
################################################################
3537

3638
b += b"12345a6789aa"
3739

38-
assert b.maybe_extract_until_next(b"a") == b"12345a"
40+
assert b.maybe_extract_until_delimiter(b"a") == b"12345a"
3941
assert bytes(b) == b"6789aa"
4042

41-
assert b.maybe_extract_until_next(b"aaa") is None
43+
assert b.maybe_extract_until_delimiter(b"aaa") is None
4244
assert bytes(b) == b"6789aa"
4345

4446
b += b"a12"
45-
assert b.maybe_extract_until_next(b"aaa") == b"6789aaa"
47+
assert b.maybe_extract_until_delimiter(b"aaa") == b"6789aaa"
4648
assert bytes(b) == b"12"
4749

4850
# check repeated searches for the same needle, triggering the
4951
# pickup-where-we-left-off logic
5052
b += b"345"
51-
assert b.maybe_extract_until_next(b"aaa") is None
53+
assert b.maybe_extract_until_delimiter(b"aaa") is None
5254

5355
b += b"6789aaa123"
54-
assert b.maybe_extract_until_next(b"aaa") == b"123456789aaa"
56+
assert b.maybe_extract_until_delimiter(b"aaa") == b"123456789aaa"
5557
assert bytes(b) == b"123"
5658

5759
################################################################
@@ -76,3 +78,28 @@ def test_receivebuffer():
7678
b += b"\r\ntrailing"
7779
assert b.maybe_extract_lines() == []
7880
assert bytes(b) == b"trailing"
81+
82+
83+
@pytest.mark.parametrize(
84+
"data",
85+
[
86+
(
87+
b"HTTP/1.1 200 OK\r\n",
88+
b"Content-type: text/plain\r\n",
89+
b"\r\n",
90+
b"Some body",
91+
),
92+
(b"HTTP/1.1 200 OK\n", b"Content-type: text/plain\n", b"\n", b"Some body"),
93+
(b"HTTP/1.1 200 OK\r\n", b"Content-type: text/plain\n", b"\n", b"Some body"),
94+
],
95+
)
96+
def test_receivebuffer_for_invalid_delimiter(data):
97+
b = ReceiveBuffer()
98+
99+
for line in data:
100+
b += line
101+
102+
lines = b.maybe_extract_lines()
103+
104+
assert lines == [b"HTTP/1.1 200 OK", b"Content-type: text/plain"]
105+
assert bytes(b) == b"Some body"

0 commit comments

Comments
 (0)