1+ import re
12import sys
23
34__all__ = ["ReceiveBuffer" ]
3839# slightly clever thing where we delay calling compress() until we've
3940# processed a whole event, which could in theory be slightly more efficient
4041# than the internal bytearray support.)
42+
43+ default_delimiter = b"\n \r ?\n "
44+ delimiter_regex = re .compile (b"\n \r ?\n " , re .MULTILINE )
45+ line_delimiter_regex = re .compile (b"\r ?\n " , re .MULTILINE )
46+
47+
4148class ReceiveBuffer (object ):
4249 def __init__ (self ):
4350 self ._data = bytearray ()
@@ -46,6 +53,9 @@ def __init__(self):
4653 self ._looked_at = 0
4754 self ._looked_for = b""
4855
56+ self ._delimiter = b"\n \r ?\n "
57+ self ._delimiter_regex = delimiter_regex
58+
4959 def __bool__ (self ):
5060 return bool (len (self ))
5161
@@ -79,21 +89,34 @@ def maybe_extract_at_most(self, count):
7989 self ._start += len (out )
8090 return out
8191
82- def maybe_extract_until_next (self , needle ):
92+ def maybe_extract_until_delimiter (self , delimiter = b" \n \r ? \n " ):
8393 # Returns extracted bytes on success (advancing offset), or None on
8494 # failure
85- if self . _looked_for == needle :
86- search_start = max (self ._start , self ._looked_at - len (needle ) + 1 )
95+ if delimiter == self . _delimiter :
96+ looked_at = max (self ._start , self ._looked_at - len (delimiter ) + 1 )
8797 else :
88- search_start = self ._start
89- offset = self ._data .find (needle , search_start )
90- if offset == - 1 :
98+ looked_at = self ._start
99+ self ._delimiter = delimiter
100+ # re.compile operation is more expensive than just byte compare
101+ if delimiter == default_delimiter :
102+ self ._delimiter_regex = delimiter_regex
103+ else :
104+ self ._delimiter_regex = re .compile (delimiter , re .MULTILINE )
105+
106+ delimiter_match = next (
107+ self ._delimiter_regex .finditer (self ._data , looked_at ), None
108+ )
109+
110+ if delimiter_match is None :
91111 self ._looked_at = len (self ._data )
92- self ._looked_for = needle
93112 return None
94- new_start = offset + len (needle )
95- out = self ._data [self ._start : new_start ]
96- self ._start = new_start
113+
114+ _ , end = delimiter_match .span (0 )
115+
116+ out = self ._data [self ._start : end ]
117+
118+ self ._start = end
119+
97120 return out
98121
99122 # HTTP/1.1 has a number of constructs where you keep reading lines until
@@ -102,11 +125,19 @@ def maybe_extract_lines(self):
102125 if self ._data [self ._start : self ._start + 2 ] == b"\r \n " :
103126 self ._start += 2
104127 return []
128+ elif self ._start < len (self ._data ) and self ._data [self ._start ] == b"\n " :
129+ self ._start += 1
130+ return []
105131 else :
106- data = self .maybe_extract_until_next (b"\r \n \r \n " )
132+ data = self .maybe_extract_until_delimiter (b"\n \r ?\n " )
133+
107134 if data is None :
108135 return None
109- lines = data .split (b"\r \n " )
136+
137+ lines = line_delimiter_regex .split (data )
138+
110139 assert lines [- 2 ] == lines [- 1 ] == b""
140+
111141 del lines [- 2 :]
142+
112143 return lines
0 commit comments