1
+ import re
1
2
import sys
2
3
3
4
__all__ = ["ReceiveBuffer" ]
38
39
# slightly clever thing where we delay calling compress() until we've
39
40
# processed a whole event, which could in theory be slightly more efficient
40
41
# than the internal bytearray support.)
42
+
43
+ default_delimiter = b"\n \r ?\n "
44
+ delimiter_regex = re .compile (b"\n \r ?\n " , re .MULTILINE )
45
+ line_delimiter_regex = re .compile (b"\r ?\n " , re .MULTILINE )
46
+
47
+
41
48
class ReceiveBuffer (object ):
42
49
def __init__ (self ):
43
50
self ._data = bytearray ()
@@ -46,6 +53,9 @@ def __init__(self):
46
53
self ._looked_at = 0
47
54
self ._looked_for = b""
48
55
56
+ self ._delimiter = b"\n \r ?\n "
57
+ self ._delimiter_regex = delimiter_regex
58
+
49
59
def __bool__ (self ):
50
60
return bool (len (self ))
51
61
@@ -79,21 +89,34 @@ def maybe_extract_at_most(self, count):
79
89
self ._start += len (out )
80
90
return out
81
91
82
- def maybe_extract_until_next (self , needle ):
92
+ def maybe_extract_until_delimiter (self , delimiter = b" \n \r ? \n " ):
83
93
# Returns extracted bytes on success (advancing offset), or None on
84
94
# failure
85
- if self . _looked_for == needle :
86
- search_start = max (self ._start , self ._looked_at - len (needle ) + 1 )
95
+ if delimiter == self . _delimiter :
96
+ looked_at = max (self ._start , self ._looked_at - len (delimiter ) + 1 )
87
97
else :
88
- search_start = self ._start
89
- offset = self ._data .find (needle , search_start )
90
- if offset == - 1 :
98
+ looked_at = self ._start
99
+ self ._delimiter = delimiter
100
+ # re.compile operation is more expensive than just byte compare
101
+ if delimiter == default_delimiter :
102
+ self ._delimiter_regex = delimiter_regex
103
+ else :
104
+ self ._delimiter_regex = re .compile (delimiter , re .MULTILINE )
105
+
106
+ delimiter_match = next (
107
+ self ._delimiter_regex .finditer (self ._data , looked_at ), None
108
+ )
109
+
110
+ if delimiter_match is None :
91
111
self ._looked_at = len (self ._data )
92
- self ._looked_for = needle
93
112
return None
94
- new_start = offset + len (needle )
95
- out = self ._data [self ._start : new_start ]
96
- self ._start = new_start
113
+
114
+ _ , end = delimiter_match .span (0 )
115
+
116
+ out = self ._data [self ._start : end ]
117
+
118
+ self ._start = end
119
+
97
120
return out
98
121
99
122
# HTTP/1.1 has a number of constructs where you keep reading lines until
@@ -102,11 +125,19 @@ def maybe_extract_lines(self):
102
125
if self ._data [self ._start : self ._start + 2 ] == b"\r \n " :
103
126
self ._start += 2
104
127
return []
128
+ elif self ._start < len (self ._data ) and self ._data [self ._start ] == b"\n " :
129
+ self ._start += 1
130
+ return []
105
131
else :
106
- data = self .maybe_extract_until_next (b"\r \n \r \n " )
132
+ data = self .maybe_extract_until_delimiter (b"\n \r ?\n " )
133
+
107
134
if data is None :
108
135
return None
109
- lines = data .split (b"\r \n " )
136
+
137
+ lines = line_delimiter_regex .split (data )
138
+
110
139
assert lines [- 2 ] == lines [- 1 ] == b""
140
+
111
141
del lines [- 2 :]
142
+
112
143
return lines
0 commit comments