@@ -166,10 +166,15 @@ def __init__(self, file_like=None, read_size=0, use_list=True,
166166 self ._fb_buf_o = 0
167167 self ._fb_buf_i = 0
168168 self ._fb_buf_n = 0
169+ # When Unpacker is used as an iterable, between the calls to next(),
170+ # the buffer is not "consumed" completely, for efficiency sake.
171+ # Instead, it is done sloppily. To make sure we raise BufferFull at
172+ # the correct moments, we have to keep track of how sloppy we were.
173+ self ._fb_sloppiness = 0
169174 self ._max_buffer_size = max_buffer_size or 2 ** 31 - 1
170175 if read_size > self ._max_buffer_size :
171176 raise ValueError ("read_size must be smaller than max_buffer_size" )
172- self ._read_size = read_size or min (self ._max_buffer_size , 2048 )
177+ self ._read_size = read_size or min (self ._max_buffer_size , 4096 )
173178 self ._encoding = encoding
174179 self ._unicode_errors = unicode_errors
175180 self ._use_list = use_list
@@ -196,18 +201,38 @@ def feed(self, next_bytes):
196201 elif isinstance (next_bytes , bytearray ):
197202 next_bytes = bytes (next_bytes )
198203 assert self ._fb_feeding
199- if self ._fb_buf_n + len (next_bytes ) > self ._max_buffer_size :
204+ if (self ._fb_buf_n + len (next_bytes ) - self ._fb_sloppiness
205+ > self ._max_buffer_size ):
200206 raise BufferFull
201207 self ._fb_buf_n += len (next_bytes )
202208 self ._fb_buffers .append (next_bytes )
203209
210+ def _fb_sloppy_consume (self ):
211+ """ Gets rid of some of the used parts of the buffer. """
212+ if self ._fb_buf_i :
213+ for i in xrange (self ._fb_buf_i ):
214+ self ._fb_buf_n -= len (self ._fb_buffers [i ])
215+ self ._fb_buffers = self ._fb_buffers [self ._fb_buf_i :]
216+ self ._fb_buf_i = 0
217+ if self ._fb_buffers :
218+ self ._fb_sloppiness = self ._fb_buf_o
219+ else :
220+ self ._fb_sloppiness = 0
221+
204222 def _fb_consume (self ):
205- self ._fb_buffers = self ._fb_buffers [self ._fb_buf_i :]
223+ """ Gets rid of the used parts of the buffer. """
224+ if self ._fb_buf_i :
225+ for i in xrange (self ._fb_buf_i ):
226+ self ._fb_buf_n -= len (self ._fb_buffers [i ])
227+ self ._fb_buffers = self ._fb_buffers [self ._fb_buf_i :]
228+ self ._fb_buf_i = 0
206229 if self ._fb_buffers :
207230 self ._fb_buffers [0 ] = self ._fb_buffers [0 ][self ._fb_buf_o :]
231+ self ._fb_buf_n -= self ._fb_buf_o
232+ else :
233+ self ._fb_buf_n = 0
208234 self ._fb_buf_o = 0
209- self ._fb_buf_i = 0
210- self ._fb_buf_n = sum (map (len , self ._fb_buffers ))
235+ self ._fb_sloppiness = 0
211236
212237 def _fb_got_extradata (self ):
213238 if self ._fb_buf_i != len (self ._fb_buffers ):
@@ -238,22 +263,30 @@ def _fb_get_extradata(self):
238263
239264 def _fb_read (self , n , write_bytes = None ):
240265 buffs = self ._fb_buffers
266+ # We have a redundant codepath for the most common case, such that
267+ # pypy optimizes it properly. This is the case that the read fits
268+ # in the current buffer.
241269 if (write_bytes is None and self ._fb_buf_i < len (buffs ) and
242270 self ._fb_buf_o + n < len (buffs [self ._fb_buf_i ])):
243271 self ._fb_buf_o += n
244272 return buffs [self ._fb_buf_i ][self ._fb_buf_o - n :self ._fb_buf_o ]
245273
274+ # The remaining cases.
246275 ret = b''
247276 while len (ret ) != n :
277+ sliced = n - len (ret )
248278 if self ._fb_buf_i == len (buffs ):
249279 if self ._fb_feeding :
250280 break
251- tmp = self .file_like .read (self ._read_size )
281+ to_read = sliced
282+ if self ._read_size > to_read :
283+ to_read = self ._read_size
284+ tmp = self .file_like .read (to_read )
252285 if not tmp :
253286 break
254287 buffs .append (tmp )
288+ self ._fb_buf_n += len (tmp )
255289 continue
256- sliced = n - len (ret )
257290 ret += buffs [self ._fb_buf_i ][self ._fb_buf_o :self ._fb_buf_o + sliced ]
258291 self ._fb_buf_o += sliced
259292 if self ._fb_buf_o >= len (buffs [self ._fb_buf_i ]):
@@ -440,9 +473,10 @@ def _fb_unpack(self, execute=EX_CONSTRUCT, write_bytes=None):
440473 def next (self ):
441474 try :
442475 ret = self ._fb_unpack (EX_CONSTRUCT , None )
443- self ._fb_consume ()
476+ self ._fb_sloppy_consume ()
444477 return ret
445478 except OutOfData :
479+ self ._fb_consume ()
446480 raise StopIteration
447481 __next__ = next
448482
0 commit comments