21
21
from .header import Field
22
22
23
23
MEGABYTE = 1024 * 1024
24
- BUFFER_SIZE = 1000000
25
24
26
25
27
26
def create_empty_header ():
@@ -342,8 +341,8 @@ def _read_header(fileobj):
342
341
343
342
return hdr
344
343
345
- @staticmethod
346
- def _read (fileobj , header , buffer_size = 4 ):
344
+ @classmethod
345
+ def _read (cls , fileobj , header , buffer_size = 4 ):
347
346
""" Return generator that reads TCK data from `fileobj` given `header`
348
347
349
348
Parameters
@@ -369,65 +368,60 @@ def _read(fileobj, header, buffer_size=4):
369
368
buffer_size = int (buffer_size * MEGABYTE )
370
369
buffer_size += coordinate_size - (buffer_size % coordinate_size )
371
370
371
+ # Markers for streamline end and file end
372
+ fiber_marker = cls .FIBER_DELIMITER .astype (dtype ).tostring ()
373
+ eof_marker = cls .EOF_DELIMITER .astype (dtype ).tostring ()
374
+
372
375
with Opener (fileobj ) as f :
373
376
start_position = f .tell ()
374
377
375
378
# Set the file position at the beginning of the data.
376
379
f .seek (header ["_offset_data" ], os .SEEK_SET )
377
380
378
381
eof = False
379
- buff = b""
380
- pts = []
381
-
382
- i = 0
383
-
384
- while not eof or not np .all (np .isinf (pts )):
385
-
386
- if not eof :
387
- bytes_read = f .read (buffer_size )
388
- buff += bytes_read
389
- eof = len (bytes_read ) == 0
382
+ buffs = []
383
+ n_streams = 0
390
384
391
- # Read floats.
392
- pts = np .frombuffer (buff , dtype = dtype )
385
+ while not eof :
393
386
394
- # Convert data to little-endian if needed.
395
- if dtype != '<f4' :
396
- pts = pts .astype ('<f4' )
397
-
398
- pts = pts .reshape ([- 1 , 3 ])
399
- idx_nan = np .arange (len (pts ))[np .isnan (pts [:, 0 ])]
387
+ bytes_read = f .read (buffer_size )
388
+ buffs .append (bytes_read )
389
+ eof = len (bytes_read ) != buffer_size
400
390
401
391
# Make sure we've read enough to find a streamline delimiter.
402
- if len ( idx_nan ) == 0 :
392
+ if fiber_marker not in bytes_read :
403
393
# If we've read the whole file, then fail.
404
- if eof and not np .all (np .isinf (pts )):
405
- msg = ("Cannot find a streamline delimiter. This file"
406
- " might be corrupted." )
407
- raise DataError (msg )
408
-
409
- # Otherwise read a bit more.
410
- continue
411
-
412
- nb_pts_total = 0
413
- idx_start = 0
414
- for idx_end in idx_nan :
415
- nb_pts = len (pts [idx_start :idx_end , :])
416
- nb_pts_total += nb_pts
417
-
418
- if nb_pts > 0 :
419
- yield pts [idx_start :idx_end , :]
420
- i += 1
421
-
422
- idx_start = idx_end + 1
423
-
424
- # Remove pts plus the first triplet of NaN.
425
- nb_tiplets_to_remove = nb_pts_total + len (idx_nan )
426
- nb_bytes_to_remove = nb_tiplets_to_remove * 3 * dtype .itemsize
427
- buff = buff [nb_bytes_to_remove :]
394
+ if eof :
395
+ # Could have minimal buffering, and have read only the
396
+ # EOF delimiter
397
+ buffs = [b'' .join (buffs )]
398
+ if not buffs [0 ] == eof_marker :
399
+ raise DataError (
400
+ "Cannot find a streamline delimiter. This file"
401
+ " might be corrupted." )
402
+ else :
403
+ # Otherwise read a bit more.
404
+ continue
405
+
406
+ all_parts = b'' .join (buffs ).split (fiber_marker )
407
+ point_parts , buffs = all_parts [:- 1 ], all_parts [- 1 :]
408
+ point_parts = [p for p in point_parts if p != b'' ]
409
+
410
+ for point_part in point_parts :
411
+ # Read floats.
412
+ pts = np .frombuffer (point_part , dtype = dtype )
413
+ # Enforce ability to write to underlying bytes object
414
+ pts .flags .writeable = True
415
+ # Convert data to little-endian if needed.
416
+ yield pts .astype ('<f4' , copy = False ).reshape ([- 1 , 3 ])
417
+
418
+ n_streams += len (point_parts )
419
+
420
+ if not buffs [- 1 ] == eof_marker :
421
+ raise DataError ('Expecting end-of-file marker ' 'inf inf inf' )
428
422
429
423
# In case the 'count' field was not provided.
430
- header [Field .NB_STREAMLINES ] = i
424
+ header [Field .NB_STREAMLINES ] = n_streams
431
425
432
426
# Set the file position where it was (in case it was already open).
433
427
f .seek (start_position , os .SEEK_CUR )
0 commit comments