Skip to content

Commit 1475594

Browse files
committed
Merge pull request #334 from matthew-brett/zero-terminal-strings
MRG: add routine to read zero-terminal strings Routine to read zero-terminal strings of arbitrary length from binary file stream. Inspired by Thomas Emmerling's readCstring function in the BrainVoyager PR.
2 parents 4cce493 + 1539d8b commit 1475594

File tree

2 files changed

+118
-0
lines changed

2 files changed

+118
-0
lines changed

nibabel/fileutils.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
2+
# vi: set ft=python sts=4 ts=4 sw=4 et:
3+
# ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
4+
#
5+
# See COPYING file distributed along with the NiBabel package for the
6+
# copyright and license terms.
7+
#
8+
# ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
9+
""" Utilities for reading and writing to binary file formats
10+
"""
11+
12+
def read_zt_byte_strings(fobj, n_strings=1, bufsize=1024):
13+
"""Read zero-terminated byte strings from a file object `fobj`
14+
15+
Returns byte strings with terminal zero stripped.
16+
17+
Found strings can be of any length.
18+
19+
The file position of `fobj` on exit will be at the byte after the terminal
20+
0 of the final read byte string.
21+
22+
Parameters
23+
----------
24+
f : fileobj
25+
File object to use. Should implement ``read``, returning byte objects
26+
(str in Python 2), and ``seek(n, 1)`` to seek from current file
27+
position.
28+
n_strings : int, optional
29+
Number of byte strings to return
30+
bufsize: int, optional
31+
Define chunk size to load from file while searching for zero terminals.
32+
We load this many bytes at a time from the file, but the returned
33+
strings can be longer than `bufsize`.
34+
35+
Returns
36+
-------
37+
byte_strings : list
38+
List of byte strings, where strings do not include the terminal 0
39+
"""
40+
byte_strings = []
41+
trailing = b''
42+
while True:
43+
buf = fobj.read(bufsize)
44+
eof = len(buf) < bufsize # end of file
45+
zt_strings = buf.split(b'\x00')
46+
if len(zt_strings) > 1: # At least one 0
47+
byte_strings += [trailing + zt_strings[0]] + zt_strings[1:-1]
48+
trailing = zt_strings[-1]
49+
else: # No 0
50+
trailing += zt_strings[0]
51+
n_found = len(byte_strings)
52+
if eof or n_found >= n_strings:
53+
break
54+
if n_found < n_strings:
55+
raise ValueError('Expected {0} strings, found {1}'.format(
56+
n_strings, n_found))
57+
n_extra = n_found - n_strings
58+
leftover_strings = byte_strings[n_strings:] + [trailing]
59+
# Add number of extra strings to account for lost terminal 0s
60+
extra_bytes = sum(len(bs) for bs in leftover_strings) + n_extra
61+
fobj.seek(-extra_bytes, 1) # seek back from current position
62+
return byte_strings[:n_strings]

nibabel/tests/test_fileutils.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
2+
# vi: set ft=python sts=4 ts=4 sw=4 et:
3+
# ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
4+
#
5+
# See COPYING file distributed along with the NiBabel package for the
6+
# copyright and license terms.
7+
#
8+
# ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
9+
""" Testing fileutils module
10+
"""
11+
12+
import numpy as np
13+
14+
from ..fileutils import read_zt_byte_strings
15+
16+
from numpy.testing import (assert_almost_equal,
17+
assert_array_equal)
18+
19+
from nose.tools import (assert_true, assert_false, assert_raises,
20+
assert_equal, assert_not_equal)
21+
22+
23+
from ..tmpdirs import InTemporaryDirectory
24+
25+
26+
def test_read_zt_byte_strings():
27+
# sample binary block
28+
binary = b'test.fmr\x00test.prt\x00something'
29+
with InTemporaryDirectory():
30+
# create a tempfile
31+
path = 'test.bin'
32+
fwrite = open(path, 'wb')
33+
# write the binary block to it
34+
fwrite.write(binary)
35+
fwrite.close()
36+
# open it again
37+
fread = open(path, 'rb')
38+
# test readout of one string
39+
assert_equal(read_zt_byte_strings(fread), [b'test.fmr'])
40+
# test new file position
41+
assert_equal(fread.tell(), 9)
42+
# manually rewind
43+
fread.seek(0)
44+
# test readout of two strings
45+
assert_equal(read_zt_byte_strings(fread, 2),
46+
[b'test.fmr', b'test.prt'])
47+
assert_equal(fread.tell(), 18)
48+
# test readout of more strings than present
49+
fread.seek(0)
50+
assert_raises(ValueError, read_zt_byte_strings, fread, 3)
51+
fread.seek(9)
52+
assert_raises(ValueError, read_zt_byte_strings, fread, 2)
53+
# Try with a small bufsize
54+
fread.seek(0)
55+
assert_equal(read_zt_byte_strings(fread, 2, 4),
56+
[b'test.fmr', b'test.prt'])

0 commit comments

Comments
 (0)