BF: fileslice.fileslice and read_segments functions optionally accept a

pauldmccarthy · pauldmccarthy · commit 32e3b05b119d · 2017-09-07T17:19:17.000+01:00
threading.Lock to protect seek/read pairs. ArrayProxy creates said
lock, and passes it into fileslice calls. Also fixed typo in openers.py
from previous commit
diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py
@@ -26,6 +26,7 @@
 See :mod:`nibabel.tests.test_proxy_api` for proxy API conformance checks.
 """
 from contextlib import contextmanager
+from threading import Lock
 
 import numpy as np
 
@@ -110,7 +111,6 @@ def __init__(self, file_like, spec, mmap=True, keep_file_open=False):
         if mmap not in (True, False, 'c', 'r'):
             raise ValueError("mmap should be one of {True, False, 'c', 'r'}")
         self.file_like = file_like
-        self._keep_file_open = keep_file_open
         if hasattr(spec, 'get_data_shape'):
             slope, inter = spec.get_slope_inter()
             par = (spec.get_data_shape(),
@@ -131,6 +131,8 @@ def __init__(self, file_like, spec, mmap=True, keep_file_open=False):
         # Permit any specifier that can be interpreted as a numpy dtype
         self._dtype = np.dtype(self._dtype)
         self._mmap = mmap
+        self._keep_file_open = keep_file_open
+        self._lock = Lock()
 
     def __del__(self):
         '''If this ``ArrayProxy`` was created with ``keep_file_open=True``,
@@ -210,7 +212,8 @@ def __getitem__(self, slicer):
                                  self._shape,
                                  self._dtype,
                                  self._offset,
-                                 order=self.order)
+                                 order=self.order,
+                                 lock=self._lock)
         # Upcast as necessary for big slopes, intercepts
         return apply_read_scaling(raw_data, self._slope, self._inter)
 
diff --git a/nibabel/fileslice.py b/nibabel/fileslice.py
@@ -1,6 +1,7 @@
 """ Utilities for getting array slices out of file-like objects
 """
 from __future__ import division
+from contextlib import contextmanager
 
 import operator
 from numbers import Integral
@@ -622,7 +623,7 @@ def slicers2segments(read_slicers, in_shape, offset, itemsize):
     return all_segments
 
 
-def read_segments(fileobj, segments, n_bytes):
+def read_segments(fileobj, segments, n_bytes, lock=None):
     """ Read `n_bytes` byte data implied by `segments` from `fileobj`
 
     Parameters
@@ -634,29 +635,42 @@ def read_segments(fileobj, segments, n_bytes):
         absolute file offset in bytes and number of bytes to read
     n_bytes : int
         total number of bytes that will be read
+    lock : threading.Lock
+        If provided, used to ensure that paired calls to ``seek`` and ``read``
+        cannot be interrupted by another thread accessing the same ``fileobj``.
+
 
     Returns
     -------
     buffer : buffer object
         object implementing buffer protocol, such as byte string or ndarray or
         mmap or ctypes ``c_char_array``
     """
+    # Make a dummy lock-like thing to make the code below a bit nicer
+    if lock is None:
+        @contextmanager
+        def dummy_lock():
+            yield
+        lock = dummy_lock
+
     if len(segments) == 0:
         if n_bytes != 0:
             raise ValueError("No segments, but non-zero n_bytes")
         return b''
     if len(segments) == 1:
         offset, length = segments[0]
-        fileobj.seek(offset)
-        bytes = fileobj.read(length)
+        with lock:
+            fileobj.seek(offset)
+            bytes = fileobj.read(length)
         if len(bytes) != n_bytes:
             raise ValueError("Whoops, not enough data in file")
         return bytes
     # More than one segment
     bytes = mmap(-1, n_bytes)
     for offset, length in segments:
-        fileobj.seek(offset)
-        bytes.write(fileobj.read(length))
+        with lock:
+            fileobj.seek(offset)
+            bytes.write(fileobj.read(length))
     if bytes.tell() != n_bytes:
         raise ValueError("Oh dear, n_bytes does not look right")
     return bytes
@@ -700,7 +714,7 @@ def _simple_fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C',
 
 
 def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C',
-              heuristic=threshold_heuristic):
+              heuristic=threshold_heuristic, lock=None):
     """ Slice array in `fileobj` using `sliceobj` slicer and array definitions
 
     `fileobj` contains the contiguous binary data for an array ``A`` of shape,
@@ -737,6 +751,9 @@ def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C',
         returning one of 'full', 'contiguous', None.  See
         :func:`optimize_slicer` and see :func:`threshold_heuristic` for an
         example.
+    lock: threading.Lock, optional
+        If provided, used to ensure that paired calls to ``seek`` and ``read``
+        cannot be interrupted by another thread accessing the same ``fileobj``.
 
     Returns
     -------
@@ -750,7 +767,7 @@ def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C',
     segments, sliced_shape, post_slicers = calc_slicedefs(
         sliceobj, shape, itemsize, offset, order)
     n_bytes = reduce(operator.mul, sliced_shape, 1) * itemsize
-    bytes = read_segments(fileobj, segments, n_bytes)
+    bytes = read_segments(fileobj, segments, n_bytes, lock)
     sliced = np.ndarray(sliced_shape, dtype, buffer=bytes, order=order)
     return sliced[post_slicers]
 
diff --git a/nibabel/openers.py b/nibabel/openers.py
@@ -73,7 +73,7 @@ def _gzip_open(fileish, mode='rb', compresslevel=9):
     is_file = hasattr(fileish, 'read') and hasattr(fileish, 'write') and \
               hasattr(fileish, 'mode')
     if is_file:
-        mode = file.mode
+        mode = fileish.mode
 
     # use indexed_gzip if possible for faster read access
     if mode == 'rb' and have_indexed_gzip: