properly guard against removal of object codec

alimanfoo · alimanfoo · commit aee5aceced5e · 2018-12-01T14:09:40.000Z
diff --git a/zarr/core.py b/zarr/core.py
@@ -8,6 +8,7 @@
 
 
 import numpy as np
+from numcodecs.compat import ensure_contiguous_ndarray
 
 
 from zarr.util import (is_total_slice, human_readable_size, normalize_resize_args,
@@ -1743,18 +1744,25 @@ def _decode_chunk(self, cdata):
             for f in self._filters[::-1]:
                 chunk = f.decode(chunk)
 
-        # view as correct dtype
+        # view as numpy array with correct dtype
         if self._dtype == object:
-            if isinstance(chunk, np.ndarray):
-                chunk = chunk.astype(self._dtype)
+            # special case object dtype, because incorrect handling can lead to
+            # segfaults and other bad things happening
+            if isinstance(chunk, np.ndarray) and chunk.dtype == object:
+                # chunk is already of correct dtype, good to carry on
+                # flatten just to be sure we can reshape later
+                chunk = chunk.reshape(-1, order='A')
             else:
+                # If we end up here, someone must have hacked around with the filters.
+                # We cannot deal with object arrays unless there is an object
+                # codec in the filter chain, i.e., a filter that converts from object
+                # array to something else during encoding, and converts back to object
+                # array during decoding.
                 raise RuntimeError('cannot read object array without object codec')
-        elif isinstance(chunk, np.ndarray):
-            chunk = chunk.view(self._dtype)
         else:
-            chunk = np.frombuffer(chunk, dtype=self._dtype)
+            chunk = ensure_contiguous_ndarray(chunk).view(self._dtype)
 
-        # reshape
+        # ensure correct chunk shape
         chunk = chunk.reshape(self._chunks, order=self._order)
 
         return chunk