support datetime64 and timedelta64

alimanfoo · alimanfoo · commit e59822a3cf0a · 2017-12-07T00:02:06.000Z
diff --git a/zarr/core.py b/zarr/core.py
@@ -1590,7 +1590,10 @@ def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection,
                     if self._compressor:
                         self._compressor.decode(cdata, dest)
                     else:
-                        chunk = np.frombuffer(cdata, dtype=self._dtype)
+                        if isinstance(cdata, np.ndarray):
+                            chunk = cdata.view(self._dtype)
+                        else:
+                            chunk = np.frombuffer(cdata, dtype=self._dtype)
                         chunk = chunk.reshape(self._chunks, order=self._order)
                         np.copyto(dest, chunk)
                     return
@@ -1736,7 +1739,7 @@ def _decode_chunk(self, cdata):
         elif isinstance(chunk, np.ndarray):
             chunk = chunk.view(self._dtype)
         else:
-            chunk = np.frombuffer(chunk, self._dtype)
+            chunk = np.frombuffer(chunk, dtype=self._dtype)
 
         # reshape
         chunk = chunk.reshape(self._chunks, order=self._order)
@@ -2087,15 +2090,15 @@ def view(self, shape=None, chunks=None, dtype=None,
             >>> import zarr
             >>> import numpy as np
             >>> np.random.seed(42)
-            >>> labels = [b'female', b'male']
+            >>> labels = ['female', 'male']
             >>> data = np.random.choice(labels, size=10000)
             >>> filters = [zarr.Categorize(labels=labels,
-            ...                                  dtype=data.dtype,
-            ...                                  astype='u1')]
+            ...                            dtype=data.dtype,
+            ...                            astype='u1')]
             >>> a = zarr.array(data, chunks=1000, filters=filters)
             >>> a[:]
-            array([b'female', b'male', b'female', ..., b'male', b'male', b'female'],
-                  dtype='|S6')
+            array(['female', 'male', 'female', ..., 'male', 'male', 'female'],
+                  dtype='<U6')
             >>> v = a.view(dtype='u1', filters=[])
             >>> v.is_view
             True
@@ -2110,10 +2113,10 @@ def view(self, shape=None, chunks=None, dtype=None,
             >>> v[:]
             array([1, 1, 1, ..., 2, 2, 2], dtype=uint8)
             >>> a[:]
-            array([b'female', b'female', b'female', ..., b'male', b'male', b'male'],
-                  dtype='|S6')
+            array(['female', 'female', 'female', ..., 'male', 'male', 'male'],
+                  dtype='<U6')
 
-        View as a different dtype with the same itemsize:
+        View as a different dtype with the same item size:
 
             >>> data = np.random.randint(0, 2, size=10000, dtype='u1')
             >>> a = zarr.array(data, chunks=1000)
@@ -2125,7 +2128,7 @@ def view(self, shape=None, chunks=None, dtype=None,
             >>> np.all(a[:].view(dtype=bool) == v[:])
             True
 
-        An array can be viewed with a dtype with a different itemsize, however
+        An array can be viewed with a dtype with a different item size, however
         some care is needed to adjust the shape and chunk shape so that chunk
         data is interpreted correctly:
 
diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py
@@ -855,27 +855,37 @@ def test_structured_array(self):
     def test_dtypes(self):
 
         # integers
-        for t in 'u1', 'u2', 'u4', 'u8', 'i1', 'i2', 'i4', 'i8':
-            z = self.create_array(shape=10, chunks=3, dtype=t)
-            assert z.dtype == np.dtype(t)
-            a = np.arange(z.shape[0], dtype=t)
+        for dtype in 'u1', 'u2', 'u4', 'u8', 'i1', 'i2', 'i4', 'i8':
+            z = self.create_array(shape=10, chunks=3, dtype=dtype)
+            assert z.dtype == np.dtype(dtype)
+            a = np.arange(z.shape[0], dtype=dtype)
             z[:] = a
             assert_array_equal(a, z[:])
 
         # floats
-        for t in 'f2', 'f4', 'f8':
-            z = self.create_array(shape=10, chunks=3, dtype=t)
-            assert z.dtype == np.dtype(t)
-            a = np.linspace(0, 1, z.shape[0], dtype=t)
+        for dtype in 'f2', 'f4', 'f8':
+            z = self.create_array(shape=10, chunks=3, dtype=dtype)
+            assert z.dtype == np.dtype(dtype)
+            a = np.linspace(0, 1, z.shape[0], dtype=dtype)
             z[:] = a
             assert_array_almost_equal(a, z[:])
 
-        # datetime, timedelta are not supported for the time being
-        for resolution in 'D', 'us', 'ns':
-            with assert_raises(ValueError):
-                self.create_array(shape=10, dtype='datetime64[{}]'.format(resolution))
-            with assert_raises(ValueError):
-                self.create_array(shape=10, dtype='timedelta64[{}]'.format(resolution))
+        # datetime, timedelta
+        for base_type in 'Mm':
+            for resolution in 'D', 'us', 'ns':
+                dtype = '{}8[{}]'.format(base_type, resolution)
+                z = self.create_array(shape=100, dtype=dtype)
+                assert z.dtype == np.dtype(dtype)
+                a = np.random.randint(0, np.iinfo('u8').max, size=z.shape[0],
+                                      dtype='u8').view(dtype)
+                z[:] = a
+                assert_array_equal(a, z[:])
+
+        # check that datetime generic units are not allowed
+        with assert_raises(ValueError):
+            self.create_array(shape=100, dtype='M8')
+        with assert_raises(ValueError):
+            self.create_array(shape=100, dtype='m8')
 
     def test_object_arrays(self):
 
diff --git a/zarr/util.py b/zarr/util.py
@@ -153,9 +153,10 @@ def normalize_dtype(dtype, object_codec):
 
     dtype = np.dtype(dtype)
 
-    if dtype.kind in 'mM':
-        raise ValueError('datetime64 and timedelta64 dtypes are not currently '
-                         'supported; please store the data using int64 instead')
+    # don't allow generic datetime64 or timedelta64, require units to be specified
+    if dtype == np.dtype('M8') or dtype == np.dtype('m8'):
+        raise ValueError('datetime64 and timedelta64 dtypes with generic units '
+                         'are not supported, please specify units (e.g., "M8[ns]")')
 
     return dtype, object_codec