Skip to content

Commit 3354012

Browse files
committed
Find the checksum for data in Zarr
Provides a new `hexdigest` method for Zarr Arrays that creates a checksum from the array based on its encoded data and its metadata. Uses the sha1 checksum by default due to its speed and reliability.
1 parent 11fc028 commit 3354012

File tree

1 file changed

+20
-0
lines changed

1 file changed

+20
-0
lines changed

zarr/core.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from __future__ import absolute_import, print_function, division
33
import operator
44
import itertools
5+
import hashlib
56
import re
67

78

@@ -87,6 +88,7 @@ class Array(object):
8788
set_mask_selection
8889
get_coordinate_selection
8990
set_coordinate_selection
91+
hexdigest
9092
resize
9193
append
9294
view
@@ -1835,6 +1837,24 @@ def bytestr(n):
18351837

18361838
return items
18371839

1840+
def hexdigest(self, hashname="sha1"):
1841+
"""
1842+
Compute a checksum for the data. Default uses sha1 for speed.
1843+
"""
1844+
1845+
h = hashlib.new(hashname)
1846+
1847+
for i in itertools.product(*[range(s) for s in self.cdata_shape]):
1848+
h.update(self.chunk_store.get(self._chunk_key(i), b""))
1849+
1850+
h.update(self.store.get(self._key_prefix + array_meta_key, b""))
1851+
1852+
h.update(self.store.get(self.attrs.key, b""))
1853+
1854+
checksum = h.hexdigest()
1855+
1856+
return checksum
1857+
18381858
def __getstate__(self):
18391859
return (self._store, self._path, self._read_only, self._chunk_store,
18401860
self._synchronizer, self._cache_metadata)

0 commit comments

Comments
 (0)