Skip to content

Commit 36db483

Browse files
authored
Merge pull request #213 from jakirkham/support_digest
Add a `digest` method for Zarr Arrays
2 parents 216b35e + 0b56c47 commit 36db483

File tree

2 files changed

+38
-8
lines changed

2 files changed

+38
-8
lines changed

docs/api/core.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ The Array class (``zarr.core``)
1414
.. automethod:: set_coordinate_selection
1515
.. automethod:: get_orthogonal_selection
1616
.. automethod:: set_orthogonal_selection
17+
.. automethod:: digest
1718
.. automethod:: hexdigest
1819
.. automethod:: resize
1920
.. automethod:: append

zarr/core.py

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: utf-8 -*-
22
from __future__ import absolute_import, print_function, division
3+
import binascii
34
import operator
45
import itertools
56
import hashlib
@@ -88,6 +89,7 @@ class Array(object):
8889
set_mask_selection
8990
get_coordinate_selection
9091
set_coordinate_selection
92+
digest
9193
hexdigest
9294
resize
9395
append
@@ -1855,22 +1857,23 @@ def bytestr(n):
18551857

18561858
return items
18571859

1858-
def hexdigest(self, hashname="sha1"):
1860+
def digest(self, hashname="sha1"):
18591861
"""
18601862
Compute a checksum for the data. Default uses sha1 for speed.
18611863
18621864
Examples
18631865
--------
1866+
>>> import binascii
18641867
>>> import zarr
18651868
>>> z = zarr.empty(shape=(10000, 10000), chunks=(1000, 1000))
1866-
>>> z.hexdigest()
1867-
'041f90bc7a571452af4f850a8ca2c6cddfa8a1ac'
1869+
>>> binascii.hexlify(z.digest())
1870+
b'041f90bc7a571452af4f850a8ca2c6cddfa8a1ac'
18681871
>>> z = zarr.zeros(shape=(10000, 10000), chunks=(1000, 1000))
1869-
>>> z.hexdigest()
1870-
'7162d416d26a68063b66ed1f30e0a866e4abed60'
1872+
>>> binascii.hexlify(z.digest())
1873+
b'7162d416d26a68063b66ed1f30e0a866e4abed60'
18711874
>>> z = zarr.zeros(shape=(10000, 10000), dtype="u1", chunks=(1000, 1000))
1872-
>>> z.hexdigest()
1873-
'cb387af37410ae5a3222e893cf3373e4e4f22816'
1875+
>>> binascii.hexlify(z.digest())
1876+
b'cb387af37410ae5a3222e893cf3373e4e4f22816'
18741877
"""
18751878

18761879
h = hashlib.new(hashname)
@@ -1882,7 +1885,33 @@ def hexdigest(self, hashname="sha1"):
18821885

18831886
h.update(self.store.get(self.attrs.key, b""))
18841887

1885-
checksum = h.hexdigest()
1888+
checksum = h.digest()
1889+
1890+
return checksum
1891+
1892+
def hexdigest(self, hashname="sha1"):
1893+
"""
1894+
Compute a checksum for the data. Default uses sha1 for speed.
1895+
1896+
Examples
1897+
--------
1898+
>>> import zarr
1899+
>>> z = zarr.empty(shape=(10000, 10000), chunks=(1000, 1000))
1900+
>>> z.hexdigest()
1901+
'041f90bc7a571452af4f850a8ca2c6cddfa8a1ac'
1902+
>>> z = zarr.zeros(shape=(10000, 10000), chunks=(1000, 1000))
1903+
>>> z.hexdigest()
1904+
'7162d416d26a68063b66ed1f30e0a866e4abed60'
1905+
>>> z = zarr.zeros(shape=(10000, 10000), dtype="u1", chunks=(1000, 1000))
1906+
>>> z.hexdigest()
1907+
'cb387af37410ae5a3222e893cf3373e4e4f22816'
1908+
"""
1909+
1910+
checksum = binascii.hexlify(self.digest(hashname=hashname))
1911+
1912+
# This is a bytes object on Python 3 and we want a str.
1913+
if type(checksum) is not str:
1914+
checksum = checksum.decode('utf8')
18861915

18871916
return checksum
18881917

0 commit comments

Comments
 (0)