Skip to content

Commit 0b56c47

Browse files
committed
Add a digest method for Zarr Arrays
This implements an equivalent method to `hashlib`'s `digest` method for Zarr Arrays. Makes sure to document it as well.
1 parent 1c7efb8 commit 0b56c47

File tree

2 files changed

+38
-8
lines changed

2 files changed

+38
-8
lines changed

docs/api/core.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ The Array class (``zarr.core``)
1414
.. automethod:: set_coordinate_selection
1515
.. automethod:: get_orthogonal_selection
1616
.. automethod:: set_orthogonal_selection
17+
.. automethod:: digest
1718
.. automethod:: hexdigest
1819
.. automethod:: resize
1920
.. automethod:: append

zarr/core.py

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: utf-8 -*-
22
from __future__ import absolute_import, print_function, division
3+
import binascii
34
import operator
45
import itertools
56
import hashlib
@@ -88,6 +89,7 @@ class Array(object):
8889
set_mask_selection
8990
get_coordinate_selection
9091
set_coordinate_selection
92+
digest
9193
hexdigest
9294
resize
9395
append
@@ -1837,22 +1839,23 @@ def bytestr(n):
18371839

18381840
return items
18391841

1840-
def hexdigest(self, hashname="sha1"):
1842+
def digest(self, hashname="sha1"):
18411843
"""
18421844
Compute a checksum for the data. Default uses sha1 for speed.
18431845
18441846
Examples
18451847
--------
1848+
>>> import binascii
18461849
>>> import zarr
18471850
>>> z = zarr.empty(shape=(10000, 10000), chunks=(1000, 1000))
1848-
>>> z.hexdigest()
1849-
'041f90bc7a571452af4f850a8ca2c6cddfa8a1ac'
1851+
>>> binascii.hexlify(z.digest())
1852+
b'041f90bc7a571452af4f850a8ca2c6cddfa8a1ac'
18501853
>>> z = zarr.zeros(shape=(10000, 10000), chunks=(1000, 1000))
1851-
>>> z.hexdigest()
1852-
'7162d416d26a68063b66ed1f30e0a866e4abed60'
1854+
>>> binascii.hexlify(z.digest())
1855+
b'7162d416d26a68063b66ed1f30e0a866e4abed60'
18531856
>>> z = zarr.zeros(shape=(10000, 10000), dtype="u1", chunks=(1000, 1000))
1854-
>>> z.hexdigest()
1855-
'cb387af37410ae5a3222e893cf3373e4e4f22816'
1857+
>>> binascii.hexlify(z.digest())
1858+
b'cb387af37410ae5a3222e893cf3373e4e4f22816'
18561859
"""
18571860

18581861
h = hashlib.new(hashname)
@@ -1864,7 +1867,33 @@ def hexdigest(self, hashname="sha1"):
18641867

18651868
h.update(self.store.get(self.attrs.key, b""))
18661869

1867-
checksum = h.hexdigest()
1870+
checksum = h.digest()
1871+
1872+
return checksum
1873+
1874+
def hexdigest(self, hashname="sha1"):
1875+
"""
1876+
Compute a checksum for the data. Default uses sha1 for speed.
1877+
1878+
Examples
1879+
--------
1880+
>>> import zarr
1881+
>>> z = zarr.empty(shape=(10000, 10000), chunks=(1000, 1000))
1882+
>>> z.hexdigest()
1883+
'041f90bc7a571452af4f850a8ca2c6cddfa8a1ac'
1884+
>>> z = zarr.zeros(shape=(10000, 10000), chunks=(1000, 1000))
1885+
>>> z.hexdigest()
1886+
'7162d416d26a68063b66ed1f30e0a866e4abed60'
1887+
>>> z = zarr.zeros(shape=(10000, 10000), dtype="u1", chunks=(1000, 1000))
1888+
>>> z.hexdigest()
1889+
'cb387af37410ae5a3222e893cf3373e4e4f22816'
1890+
"""
1891+
1892+
checksum = binascii.hexlify(self.digest(hashname=hashname))
1893+
1894+
# This is a bytes object on Python 3 and we want a str.
1895+
if type(checksum) is not str:
1896+
checksum = checksum.decode('utf8')
18681897

18691898
return checksum
18701899

0 commit comments

Comments
 (0)