Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ Unreleased
methods with V3 stores.
By :user:`Ryan Abernathey <rabernat>` :issue:`1228`.

* Add support for setting user-defined attributes at array / group creation time.
By :user: `Davis Bennett <d-v-b>` :issue:`538`.

.. _release_2.13.2:

Maintenance
Expand Down
15 changes: 14 additions & 1 deletion zarr/_storage/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ def _prefix_to_group_key(store: StoreLike, prefix: str) -> str:
return key


def _prefix_to_attrs_key(store: StoreLike, prefix: str) -> str:
def _prefix_to_array_attrs_key(store: StoreLike, prefix: str) -> str:
if getattr(store, "_store_version", 2) == 3:
# for v3, attributes are stored in the array metadata
sfx = _get_metadata_suffix(store) # type: ignore
Expand All @@ -453,3 +453,16 @@ def _prefix_to_attrs_key(store: StoreLike, prefix: str) -> str:
else:
key = prefix + attrs_key
return key


def _prefix_to_group_attrs_key(store: StoreLike, prefix: str) -> str:
if getattr(store, "_store_version", 2) == 3:
# for v3, attributes are stored in the array metadata
sfx = _get_metadata_suffix(store) # type: ignore
if prefix:
key = meta_root + prefix.rstrip('/') + ".group" + sfx
else:
key = meta_root[:-1] + '.group' + sfx
else:
key = prefix + attrs_key
return key
4 changes: 2 additions & 2 deletions zarr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np
from numcodecs.compat import ensure_bytes

from zarr._storage.store import _prefix_to_attrs_key, assert_zarr_v3_api_available
from zarr._storage.store import _prefix_to_array_attrs_key, assert_zarr_v3_api_available
from zarr.attrs import Attributes
from zarr.codecs import AsType, get_codec
from zarr.errors import ArrayNotFoundError, ReadOnlyError, ArrayIndexError
Expand Down Expand Up @@ -215,7 +215,7 @@ def __init__(
self._load_metadata()

# initialize attributes
akey = _prefix_to_attrs_key(self._store, self._key_prefix)
akey = _prefix_to_array_attrs_key(self._store, self._key_prefix)
self._attrs = Attributes(store, key=akey, read_only=read_only,
synchronizer=synchronizer, cache=cache_attrs)

Expand Down
19 changes: 12 additions & 7 deletions zarr/creation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional
from typing import Any, Dict, Optional
from warnings import warn

import numpy as np
Expand All @@ -22,7 +22,7 @@ def create(shape, chunks=True, dtype=None, compressor='default',
overwrite=False, path=None, chunk_store=None, filters=None,
cache_metadata=True, cache_attrs=True, read_only=False,
object_codec=None, dimension_separator=None, write_empty_chunks=True,
*, zarr_version=None, meta_array=None, **kwargs):
attrs: Dict[str, Any] = {}, *, zarr_version=None, meta_array=None, **kwargs):
"""Create an array.

Parameters
Expand Down Expand Up @@ -71,6 +71,8 @@ def create(shape, chunks=True, dtype=None, compressor='default',
A codec to encode object arrays, only needed if dtype=object.
dimension_separator : {'.', '/'}, optional
Separator placed between the dimensions of a chunk.
attrs : JSON-serializable dict.
User attributes for the array. Defaults to {}.

.. versionadded:: 2.8

Expand Down Expand Up @@ -170,7 +172,7 @@ def create(shape, chunks=True, dtype=None, compressor='default',
init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor,
fill_value=fill_value, order=order, overwrite=overwrite, path=path,
chunk_store=chunk_store, filters=filters, object_codec=object_codec,
dimension_separator=dimension_separator)
dimension_separator=dimension_separator, attrs=attrs)

# instantiate array
z = Array(store, path=path, chunk_store=chunk_store, synchronizer=synchronizer,
Expand Down Expand Up @@ -413,6 +415,7 @@ def open_array(
storage_options=None,
partial_decompress=False,
write_empty_chunks=True,
attrs: Dict[str, Any] = {},
*,
zarr_version=None,
dimension_separator=None,
Expand Down Expand Up @@ -478,6 +481,8 @@ def open_array(
is deleted. This setting enables sparser storage, as only chunks with
non-fill-value data are stored, at the expense of overhead associated
with checking the data of each chunk.
attrs : JSON-serializable dict.
User attributes for the array. Defaults to {}.

.. versionadded:: 2.11

Expand Down Expand Up @@ -557,7 +562,7 @@ def open_array(
fill_value = np.array(fill_value, dtype=dtype)[()]

# ensure store is initialized

# TODO: warning when creation kwargs (dtype, shape) are provided but mode is not w
if mode in ['r', 'r+']:
if not contains_array(store, path=path):
if contains_group(store, path=path):
Expand All @@ -569,7 +574,7 @@ def open_array(
compressor=compressor, fill_value=fill_value,
order=order, filters=filters, overwrite=True, path=path,
object_codec=object_codec, chunk_store=chunk_store,
dimension_separator=dimension_separator)
dimension_separator=dimension_separator, attrs=attrs)

elif mode == 'a':
if not contains_array(store, path=path):
Expand All @@ -579,7 +584,7 @@ def open_array(
compressor=compressor, fill_value=fill_value,
order=order, filters=filters, path=path,
object_codec=object_codec, chunk_store=chunk_store,
dimension_separator=dimension_separator)
dimension_separator=dimension_separator, attrs=attrs)

elif mode in ['w-', 'x']:
if contains_group(store, path=path):
Expand All @@ -591,7 +596,7 @@ def open_array(
compressor=compressor, fill_value=fill_value,
order=order, filters=filters, path=path,
object_codec=object_codec, chunk_store=chunk_store,
dimension_separator=dimension_separator)
dimension_separator=dimension_separator, attrs=attrs)

# determine read only status
read_only = mode == 'r'
Expand Down
5 changes: 3 additions & 2 deletions zarr/hierarchy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from collections.abc import MutableMapping
from itertools import islice
from typing import Any, Dict

import numpy as np

Expand Down Expand Up @@ -920,12 +921,12 @@ def create_group(self, name, overwrite=False):

return self._write_op(self._create_group_nosync, name, overwrite=overwrite)

def _create_group_nosync(self, name, overwrite=False):
def _create_group_nosync(self, name, overwrite: bool = False, attrs: Dict[str, Any] = {}):
path = self._item_path(name)

# create terminal group
init_group(self._store, path=path, chunk_store=self._chunk_store,
overwrite=overwrite)
overwrite=overwrite, attrs=attrs)

return Group(self._store, path=path, read_only=self._read_only,
chunk_store=self._chunk_store, cache_attrs=self.attrs.cache,
Expand Down
37 changes: 35 additions & 2 deletions zarr/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
ensure_contiguous_ndarray_like
)
from numcodecs.registry import codec_registry
from zarr.attrs import Attributes

from zarr.errors import (
MetadataError,
Expand All @@ -62,6 +63,8 @@
from zarr._storage.store import (_get_hierarchy_metadata, # noqa: F401
_get_metadata_suffix,
_listdir_from_keys,
_prefix_to_array_attrs_key,
_prefix_to_group_attrs_key,
_rename_from_keys,
_rename_metadata_v3,
_rmdir_from_keys,
Expand Down Expand Up @@ -300,7 +303,8 @@ def init_array(
chunk_store: Optional[StoreLike] = None,
filters=None,
object_codec=None,
dimension_separator=None,
dimension_separator: Optional[str] = None,
attrs: Dict[str, Any] = {}
):
"""Initialize an array store with the given configuration. Note that this is a low-level
function and there should be no need to call this directly from user code.
Expand Down Expand Up @@ -335,6 +339,8 @@ def init_array(
A codec to encode object arrays, only needed if dtype=object.
dimension_separator : {'.', '/'}, optional
Separator placed between the dimensions of a chunk.
attrs : JSON-serializable dict.
User attributes for the array. Defaults to {}.

Examples
--------
Expand Down Expand Up @@ -430,6 +436,28 @@ def init_array(
object_codec=object_codec,
dimension_separator=dimension_separator)

_init_array_attrs(store, path, attrs)


def _init_array_attrs(store: StoreLike, path: Optional[str], attrs: Dict[str, Any]):
if len(attrs):
if path:
key_prefix = path + '/'
else:
key_prefix = ''
akey = _prefix_to_array_attrs_key(store, key_prefix)
Attributes(store, key=akey, cache=False).update(attrs)


def _init_group_attrs(store: StoreLike, path: Optional[str], attrs: Dict[str, Any]):
if len(attrs):
if path:
key_prefix = path + '/'
else:
key_prefix = ''
akey = _prefix_to_group_attrs_key(store, key_prefix)
Attributes(store, key=akey, cache=False).update(attrs)


def _init_array_metadata(
store: StoreLike,
Expand Down Expand Up @@ -598,6 +626,7 @@ def init_group(
overwrite: bool = False,
path: Path = None,
chunk_store: Optional[StoreLike] = None,
attrs: Dict[str, Any] = {},
):
"""Initialize a group store. Note that this is a low-level function and there should be no
need to call this directly from user code.
Expand All @@ -613,7 +642,8 @@ def init_group(
chunk_store : Store, optional
Separate storage for chunks. If not provided, `store` will be used
for storage of both chunks and metadata.

attrs : JSON-serializable dict.
User attributes for the group. Defaults to {}.
"""

# normalize path
Expand All @@ -633,6 +663,9 @@ def init_group(
_init_group_metadata(store=store, overwrite=overwrite, path=path,
chunk_store=chunk_store)

# initialize attrs
_init_group_attrs(store, path, attrs)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question


if store_version == 3:
# TODO: Should initializing a v3 group also create a corresponding
# empty folder under data/root/? I think probably not until there
Expand Down
7 changes: 5 additions & 2 deletions zarr/tests/test_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,14 @@ def test_array(zarr_version, at_root):
expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version
kwargs = _init_creation_kwargs(zarr_version, at_root)

# with numpy array
# with numpy array and attrs
a = np.arange(100)
z = array(a, chunks=10, **kwargs)
attrs = {'foo': 10}
z = array(a, chunks=10, **kwargs, attrs=attrs)

assert a.shape == z.shape
assert a.dtype == z.dtype
assert attrs == dict(z.attrs)
assert z._store._store_version == expected_zarr_version
assert_array_equal(a, z[:])

Expand Down
15 changes: 7 additions & 8 deletions zarr/tests/test_hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import shutil
import tempfile
import textwrap
from typing import Any, Dict
import unittest

import numpy as np
Expand Down Expand Up @@ -48,18 +49,18 @@ def create_store():
return KVStore(dict()), None

def create_group(self, store=None, path=None, read_only=False,
chunk_store=None, synchronizer=None):
chunk_store=None, synchronizer=None, attrs: Dict[str, Any] = {}):
# can be overridden in sub-classes
if store is None:
store, chunk_store = self.create_store()
init_group(store, path=path, chunk_store=chunk_store)
init_group(store, path=path, chunk_store=chunk_store, attrs=attrs)
g = Group(store, path=path, read_only=read_only,
chunk_store=chunk_store, synchronizer=synchronizer)
return g

def test_group_init_1(self):
store, chunk_store = self.create_store()
g = self.create_group(store, chunk_store=chunk_store)
g = self.create_group(store, chunk_store=chunk_store, attrs={'foo': 'bar'})
assert store is g.store
if chunk_store is None:
assert store is g.chunk_store
Expand All @@ -70,7 +71,6 @@ def test_group_init_1(self):
assert '/' == g.name
assert '' == g.basename
assert isinstance(g.attrs, Attributes)
g.attrs['foo'] = 'bar'
assert g.attrs['foo'] == 'bar'
assert isinstance(g.info, InfoReporter)
assert isinstance(repr(g.info), str)
Expand Down Expand Up @@ -1113,18 +1113,18 @@ def create_store():
return KVStoreV3(dict()), None

def create_group(self, store=None, path='group', read_only=False,
chunk_store=None, synchronizer=None):
chunk_store=None, synchronizer=None, attrs: Dict[str, Any] = {}):
# can be overridden in sub-classes
if store is None:
store, chunk_store = self.create_store()
init_group(store, path=path, chunk_store=chunk_store)
init_group(store, path=path, chunk_store=chunk_store, attrs=attrs)
g = Group(store, path=path, read_only=read_only,
chunk_store=chunk_store, synchronizer=synchronizer)
return g

def test_group_init_1(self):
store, chunk_store = self.create_store()
g = self.create_group(store, chunk_store=chunk_store)
g = self.create_group(store, chunk_store=chunk_store, attrs={'foo': 'bar'})
assert store is g.store
if chunk_store is None:
assert store is g.chunk_store
Expand All @@ -1137,7 +1137,6 @@ def test_group_init_1(self):
assert 'group' == g.basename

assert isinstance(g.attrs, Attributes)
g.attrs['foo'] = 'bar'
assert g.attrs['foo'] == 'bar'

assert isinstance(g.info, InfoReporter)
Expand Down
6 changes: 2 additions & 4 deletions zarr/tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1571,8 +1571,7 @@ def test_init_group_overwrite_chunk_store(self):

def test_init_group(self):
store = self.create_store()
init_group(store)
store['.zattrs'] = json_dumps({'foo': 'bar'})
init_group(store, attrs={'foo': 'bar'})
# check metadata
assert group_meta_key in store
assert group_meta_key in store.listdir()
Expand Down Expand Up @@ -1710,8 +1709,7 @@ def test_dimension_separator(self):

def test_init_group(self):
store = self.create_store()
init_group(store)
store['.zattrs'] = json_dumps({'foo': 'bar'})
init_group(store, attrs={'foo': 'bar'})
# check metadata
assert group_meta_key in store
assert group_meta_key in store.listdir()
Expand Down
9 changes: 5 additions & 4 deletions zarr/tests/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from multiprocessing import cpu_count
from multiprocessing.pool import ThreadPool
from tempfile import mkdtemp
from typing import Any, Dict

import numpy as np
from numpy.testing import assert_array_equal
Expand Down Expand Up @@ -246,10 +247,10 @@ def test_parallel_require_group(self):
class TestGroupWithThreadSynchronizer(TestGroup, MixinGroupSyncTests):

def create_group(self, store=None, path=None, read_only=False,
chunk_store=None, synchronizer=None):
chunk_store=None, synchronizer=None, attrs: Dict[str, Any] = {}):
if store is None:
store, chunk_store = self.create_store()
init_group(store, path=path, chunk_store=chunk_store)
init_group(store, path=path, chunk_store=chunk_store, attrs=attrs)
synchronizer = ThreadSynchronizer()
g = Group(store, path=path, read_only=read_only,
chunk_store=chunk_store, synchronizer=synchronizer)
Expand All @@ -274,10 +275,10 @@ def create_store(self):
return store, None

def create_group(self, store=None, path=None, read_only=False,
chunk_store=None, synchronizer=None):
chunk_store=None, synchronizer=None, attrs: Dict[str, Any] = {}):
if store is None:
store, chunk_store = self.create_store()
init_group(store, path=path, chunk_store=chunk_store)
init_group(store, path=path, chunk_store=chunk_store, attrs=attrs)
sync_path = tempfile.mkdtemp()
atexit.register(atexit_rmtree, sync_path)
synchronizer = ProcessSynchronizer(sync_path)
Expand Down