fixes for doctest

normanrz · normanrz · commit 2c57014f2488 · 2025-01-03T13:30:49.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -54,6 +54,7 @@ docs/_build/
 docs/_autoapi
 docs/data
 data
+data.zip
 
 # PyBuilder
 target/
diff --git a/docs/user-guide/arrays.rst b/docs/user-guide/arrays.rst
@@ -1,10 +1,10 @@
-.. _user-guide-arrays:
-
-
 .. only:: doctest
+
    >>> import shutil
    >>> shutil.rmtree('data', ignore_errors=True)
 
+.. _user-guide-arrays:
+
 Working with arrays
 ===================
 
@@ -244,7 +244,7 @@ The default compressor can be changed by setting the value of the using Zarr's
 :ref:`user-guide-config`, e.g.::
 
    >>> with zarr.config.set({'array.v2_default_compressor.numeric': {'id': 'blosc'}}):
-   ...     z = zarr.zeros(100000000, chunks=1000000, zarr_format=2)
+   ...     z = zarr.create_array(store={}, shape=(100000000,), chunks=(1000000,), dtype='int32', zarr_format=2)
    >>> z.metadata.filters
    >>> z.metadata.compressor
    Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
diff --git a/docs/user-guide/attributes.rst b/docs/user-guide/attributes.rst
@@ -7,10 +7,10 @@ Zarr arrays and groups support custom key/value attributes, which can be useful
 storing application-specific metadata. For example::
 
    >>> import zarr
-   >>> # TODO: replace with create_group after #2463
-   >>> root = zarr.group()
+   >>> store = zarr.storage.MemoryStore()
+   >>> root = zarr.create_group(store=store)
    >>> root.attrs['foo'] = 'bar'
-   >>> z = root.zeros(name='zzz', shape=(10000, 10000))
+   >>> z = root.create_array(name='zzz', shape=(10000, 10000), dtype='int32')
    >>> z.attrs['baz'] = 42
    >>> z.attrs['qux'] = [1, 4, 7, 12]
    >>> sorted(root.attrs)
diff --git a/docs/user-guide/config.rst b/docs/user-guide/config.rst
@@ -10,11 +10,11 @@ Configuration values can be set using code like the following::
 
    >>> import zarr
    >>>
-   >>> zarr.config.set({"array.order": "F"})
+   >>> zarr.config.set({'array.order': 'F'})
    <donfig.config_obj.ConfigSet object at ...>
    >>>
    >>> # revert this change so it doesn't impact the rest of the docs
-   >>> zarr.config.set({"array.order": "C"})
+   >>> zarr.config.set({'array.order': 'C'})
    <donfig.config_obj.ConfigSet object at ...>
 
 Alternatively, configuration values can be set using environment variables, e.g.
@@ -35,8 +35,8 @@ Configuration options include the following:
 
 For selecting custom implementations of codecs, pipelines, buffers and ndbuffers,
 first register the implementations in the registry and then select them in the config.
-For example, an implementation of the bytes codec in a class "custompackage.NewBytesCodec",
-requires the value of ``codecs.bytes.name`` to be "custompackage.NewBytesCodec".
+For example, an implementation of the bytes codec in a class ``'custompackage.NewBytesCodec'``,
+requires the value of ``codecs.bytes.name`` to be ``'custompackage.NewBytesCodec'``.
 
 This is the current default configuration::
 
diff --git a/docs/user-guide/consolidated_metadata.rst b/docs/user-guide/consolidated_metadata.rst
@@ -1,8 +1,9 @@
-.. _user-guide-consolidated-metadata:
-
 .. only:: doctest
+
    >>> from pprint import pprint
 
+.. _user-guide-consolidated-metadata:
+
 Consolidated metadata
 =====================
 
@@ -33,11 +34,11 @@ attribute of the ``GroupMetadata`` object.
    >>>
    >>> store = zarr.storage.MemoryStore()
    >>> group = zarr.create_group(store=store)
-   >>> group.create_array(shape=(1,), name="a", dtype="float64")
+   >>> group.create_array(shape=(1,), name='a', dtype='float64')
    <Array memory://.../a shape=(1,) dtype=float64>
-   >>> group.create_array(shape=(2, 2), name="b", dtype="float64")
+   >>> group.create_array(shape=(2, 2), name='b', dtype='float64')
    <Array memory://.../b shape=(2, 2) dtype=float64>
-   >>> group.create_array(shape=(3, 3, 3), name="c", dtype="float64")
+   >>> group.create_array(shape=(3, 3, 3), name='c', dtype='float64')
    <Array memory://.../c shape=(3, 3, 3) dtype=float64>
    >>> zarr.consolidate_metadata(store)
    <Group memory://...>
@@ -90,16 +91,16 @@ that can be used.:
 
 Operations on the group to get children automatically use the consolidated metadata.:
 
-   >>> consolidated["a"]  # no read / HTTP request to the Store is required
+   >>> consolidated['a']  # no read / HTTP request to the Store is required
    <Array memory://.../a shape=(1,) dtype=float64>
 
 With nested groups, the consolidated metadata is available on the children, recursively.:
 
-   >>> child = group.create_group("child", attributes={"kind": "child"})
-   >>> grandchild = child.create_group("child", attributes={"kind": "grandchild"})
+   >>> child = group.create_group('child', attributes={'kind': 'child'})
+   >>> grandchild = child.create_group('child', attributes={'kind': 'grandchild'})
    >>> consolidated = zarr.consolidate_metadata(store)
    >>>
-   >>> consolidated["child"].metadata.consolidated_metadata
+   >>> consolidated['child'].metadata.consolidated_metadata
    ConsolidatedMetadata(metadata={'child': GroupMetadata(attributes={'kind': 'grandchild'}, zarr_format=3, consolidated_metadata=ConsolidatedMetadata(metadata={}, kind='inline', must_understand=False), node_type='group')}, kind='inline', must_understand=False)
 
 Synchronization and Concurrency
diff --git a/docs/user-guide/groups.rst b/docs/user-guide/groups.rst
@@ -1,3 +1,8 @@
+.. only:: doctest
+
+   >>> import shutil
+   >>> shutil.rmtree('data', ignore_errors=True)
+
 .. _user-guide-groups:
 
 Working with groups
@@ -10,9 +15,8 @@ support a similar interface.
 To create a group, use the :func:`zarr.group` function::
 
    >>> import zarr
-   >>>
-   >>> # TODO: replace with create_group after #2463
-   >>> root = zarr.group()
+   >>> store = zarr.storage.MemoryStore()
+   >>> root = zarr.create_group(store=store)
    >>> root
    <Group memory://...>
 
@@ -24,7 +28,7 @@ Groups have a similar API to the Group class from `h5py
 
 Groups can also contain arrays, e.g.::
 
-   >>> z1 = bar.zeros(name='baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='i4')
+   >>> z1 = bar.create_array(name='baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32')
    >>> z1
    <Array memory://.../foo/bar/baz shape=(10000, 10000) dtype=int32>
 
@@ -59,7 +63,7 @@ sub-directories, e.g.::
    >>> root
    <Group file://data/group.zarr>
    >>>
-   >>> z = root.zeros(name='foo/bar/baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='i4')
+   >>> z = root.create_array(name='foo/bar/baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32')
    >>> z
    <Array file://data/group.zarr/foo/bar/baz shape=(10000, 10000) dtype=int32>
 
@@ -77,12 +81,12 @@ Array and group diagnostics
 Diagnostic information about arrays and groups is available via the ``info``
 property. E.g.::
 
-   >>> # TODO: replace with create_group after #2463
-   >>> root = zarr.group()
+   >>> store = zarr.storage.MemoryStore()
+   >>> root = zarr.group(store=store)
    >>> foo = root.create_group('foo')
-   >>> bar = foo.zeros(name='bar', shape=1000000, chunks=100000, dtype='i8')
+   >>> bar = foo.create_array(name='bar', shape=1000000, chunks=100000, dtype='int64')
    >>> bar[:] = 42
-   >>> baz = foo.zeros(name='baz', shape=(1000, 1000), chunks=(100, 100), dtype='f4')
+   >>> baz = foo.create_array(name='baz', shape=(1000, 1000), chunks=(100, 100), dtype='float32')
    >>> baz[:] = 4.2
    >>> root.info
    Name        :
diff --git a/docs/user-guide/performance.rst b/docs/user-guide/performance.rst
@@ -1,4 +1,9 @@
-user-guide-performance
+.. only:: doctest
+
+   >>> import shutil
+   >>> shutil.rmtree('data', ignore_errors=True)
+
+.. _user-guide-performance:
 
 Optimizing performance
 ======================
@@ -19,42 +24,41 @@ better performance, at least when using the Blosc compression library.
 The optimal chunk shape will depend on how you want to access the data. E.g.,
 for a 2-dimensional array, if you only ever take slices along the first
 dimension, then chunk across the second dimension. If you know you want to chunk
-across an entire dimension you can use ``None`` or ``-1`` within the ``chunks``
-argument, e.g.::
+across an entire dimension you can use the full size of that dimension within the
+``chunks`` argument, e.g.::
 
    >>> import zarr
-   >>>
-   >>> z1 = zarr.zeros((10000, 10000), chunks=(100, None), dtype='i4')
+   >>> z1 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(100, 10000), dtype='int32')
    >>> z1.chunks
    (100, 10000)
 
 Alternatively, if you only ever take slices along the second dimension, then
 chunk across the first dimension, e.g.::
 
-   >>> z2 = zarr.zeros((10000, 10000), chunks=(None, 100), dtype='i4')
+   >>> z2 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 100), dtype='int32')
    >>> z2.chunks
    (10000, 100)
 
 If you require reasonable performance for both access patterns then you need to
 find a compromise, e.g.::
 
-   >>> z3 = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype='i4')
+   >>> z3 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32')
    >>> z3.chunks
    (1000, 1000)
 
 If you are feeling lazy, you can let Zarr guess a chunk shape for your data by
-providing ``chunks=True``, although please note that the algorithm for guessing
+providing ``chunks='auto'``, although please note that the algorithm for guessing
 a chunk shape is based on simple heuristics and may be far from optimal. E.g.::
 
-   >>> z4 = zarr.zeros((10000, 10000), chunks=True, dtype='i4')
+   >>> z4 = zarr.create_array(store={}, shape=(10000, 10000), chunks='auto', dtype='int32')
    >>> z4.chunks
    (625, 625)
 
 If you know you are always going to be loading the entire array into memory, you
-can turn off chunks by providing ``chunks=False``, in which case there will be
-one single chunk for the array::
+can turn off chunks by providing ``chunks`` equal to ``shape``, in which case there
+will be one single chunk for the array::
 
-   >>> z5 = zarr.zeros((10000, 10000), chunks=False, dtype='i4')
+   >>> z5 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 10000), dtype='int32')
    >>> z5.chunks
    (10000, 10000)
 
@@ -70,9 +74,9 @@ ratios, depending on the correlation structure within the data. E.g.::
 
    >>> import numpy as np
    >>>
-   >>> a = np.arange(100000000, dtype='i4').reshape(10000, 10000).T
-   >>> # TODO: replace with create_array after #2463
-   >>> c = zarr.array(a, chunks=(1000, 1000))
+   >>> a = np.arange(100000000, dtype='int32').reshape(10000, 10000).T
+   >>> c = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype, config={'order': 'C'})
+   >>> c[:] = a
    >>> c.info_complete()
    Type               : Array
    Zarr format        : 3
@@ -88,7 +92,8 @@ ratios, depending on the correlation structure within the data. E.g.::
    Storage ratio      : 1.2
    Chunks Initialized : 100
    >>> with zarr.config.set({'array.order': 'F'}):
-   ...     f = zarr.array(a, chunks=(1000, 1000))
+   ...     f = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype)
+   ...     f[:] = a
    >>> f.info_complete()
    Type               : Array
    Zarr format        : 3
@@ -143,15 +148,14 @@ the time required to write an array with different values.::
    ...     shape = (chunks[0] * 1024,)
    ...     data = np.random.randint(0, 255, shape)
    ...     dtype = 'uint8'
-   ...     with zarr.config.set({"array.write_empty_chunks": write_empty_chunks}):
-   ...         arr = zarr.open(
-   ...             f"data/example-{write_empty_chunks}.zarr",
-   ...             shape=shape,
-   ...             chunks=chunks,
-   ...             dtype=dtype,
-   ...             fill_value=0,
-   ...             mode='w'
-   ...          )
+   ...     arr = zarr.create_array(
+   ...         f'data/example-{write_empty_chunks}.zarr',
+   ...         shape=shape,
+   ...         chunks=chunks,
+   ...         dtype=dtype,
+   ...         fill_value=0,
+   ...         config={'write_empty_chunks': write_empty_chunks}
+   ...      )
    ...     # initialize all chunks
    ...     arr[:] = 100
    ...     result = []
@@ -208,9 +212,9 @@ to re-open any underlying files or databases upon being unpickled.
 E.g., pickle/unpickle an local store array::
 
    >>> import pickle
-   >>>
-   >>> # TODO: replace with create_array after #2463
-   >>> z1 = zarr.array(store="data/example-2", data=np.arange(100000))
+   >>> data = np.arange(100000)
+   >>> z1 = zarr.create_array(store='data/example-2.zarr', shape=data.shape, chunks=data.shape, dtype=data.dtype)
+   >>> z1[:] = data
    >>> s = pickle.dumps(z1)
    >>> z2 = pickle.loads(s)
    >>> z1 == z2
diff --git a/docs/user-guide/storage.rst b/docs/user-guide/storage.rst
@@ -1,3 +1,8 @@
+.. only:: doctest
+
+   >>> import shutil
+   >>> shutil.rmtree('data', ignore_errors=True)
+
 .. _user-guide-storage:
 
 Storage guide
@@ -21,20 +26,20 @@ to Zarr's top level API will result in the store being created automatically.:
    >>> import zarr
    >>>
    >>> # Implicitly create a writable LocalStore
-   >>> zarr.open_group("data/foo/bar", mode="w")
+   >>> zarr.create_group(store='data/foo/bar')
    <Group file://data/foo/bar>
    >>>
    >>> # Implicitly create a read-only FsspecStore
    >>> zarr.open_group(
-   ...    "s3://noaa-nwm-retro-v2-zarr-pds",
-   ...    mode="r",
-   ...    storage_options={"anon": True}
+   ...    store='s3://noaa-nwm-retro-v2-zarr-pds',
+   ...    mode='r',
+   ...    storage_options={'anon': True}
    ... )
    <Group <FsspecStore(S3FileSystem, noaa-nwm-retro-v2-zarr-pds)>>
    >>>
    >>> # Implicitly creates a MemoryStore
    >>> data = {}
-   >>> zarr.open_group(data, mode="w")
+   >>> zarr.create_group(store=data)
    <Group memory://...>
 
 Explicit Store Creation
@@ -50,9 +55,8 @@ Local Store
 The :class:`zarr.storage.LocalStore` stores data in a nested set of directories on a local
 filesystem.:
 
-   >>> store = zarr.storage.LocalStore("data/foo/bar", read_only=True)
-   >>> # TODO: replace with create_group after #2463
-   >>> zarr.open(store=store, mode='r')
+   >>> store = zarr.storage.LocalStore('data/foo/bar', read_only=True)
+   >>> zarr.open_group(store=store, mode='r')
    <Group file://data/foo/bar>
 
 Zip Store
@@ -61,9 +65,8 @@ Zip Store
 The :class:`zarr.storage.ZipStore` stores the contents of a Zarr hierarchy in a single
 Zip file. The `Zip Store specification`_ is currently in draft form.:
 
-   >>> store = zarr.storage.ZipStore("data.zip", mode="w")
-   >>> # TODO: replace with create_array after #2463
-   >>> zarr.open(store=store, shape=(2,))
+   >>> store = zarr.storage.ZipStore('data.zip', mode='w')
+   >>> zarr.create_array(store=store, shape=(2,), dtype='float64')
    <Array zip://data.zip shape=(2,) dtype=float64>
 
 Remote Store
@@ -77,9 +80,9 @@ that implements the `AbstractFileSystem <https://filesystem-spec.readthedocs.io/
 API. ``storage_options`` can be used to configure the fsspec backend.:
 
    >>> store = zarr.storage.FsspecStore.from_url(
-   ...    "s3://noaa-nwm-retro-v2-zarr-pds",
+   ...    's3://noaa-nwm-retro-v2-zarr-pds',
    ...    read_only=True,
-   ...    storage_options={"anon": True}
+   ...    storage_options={'anon': True}
    ... )
    >>> zarr.open_group(store=store, mode='r')
    <Group <FsspecStore(S3FileSystem, noaa-nwm-retro-v2-zarr-pds)>>
@@ -93,7 +96,7 @@ Zarr data (metadata and chunks) to a dictionary.:
    >>> data = {}
    >>> store = zarr.storage.MemoryStore(data)
    >>> # TODO: replace with create_array after #2463
-   >>> zarr.open(store=store, shape=(2, ))
+   >>> zarr.create_array(store=store, shape=(2,), dtype='float64')
    <Array memory://... shape=(2,) dtype=float64>
 
 Developing custom stores