Skip to content

Commit 5148dd6

Browse files
authored
Merge branch 'main' into testing-storage
2 parents cc14e07 + 45146ca commit 5148dd6

File tree

9 files changed

+110
-43
lines changed

9 files changed

+110
-43
lines changed

docs/quickstart.rst

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ Zarr supports data compression and filters. For example, to use Blosc compressio
7474
... "data/example-3.zarr",
7575
... mode="w", shape=(100, 100),
7676
... chunks=(10, 10), dtype="f4",
77-
... compressor=zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.SHUFFLE)
77+
... compressors=zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle)
7878
... )
7979
>>> z[:, :] = np.random.random((100, 100))
8080
>>>
@@ -101,7 +101,7 @@ Zarr allows you to create hierarchical groups, similar to directories::
101101
>>> root = zarr.group("data/example-2.zarr")
102102
>>> foo = root.create_group(name="foo")
103103
>>> bar = root.create_array(
104-
... name="bar", shape=(100, 10), chunks=(10, 10)
104+
... name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4"
105105
... )
106106
>>> spam = foo.create_array(name="spam", shape=(10,), dtype="i4")
107107
>>>
@@ -112,6 +112,7 @@ Zarr allows you to create hierarchical groups, similar to directories::
112112
>>> # print the hierarchy
113113
>>> root.tree()
114114
/
115+
├── bar (100, 10) float32
115116
└── foo
116117
└── spam (10,) int32
117118
<BLANKLINE>
@@ -130,7 +131,7 @@ using external libraries like `s3fs <https://s3fs.readthedocs.io>`_ or
130131

131132
>>> import s3fs # doctest: +SKIP
132133
>>>
133-
>>> z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chunks=(10, 10)) # doctest: +SKIP
134+
>>> z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chunks=(10, 10), dtype="f4") # doctest: +SKIP
134135
>>> z[:, :] = np.random.random((100, 100)) # doctest: +SKIP
135136

136137
A single-file store can also be created using the the :class:`zarr.storage.ZipStore`::

docs/release-notes.rst

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,21 @@ New features
99

1010
Bug fixes
1111
~~~~~~~~~
12-
* Fixes ``order`` argument for Zarr format 2 arrays.
13-
By :user:`Norman Rzepka <normanrz>` (:issue:`2679`).
12+
* Fixes ``order`` argument for Zarr format 2 arrays (:issue:`2679`).
13+
14+
* Fixes a bug that prevented reading Zarr format 2 data with consolidated metadata written using ``zarr-python`` version 2 (:issue:`2694`).
15+
16+
* Ensure that compressor=None results in no compression when writing Zarr format 2 data (:issue:`2708`)
1417

1518
Behaviour changes
1619
~~~~~~~~~~~~~~~~~
1720

21+
Other
22+
~~~~~
23+
* Removed some unnecessary files from the source distribution
24+
to reduce its size. (:issue:`2686`)
25+
26+
1827
.. _release_3.0.0:
1928

2029
3.0.0

pyproject.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
requires = ["hatchling", "hatch-vcs"]
33
build-backend = "hatchling.build"
44

5+
[tool.hatch.build.targets.sdist]
6+
exclude = [
7+
"/.github",
8+
"/bench",
9+
"/docs",
10+
"/notebooks"
11+
]
512

613
[project]
714
name = "zarr"

src/zarr/core/array.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4131,15 +4131,22 @@ def _parse_chunk_encoding_v3(
41314131

41324132

41334133
def _parse_deprecated_compressor(
4134-
compressor: CompressorLike | None, compressors: CompressorsLike
4134+
compressor: CompressorLike | None, compressors: CompressorsLike, zarr_format: int = 3
41354135
) -> CompressorsLike | None:
4136-
if compressor:
4136+
if compressor != "auto":
41374137
if compressors != "auto":
41384138
raise ValueError("Cannot specify both `compressor` and `compressors`.")
4139-
warn(
4140-
"The `compressor` argument is deprecated. Use `compressors` instead.",
4141-
category=UserWarning,
4142-
stacklevel=2,
4143-
)
4144-
compressors = (compressor,)
4139+
if zarr_format == 3:
4140+
warn(
4141+
"The `compressor` argument is deprecated. Use `compressors` instead.",
4142+
category=UserWarning,
4143+
stacklevel=2,
4144+
)
4145+
if compressor is None:
4146+
# "no compression"
4147+
compressors = ()
4148+
else:
4149+
compressors = (compressor,)
4150+
elif zarr_format == 2 and compressor == compressors == "auto":
4151+
compressors = ({"id": "blosc"},)
41454152
return compressors

src/zarr/core/group.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -573,8 +573,8 @@ def _from_bytes_v2(
573573
v2_consolidated_metadata = json.loads(consolidated_metadata_bytes.to_bytes())
574574
v2_consolidated_metadata = v2_consolidated_metadata["metadata"]
575575
# We already read zattrs and zgroup. Should we ignore these?
576-
v2_consolidated_metadata.pop(".zattrs")
577-
v2_consolidated_metadata.pop(".zgroup")
576+
v2_consolidated_metadata.pop(".zattrs", None)
577+
v2_consolidated_metadata.pop(".zgroup", None)
578578

579579
consolidated_metadata: defaultdict[str, dict[str, Any]] = defaultdict(dict)
580580

@@ -1011,7 +1011,7 @@ async def create_array(
10111011
shards: ShardsLike | None = None,
10121012
filters: FiltersLike = "auto",
10131013
compressors: CompressorsLike = "auto",
1014-
compressor: CompressorLike = None,
1014+
compressor: CompressorLike = "auto",
10151015
serializer: SerializerLike = "auto",
10161016
fill_value: Any | None = 0,
10171017
order: MemoryOrder | None = None,
@@ -1114,8 +1114,9 @@ async def create_array(
11141114
AsyncArray
11151115
11161116
"""
1117-
1118-
compressors = _parse_deprecated_compressor(compressor, compressors)
1117+
compressors = _parse_deprecated_compressor(
1118+
compressor, compressors, zarr_format=self.metadata.zarr_format
1119+
)
11191120
return await create_array(
11201121
store=self.store_path,
11211122
name=name,
@@ -2244,7 +2245,7 @@ def create_array(
22442245
shards: ShardsLike | None = None,
22452246
filters: FiltersLike = "auto",
22462247
compressors: CompressorsLike = "auto",
2247-
compressor: CompressorLike = None,
2248+
compressor: CompressorLike = "auto",
22482249
serializer: SerializerLike = "auto",
22492250
fill_value: Any | None = 0,
22502251
order: MemoryOrder | None = "C",
@@ -2346,7 +2347,9 @@ def create_array(
23462347
-------
23472348
AsyncArray
23482349
"""
2349-
compressors = _parse_deprecated_compressor(compressor, compressors)
2350+
compressors = _parse_deprecated_compressor(
2351+
compressor, compressors, zarr_format=self.metadata.zarr_format
2352+
)
23502353
return Array(
23512354
self._sync(
23522355
self._async_group.create_array(

tests/test_group.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import numpy as np
1111
import pytest
12-
from numcodecs import Zstd
12+
from numcodecs import Blosc
1313

1414
import zarr
1515
import zarr.api.asynchronous
@@ -499,7 +499,7 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat
499499
"chunks": (1,),
500500
"order": "C",
501501
"filters": None,
502-
"compressor": Zstd(level=0),
502+
"compressor": Blosc(),
503503
"zarr_format": zarr_format,
504504
},
505505
"subgroup": {
@@ -1505,13 +1505,3 @@ def test_group_members_concurrency_limit(store: MemoryStore) -> None:
15051505
elapsed = time.time() - start
15061506

15071507
assert elapsed > num_groups * get_latency
1508-
1509-
1510-
@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
1511-
def test_deprecated_compressor(store: Store) -> None:
1512-
g = zarr.group(store=store, zarr_format=2)
1513-
with pytest.warns(UserWarning, match="The `compressor` argument is deprecated.*"):
1514-
a = g.create_array(
1515-
"foo", shape=(100,), chunks=(10,), dtype="i4", compressor={"id": "blosc"}
1516-
)
1517-
assert a.metadata.compressor.codec_id == "blosc"

tests/test_metadata/test_consolidated.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77
import pytest
8-
from numcodecs import Zstd
8+
from numcodecs import Blosc
99

1010
import zarr.api.asynchronous
1111
import zarr.api.synchronous
@@ -17,7 +17,7 @@
1717
open,
1818
open_consolidated,
1919
)
20-
from zarr.core.buffer import default_buffer_prototype
20+
from zarr.core.buffer import cpu, default_buffer_prototype
2121
from zarr.core.group import ConsolidatedMetadata, GroupMetadata
2222
from zarr.core.metadata import ArrayV3Metadata
2323
from zarr.core.metadata.v2 import ArrayV2Metadata
@@ -476,6 +476,30 @@ async def test_open_consolidated_raises_async(self, zarr_format: ZarrFormat):
476476
with pytest.raises(ValueError):
477477
await zarr.api.asynchronous.open_consolidated(store, zarr_format=None)
478478

479+
@pytest.fixture
480+
async def v2_consolidated_metadata_empty_dataset(
481+
self, memory_store: zarr.storage.MemoryStore
482+
) -> AsyncGroup:
483+
zgroup_bytes = cpu.Buffer.from_bytes(json.dumps({"zarr_format": 2}).encode())
484+
zmetadata_bytes = cpu.Buffer.from_bytes(
485+
b'{"metadata":{".zgroup":{"zarr_format":2}},"zarr_consolidated_format":1}'
486+
)
487+
return AsyncGroup._from_bytes_v2(
488+
None, zgroup_bytes, zattrs_bytes=None, consolidated_metadata_bytes=zmetadata_bytes
489+
)
490+
491+
async def test_consolidated_metadata_backwards_compatibility(
492+
self, v2_consolidated_metadata_empty_dataset
493+
):
494+
"""
495+
Test that consolidated metadata handles a missing .zattrs key. This is necessary for backwards compatibility with zarr-python 2.x. See https://github.com/zarr-developers/zarr-python/issues/2694
496+
"""
497+
store = zarr.storage.MemoryStore()
498+
await zarr.api.asynchronous.open(store=store, zarr_format=2)
499+
await zarr.api.asynchronous.consolidate_metadata(store)
500+
result = await zarr.api.asynchronous.open_consolidated(store, zarr_format=2)
501+
assert result.metadata == v2_consolidated_metadata_empty_dataset.metadata
502+
479503
async def test_consolidated_metadata_v2(self):
480504
store = zarr.storage.MemoryStore()
481505
g = await AsyncGroup.from_store(store, attributes={"key": "root"}, zarr_format=2)
@@ -498,7 +522,7 @@ async def test_consolidated_metadata_v2(self):
498522
attributes={"key": "a"},
499523
chunks=(1,),
500524
fill_value=0,
501-
compressor=Zstd(level=0),
525+
compressor=Blosc(),
502526
order="C",
503527
),
504528
"g1": GroupMetadata(

tests/test_store/test_stateful.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def mk_test_instance_sync() -> ZarrHierarchyStateMachine:
1818
pytest.skip(reason="ZipStore does not support delete")
1919
if isinstance(sync_store, MemoryStore):
2020
run_state_machine_as_test(
21-
mk_test_instance_sync, settings=Settings(report_multiple_bugs=False)
21+
mk_test_instance_sync, settings=Settings(report_multiple_bugs=False, max_examples=50)
2222
)
2323

2424

@@ -28,6 +28,11 @@ def mk_test_instance_sync() -> None:
2828

2929
if isinstance(sync_store, ZipStore):
3030
pytest.skip(reason="ZipStore does not support delete")
31-
if isinstance(sync_store, LocalStore):
31+
elif isinstance(sync_store, LocalStore):
3232
pytest.skip(reason="This test has errors")
33-
run_state_machine_as_test(mk_test_instance_sync, settings=Settings(report_multiple_bugs=True))
33+
elif isinstance(sync_store, MemoryStore):
34+
run_state_machine_as_test(mk_test_instance_sync, settings=Settings(max_examples=50))
35+
else:
36+
run_state_machine_as_test(
37+
mk_test_instance_sync, settings=Settings(report_multiple_bugs=True)
38+
)

tests/test_v2.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,13 @@
77
import pytest
88
from numcodecs import Delta
99
from numcodecs.blosc import Blosc
10+
from numcodecs.zstd import Zstd
1011

1112
import zarr
1213
import zarr.core.buffer
1314
import zarr.storage
1415
from zarr import config
16+
from zarr.abc.store import Store
1517
from zarr.core.buffer.core import default_buffer_prototype
1618
from zarr.core.sync import sync
1719
from zarr.storage import MemoryStore, StorePath
@@ -93,11 +95,7 @@ async def test_v2_encode_decode(dtype):
9395
store = zarr.storage.MemoryStore()
9496
g = zarr.group(store=store, zarr_format=2)
9597
g.create_array(
96-
name="foo",
97-
shape=(3,),
98-
chunks=(3,),
99-
dtype=dtype,
100-
fill_value=b"X",
98+
name="foo", shape=(3,), chunks=(3,), dtype=dtype, fill_value=b"X", compressor=None
10199
)
102100

103101
result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype())
@@ -166,6 +164,29 @@ def test_v2_filters_codecs(filters: Any, order: Literal["C", "F"]) -> None:
166164
np.testing.assert_array_equal(result, array_fixture)
167165

168166

167+
@pytest.mark.filterwarnings("ignore")
168+
@pytest.mark.parametrize("store", ["memory"], indirect=True)
169+
def test_create_array_defaults(store: Store):
170+
"""
171+
Test that passing compressor=None results in no compressor. Also test that the default value of the compressor
172+
parameter does produce a compressor.
173+
"""
174+
g = zarr.open(store, mode="w", zarr_format=2)
175+
arr = g.create_array("one", dtype="i8", shape=(1,), chunks=(1,), compressor=None)
176+
assert arr._async_array.compressor is None
177+
assert not (arr.filters)
178+
arr = g.create_array("two", dtype="i8", shape=(1,), chunks=(1,))
179+
assert arr._async_array.compressor is not None
180+
assert not (arr.filters)
181+
arr = g.create_array("three", dtype="i8", shape=(1,), chunks=(1,), compressor=Zstd())
182+
assert arr._async_array.compressor is not None
183+
assert not (arr.filters)
184+
with pytest.raises(ValueError):
185+
g.create_array(
186+
"four", dtype="i8", shape=(1,), chunks=(1,), compressor=None, compressors=None
187+
)
188+
189+
169190
@pytest.mark.parametrize("array_order", ["C", "F"])
170191
@pytest.mark.parametrize("data_order", ["C", "F"])
171192
@pytest.mark.parametrize("memory_order", ["C", "F"])

0 commit comments

Comments
 (0)