Skip to content

Commit 8b67060

Browse files
authored
Properly extract chunk keys for arrays with a single chunk (#523)
* Properly extract chunk keys for arrays with a single chunk * Update test to specify chunk_key_encoding * Lint * Raise NotImplementedError on scalar arrays
1 parent e8f9dad commit 8b67060

File tree

3 files changed

+43
-32
lines changed

3 files changed

+43
-32
lines changed

virtualizarr/manifests/store.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -108,36 +108,31 @@ def get_zarr_metadata(manifest_group: ManifestGroup, key: str) -> Buffer:
108108
return dict_to_buffer(metadata, prototype=default_buffer_prototype())
109109

110110

111-
def parse_manifest_index(
112-
key: str, chunk_key_encoding: str = "."
113-
) -> tuple[str, tuple[int, ...]]:
111+
def parse_manifest_index(key: str, chunk_key_encoding: str = ".") -> tuple[int, ...]:
114112
"""
115113
Splits `key` provided to a zarr store into the variable indicated
116114
by the first part and the chunk index from the 3rd through last parts,
117115
which can be used to index into the ndarrays containing paths, offsets,
118116
and lengths in ManifestArrays.
119117
120-
Currently only works for 1d+ arrays with a tree depth of one from the
121-
root Zarr group.
122-
123118
Parameters
124119
----------
125120
key : str
126121
chunk_key_encoding : str
127122
128123
Returns
129124
-------
130-
ManifestIndex
125+
tuple containing chunk indexes
131126
"""
132-
parts = key.split("/")
133-
var = parts[0]
134-
# Assume "c" is the second part
135-
# TODO: Handle scalar array case with "c" holds the data
136-
if chunk_key_encoding == "/":
137-
indexes = tuple(int(ind) for ind in parts[2:])
138-
else:
139-
indexes = tuple(int(ind) for ind in parts[2].split(chunk_key_encoding))
140-
return var, indexes
127+
if key.endswith("c"):
128+
# Scalar arrays hold the data in the "c" key
129+
raise NotImplementedError(
130+
"Scalar arrays are not yet supported by ManifestStore"
131+
)
132+
parts = key.split(
133+
"c/"
134+
) # TODO: Open an issue upstream about the Zarr spec indicating this should be f"c{chunk_key_encoding}" rather than always "c/"
135+
return tuple(int(ind) for ind in parts[1].split(chunk_key_encoding))
141136

142137

143138
def find_matching_store(stores: StoreDict, request_key: str) -> StoreRequest:
@@ -263,13 +258,16 @@ async def get(
263258

264259
if key.endswith("zarr.json"):
265260
return get_zarr_metadata(self._group, key)
266-
var, chunk_key = parse_manifest_index(key)
261+
var = key.split("/")[0]
267262
marr = self._group.arrays[var]
268263
manifest = marr.manifest
269264

270-
path = manifest._paths[*chunk_key]
271-
offset = manifest._offsets[*chunk_key]
272-
length = manifest._lengths[*chunk_key]
265+
chunk_indexes = parse_manifest_index(
266+
key, marr.metadata.chunk_key_encoding.separator
267+
)
268+
path = manifest._paths[*chunk_indexes]
269+
offset = manifest._offsets[*chunk_indexes]
270+
length = manifest._lengths[*chunk_indexes]
273271
# Get the configured object store instance that matches the path
274272
store_request = find_matching_store(stores=self._stores, request_key=path)
275273
# Transform the input byte range to account for the chunk location in the file

virtualizarr/manifests/utils.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import numpy as np
44
from zarr import Array
5+
from zarr.core.chunk_key_encodings import ChunkKeyEncodingLike
56
from zarr.core.metadata.v3 import ArrayV3Metadata
67

78
from virtualizarr.codecs import convert_to_codec_pipeline, get_codecs
@@ -14,9 +15,11 @@ def create_v3_array_metadata(
1415
shape: tuple[int, ...],
1516
data_type: np.dtype,
1617
chunk_shape: tuple[int, ...],
18+
chunk_key_encoding: ChunkKeyEncodingLike = {"name": "default"},
1719
fill_value: Any = None,
1820
codecs: Optional[list[Dict[str, Any]]] = None,
1921
attributes: Optional[Dict[str, Any]] = None,
22+
dimension_names: Optional[tuple[str, ...]] = None,
2023
) -> ArrayV3Metadata:
2124
"""
2225
Create an ArrayV3Metadata instance with standard configuration.
@@ -30,12 +33,16 @@ def create_v3_array_metadata(
3033
The numpy dtype of the array
3134
chunk_shape : tuple[int, ...]
3235
The shape of each chunk
36+
chunk_key_encoding : ChunkKeyEncodingLike
37+
The mapping from chunk grid cell coordinates to keys.
3338
fill_value : Any, optional
3439
The fill value for the array
3540
codecs : list[Dict[str, Any]], optional
3641
List of codec configurations
3742
attributes : Dict[str, Any], optional
3843
Additional attributes for the array
44+
dimension_names : tuple[str], optional
45+
Names of the dimensions
3946
4047
Returns
4148
-------
@@ -49,14 +56,14 @@ def create_v3_array_metadata(
4956
"name": "regular",
5057
"configuration": {"chunk_shape": chunk_shape},
5158
},
52-
chunk_key_encoding={"name": "default"},
59+
chunk_key_encoding=chunk_key_encoding,
5360
fill_value=fill_value,
5461
codecs=convert_to_codec_pipeline(
5562
codecs=codecs or [],
5663
dtype=data_type,
5764
),
5865
attributes=attributes or {},
59-
dimension_names=None,
66+
dimension_names=dimension_names,
6067
storage_transformers=None,
6168
)
6269

virtualizarr/tests/test_manifests/test_store.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22

33
import json
44
import pickle
5-
from typing import TYPE_CHECKING, Callable
5+
from typing import TYPE_CHECKING
66

7+
import numpy as np
78
import pytest
89
from zarr.abc.store import (
910
OffsetByteRequest,
@@ -19,14 +20,15 @@
1920
ManifestGroup,
2021
ManifestStore,
2122
)
23+
from virtualizarr.manifests.utils import create_v3_array_metadata
2224
from virtualizarr.tests import requires_minio, requires_obstore
2325

2426
if TYPE_CHECKING:
2527
from obstore.store import ObjectStore
2628

2729

2830
def _generate_manifest_store(
29-
store: ObjectStore, *, prefix: str, filepath: str, array_v3_metadata: Callable
31+
store: ObjectStore, *, prefix: str, filepath: str
3032
) -> ManifestStore:
3133
"""
3234
Generate a ManifestStore for testing.
@@ -66,9 +68,15 @@ def _generate_manifest_store(
6668
"1.1": {"path": f"{prefix}/{filepath}", "offset": 12, "length": 4},
6769
}
6870
manifest = ChunkManifest(entries=chunk_dict)
69-
chunks = (1, 4)
70-
shape = (2, 8)
71-
array_metadata = array_v3_metadata(shape=shape, chunks=chunks)
71+
codecs = [{"configuration": {"endian": "little"}, "name": "bytes"}]
72+
array_metadata = create_v3_array_metadata(
73+
shape=(4, 4),
74+
chunk_shape=(2, 2),
75+
data_type=np.dtype("int32"),
76+
codecs=codecs,
77+
chunk_key_encoding={"name": "default", "separator": "."},
78+
fill_value=0,
79+
)
7280
manifest_array = ManifestArray(metadata=array_metadata, chunkmanifest=manifest)
7381
manifest_group = ManifestGroup(
7482
arrays={"foo": manifest_array, "bar": manifest_array},
@@ -78,7 +86,7 @@ def _generate_manifest_store(
7886

7987

8088
@pytest.fixture()
81-
def local_store(tmpdir, array_v3_metadata):
89+
def local_store(tmpdir):
8290
import obstore as obs
8391

8492
store = obs.store.LocalStore()
@@ -88,12 +96,11 @@ def local_store(tmpdir, array_v3_metadata):
8896
store=store,
8997
prefix=prefix,
9098
filepath=filepath,
91-
array_v3_metadata=array_v3_metadata,
9299
)
93100

94101

95102
@pytest.fixture()
96-
def s3_store(minio_bucket, array_v3_metadata):
103+
def s3_store(minio_bucket):
97104
import obstore as obs
98105

99106
store = obs.store.S3Store(
@@ -110,7 +117,6 @@ def s3_store(minio_bucket, array_v3_metadata):
110117
store=store,
111118
prefix=prefix,
112119
filepath=filepath,
113-
array_v3_metadata=array_v3_metadata,
114120
)
115121

116122

@@ -164,7 +170,7 @@ async def test_get_metadata(self, manifest_store, request):
164170
"foo/zarr.json", prototype=default_buffer_prototype()
165171
)
166172
metadata = json.loads(observed.to_bytes())
167-
assert metadata["chunk_grid"]["configuration"]["chunk_shape"] == [1, 4]
173+
assert metadata["chunk_grid"]["configuration"]["chunk_shape"] == [2, 2]
168174
assert metadata["node_type"] == "array"
169175
assert metadata["zarr_format"] == 3
170176

0 commit comments

Comments
 (0)