Skip to content

Commit 370065a

Browse files
committed
Use entrypoint.name as the key for registering chunk key encodings.
- Change register_chunk_key_encoding function to take key as first arg similar to codec.
1 parent c6ee5c6 commit 370065a

File tree

4 files changed

+77
-9
lines changed

4 files changed

+77
-9
lines changed

run.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
from pathlib import Path
2+
3+
import numpy as np
4+
5+
import zarr
6+
import zarr.codecs
7+
from zarr.core.chunk_key_encodings import ChunkKeyEncoding
8+
from zarr.registry import register_chunk_key_encoding
9+
from zarr.storage import LocalStore, WrapperStore
10+
# from numcodecs.zarr3 import
11+
12+
13+
class FanoutStore(WrapperStore):
14+
pass
15+
16+
17+
class FanoutChunkKeyEncoding(ChunkKeyEncoding):
18+
name = "fanout"
19+
20+
def decode_chunk_key(self, chunk_key: str) -> tuple[int, ...]:
21+
if chunk_key == "c":
22+
return ()
23+
return tuple(map(int, chunk_key[1:].split(self.separator)))
24+
25+
def encode_chunk_key(self, chunk_coords: tuple[int, ...]) -> str:
26+
return self.separator.join(map(str, ("c",) + chunk_coords))
27+
28+
register_chunk_key_encoding("fanout", FanoutChunkKeyEncoding)
29+
30+
31+
def main():
32+
store = Path("~/data/test.zarr").expanduser()
33+
store = FanoutStore(LocalStore(store))
34+
35+
# z = zarr.open_array(store)
36+
37+
z = zarr.create_array(
38+
store,
39+
shape=(100_000, 3),
40+
# shards=(1_000_000, 3),
41+
chunks=(1_000, 3),
42+
dtype=np.uint16,
43+
chunk_key_encoding={"name": "v2"},
44+
# chunk_key_encoding=FanoutChunkKeyEncoding(separator="/"),
45+
# chunk_key_encoding="fanout",
46+
overwrite=True,
47+
# compressors=zarr.codecs.BloscCodec(),
48+
# compressors=GZip(),
49+
)
50+
rng = np.random.default_rng()
51+
data = rng.integers(0, 65_535, size=(10_000_000, 3), dtype=np.uint16)
52+
z[:] = data
53+
print(z.info)
54+
55+
56+
if __name__ == "__main__":
57+
main()

src/zarr/core/chunk_key_encodings.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,5 +108,5 @@ def parse_chunk_key_encoding(data: ChunkKeyEncodingLike) -> ChunkKeyEncoding:
108108
return chunk_key_encoding
109109

110110

111-
register_chunk_key_encoding(DefaultChunkKeyEncoding, qualname="default")
112-
register_chunk_key_encoding(V2ChunkKeyEncoding, qualname="v2")
111+
register_chunk_key_encoding("default", DefaultChunkKeyEncoding)
112+
register_chunk_key_encoding("v2", V2ChunkKeyEncoding)

src/zarr/fanout.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from zarr.core.chunk_key_encodings import ChunkKeyEncoding
2+
3+
4+
class FanoutChunkKeyEncoding(ChunkKeyEncoding):
5+
name = "fanout"
6+
7+
def decode_chunk_key(self, chunk_key: str) -> tuple[int, ...]:
8+
if chunk_key == "c":
9+
return ()
10+
return tuple(map(int, chunk_key[1:].split(self.separator)))
11+
12+
def encode_chunk_key(self, chunk_coords: tuple[int, ...]) -> str:
13+
return self.separator.join(map(str, ("c",) + chunk_coords))

src/zarr/registry.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from zarr.core.chunk_key_encodings import ChunkKeyEncoding
2626
from zarr.core.common import JSON
2727

28-
# CHANGE: Consider adding here
2928
__all__ = [
3029
"Registry",
3130
"get_buffer_class",
@@ -48,9 +47,9 @@ def __init__(self) -> None:
4847
super().__init__()
4948
self.lazy_load_list: list[EntryPoint] = []
5049

51-
def lazy_load(self) -> None:
50+
def lazy_load(self, use_entrypoint_name: bool = False) -> None:
5251
for e in self.lazy_load_list:
53-
self.register(e.load())
52+
self.register(e.load(), qualname=e.name if use_entrypoint_name else None)
5453

5554
self.lazy_load_list.clear()
5655

@@ -158,8 +157,8 @@ def register_buffer(cls: type[Buffer], qualname: str | None = None) -> None:
158157
__buffer_registry.register(cls, qualname)
159158

160159

161-
def register_chunk_key_encoding(cls: type, qualname: str | None = None) -> None:
162-
__chunk_key_encoding_registry.register(cls, qualname)
160+
def register_chunk_key_encoding(key: str, cls: type) -> None:
161+
__chunk_key_encoding_registry.register(cls, key)
163162

164163

165164
def get_codec_class(key: str, reload_config: bool = False) -> type[Codec]:
@@ -300,12 +299,11 @@ def get_ndbuffer_class(reload_config: bool = False) -> type[NDBuffer]:
300299

301300

302301
def get_chunk_key_encoding_class(key: str) -> type[ChunkKeyEncoding]:
303-
__chunk_key_encoding_registry.lazy_load()
302+
__chunk_key_encoding_registry.lazy_load(use_entrypoint_name=True)
304303
if key not in __chunk_key_encoding_registry:
305304
raise KeyError(
306305
f"Chunk key encoding '{key}' not found in registered chunk key encodings: {list(__chunk_key_encoding_registry)}."
307306
)
308-
309307
return __chunk_key_encoding_registry[key]
310308

311309

0 commit comments

Comments
 (0)