Skip to content

Commit 5efb587

Browse files
committed
Seperate chunk key encoding 'name' (key) from
its implementation indicated by qualname. Set default chunk key encodings implementations for `default` and `v2` in global config.
1 parent f3e4275 commit 5efb587

File tree

3 files changed

+44
-17
lines changed

3 files changed

+44
-17
lines changed

src/zarr/core/chunk_key_encodings.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,5 +108,5 @@ def parse_chunk_key_encoding(data: ChunkKeyEncodingLike) -> ChunkKeyEncoding:
108108
return chunk_key_encoding
109109

110110

111-
register_chunk_key_encoding(DefaultChunkKeyEncoding, qualname="default")
112-
register_chunk_key_encoding(V2ChunkKeyEncoding, qualname="v2")
111+
register_chunk_key_encoding("default", DefaultChunkKeyEncoding)
112+
register_chunk_key_encoding("v2", V2ChunkKeyEncoding)

src/zarr/core/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ def enable_gpu(self) -> ConfigSet:
126126
"vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec",
127127
"vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec",
128128
},
129+
"chunk_key_encodings": {
130+
"default": "zarr.core.chunk_key_encodings.DefaultChunkKeyEncoding",
131+
"v2": "zarr.core.chunk_key_encodings.V2ChunkKeyEncoding",
132+
},
129133
"buffer": "zarr.buffer.cpu.Buffer",
130134
"ndbuffer": "zarr.buffer.cpu.NDBuffer",
131135
}

src/zarr/registry.py

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ def register(self, cls: type[T], qualname: str | None = None) -> None:
6464
__pipeline_registry: Registry[CodecPipeline] = Registry()
6565
__buffer_registry: Registry[Buffer] = Registry()
6666
__ndbuffer_registry: Registry[NDBuffer] = Registry()
67-
__chunk_key_encoding_registry: Registry[ChunkKeyEncoding] = Registry()
67+
# Now a dict[str, Registry[ChunkKeyEncoding]]
68+
__chunk_key_encoding_registries: dict[str, Registry[ChunkKeyEncoding]] = defaultdict(Registry)
6869

6970
# CHANGE: Consider updating docstring
7071
"""
@@ -105,12 +106,10 @@ def _collect_entrypoints() -> list[Registry[Any]]:
105106
data_type_registry._lazy_load_list.extend(entry_points.select(group="zarr.data_type"))
106107
data_type_registry._lazy_load_list.extend(entry_points.select(group="zarr", name="data_type"))
107108

108-
__chunk_key_encoding_registry.lazy_load_list.extend(
109-
entry_points.select(group="zarr.chunk_key_encoding")
110-
)
111-
__chunk_key_encoding_registry.lazy_load_list.extend(
112-
entry_points.select(group="zarr", name="chunk_key_encoding")
113-
)
109+
for e in entry_points.select(group="zarr.chunk_key_encoding"):
110+
__chunk_key_encoding_registries[e.name].lazy_load_list.append(e)
111+
for e in entry_points.select(group="zarr", name="chunk_key_encoding"):
112+
__chunk_key_encoding_registries[e.name].lazy_load_list.append(e)
114113

115114
__pipeline_registry.lazy_load_list.extend(entry_points.select(group="zarr.codec_pipeline"))
116115
__pipeline_registry.lazy_load_list.extend(
@@ -127,7 +126,7 @@ def _collect_entrypoints() -> list[Registry[Any]]:
127126
__pipeline_registry,
128127
__buffer_registry,
129128
__ndbuffer_registry,
130-
__chunk_key_encoding_registry,
129+
*(__chunk_key_encoding_registries.values()),
131130
]
132131

133132

@@ -158,8 +157,10 @@ def register_buffer(cls: type[Buffer], qualname: str | None = None) -> None:
158157
__buffer_registry.register(cls, qualname)
159158

160159

161-
def register_chunk_key_encoding(cls: type, qualname: str | None = None) -> None:
162-
__chunk_key_encoding_registry.register(cls, qualname)
160+
def register_chunk_key_encoding(
161+
key: str, cke_cls: type[ChunkKeyEncoding], qualname: str | None = None
162+
) -> None:
163+
__chunk_key_encoding_registries[key].register(cke_cls, qualname)
163164

164165

165166
def get_codec_class(key: str, reload_config: bool = False) -> type[Codec]:
@@ -299,14 +300,36 @@ def get_ndbuffer_class(reload_config: bool = False) -> type[NDBuffer]:
299300
)
300301

301302

302-
def get_chunk_key_encoding_class(key: str) -> type[ChunkKeyEncoding]:
303-
__chunk_key_encoding_registry.lazy_load()
304-
if key not in __chunk_key_encoding_registry:
303+
def get_chunk_key_encoding_class(key: str, reload_config: bool = False) -> type[ChunkKeyEncoding]:
304+
if reload_config:
305+
_reload_config()
306+
307+
if key in __chunk_key_encoding_registries:
308+
__chunk_key_encoding_registries[key].lazy_load()
309+
else:
305310
raise KeyError(
306-
f"Chunk key encoding '{key}' not found in registered chunk key encodings: {list(__chunk_key_encoding_registry)}."
311+
f"Chunk key encoding '{key}' not found in registered chunk key encodings: {list(__chunk_key_encoding_registries)}."
307312
)
308313

309-
return __chunk_key_encoding_registry[key]
314+
cke_classes = __chunk_key_encoding_registries[key]
315+
if not cke_classes:
316+
raise KeyError(key)
317+
318+
config_entry = config.get("chunk_key_encodings", {}).get(key)
319+
if config_entry is None:
320+
if len(cke_classes) == 1:
321+
return next(iter(cke_classes.values()))
322+
warnings.warn(
323+
f"Chunk key encoding '{key}' not configured in config. Selecting any implementation.",
324+
stacklevel=2,
325+
category=ZarrUserWarning,
326+
)
327+
return list(cke_classes.values())[-1]
328+
selected_encoding_cls = cke_classes[config_entry]
329+
330+
if selected_encoding_cls:
331+
return selected_encoding_cls
332+
raise KeyError(key)
310333

311334

312335
_collect_entrypoints()

0 commit comments

Comments
 (0)