11from __future__ import annotations
22
3+ from collections .abc import Mapping
34import json
45import re
56from typing import TYPE_CHECKING , Literal
1011from zarr .codecs .bytes import BytesCodec
1112from zarr .core .buffer import default_buffer_prototype
1213from zarr .core .chunk_key_encodings import DefaultChunkKeyEncoding , V2ChunkKeyEncoding
14+ from zarr .core .common import ArrayMetadataJSON_V3 , NamedConfig
1315from zarr .core .config import config
1416from zarr .core .dtype import get_data_type_from_native_dtype
1517from zarr .core .dtype .npy .string import _NUMPY_SUPPORTS_VLEN_STRING
@@ -110,83 +112,84 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str)
110112 dtype_instance .from_json_scalar (fill_value , zarr_format = 3 )
111113
112114
113- @pytest .mark .parametrize ("chunk_grid" , ["regular" ])
114- @pytest .mark .parametrize ("attributes" , [None , {"foo" : "bar" }])
115- @pytest .mark .parametrize ("codecs" , [[BytesCodec (endian = None )]])
115+ @pytest .mark .parametrize ("chunk_grid" , [{"name" : "regular" , "configuration" : {"chunk_shape" : (1 , 1 , 1 )}}])
116+ @pytest .mark .parametrize ("codecs" , [({"name" : "bytes" },)])
116117@pytest .mark .parametrize ("fill_value" , [0 , 1 ])
117- @pytest .mark .parametrize ("chunk_key_encoding" , ["v2" , "default" ])
118- @pytest .mark .parametrize ("dimension_separator" , ["." , "/" , None ])
119- @pytest .mark .parametrize ("dimension_names" , ["nones" , "strings" , "missing" ])
120- @pytest .mark .parametrize ("storage_transformers" , [None , ()])
118+ @pytest .mark .parametrize ("data_type" , ["int8" , "uint8" ])
119+ @pytest .mark .parametrize ("chunk_key_encoding" , [
120+ {"name" : "v2" , "configuration" : {"separator" : "." }},
121+ {"name" : "v2" , "configuration" : {"separator" : "/" }},
122+ {"name" : "v2" },
123+ {"name" : "default" , "configuration" : {"separator" : "." }},
124+ {"name" : "default" , "configuration" : {"separator" : "/" }},
125+ {"name" : "default" },
126+ ])
127+ @pytest .mark .parametrize ("attributes" , ["unset" , {"foo" : "bar" }])
128+ @pytest .mark .parametrize ("dimension_names" , [(None , None , None ), ('a' ,'b' , None ), "unset" ])
129+ @pytest .mark .parametrize ("storage_transformers" , [(), "unset" ])
121130def test_metadata_to_dict (
122- chunk_grid : str ,
131+ chunk_grid : NamedConfig [ str , Mapping [ str , object ]] ,
123132 codecs : list [Codec ],
133+ data_type : str ,
124134 fill_value : Any ,
125- chunk_key_encoding : Literal ["v2" , "default" ],
126- dimension_separator : Literal ["." , "/" ] | None ,
127- dimension_names : Literal ["nones" , "strings" , "missing" ],
128- attributes : dict [str , Any ] | None ,
129- storage_transformers : tuple [dict [str , JSON ]] | None ,
135+ chunk_key_encoding : NamedConfig [str , Mapping [str , object ]],
136+ dimension_names : tuple [str | None , ...] | Literal ["unset" ],
137+ attributes : dict [str , Any ] | Literal ['unset' ],
138+ storage_transformers : tuple [dict [str , JSON ]] | Literal ["unset" ],
130139) -> None :
131140 shape = (1 , 2 , 3 )
132- data_type_str = "uint8"
133- if chunk_grid == "regular" :
134- cgrid = {"name" : "regular" , "configuration" : {"chunk_shape" : (1 , 1 , 1 )}}
135-
136- cke : dict [str , Any ]
137- cke_name_dict = {"name" : chunk_key_encoding }
138- if dimension_separator is not None :
139- cke = cke_name_dict | {"configuration" : {"separator" : dimension_separator }}
140- else :
141- cke = cke_name_dict
142- dnames : tuple [str | None , ...] | None
143-
144- if dimension_names == "strings" :
145- dnames = tuple (map (str , range (len (shape ))))
146- elif dimension_names == "missing" :
147- dnames = None
148- elif dimension_names == "nones" :
149- dnames = (None ,) * len (shape )
150-
151- metadata_dict = {
152- "zarr_format" : 3 ,
153- "node_type" : "array" ,
154- "shape" : shape ,
155- "chunk_grid" : cgrid ,
156- "data_type" : data_type_str ,
157- "chunk_key_encoding" : cke ,
158- "codecs" : tuple (c .to_dict () for c in codecs ),
159- "fill_value" : fill_value ,
160- "storage_transformers" : storage_transformers ,
161- }
162141
163- if attributes is not None :
164- metadata_dict ["attributes" ] = attributes
165- if dnames is not None :
166- metadata_dict ["dimension_names" ] = dnames
142+ # These are the fields in the array metadata document that are optional
143+ not_required = {}
167144
168- metadata = ArrayV3Metadata .from_dict (metadata_dict )
169- observed = metadata .to_dict ()
170- expected = metadata_dict .copy ()
145+ if dimension_names != "unset" :
146+ not_required ["dimension_names" ] = dimension_names
171147
172- # if unset or None or (), storage_transformers gets normalized to ()
173- assert observed ["storage_transformers" ] == ()
174- observed .pop ("storage_transformers" )
175- expected .pop ("storage_transformers" )
148+ if storage_transformers != "unset" :
149+ not_required ["storage_transformers" ] = storage_transformers
176150
177- if attributes is None :
178- assert observed ["attributes" ] == {}
179- observed .pop ("attributes" )
151+ if attributes != "unset" :
152+ not_required ["attributes" ] = attributes
180153
181- if dimension_separator is None :
182- if chunk_key_encoding == "default" :
183- expected_cke_dict = DefaultChunkKeyEncoding (separator = "/" ).to_dict ()
154+ source_dict = {
155+ "zarr_format" : 3 ,
156+ "node_type" : "array" ,
157+ "shape" : shape ,
158+ "chunk_grid" : chunk_grid ,
159+ "data_type" : data_type ,
160+ "chunk_key_encoding" : chunk_key_encoding ,
161+ "codecs" : codecs ,
162+ "fill_value" : fill_value ,
163+ } | not_required
164+
165+ metadata = ArrayV3Metadata .from_dict (source_dict )
166+ parsed_dict = metadata .to_dict ()
167+
168+ for k ,v in parsed_dict .items ():
169+ if k in source_dict :
170+ if k == 'chunk_key_encoding' :
171+ assert v ['name' ] == chunk_key_encoding ['name' ]
172+ if chunk_key_encoding ['name' ] == 'v2' :
173+ if "configuration" in chunk_key_encoding :
174+ if "separator" in chunk_key_encoding ['configuration' ]:
175+ assert v ['configuration' ]['separator' ] == chunk_key_encoding ['configuration' ]['separator' ]
176+ else :
177+ assert v ["configuration" ]["separator" ] == "."
178+ elif chunk_key_encoding ['name' ] == 'default' :
179+ if "configuration" in chunk_key_encoding :
180+ if "separator" in chunk_key_encoding ['configuration' ]:
181+ assert v ['configuration' ]['separator' ] == chunk_key_encoding ['configuration' ]['separator' ]
182+ else :
183+ assert v ["configuration" ]["separator" ] == "/"
184+ else :
185+ assert source_dict [k ] == v
184186 else :
185- expected_cke_dict = V2ChunkKeyEncoding (separator = "." ).to_dict ()
186- assert observed ["chunk_key_encoding" ] == expected_cke_dict
187- observed .pop ("chunk_key_encoding" )
188- expected .pop ("chunk_key_encoding" )
189- assert observed == expected
187+ if k == 'attributes' :
188+ assert v == {}
189+ elif k == 'storage_transformers' :
190+ assert v == ()
191+ else :
192+ assert v is None
190193
191194
192195@pytest .mark .parametrize ("indent" , [2 , 4 , None ])
@@ -201,14 +204,14 @@ def test_json_indent(indent: int):
201204@pytest .mark .parametrize ("precision" , ["ns" , "D" ])
202205async def test_datetime_metadata (fill_value : int , precision : str ) -> None :
203206 dtype = DateTime64 (unit = precision )
204- metadata_dict = {
207+ metadata_dict : ArrayMetadataJSON_V3 = {
205208 "zarr_format" : 3 ,
206209 "node_type" : "array" ,
207210 "shape" : (1 ,),
208211 "chunk_grid" : {"name" : "regular" , "configuration" : {"chunk_shape" : (1 ,)}},
209212 "data_type" : dtype .to_json (zarr_format = 3 ),
210- "chunk_key_encoding" : {"name" : "default" , "separator" : "." },
211- "codecs" : (BytesCodec () ,),
213+ "chunk_key_encoding" : {"name" : "default" , "configuration" : { " separator" : "." } },
214+ "codecs" : ({ "name" : "bytes" } ,),
212215 "fill_value" : dtype .to_json_scalar (
213216 dtype .to_native_dtype ().type (fill_value , dtype .unit ), zarr_format = 3
214217 ),
@@ -225,13 +228,13 @@ async def test_datetime_metadata(fill_value: int, precision: str) -> None:
225228 ("data_type" , "fill_value" ), [("uint8" , {}), ("int32" , [0 , 1 ]), ("float32" , "foo" )]
226229)
227230async def test_invalid_fill_value_raises (data_type : str , fill_value : float ) -> None :
228- metadata_dict = {
231+ metadata_dict : ArrayMetadataJSON_V3 = {
229232 "zarr_format" : 3 ,
230233 "node_type" : "array" ,
231234 "shape" : (1 ,),
232235 "chunk_grid" : {"name" : "regular" , "configuration" : {"chunk_shape" : (1 ,)}},
233236 "data_type" : data_type ,
234- "chunk_key_encoding" : {"name" : "default" , "separator" : "." },
237+ "chunk_key_encoding" : {"name" : "default" , "configuration" : { " separator" : "." } },
235238 "codecs" : ({"name" : "bytes" },),
236239 "fill_value" : fill_value , # this is not a valid fill value for uint8
237240 }
@@ -242,13 +245,13 @@ async def test_invalid_fill_value_raises(data_type: str, fill_value: float) -> N
242245
243246@pytest .mark .parametrize ("fill_value" , [("NaN" ), "Infinity" , "-Infinity" ])
244247async def test_special_float_fill_values (fill_value : str ) -> None :
245- metadata_dict = {
248+ metadata_dict : ArrayMetadataJSON_V3 = {
246249 "zarr_format" : 3 ,
247250 "node_type" : "array" ,
248251 "shape" : (1 ,),
249252 "chunk_grid" : {"name" : "regular" , "configuration" : {"chunk_shape" : (1 ,)}},
250253 "data_type" : "float64" ,
251- "chunk_key_encoding" : {"name" : "default" , "separator" : "." },
254+ "chunk_key_encoding" : {"name" : "default" , "configuration" : { " separator" : "." } },
252255 "codecs" : [{"name" : "bytes" }],
253256 "fill_value" : fill_value , # this is not a valid fill value for uint8
254257 }
0 commit comments