3535 ShapeLike ,
3636 ZarrFormat ,
3737 concurrent_map ,
38+ parse_dtype ,
3839 parse_shapelike ,
3940 product ,
4041)
@@ -365,16 +366,17 @@ async def create(
365366 ) -> AsyncArray [ArrayV2Metadata ] | AsyncArray [ArrayV3Metadata ]:
366367 store_path = await make_store_path (store )
367368
369+ dtype_parsed = parse_dtype (dtype , zarr_format )
368370 shape = parse_shapelike (shape )
369371
370372 if chunks is not None and chunk_shape is not None :
371373 raise ValueError ("Only one of chunk_shape or chunks can be provided." )
372374
373- dtype = np .dtype (dtype )
374375 if chunks :
375- _chunks = normalize_chunks (chunks , shape , dtype .itemsize )
376+ _chunks = normalize_chunks (chunks , shape , dtype_parsed .itemsize )
376377 else :
377- _chunks = normalize_chunks (chunk_shape , shape , dtype .itemsize )
378+ _chunks = normalize_chunks (chunk_shape , shape , dtype_parsed .itemsize )
379+
378380 result : AsyncArray [ArrayV3Metadata ] | AsyncArray [ArrayV2Metadata ]
379381 if zarr_format == 3 :
380382 if dimension_separator is not None :
@@ -396,7 +398,7 @@ async def create(
396398 result = await cls ._create_v3 (
397399 store_path ,
398400 shape = shape ,
399- dtype = dtype ,
401+ dtype = dtype_parsed ,
400402 chunk_shape = _chunks ,
401403 fill_value = fill_value ,
402404 chunk_key_encoding = chunk_key_encoding ,
@@ -406,6 +408,14 @@ async def create(
406408 exists_ok = exists_ok ,
407409 )
408410 elif zarr_format == 2 :
411+ if dtype is str or dtype == "str" :
412+ # another special case: zarr v2 added the vlen-utf8 codec
413+ vlen_codec : dict [str , JSON ] = {"id" : "vlen-utf8" }
414+ if filters and not any (x ["id" ] == "vlen-utf8" for x in filters ):
415+ filters = list (filters ) + [vlen_codec ]
416+ else :
417+ filters = [vlen_codec ]
418+
409419 if codecs is not None :
410420 raise ValueError (
411421 "codecs cannot be used for arrays with version 2. Use filters and compressor instead."
@@ -419,7 +429,7 @@ async def create(
419429 result = await cls ._create_v2 (
420430 store_path ,
421431 shape = shape ,
422- dtype = dtype ,
432+ dtype = dtype_parsed ,
423433 chunks = _chunks ,
424434 dimension_separator = dimension_separator ,
425435 fill_value = fill_value ,
0 commit comments