Skip to content

Commit aa32271

Browse files
committed
route object dtype arrays to vlen string dtype when numpy > 2
1 parent 2bffe1a commit aa32271

File tree

2 files changed

+6
-8
lines changed

2 files changed

+6
-8
lines changed

src/zarr/core/dtype/__init__.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,10 @@ def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[_BaseDType,
9090
"""
9191
data_type_registry.lazy_load()
9292
if not isinstance(dtype, np.dtype):
93-
if dtype in (str, "str"):
93+
# TODO: This check has a lot of assumptions in it! Chiefly, we assume that the
94+
# numpy object dtype contains variable length strings, which is not in general true
95+
# When / if zarr python supports ragged arrays, for example, this check will fail!
96+
if dtype in (str, "str", "|T16", "O", "|O", np.dtypes.ObjectDType()):
9497
if _NUMPY_SUPPORTS_VLEN_STRING:
9598
na_dtype = np.dtype("T")
9699
else:
@@ -99,12 +102,7 @@ def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[_BaseDType,
99102
# this is a valid _VoidDTypeLike check
100103
na_dtype = np.dtype([tuple(d) for d in dtype])
101104
else:
102-
if dtype == "|T16":
103-
# `|T16` is the numpy dtype str form for variable length strings. unfortunately
104-
# numpy cannot create these directly from np.dtype("|T16")
105-
na_dtype = np.dtypes.StringDType()
106-
else:
107-
na_dtype = np.dtype(dtype)
105+
na_dtype = np.dtype(dtype)
108106
else:
109107
na_dtype = dtype
110108
return data_type_registry.match_dtype(na_dtype)

src/zarr/core/dtype/_numpy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1051,7 +1051,7 @@ def _cast_value_unsafe(self, value: object) -> str:
10511051
return str(value)
10521052

10531053
else:
1054-
1054+
# Numpy pre-2 does not have a variable length string dtype, so we use the Object dtype instead.
10551055
@dataclass(frozen=True, kw_only=True)
10561056
class VariableLengthString(ZDType[np.dtypes.ObjectDType, str]): # type: ignore[no-redef]
10571057
dtype_cls = np.dtypes.ObjectDType

0 commit comments

Comments
 (0)