|
39 | 39 | from zarr.core.chunk_grids import _auto_partition |
40 | 40 | from zarr.core.common import JSON, MemoryOrder, ZarrFormat |
41 | 41 | from zarr.core.dtype import get_data_type_from_native_dtype |
42 | | -from zarr.core.dtype._numpy import Float64, Int16, endianness_from_numpy_str |
| 42 | +from zarr.core.dtype._numpy import ( |
| 43 | + DateTime64, |
| 44 | + Float64, |
| 45 | + Int16, |
| 46 | + Structured, |
| 47 | + endianness_from_numpy_str, |
| 48 | +) |
43 | 49 | from zarr.core.dtype.common import Endianness |
44 | 50 | from zarr.core.dtype.wrapper import ZDType |
45 | 51 | from zarr.core.group import AsyncGroup |
@@ -936,12 +942,59 @@ def test_chunks_and_shards(store: Store) -> None: |
936 | 942 | assert arr_v2.shards is None |
937 | 943 |
|
938 | 944 | @staticmethod |
939 | | - @pytest.mark.parametrize( |
940 | | - ("dtype", "fill_value_expected"), [("<U4", ""), ("<S4", b""), ("i", 0), ("f", 0.0)] |
941 | | - ) |
942 | | - def test_default_fill_value(dtype: str, fill_value_expected: object, store: Store) -> None: |
| 945 | + @pytest.mark.parametrize("dtype", zdtype_examples) |
| 946 | + def test_default_fill_value(dtype: ZDType[Any, Any], store: Store) -> None: |
| 947 | + """ |
| 948 | + Test that the fill value of an array is set to the default value for the dtype object |
| 949 | + """ |
943 | 950 | a = zarr.create_array(store, shape=(5,), chunks=(5,), dtype=dtype) |
944 | | - assert a.fill_value == fill_value_expected |
| 951 | + if isinstance(dtype, DateTime64) and np.isnat(a.fill_value): |
| 952 | + assert np.isnat(dtype.default_value()) |
| 953 | + else: |
| 954 | + assert a.fill_value == dtype.default_value() |
| 955 | + |
| 956 | + @staticmethod |
| 957 | + @pytest.mark.parametrize("dtype", zdtype_examples) |
| 958 | + def test_dtype_forms(dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat) -> None: |
| 959 | + """ |
| 960 | + Test that the same array is produced from a ZDType instance, a numpy dtype, or a numpy string |
| 961 | + """ |
| 962 | + a = zarr.create_array( |
| 963 | + store, name="a", shape=(5,), chunks=(5,), dtype=dtype, zarr_format=zarr_format |
| 964 | + ) |
| 965 | + b = zarr.create_array( |
| 966 | + store, |
| 967 | + name="b", |
| 968 | + shape=(5,), |
| 969 | + chunks=(5,), |
| 970 | + dtype=dtype.to_dtype(), |
| 971 | + zarr_format=zarr_format, |
| 972 | + ) |
| 973 | + assert a.dtype == b.dtype |
| 974 | + |
| 975 | + # Structured dtypes do not have a numpy string representation that uniquely identifies them |
| 976 | + if not isinstance(dtype, Structured): |
| 977 | + c = zarr.create_array( |
| 978 | + store, |
| 979 | + name="c", |
| 980 | + shape=(5,), |
| 981 | + chunks=(5,), |
| 982 | + dtype=dtype.to_dtype().str, |
| 983 | + zarr_format=zarr_format, |
| 984 | + ) |
| 985 | + assert a.dtype == c.dtype |
| 986 | + |
| 987 | + @staticmethod |
| 988 | + @pytest.mark.parametrize("dtype", zdtype_examples) |
| 989 | + def test_dtype_roundtrip( |
| 990 | + dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat |
| 991 | + ) -> None: |
| 992 | + """ |
| 993 | + Test that creating an array, then opening it, gets the same array. |
| 994 | + """ |
| 995 | + a = zarr.create_array(store, shape=(5,), chunks=(5,), dtype=dtype, zarr_format=zarr_format) |
| 996 | + b = zarr.open_array(store) |
| 997 | + assert a.dtype == b.dtype |
945 | 998 |
|
946 | 999 | @staticmethod |
947 | 1000 | @pytest.mark.parametrize("dtype", ["uint8", "float32", "str", "U3", "S4", "V1"]) |
@@ -1266,6 +1319,64 @@ async def test_name(store: Store, zarr_format: ZarrFormat, path: str | None) -> |
1266 | 1319 | store=store, path=parent_path, mode="r", zarr_format=zarr_format |
1267 | 1320 | ) |
1268 | 1321 |
|
| 1322 | + @staticmethod |
| 1323 | + @pytest.mark.parametrize("endianness", get_args(Endianness)) |
| 1324 | + def test_default_endianness( |
| 1325 | + store: Store, zarr_format: ZarrFormat, endianness: Endianness |
| 1326 | + ) -> None: |
| 1327 | + """ |
| 1328 | + Test that that endianness is correctly set when creating an array when not specifying a serializer |
| 1329 | + """ |
| 1330 | + dtype = Int16(endianness=endianness) |
| 1331 | + arr = zarr.create_array(store=store, shape=(1,), dtype=dtype, zarr_format=zarr_format) |
| 1332 | + assert endianness_from_numpy_str(arr[:].dtype.byteorder) == endianness |
| 1333 | + if zarr_format == 3: |
| 1334 | + assert isinstance(arr.metadata, ArrayV3Metadata) # mypy |
| 1335 | + assert str(arr.metadata.codecs[0].endian.value) == endianness # type: ignore[union-attr] |
| 1336 | + |
| 1337 | + @staticmethod |
| 1338 | + @pytest.mark.parametrize("endianness", get_args(Endianness)) |
| 1339 | + def test_explicit_endianness(store: Store, endianness: Endianness) -> None: |
| 1340 | + """ |
| 1341 | + Test that that a mismatch between the bytescodec endianness and the dtype endianness is an error |
| 1342 | + """ |
| 1343 | + if endianness == "little": |
| 1344 | + dtype = Int16(endianness="big") |
| 1345 | + else: |
| 1346 | + dtype = Int16(endianness="little") |
| 1347 | + |
| 1348 | + serializer = BytesCodec(endian=endianness) |
| 1349 | + |
| 1350 | + msg = ( |
| 1351 | + f"The endianness of the requested serializer ({serializer}) does not match the endianness of the dtype ({dtype.endianness}). " |
| 1352 | + "The endianness of the serializer and the dtype must match." |
| 1353 | + ) |
| 1354 | + |
| 1355 | + with pytest.raises(ValueError, match=re.escape(msg)): |
| 1356 | + _ = zarr.create_array( |
| 1357 | + store=store, |
| 1358 | + shape=(1,), |
| 1359 | + dtype=dtype, |
| 1360 | + zarr_format=3, |
| 1361 | + serializer=serializer, |
| 1362 | + ) |
| 1363 | + |
| 1364 | + # additional check for the case where the serializer has endian=None |
| 1365 | + none_serializer = dataclasses.replace(serializer, endian=None) |
| 1366 | + msg = ( |
| 1367 | + f"The endianness of the requested serializer ({none_serializer}) does not match the endianness of the dtype ({dtype.endianness}). " |
| 1368 | + "The endianness of the serializer and the dtype must match." |
| 1369 | + ) |
| 1370 | + |
| 1371 | + with pytest.raises(ValueError, match=re.escape(msg)): |
| 1372 | + _ = zarr.create_array( |
| 1373 | + store=store, |
| 1374 | + shape=(1,), |
| 1375 | + dtype=dtype, |
| 1376 | + zarr_format=3, |
| 1377 | + serializer=none_serializer, |
| 1378 | + ) |
| 1379 | + |
1269 | 1380 |
|
1270 | 1381 | async def test_scalar_array() -> None: |
1271 | 1382 | arr = zarr.array(1.5) |
@@ -1384,61 +1495,3 @@ async def test_sharding_coordinate_selection() -> None: |
1384 | 1495 | ) |
1385 | 1496 | arr[:] = np.arange(2 * 3 * 4).reshape((2, 3, 4)) |
1386 | 1497 | assert (arr[1, [0, 1]] == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all() |
1387 | | - |
1388 | | - |
1389 | | -@pytest.mark.parametrize("store", ["memory"], indirect=True) |
1390 | | -@pytest.mark.parametrize("endianness", get_args(Endianness)) |
1391 | | -def test_default_endianness(store: Store, zarr_format: ZarrFormat, endianness: Endianness) -> None: |
1392 | | - """ |
1393 | | - Test that that endianness is correctly set when creating an array when not specifying a serializer |
1394 | | - """ |
1395 | | - dtype = Int16(endianness=endianness) |
1396 | | - arr = zarr.create_array(store=store, shape=(1,), dtype=dtype, zarr_format=zarr_format) |
1397 | | - assert endianness_from_numpy_str(arr[:].dtype.byteorder) == endianness |
1398 | | - if zarr_format == 3: |
1399 | | - assert isinstance(arr.metadata, ArrayV3Metadata) # mypy |
1400 | | - assert str(arr.metadata.codecs[0].endian.value) == endianness # type: ignore[union-attr] |
1401 | | - |
1402 | | - |
1403 | | -@pytest.mark.parametrize("store", ["memory"], indirect=True) |
1404 | | -@pytest.mark.parametrize("endianness", get_args(Endianness)) |
1405 | | -def test_explicit_endianness(store: Store, endianness: Endianness) -> None: |
1406 | | - """ |
1407 | | - Test that that a mismatch between the bytescodec endianness and the dtype endianness is an error |
1408 | | - """ |
1409 | | - if endianness == "little": |
1410 | | - dtype = Int16(endianness="big") |
1411 | | - else: |
1412 | | - dtype = Int16(endianness="little") |
1413 | | - |
1414 | | - serializer = BytesCodec(endian=endianness) |
1415 | | - |
1416 | | - msg = ( |
1417 | | - f"The endianness of the requested serializer ({serializer}) does not match the endianness of the dtype ({dtype.endianness}). " |
1418 | | - "The endianness of the serializer and the dtype must match." |
1419 | | - ) |
1420 | | - |
1421 | | - with pytest.raises(ValueError, match=re.escape(msg)): |
1422 | | - _ = zarr.create_array( |
1423 | | - store=store, |
1424 | | - shape=(1,), |
1425 | | - dtype=dtype, |
1426 | | - zarr_format=3, |
1427 | | - serializer=serializer, |
1428 | | - ) |
1429 | | - |
1430 | | - # additional check for the case where the serializer has endian=None |
1431 | | - none_serializer = dataclasses.replace(serializer, endian=None) |
1432 | | - msg = ( |
1433 | | - f"The endianness of the requested serializer ({none_serializer}) does not match the endianness of the dtype ({dtype.endianness}). " |
1434 | | - "The endianness of the serializer and the dtype must match." |
1435 | | - ) |
1436 | | - |
1437 | | - with pytest.raises(ValueError, match=re.escape(msg)): |
1438 | | - _ = zarr.create_array( |
1439 | | - store=store, |
1440 | | - shape=(1,), |
1441 | | - dtype=dtype, |
1442 | | - zarr_format=3, |
1443 | | - serializer=none_serializer, |
1444 | | - ) |
|
0 commit comments