diff --git a/pyproject.toml b/pyproject.toml index 530ddedcd..75703438b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "h5py>=3.6.0", "xarray>=0.20.2", "PyYAML>=6.0", - 'numpy>=1.22.4,<2.0.0', + 'numpy', "pandas>=1.3.2", "ase>=3.19.0", "mergedeep", @@ -140,7 +140,7 @@ select = [ "UP", # pyupgrade # "F401", # remove unused import "I001", # sort imports - # "NPY201", # reactivate when np>2.0 is used + "NPY201", # upgrade to numpy>2 ] ignore = [ "E402", # Module level import not at top of file diff --git a/src/pynxtools/dataconverter/hdfdict.py b/src/pynxtools/dataconverter/hdfdict.py index 119e7fb6c..929e2fd07 100644 --- a/src/pynxtools/dataconverter/hdfdict.py +++ b/src/pynxtools/dataconverter/hdfdict.py @@ -7,7 +7,6 @@ import h5py import yaml -from numpy import string_ TYPE = "_type_" @@ -45,7 +44,8 @@ def unpack_dataset(item): """ value = item[()] - type_id = item.attrs.get(TYPE, string_()).astype(str) + type_id = item.attrs.get(TYPE, "") + if type_id == "datetime": if hasattr(value, "__iter__"): value = [datetime.fromtimestamp(ts) for ts in value] @@ -62,7 +62,7 @@ def unpack_dataset(item): value = tuple(value) elif type_id == "str": - value = string_(value).astype(str) + value = value.decode("utf-8") if isinstance(value, bytes) else value return value @@ -181,15 +181,16 @@ def pack_dataset(hdfobject, key, value): attr_data = None if attr_data: - ds.attrs.create(name=TYPE, data=string_(attr_data)) + ds.attrs.create(name=TYPE, data=attr_data.encode("utf-8")) except (TypeError, ValueError): # Obviously the data was not serializable. To give it # a last try; serialize it to yaml # and save it to the hdf file: - ds = hdfobject.create_dataset(name=key, data=string_(yaml.safe_dump(value))) - - ds.attrs.create(name=TYPE, data=string_("yaml")) + ds = hdfobject.create_dataset( + name=key, data=yaml.safe_dump(value).encode("utf-8") + ) + ds.attrs.create(name=TYPE, data=b"yaml") # if this fails again, restructure your data! diff --git a/tests/dataconverter/test_validation.py b/tests/dataconverter/test_validation.py index d740a67f3..82e528af2 100644 --- a/tests/dataconverter/test_validation.py +++ b/tests/dataconverter/test_validation.py @@ -32,6 +32,12 @@ from .test_helpers import alter_dict # pylint: disable=unused-import +# Workaround for different str representation of np.bool +if np.lib.NumpyVersion(np.__version__) >= "2.0.0": + np_bool = "numpy.bool" +else: + np_bool = "numpy.bool_" + def set_to_none_in_dict(data_dict: Optional[Template], key: str, optionality: str): """Helper function to forcefully set path to 'None'""" @@ -468,8 +474,7 @@ def format_error_message(msg: str) -> str: "NOT_TRUE_OR_FALSE", ), [ - "The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value should be one of the following Python types: " - "(, ), as defined in the NXDL as NX_BOOLEAN." + f"The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value should be one of the following Python types: (, ), as defined in the NXDL as NX_BOOLEAN." ], id="string-instead-of-bool", ), @@ -1906,7 +1911,7 @@ def format_error_message(msg: str) -> str: alter_dict( TEMPLATE, "/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value", - {"compress": np.int64(2), "strength": 11}, + {"compress": 2, "strength": 11}, ), [ "Compression strength for /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value = " @@ -2015,7 +2020,7 @@ def test_validate_data_dict(data_dict, error_messages, caplog, request): ), [ "The value at /my_entry/nxodd_name/bool_value should be one of the following Python types: " - "(, ), as defined in the NXDL as NX_BOOLEAN." + f"(, ), as defined in the NXDL as NX_BOOLEAN." ], id="string-instead-of-bool", ),