Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ dependencies = [
"h5py>=3.6.0",
"xarray>=0.20.2",
"PyYAML>=6.0",
'numpy>=1.22.4,<2.0.0',
'numpy',
"pandas>=1.3.2",
"ase>=3.19.0",
"mergedeep",
Expand Down Expand Up @@ -140,7 +140,7 @@ select = [
"UP", # pyupgrade
# "F401", # remove unused import
"I001", # sort imports
# "NPY201", # reactivate when np>2.0 is used
"NPY201", # upgrade to numpy>2
]
ignore = [
"E402", # Module level import not at top of file
Expand Down
15 changes: 8 additions & 7 deletions src/pynxtools/dataconverter/hdfdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import h5py
import yaml
from numpy import string_

TYPE = "_type_"

Expand Down Expand Up @@ -45,7 +44,8 @@ def unpack_dataset(item):

"""
value = item[()]
type_id = item.attrs.get(TYPE, string_()).astype(str)
type_id = item.attrs.get(TYPE, "")

if type_id == "datetime":
if hasattr(value, "__iter__"):
value = [datetime.fromtimestamp(ts) for ts in value]
Expand All @@ -62,7 +62,7 @@ def unpack_dataset(item):
value = tuple(value)

elif type_id == "str":
value = string_(value).astype(str)
value = value.decode("utf-8") if isinstance(value, bytes) else value
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we wish to support arbitrary string payload then we need to think about what happens when decode() raises a UnicodeError

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its okay to address this via #705

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please note that the whole hdfdict.py is just a copy of https://github.com/SiggiGue/hdfdict/blob/master/hdfdict/hdfdict.py (which we had to copy because we wanted to have a stable version and there was no release management on the existing tool at the time). I only made changes here that are related to upgrading to numpy>2, so any decoding problems we had before will be the same. We can discuss this elsewhere later.


return value

Expand Down Expand Up @@ -181,15 +181,16 @@ def pack_dataset(hdfobject, key, value):
attr_data = None

if attr_data:
ds.attrs.create(name=TYPE, data=string_(attr_data))
ds.attrs.create(name=TYPE, data=attr_data.encode("utf-8"))

except (TypeError, ValueError):
# Obviously the data was not serializable. To give it
# a last try; serialize it to yaml
# and save it to the hdf file:
ds = hdfobject.create_dataset(name=key, data=string_(yaml.safe_dump(value)))

ds.attrs.create(name=TYPE, data=string_("yaml"))
ds = hdfobject.create_dataset(
name=key, data=yaml.safe_dump(value).encode("utf-8")
)
ds.attrs.create(name=TYPE, data=b"yaml")
# if this fails again, restructure your data!
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"restructure your data" should be emitted as a message if it fails again

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above, I did not change the code. Could be made more robust in the future, but not here.



Expand Down
13 changes: 9 additions & 4 deletions tests/dataconverter/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@

from .test_helpers import alter_dict # pylint: disable=unused-import

# Workaround for different str representation of np.bool
if np.lib.NumpyVersion(np.__version__) >= "2.0.0":
np_bool = "numpy.bool"
else:
np_bool = "numpy.bool_"


def set_to_none_in_dict(data_dict: Optional[Template], key: str, optionality: str):
"""Helper function to forcefully set path to 'None'"""
Expand Down Expand Up @@ -468,8 +474,7 @@ def format_error_message(msg: str) -> str:
"NOT_TRUE_OR_FALSE",
),
[
"The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value should be one of the following Python types: "
"(<class 'bool'>, <class 'numpy.bool_'>), as defined in the NXDL as NX_BOOLEAN."
f"The value at /ENTRY[my_entry]/NXODD_name[nxodd_name]/bool_value should be one of the following Python types: (<class 'bool'>, <class '{np_bool}'>), as defined in the NXDL as NX_BOOLEAN."
],
id="string-instead-of-bool",
),
Expand Down Expand Up @@ -1906,7 +1911,7 @@ def format_error_message(msg: str) -> str:
alter_dict(
TEMPLATE,
"/ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value",
{"compress": np.int64(2), "strength": 11},
{"compress": 2, "strength": 11},
),
[
"Compression strength for /ENTRY[my_entry]/NXODD_name[nxodd_name]/int_value = "
Expand Down Expand Up @@ -2015,7 +2020,7 @@ def test_validate_data_dict(data_dict, error_messages, caplog, request):
),
[
"The value at /my_entry/nxodd_name/bool_value should be one of the following Python types: "
"(<class 'bool'>, <class 'numpy.bool_'>), as defined in the NXDL as NX_BOOLEAN."
f"(<class 'bool'>, <class '{np_bool}'>), as defined in the NXDL as NX_BOOLEAN."
],
id="string-instead-of-bool",
),
Expand Down
Loading