-
Notifications
You must be signed in to change notification settings - Fork 23
Open
Description
Behavior
Calling DecodedFeatures.get() under an environment installing numpy>=2.0 raises the following error:
RemoteTraceback:
"""
Traceback (most recent call last):
File "/project-dir/.venv/lib/python3.11/site-packages/bdpy/dataform/features.py", line 30, in _load_array_with_key
return sio.loadmat(path)[key]
^^^^^^^^^^^^^^^^^
File "/project-dir/.venv/lib/python3.11/site-packages/scipy/io/matlab/_mio.py", line 234, in loadmat
MR, _ = mat_reader_factory(f, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/project-dir/.venv/lib/python3.11/site-packages/scipy/io/matlab/_mio.py", line 80, in mat_reader_factory
raise NotImplementedError('Please use HDF reader for matlab v7.3 '
NotImplementedError: Please use HDF reader for matlab v7.3 files, e.g. h5py
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/python-share-dir/lib/python3.11/multiprocessing/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
^^^^^^^^^^^^^^^^^^^
File "/python-share-dir/lib/python3.11/multiprocessing/pool.py", line 48, in mapstar
return list(map(*args))
^^^^^^^^^^^^^^^^
File "/project-dir/.venv/lib/python3.11/site-packages/bdpy/dataform/features.py", line 33, in _load_array_with_key
return hdf5storage.loadmat(path)[key]
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/project-dir/.venv/lib/python3.11/site-packages/hdf5storage/__init__.py", line 1764, in loadmat
options = Options(marshaller_collection=marshaller_collection)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/project-dir/.venv/lib/python3.11/site-packages/hdf5storage/__init__.py", line 260, in __init__
self.marshaller_collection = MarshallerCollection()
^^^^^^^^^^^^^^^^^^^^^^
File "/project-dir/.venv/lib/python3.11/site-packages/hdf5storage/__init__.py", line 902, in __init__
self._builtin_marshallers = [m() for key, m in dict(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/project-dir/.venv/lib/python3.11/site-packages/hdf5storage/__init__.py", line 902, in <listcomp>
self._builtin_marshallers = [m() for key, m in dict(
^^^
File "/project-dir/.venv/lib/python3.11/site-packages/hdf5storage/Marshallers.py", line 492, in __init__
np.bytes_, np.unicode_, np.object_]
^^^^^^^^^^^
File "/project-dir/.venv/lib/python3.11/site-packages/numpy/__init__.py", line 400, in __getattr__
raise AttributeError(
AttributeError: `np.unicode_` was removed in the NumPy 2.0 release. Use `np.str_` instead.
"""
The above exception was the direct cause of the following exception:
AttributeError Traceback (most recent call last)
Cell In[12], line 1
----> 1 decoded_features = decoded_features_dataset.get(layer=layer_path_name, subject=subject, roi=roi)
File /project-dir/src/dists_like_loss/dataset.py:112, in DecodedFeaturesDataset.get(self, layer, subject, roi, fold, label)
111 def get(self, layer=None, subject=None, roi=None, fold=None, label=None):
--> 112 return self._decoded_features_store.get(
113 layer=layer,
114 subject=subject,
115 roi=roi,
116 fold=fold,
117 label=label,
118 )
File /project-dir/.venv/lib/python3.11/site-packages/bdpy/dataform/features.py:390, in DecodedFeatures.get(self, layer, subject, roi, fold, label, image)
388 else:
389 with Pool(processes=num_parallel) as pool:
--> 390 features = np.concatenate(pool.map(partial(_load_array_with_key, self.__file_key), files), axis=0)
392 if self.__squeeze:
393 features = np.squeeze(features)
File /python-share-dir/lib/python3.11/multiprocessing/pool.py:367, in Pool.map(self, func, iterable, chunksize)
362 def map(self, func, iterable, chunksize=None):
363 '''
364 Apply `func` to each element in `iterable`, collecting the results
365 in a list that is returned.
366 '''
--> 367 return self._map_async(func, iterable, mapstar, chunksize).get()
File /python-share-dir/lib/python3.11/multiprocessing/pool.py:774, in ApplyResult.get(self, timeout)
772 return self._value
773 else:
--> 774 raise self._value
AttributeError: `np.unicode_` was removed in the NumPy 2.0 release. Use `np.str_` instead.Idea on the solution
This problem is probably caused by using hdf5storage.loadmat. In fact, if I run hdf5storage.loadmat() in an environment with numpy>=2.0, I get the same error. The last commit on the hdf5storage GitHub repository was two years ago, so we cannot expect continuous development.
Based on this observation, I propose to change the following loader function:
bdpy/bdpy/dataform/features.py
Lines 25 to 32 in d86199f
| import hdf5storage | |
| def _load_array_with_key(key: str, path: str) -> np.ndarray: | |
| try: | |
| return sio.loadmat(path)[key] | |
| except (NotImplementedError, ValueError): | |
| return hdf5storage.loadmat(path)[key] |
as
import h5py
def _load_array_with_key(key: str, path: str) -> np.ndarray:
try:
return sio.loadmat(path)[key]
except (NotImplementedError, ValueError):
with h5py.File(path, "r") as f:
return f[key][:].transpose()hdf5storage is also used in other places, and I think it is necessary to gradually replace it with h5py.
$ grep -r hdf5storage . --include "*.py"
./bdpy/dataform/datastore.py:import hdf5storage
./bdpy/dataform/datastore.py: r = hdf5storage.loadmat(fpath)[self.__variable]
./bdpy/dataform/sparse.py:import hdf5storage
./bdpy/dataform/sparse.py: return hdf5storage.loadmat(fname)[key]
./bdpy/dataform/sparse.py: hdf5storage.savemat(fname,
./bdpy/dataform/sparse.py: hdf5storage.savemat(fname,
./bdpy/dataform/sparse.py: hdf5storage.savemat(fname, {key: {u'__bdpy_sparse_arrray': True,
./bdpy/dataform/sparse.py: data = hdf5storage.loadmat(fname)[key]
./bdpy/dataform/features.py:import hdf5storage
./bdpy/dataform/features.py: return hdf5storage.loadmat(path)[key]
./bdpy/dataform/features.py: self.__feat_index_table = hdf5storage.loadmat(feature_index)['index']
./bdpy/dataform/features.py: hdf5storage.savemat(save_file, {'feat': feature})
./tests/dataform/test_features.py:import hdf5storage
./tests/dataform/test_features.py: hdf5storage.savemat(Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels