Skip to content

Commit 1c7895f

Browse files
bendichterrly
andauthored
add can_read method to HDMFIO and HDF5IO (#875)
Co-authored-by: Ryan Ly <[email protected]>
1 parent 6b1a55f commit 1c7895f

File tree

4 files changed

+43
-11
lines changed

4 files changed

+43
-11
lines changed

CHANGELOG.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
# HDMF Changelog
22

3-
## HMDF 3.7.0 (Upcoming)
3+
## HDMF 3.7.0 (Upcoming)
44

55
### New features and minor improvements
66
- Updated `ExternalResources` to have EntityKeyTable with updated tests/documentation and minor bug fix to ObjectKeyTable. @mavaylon1 [#872](https://github.com/hdmf-dev/hdmf/pull/872)
7+
- Added abstract static method `HDMFIO.can_read()` and concrete static method `HDF5IO.can_read()`. @bendichter [#875](https://github.com/hdmf-dev/hdmf/pull/875)
78
- Added warning for `DynamicTableRegion` links that are not added to the same parent as the original container object. @mavaylon1 [#891](https://github.com/hdmf-dev/hdmf/pull/891)
89
- Added the `TermSet` class along with integrated validation methods for any child of `AbstractContainer`, e.g., `VectorData`, `Data`, `DynamicTable`. @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880)
910
- Allow for `datetime.date` to be used instead of `datetime.datetime`. @bendichter [#874](https://github.com/hdmf-dev/hdmf/pull/874)
1011
- Updated `HDMFIO` and `HDF5IO` to support `ExternalResources`. @mavaylon1 [#895](https://github.com/hdmf-dev/hdmf/pull/895)
1112
- Dropped Python 3.7 support. @rly [#897](https://github.com/hdmf-dev/hdmf/pull/897)
1213

1314
### Documentation and tutorial enhancements:
14-
1515
- Added tutorial for the new `TermSet` class @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880)
1616

1717
## Bug fixes

src/hdmf/backends/hdf5/h5tools.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,17 @@ class HDF5IO(HDMFIO):
3636

3737
__ns_spec_path = 'namespace' # path to the namespace dataset within a namespace group
3838

39+
@staticmethod
40+
def can_read(path):
41+
"""Determines whether a given path is readable by the HDF5IO class"""
42+
if not os.path.isfile(path):
43+
return False
44+
try:
45+
with h5py.File(path, "r"):
46+
return True
47+
except IOError:
48+
return False
49+
3950
@docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None},
4051
{'name': 'mode', 'type': str,
4152
'doc': ('the mode to open the HDF5 file with, one of ("w", "r", "r+", "a", "w-", "x"). '
@@ -82,8 +93,8 @@ def __init__(self, **kwargs):
8293
self.__file = file_obj
8394
super().__init__(manager, source=path, external_resources_path=external_resources_path)
8495
# NOTE: source is not set if path is None and file_obj is passed
85-
self.__built = dict() # keep track of each builder for each dataset/group/link for each file
86-
self.__read = dict() # keep track of which files have been read. Key is the filename value is the builder
96+
self.__built = dict() # keep track of each builder for each dataset/group/link for each file
97+
self.__read = dict() # keep track of which files have been read. Key is the filename value is the builder
8798
self.__ref_queue = deque() # a queue of the references that need to be added
8899
self.__dci_queue = HDF5IODataChunkIteratorQueue() # a queue of DataChunkIterators that need to be exhausted
89100
ObjectMapper.no_convert(Dataset)
@@ -603,7 +614,7 @@ def __read_group(self, h5obj, name=None, ignore=set()):
603614
builder = self.__read_dataset(target_obj, builder_name)
604615
else:
605616
builder = self.__read_group(target_obj, builder_name, ignore=ignore)
606-
self.__set_built(sub_h5obj.file.filename, target_obj.id, builder)
617+
self.__set_built(sub_h5obj.file.filename, target_obj.id, builder)
607618
link_builder = LinkBuilder(builder=builder, name=k, source=os.path.abspath(h5obj.file.filename))
608619
link_builder.location = h5obj.name
609620
self.__set_written(link_builder)
@@ -648,7 +659,7 @@ def __read_dataset(self, h5obj, name=None):
648659
name = str(os.path.basename(h5obj.name))
649660
kwargs['source'] = os.path.abspath(h5obj.file.filename)
650661
ndims = len(h5obj.shape)
651-
if ndims == 0: # read scalar
662+
if ndims == 0: # read scalar
652663
scalar = h5obj[()]
653664
if isinstance(scalar, bytes):
654665
scalar = scalar.decode('UTF-8')
@@ -678,7 +689,7 @@ def __read_dataset(self, h5obj, name=None):
678689
elif isinstance(elem1, Reference):
679690
d = BuilderH5ReferenceDataset(h5obj, self)
680691
kwargs['dtype'] = d.dtype
681-
elif h5obj.dtype.kind == 'V': # table / compound data type
692+
elif h5obj.dtype.kind == 'V': # table / compound data type
682693
cpd_dt = h5obj.dtype
683694
ref_cols = [check_dtype(ref=cpd_dt[i]) or check_dtype(vlen=cpd_dt[i]) for i in range(len(cpd_dt))]
684695
d = BuilderH5TableDataset(h5obj, self, ref_cols)
@@ -708,7 +719,7 @@ def __compound_dtype_to_list(cls, h5obj_dtype, dset_dtype):
708719
def __read_attrs(self, h5obj):
709720
ret = dict()
710721
for k, v in h5obj.attrs.items():
711-
if k == SPEC_LOC_ATTR: # ignore cached spec
722+
if k == SPEC_LOC_ATTR: # ignore cached spec
712723
continue
713724
if isinstance(v, RegionReference):
714725
raise ValueError("cannot read region reference attributes yet")
@@ -925,14 +936,14 @@ def set_attributes(self, **kwargs):
925936
self.logger.debug("Setting %s '%s' attribute '%s' to %s"
926937
% (obj.__class__.__name__, obj.name, key, value.__class__.__name__))
927938
obj.attrs[key] = value
928-
elif isinstance(value, (Container, Builder, ReferenceBuilder)): # a reference
939+
elif isinstance(value, (Container, Builder, ReferenceBuilder)): # a reference
929940
self.__queue_ref(self._make_attr_ref_filler(obj, key, value))
930941
else:
931942
self.logger.debug("Setting %s '%s' attribute '%s' to %s"
932943
% (obj.__class__.__name__, obj.name, key, value.__class__.__name__))
933944
if isinstance(value, np.ndarray) and value.dtype.kind == 'U':
934945
value = np.array(value, dtype=H5_TEXT)
935-
obj.attrs[key] = value # a regular scalar
946+
obj.attrs[key] = value # a regular scalar
936947
except Exception as e:
937948
msg = "unable to write attribute '%s' on object '%s'" % (key, obj.name)
938949
raise RuntimeError(msg) from e
@@ -1079,7 +1090,7 @@ def write_dataset(self, **kwargs): # noqa: C901
10791090
name = builder.name
10801091
data = builder.data
10811092
dataio = None
1082-
options = dict() # dict with additional
1093+
options = dict() # dict with additional
10831094
if isinstance(data, H5DataIO):
10841095
options['io_settings'] = data.io_settings
10851096
dataio = data

src/hdmf/backends/io.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,13 @@
1010

1111

1212
class HDMFIO(metaclass=ABCMeta):
13+
14+
@staticmethod
15+
@abstractmethod
16+
def can_read(path):
17+
"""Determines whether a given path is readable by this HDMFIO class"""
18+
pass
19+
1320
@docval({'name': 'manager', 'type': BuildManager,
1421
'doc': 'the BuildManager to use for I/O', 'default': None},
1522
{"name": "source", "type": (str, Path),

tests/unit/test_io_hdf5_h5tools.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3228,6 +3228,10 @@ def test_non_manager_container(self):
32283228

32293229
class OtherIO(HDMFIO):
32303230

3231+
@staticmethod
3232+
def can_read(path):
3233+
pass
3234+
32313235
def read_builder(self):
32323236
pass
32333237

@@ -3257,6 +3261,10 @@ def test_non_HDF5_src_link_data_true(self):
32573261

32583262
class OtherIO(HDMFIO):
32593263

3264+
@staticmethod
3265+
def can_read(path):
3266+
pass
3267+
32603268
def __init__(self, manager):
32613269
super().__init__(manager=manager)
32623270

@@ -3570,3 +3578,9 @@ def test_dataio_shape_then_data(self):
35703578
dataio = H5DataIO(shape=(10, 10), dtype=int)
35713579
with self.assertRaisesRegex(ValueError, "Setting data when dtype and shape are not None is not supported"):
35723580
dataio.data = list()
3581+
3582+
3583+
def test_hdf5io_can_read():
3584+
assert not HDF5IO.can_read("not_a_file")
3585+
assert HDF5IO.can_read("tests/unit/back_compat_tests/1.0.5.h5")
3586+
assert not HDF5IO.can_read(__file__) # this file is not an HDF5 file

0 commit comments

Comments
 (0)