diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b0ac581b..8c042ec7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,27 @@ # HDMF Changelog +## HDMF 5.0.0 (Upcoming) + +### Changed +- New spec resolution system: Instead of resolving includes during spec loading, resolution now happens after all specs are loaded via `NamespaceCatalog.resolve_all_specs()`. @rly [#1312](https://github.com/hdmf-dev/hdmf/pull/1312) + - New methods: `BaseStorageSpec.resolve_inc_spec()` replaces the old `BaseStorageSpec.resolve_spec()` method + - Resolution tracking: New properties `BaseStorageSpec.resolved` and `BaseStorageSpec.inc_spec_resolved` track resolution state + - Cross-namespace resolution: The system can now resolve specs that include types from different namespaces + - `dtype`, `shape`, `dims`, `value`, and `default_value` in `DatasetSpec` and `AttributeSpec` are now inherited and validated from the parent data type spec +- If `dims` are not provided in a `DatasetSpec` or `AttributeSpec`, but `shape` is provided, `dims` will be set to a list of dummy dimension names, e.g., "dim_0", "dim_1", etc. @rly [#1312](https://github.com/hdmf-dev/hdmf/pull/1312) +- Deprecated `BaseStorageSpec.add_attribute`, `GroupSpec.add_group`, `GroupSpec.add_dataset`, and `GroupSpec.add_link`. Use `set_attribute`, `set_group`, `set_dataset`, and `set_link` instead. @rly [#1333](https://github.com/hdmf-dev/hdmf/pull/1333) +- Deprecated unused `BaseStorageSpec.get_data_type_spec` and `BaseStorageSpec.get_namespace_spec`. @rly [#1333](https://github.com/hdmf-dev/hdmf/pull/1333) + +### Added +- Warning when `data_type_def` and `data_type_inc` are the same in a spec. @rly [#1312](https://github.com/hdmf-dev/hdmf/pull/1312) +- Added abstract methods `HDMFIO.load_namespaces` and `HDMFIO.load_namespaces_io`. @rly [#1299](https://github.com/hdmf-dev/hdmf/pull/1299) + +### Removed +- Removed unused and undocumented `hdmf.monitor` module. @rly [#1327](https://github.com/hdmf-dev/hdmf/pull/1327) +- Removed deprecated `Data.set_data_io` usage and `HERDManager` methods. @rly [#1328](https://github.com/hdmf-dev/hdmf/pull/1328) +- Removed deprecated `HDF5IO.copy_file` method. Use the `HDF5IO.export` or the `h5py.File.copy` method instead. @stephprince [#1332](https://github.com/hdmf-dev/hdmf/pull/1332) +- Removed deprecated `extensions` kwarg for `get_type_map` function. @stephprince [#1332](https://github.com/hdmf-dev/hdmf/pull/1332) + ## HDMF 4.1.1 (Upcoming) ### Fixed diff --git a/docs/source/extensions.rst b/docs/source/extensions.rst index 1206d836a..4cfcb79c6 100644 --- a/docs/source/extensions.rst +++ b/docs/source/extensions.rst @@ -207,7 +207,7 @@ The following code demonstrates how to load custom namespaces. .. code-block:: python - from hdmf import load_namespaces + from hdmf.common import load_namespaces namespace_path = 'my_namespace.yaml' load_namespaces(namespace_path) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index e1873dd5d..38be3ea9d 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -159,25 +159,34 @@ def __resolve_file_obj(cls, path, file_obj, driver, aws_region=None): return file_obj @classmethod - @docval({'name': 'namespace_catalog', 'type': (NamespaceCatalog, TypeMap), - 'doc': 'the NamespaceCatalog or TypeMap to load namespaces into'}, - {'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None}, - {'name': 'namespaces', 'type': list, 'doc': 'the namespaces to load', 'default': None}, - {'name': 'file', 'type': File, 'doc': 'a pre-existing h5py.File object', 'default': None}, - {'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None}, - {'name': 'aws_region', 'type': str, 'doc': 'If driver is ros3, then specify the aws region of the url.', - 'default': None}, - returns=("dict mapping the names of the loaded namespaces to a dict mapping included namespace names and " - "the included data types"), - rtype=dict) + @docval( + { + 'name': 'namespace_catalog', + 'type': (NamespaceCatalog, TypeMap), + 'doc': 'the NamespaceCatalog or TypeMap to load namespaces into' + }, + {'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None}, + {'name': 'namespaces', 'type': list, 'doc': 'the namespaces to load', 'default': None}, + {'name': 'file', 'type': File, 'doc': 'a pre-existing h5py.File object', 'default': None}, + {'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None}, + { + 'name': 'aws_region', + 'type': str, + 'doc': 'If driver is ros3, then specify the aws region of the url.', + 'default': None + }, + returns=("dict mapping the names of the loaded namespaces to a dict mapping included namespace names and " + "the included data types"), + rtype=dict + ) def load_namespaces(cls, **kwargs): - """Load cached namespaces from a file. + """Load cached namespaces from a file into the provided NamespaceCatalog or TypeMap. If `file` is not supplied, then an :py:class:`h5py.File` object will be opened for the given `path`, the namespaces will be read, and the File object will be closed. If `file` is supplied, then the given File object will be read from and not closed. - :raises ValueError: if both `path` and `file` are supplied but `path` is not the same as the path of `file`. + :raises ValueError: if both `path` and `file` are supplied but `path` is not the same as the path of `file` """ namespace_catalog, path, namespaces, file_obj, driver, aws_region = popargs( 'namespace_catalog', 'path', 'namespaces', 'file', 'driver', 'aws_region', kwargs) @@ -188,12 +197,25 @@ def load_namespaces(cls, **kwargs): return cls.__load_namespaces(namespace_catalog, namespaces, open_file_obj) return cls.__load_namespaces(namespace_catalog, namespaces, open_file_obj) + @docval( + { + 'name': 'namespace_catalog', + 'type': (NamespaceCatalog, TypeMap), + 'doc': 'the NamespaceCatalog or TypeMap to load namespaces into' + }, + {'name': 'namespaces', 'type': list, 'doc': 'the namespaces to load', 'default': None} + ) + def load_namespaces_io(self, **kwargs): + """Load cached namespaces from this HDF5IO object into the provided NamespaceCatalog or TypeMap.""" + namespace_catalog, namespaces = getargs('namespace_catalog', 'namespaces', kwargs) + if not self.__file: + raise UnsupportedOperation("Cannot load namespaces from closed HDF5 file '%s'" % self.source) + return self.__load_namespaces(namespace_catalog, namespaces, self.__file) + @classmethod def __load_namespaces(cls, namespace_catalog, namespaces, file_obj): - d = {} - if not cls.__check_specloc(file_obj): - return d + return {} namespace_versions = cls.__get_namespaces(file_obj) @@ -205,11 +227,9 @@ def __load_namespaces(cls, namespace_catalog, namespaces, file_obj): for ns in namespaces: latest_version = namespace_versions[ns] ns_group = spec_group[ns][latest_version] - reader = H5SpecReader(ns_group) - readers[ns] = reader - - d.update(namespace_catalog.load_namespaces(cls.__ns_spec_path, reader=readers)) + readers[ns] = H5SpecReader(ns_group) + d = namespace_catalog.load_namespaces(cls.__ns_spec_path, reader=readers) return d @classmethod @@ -274,58 +294,6 @@ def __get_namespaces(cls, file_obj): return used_version_names - @classmethod - @docval({'name': 'source_filename', 'type': str, 'doc': 'the path to the HDF5 file to copy'}, - {'name': 'dest_filename', 'type': str, 'doc': 'the name of the destination file'}, - {'name': 'expand_external', 'type': bool, 'doc': 'expand external links into new objects', 'default': True}, - {'name': 'expand_refs', 'type': bool, 'doc': 'copy objects which are pointed to by reference', - 'default': False}, - {'name': 'expand_soft', 'type': bool, 'doc': 'expand soft links into new objects', 'default': False} - ) - def copy_file(self, **kwargs): - """ - Convenience function to copy an HDF5 file while allowing external links to be resolved. - - .. warning:: - - As of HDMF 2.0, this method is no longer supported and may be removed in a future version. - Please use the export method or h5py.File.copy method instead. - - .. note:: - - The source file will be opened in 'r' mode and the destination file will be opened in 'w' mode - using h5py. To avoid possible collisions, care should be taken that, e.g., the source file is - not opened already when calling this function. - - """ - - warnings.warn("The copy_file class method is no longer supported and may be removed in a future version of " - "HDMF. Please use the export method or h5py.File.copy method instead.", - category=DeprecationWarning, - stacklevel=3) - - source_filename, dest_filename, expand_external, expand_refs, expand_soft = getargs('source_filename', - 'dest_filename', - 'expand_external', - 'expand_refs', - 'expand_soft', - kwargs) - source_file = File(source_filename, 'r') - dest_file = File(dest_filename, 'w') - for objname in source_file["/"].keys(): - source_file.copy(source=objname, - dest=dest_file, - name=objname, - expand_external=expand_external, - expand_refs=expand_refs, - expand_soft=expand_soft, - shallow=False, - without_attrs=False, - ) - for objname in source_file['/'].attrs: - dest_file['/'].attrs[objname] = source_file['/'].attrs[objname] - source_file.close() - dest_file.close() @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, {'name': 'cache_spec', 'type': bool, diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 972b57a37..c19d08fdf 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -1,11 +1,13 @@ from abc import ABCMeta, abstractmethod import os from pathlib import Path +from typing import Union, Optional -from ..build import BuildManager, GroupBuilder +from ..build import BuildManager, GroupBuilder, TypeMap from ..container import Container, HERDManager from .errors import UnsupportedOperation from ..utils import docval, getargs, popargs, get_basic_array_info, generate_array_html_repr +from ..spec import NamespaceCatalog from warnings import warn @@ -188,6 +190,44 @@ def close(self): ''' Close this HDMFIO object to further reading/writing''' pass + @classmethod + @abstractmethod + def load_namespaces(cls, + namespace_catalog: Union[NamespaceCatalog, TypeMap], + path: Optional[Union[str, Path]] = None, + namespaces: Optional[list[str]] = None, + io: Optional['HDMFIO'] = None, + **kwargs + ) -> dict: + """Load the namespaces from the file at the given path into the provided NamespaceCatalog or TypeMap. + + This method should be implemented by subclasses to load the namespaces that are relevant for the backend. + + :param namespace_catalog: The NamespaceCatalog (or TypeMap) to load the namespaces into. + :param path: The path to the file from which to load the namespaces. + :param namespaces: A list of namespace names to load. If None, all namespaces will be loaded. + :param kwargs: Additional keyword arguments that may be needed for the specific backend. + :return: A dictionary mapping namespace names to their dependencies. + """ + pass + + @abstractmethod + def load_namespaces_io(self, + namespace_catalog: Union[NamespaceCatalog, TypeMap], + namespaces: Optional[list[str]] = None, + ) -> dict: + """Load the namespaces from this HDMFIO object into the provided NamespaceCatalog or TypeMap. + + Similar to `load_namespaces`, but uses the already opened HDMFIO object. + This method should be implemented by subclasses to load the namespaces that are relevant for the backend. + + :param namespace_catalog: The NamespaceCatalog (or TypeMap) to load the namespaces into. + :param namespaces: A list of namespace names to load. If None, all namespaces will be loaded. + :return: A dictionary mapping namespace names to their dependencies. + """ + # NOTE: this function is separated from load_namespaces for developer clarity + pass + @staticmethod def generate_dataset_html(dataset): """Generates an html representation for a dataset""" diff --git a/src/hdmf/common/__init__.py b/src/hdmf/common/__init__.py index 36a35dfa8..7ba964371 100644 --- a/src/hdmf/common/__init__.py +++ b/src/hdmf/common/__init__.py @@ -168,40 +168,13 @@ def get_class(**kwargs): return __TYPE_MAP.get_dt_container_cls(data_type, namespace, post_init_method) -@docval({'name': 'extensions', 'type': (str, TypeMap, list), - 'doc': 'a path to a namespace, a TypeMap, or a list consisting paths to namespaces and TypeMaps', - 'default': None}, - returns="the namespaces loaded from the given file", rtype=tuple, +@docval(returns="the namespaces loaded from the given file", rtype=tuple, is_method=False) -def get_type_map(**kwargs): +def get_type_map(): ''' - Get a BuildManager to use for I/O using the given extensions. If no extensions are provided, - return a BuildManager that uses the core namespace + Get a BuildManager to use for I/O using the core namespace. ''' - extensions = getargs('extensions', kwargs) - type_map = None - if extensions is None: - type_map = deepcopy(__TYPE_MAP) - else: - warnings.warn("The 'extensions' argument is deprecated and will be removed in HDMF 5.0", DeprecationWarning) - if isinstance(extensions, TypeMap): - type_map = extensions - else: - type_map = deepcopy(__TYPE_MAP) - if isinstance(extensions, list): - for ext in extensions: - if isinstance(ext, str): - type_map.load_namespaces(ext) - elif isinstance(ext, TypeMap): - type_map.merge(ext) - else: - msg = 'extensions must be a list of paths to namespace specs or a TypeMaps' - raise ValueError(msg) - elif isinstance(extensions, str): - type_map.load_namespaces(extensions) - elif isinstance(extensions, TypeMap): - type_map.merge(extensions) - return type_map + return deepcopy(__TYPE_MAP) @docval(*get_docval(get_type_map), diff --git a/src/hdmf/container.py b/src/hdmf/container.py index dd30d208b..04ef7ddea 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -37,27 +37,6 @@ class HERDManager: When this class is used as a mixin for a Container, it enables setting and getting an instance of HERD. """ - @docval({'name': 'herd', 'type': 'HERD', - 'doc': 'The external resources to be used for the container.'},) - def link_resources(self, **kwargs): - """ - Method to attach an instance of HERD in order to auto-add terms/references to data. - """ - msg = ( - "link_resources is deprecated and will be removed in HDMF 5.0. " - "Use the external_resources property instead." - ) - warn(msg, DeprecationWarning, stacklevel=2) - self.external_resources = kwargs['herd'] - - def get_linked_resources(self): - msg = ( - "get_linked_resources is deprecated and will be removed in HDMF 5.0. " - "Use the external_resources property instead." - ) - warn(msg, DeprecationWarning, stacklevel=2) - return self.external_resources - @property def external_resources(self): return self._herd if hasattr(self, "_herd") else None @@ -865,9 +844,9 @@ def set_data_io( self, dataset_name: str, data_io_class: Type[DataIO], - data_io_kwargs: dict = None, + data_io_kwargs: dict, data_chunk_iterator_class: Optional[Type[AbstractDataChunkIterator]] = None, - data_chunk_iterator_kwargs: dict = None, **kwargs + data_chunk_iterator_kwargs: Optional[dict] = None, ): """ Apply DataIO object to a dataset field of the Container. @@ -884,23 +863,12 @@ def set_data_io( Class to use for DataChunkIterator. If None, no DataChunkIterator is used. data_chunk_iterator_kwargs: dict keyword arguments passed to the constructor of the DataChunkIterator class. - **kwargs: - DEPRECATED. Use data_io_kwargs instead. - kwargs are passed to the constructor of the DataIO class. Notes ----- If data_chunk_iterator_class is not None, the data is wrapped in the DataChunkIterator before being wrapped in the DataIO. This allows for rewriting the backend configuration of hdf5 datasets. """ - if kwargs or (data_io_kwargs is None): - warn( - "Use of **kwargs in Container.set_data_io() is deprecated. Please pass the DataIO kwargs as a " - "dictionary to the `data_io_kwargs` parameter instead.", - DeprecationWarning, - stacklevel=2 - ) - data_io_kwargs = kwargs data = self.fields.get(dataset_name) data_chunk_iterator_kwargs = data_chunk_iterator_kwargs or dict() if data is None: diff --git a/src/hdmf/monitor.py b/src/hdmf/monitor.py deleted file mode 100644 index 9f8e7e0cc..000000000 --- a/src/hdmf/monitor.py +++ /dev/null @@ -1,81 +0,0 @@ -from abc import ABCMeta, abstractmethod -import warnings - -from .data_utils import AbstractDataChunkIterator, DataChunkIterator, DataChunk -from .utils import docval, getargs - -warnings.warn( - "The hdmf.monitor module is deprecated and will be removed in HDMF 5.0. If you are using this module, " - "please copy this module to your codebase or raise an issue in the HDMF repository: " - "https://github.com/hdmf-dev/hdmf/issues", - DeprecationWarning, -) - - -class NotYetExhausted(Exception): - pass - - -class DataChunkProcessor(AbstractDataChunkIterator, metaclass=ABCMeta): - - @docval({'name': 'data', 'type': DataChunkIterator, 'doc': 'the DataChunkIterator to analyze'}) - def __init__(self, **kwargs): - """Initialize the DataChunkIterator""" - # Get the user parameters - self.__dci = getargs('data', kwargs) - - def __next__(self): - try: - dc = self.__dci.__next__() - except StopIteration as e: - self.__done = True - raise e - self.process_data_chunk(dc) - return dc - - def __iter__(self): - return iter(self.__dci) - - def recommended_chunk_shape(self): - return self.__dci.recommended_chunk_shape() - - def recommended_data_shape(self): - return self.__dci.recommended_data_shape() - - def get_final_result(self, **kwargs): - ''' Return the result of processing data fed by this DataChunkIterator ''' - if not self.__done: - raise NotYetExhausted() - return self.compute_final_result() - - @abstractmethod - @docval({'name': 'data_chunk', 'type': DataChunk, 'doc': 'a chunk to process'}) - def process_data_chunk(self, **kwargs): - ''' This method should take in a DataChunk, - and process it. - ''' - pass - - @abstractmethod - @docval(returns='the result of processing this stream') - def compute_final_result(self, **kwargs): - ''' Return the result of processing this stream - Should raise NotYetExhaused exception - ''' - pass - - -class NumSampleCounter(DataChunkProcessor): - - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.__sample_count = 0 - - @docval({'name': 'data_chunk', 'type': DataChunk, 'doc': 'a chunk to process'}) - def process_data_chunk(self, **kwargs): - dc = getargs('data_chunk', kwargs) - self.__sample_count += len(dc) - - @docval(returns='the result of processing this stream') - def compute_final_result(self, **kwargs): - return self.__sample_count diff --git a/src/hdmf/spec/catalog.py b/src/hdmf/spec/catalog.py index f93f70abe..c40d2130c 100644 --- a/src/hdmf/spec/catalog.py +++ b/src/hdmf/spec/catalog.py @@ -56,7 +56,7 @@ def register_spec(self, **kwargs): self.__spec_source_files[type_name] = source_file @docval({'name': 'data_type', 'type': str, 'doc': 'the data_type to get the Spec for'}, - returns="the specification for writing the given object type to HDF5 ", rtype='Spec') + returns="the specification for writing the given object type to HDF5 ", rtype=BaseStorageSpec) def get_spec(self, **kwargs): ''' Get the Spec object for the given type @@ -129,6 +129,8 @@ def get_hierarchy(self, **kwargs): hierarchy = list() parent = data_type while parent is not None: + if parent in hierarchy: + raise ValueError(f"Circular reference detected in type hierarchy for {data_type}") hierarchy.append(parent) parent = self.__parent_types.get(parent) # store the computed hierarchy for data_type and all types in between it and diff --git a/src/hdmf/spec/namespace.py b/src/hdmf/spec/namespace.py index ae9af8b99..dc7d238d1 100644 --- a/src/hdmf/spec/namespace.py +++ b/src/hdmf/spec/namespace.py @@ -6,9 +6,10 @@ from copy import copy from datetime import datetime from warnings import warn +import graphlib from .catalog import SpecCatalog -from .spec import DatasetSpec, GroupSpec +from .spec import DatasetSpec, GroupSpec, BaseStorageSpec from ..utils import docval, getargs, popargs, get_docval, is_newer_version _namespace_args = [ @@ -248,8 +249,6 @@ def __init__(self, **kwargs): self.__included_specs = dict() self.__included_sources = dict() - self._loaded_specs = self.__loaded_specs - def __copy__(self): ret = NamespaceCatalog(self.__group_spec_cls, self.__dataset_spec_cls, @@ -384,7 +383,7 @@ def get_types(self, **kwargs): ret = tuple() return ret - def __load_spec_file(self, reader, spec_source, catalog, types_to_load=None, resolve=True): + def __load_spec_file(self, reader, spec_source, catalog, types_to_load): ret = self.__loaded_specs.get(spec_source) if ret is not None: raise ValueError("spec source '%s' already loaded" % spec_source) @@ -396,8 +395,6 @@ def __reg_spec(spec_cls, spec_dict): raise ValueError(msg) if types_to_load and dt_def not in types_to_load: return - if resolve: - self.__resolve_includes(spec_cls, spec_dict, catalog) spec_obj = spec_cls.build_spec(spec_dict) return catalog.auto_register(spec_obj, spec_source) @@ -426,25 +423,99 @@ def __convert_spec_cls_keys(self, parent_cls, spec_cls, spec_dict): if parent_cls.inc_key() in spec_dict: spec_dict[spec_cls.inc_key()] = spec_dict.pop(parent_cls.inc_key()) - def __resolve_includes(self, spec_cls, spec_dict, catalog): - """Replace data type inc strings with the spec definition so the new spec is built with included fields. - """ - dt_def = spec_dict.get(spec_cls.def_key()) - dt_inc = spec_dict.get(spec_cls.inc_key()) - if dt_inc is not None and dt_def is not None: - parent_spec = catalog.get_spec(dt_inc) - if parent_spec is None: - msg = "Cannot resolve include spec '%s' for type '%s'" % (dt_inc, dt_def) - raise ValueError(msg) - # replace the inc key value from string to the inc spec so that the spec can be updated with all of the - # attributes, datasets, groups, and links of the inc spec when spec_cls.build_spec(spec_dict) is called - spec_dict[spec_cls.inc_key()] = parent_spec - for subspec_dict in spec_dict.get('groups', list()): - self.__resolve_includes(self.__group_spec_cls, subspec_dict, catalog) - for subspec_dict in spec_dict.get('datasets', list()): - self.__resolve_includes(self.__dataset_spec_cls, subspec_dict, catalog) - - def __load_namespace(self, namespace, reader, resolve=True): + def __collect_nested_subspecs(self, spec: GroupSpec) -> list[BaseStorageSpec]: + """Collect all nested subspecs of the given group spec.""" + nested_subspecs = list(spec.groups + spec.datasets) + for subgroup_spec in spec.groups: + nested_subspecs.extend(self.__collect_nested_subspecs(subgroup_spec)) + return nested_subspecs + + def __get_spec_dependencies(self, spec: BaseStorageSpec) -> set[tuple[str, str]]: + """Get the set of edges representing the dependencies of the given spec.""" + edges = set() + if spec.data_type_inc is not None: + # The included spec should be resolved before this spec + edges.add((spec.data_type_def, spec.data_type_inc)) + if isinstance(spec, GroupSpec): + # For each nested subspec, the included specs of that nested subspec should be resolved before + # this spec + nested_subspecs = self.__collect_nested_subspecs(spec) + for subspec in nested_subspecs: + if subspec.data_type_inc is not None: + # TODO: cycles are not yet supported + # if spec.data_type_def == subspec.data_type_inc: + # # Allow the simple case of a "cycle" where A contains B, and B includes A + # # but do not add this edge to the graph because it makes a cycle. + # continue + edges.add((spec.data_type_def, subspec.data_type_inc)) + return edges + + def __resolve_local(self, namespace: SpecNamespace, spec: BaseStorageSpec) -> None: + if spec.data_type_inc is not None and not spec.inc_spec_resolved: + # NOTE: The included spec may have already been resolved into the current spec if the current spec + # was copied (included) from another spec. For example, if A has a subspec B that includes C, and + # D includes A, then when resolving D, first, already resolved subspec B is copied from A to D, and + # then resolve_local may be called on B again + included_spec = self.get_spec(namespace.name, spec.data_type_inc) + + # NOTE: In most cases, because we are resolving specs in topological order, the included spec + # should have already been resolved. However, in the case of the "cycle" described above where + # A contains B, and B includes A, then the included spec will not have been resolved yet. + + # Resolve the included spec into this spec + spec.resolve_inc_spec(included_spec, namespace) + + if isinstance(spec, GroupSpec): + # Recursively resolve all subspecs + nested_subspecs = self.__collect_nested_subspecs(spec) + for subspec in nested_subspecs: + self.__resolve_local(namespace, subspec) + + # Mark this spec as resolved if the included spec has been resolved and all subspecs have been resolved. + # This is not necessary / not used anywhere, but may be useful for debugging. + spec.resolved = True + + def resolve_all_specs(self) -> None: + """Resolve all specs in all namespaces in the catalog.""" + for namespace in self.__namespaces.values(): + self.__resolve_namespace_specs(namespace) + + def __resolve_namespace_specs(self, namespace: SpecNamespace) -> None: + """Resolve all specs in the catalog.""" + # Build a graph of all type dependencies + # For example, if A includes B, A has subspec that includes C, and B includes D, then A -> B, A -> C, B -> D + ts = graphlib.TopologicalSorter() + specs_without_deps = set() # track specs that have no dependencies + for type_name in namespace.catalog.get_registered_types(): + spec = namespace.catalog.get_spec(type_name) + edges = self.__get_spec_dependencies(spec) + if not edges: + specs_without_deps.add(type_name) + else: + for e in edges: + ts.add(*e) + + # Check for cycles and get static topological order + # For example, in the ABCD example above, the static order is D, B, C, A + try: + static_order = list(ts.static_order()) + except graphlib.CycleError: # pragma: no cover + # This should not happen because cycles will cause an error during spec object creation + raise ValueError("Cycle detected in specification dependencies. Cannot resolve specifications.") + + # In rare cases, a namespace may have specs that have no dependencies and are not included by any other + # spec, so they will not be in the topological sort. Add them to the front of the order. + for s in specs_without_deps: + if s not in static_order: + static_order.insert(0, s) + + # Resolve specs in topological order + for type_name in static_order: + spec = self.get_spec(namespace.name, type_name) + self.__resolve_local(namespace, spec) + + + def __load_namespace(self, namespace, reader): ns_name = namespace['name'] if ns_name in self.__namespaces: # pragma: no cover raise KeyError("namespace '%s' already exists" % ns_name) @@ -458,7 +529,7 @@ def __load_namespace(self, namespace, reader, resolve=True): types_to_load = set(types_to_load) if 'source' in s: # read specs from file - self.__load_spec_file(reader, s['source'], catalog, types_to_load=types_to_load, resolve=resolve) + self.__load_spec_file(reader, s['source'], catalog, types_to_load) self.__included_sources.setdefault(ns_name, list()).append(s['source']) elif 'namespace' in s: # load specs from namespace @@ -484,6 +555,7 @@ def __load_namespace(self, namespace, reader, resolve=True): self._check_namespace_conflicts(extension_ns_name=ns_name, extension_ns_source=s.get('source'), catalog=catalog) + return included_types def __register_type(self, ndt, inc_ns, catalog, registered_types): @@ -527,7 +599,9 @@ def __register_dependent_types_helper(spec, inc_ns, catalog, registered_types): @docval({'name': 'namespace_path', 'type': str, 'doc': 'the path to the file containing the namespaces(s) to load'}, {'name': 'resolve', 'type': bool, - 'doc': 'whether or not to include objects from included/parent spec objects', 'default': True}, + 'doc': ('whether or not to include objects from included/parent spec objects. In practice, this is ' + 'False when generating documentation where it is useful to show the unresolved specs'), + 'default': True}, {'name': 'reader', 'type': (SpecReader, dict), 'doc': 'the SpecReader or dict of SpecReader classes to use for reading specifications', @@ -582,9 +656,12 @@ def load_namespaces(self, **kwargs): # now load specs into namespace for ns in to_load: - ret[ns['name']] = self.__load_namespace(ns, r, resolve=resolve) + ret[ns['name']] = self.__load_namespace(ns, r) self.__included_specs[ns_path_key] = ret + if resolve: + self.resolve_all_specs() + # warn if there are any ignored namespaces if ignored_namespaces: self.warn_for_ignored_namespaces(ignored_namespaces) diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py index 665be4a34..20ced242b 100644 --- a/src/hdmf/spec/spec.py +++ b/src/hdmf/spec/spec.py @@ -1,10 +1,15 @@ -import re from abc import ABCMeta from collections import OrderedDict +from copy import copy +from itertools import chain +from typing import Union, TYPE_CHECKING from warnings import warn from ..utils import docval, getargs, popargs, get_docval +if TYPE_CHECKING: + from .namespace import SpecNamespace # noqa: F401 + NAME_WILDCARD = None # this is no longer used, but kept for backward compatibility ZERO_OR_ONE = '?' ZERO_OR_MANY = '*' @@ -76,6 +81,151 @@ def check_dtype(dtype): % (dtype, str(DtypeHelper.valid_primary_dtypes))) return dtype + # all keys and values should be keys in primary_dtype_synonyms + additional_allowed = { + 'float': ['double'], + 'int8': ['short', 'int', 'long'], + 'short': ['int', 'long'], + 'int': ['long'], + 'uint8': ['uint16', 'uint32', 'uint64'], + 'uint16': ['uint32', 'uint64'], + 'uint32': ['uint64'], + 'utf': ['ascii'] + } + + # if the spec dtype is a key in __allowable, then all types in __allowable[key] are valid + allowable = dict() + for dt, dt_syn in primary_dtype_synonyms.items(): + allow = copy(dt_syn) + if dt in additional_allowed: + for addl in additional_allowed[dt]: + allow.extend(primary_dtype_synonyms[addl]) + for syn in dt_syn: + allowable[syn] = allow + allowable['numeric'].extend(set(chain.from_iterable(v for k, v in allowable.items() if 'int' in k or 'float' in k))) + + @staticmethod + def is_allowed_dtype(new: str, orig: str): + if orig not in DtypeHelper.allowable: + raise ValueError(f"Unknown dtype '{orig}'") + return new in DtypeHelper.allowable[orig] + + +def _is_sub_dtype(new: Union[str, "RefSpec"], orig: Union[str, "RefSpec"]): + if isinstance(orig, RefSpec) != isinstance(new, RefSpec): + return False + + if isinstance(orig, RefSpec): # both are RefSpec + # check ref target is a subtype of the original ref target + # TODO: implement subtype check for RefSpec. might need to resolve RefSpec target type to a spec first + # return orig == new + return True + else: + return DtypeHelper.is_allowed_dtype(new, orig) + + +def _resolve_inc_spec_dtype( + spec: Union['AttributeSpec', 'DatasetSpec'], + inc_spec: Union['AttributeSpec', 'DatasetSpec'] + ): + if inc_spec.dtype is None: + # nothing to include/check + return + + if spec.dtype is None: + # no dtype defined, just use the included spec dtype + spec['dtype'] = inc_spec.dtype + return + + # both inc_spec and spec have dtype defined + if not isinstance(spec.dtype, list): + # spec is a simple dtype. make sure it is a subtype of the included spec dtype + if isinstance(inc_spec.dtype, list): + msg = 'Cannot extend compound data type to simple data type' + raise ValueError(msg) + if not _is_sub_dtype(spec.dtype, inc_spec.dtype): + msg = f'Cannot extend {str(inc_spec.dtype)} to {str(spec.dtype)}' + raise ValueError(msg) + return + + # spec is a compound dtype. make sure it is a subtype of the included spec dtype + if not isinstance(inc_spec.dtype, list): + msg = 'Cannot extend simple data type to compound data type' + raise ValueError(msg) + inc_spec_order = OrderedDict() + for dt in inc_spec.dtype: + inc_spec_order[dt['name']] = dt + for dt in spec.dtype: + name = dt['name'] + if name in inc_spec_order: + # verify that the extension has supplied + # a valid subtyping of existing type + inc_sub_dtype = inc_spec_order[name].dtype + new_sub_dtype = dt.dtype + if not _is_sub_dtype(new_sub_dtype, inc_sub_dtype): + msg = f'Cannot extend {str(inc_sub_dtype)} to {str(new_sub_dtype)}' + raise ValueError(msg) + # TODO do we want to disallow adding columns? (if name not in inc_spec_order) + # add/replace the new spec + inc_spec_order[name] = dt + # keep the order of the included spec + spec['dtype'] = list(inc_spec_order.values()) + +def _resolve_inc_spec_shape( + spec: Union['AttributeSpec', 'DatasetSpec'], + inc_spec: Union['AttributeSpec', 'DatasetSpec'] + ): + if inc_spec.shape is None: + # nothing to include/check + return + + if spec.shape is None: + # no shape defined, just use the included spec shape + spec['shape'] = inc_spec.shape + return + + # both inc_spec and self have shape defined + if len(spec.shape) > len(inc_spec.shape): + msg = f"Cannot extend shape {str(inc_spec.shape)} to {str(spec.shape)}" + raise ValueError(msg) + # TODO: make sure the new shape is a subset of the included shape + +def _resolve_inc_spec_dims( + spec: Union['AttributeSpec', 'DatasetSpec'], + inc_spec: Union['AttributeSpec', 'DatasetSpec'] + ): + # NOTE: In theory, the shape check above and shape & dims consistency check will catch all issues with dims + # before this function is called + if inc_spec.dims is None: + # nothing to include/check + return + + if spec.dims is None: + # no dims defined, just use the included spec dims + spec['dims'] = inc_spec.dims + return + + # both inc_spec and spec have dims defined + if len(spec.dims) > len(inc_spec.dims): # pragma: no cover + msg = f"Cannot extend dims {str(inc_spec.dims)} to {str(spec.dims)}" + raise ValueError(msg) + # TODO: make sure the new dims is a subset of the included dims + + +def _resolve_inc_spec_value( + spec: Union['AttributeSpec', 'DatasetSpec'], + inc_spec: Union['AttributeSpec', 'DatasetSpec'] + ): + # handle both default_value and value + if spec.default_value is None and inc_spec.default_value is not None: + spec['default_value'] = inc_spec.default_value + if spec.value is None and inc_spec.value is not None: + spec['value'] = inc_spec.value + + # cannot specify both value and default_value. use value if both are specified + if spec.value is not None and spec.default_value is not None: + spec['default_value'] = None + class ConstructableDict(dict, metaclass=ABCMeta): @classmethod @@ -235,9 +385,11 @@ def __init__(self, **kwargs): self['required'] = False if shape is not None: self['shape'] = shape + if dims is None: # set dummy dims "dim_0", "dim_1", ... if shape is specified but dims is not + self['dims'] = tuple(['dim_%d' % i for i in range(len(shape))]) if dims is not None: self['dims'] = dims - if 'shape' not in self: + if 'shape' not in self: # set dummy shape (None, None, ...) if dims is specified but shape is not self['shape'] = tuple([None] * len(dims)) if self.shape is not None and self.dims is not None: if len(self['dims']) != len(self['shape']): @@ -293,8 +445,7 @@ def build_const_args(cls, spec_dict): {'name': 'linkable', 'type': bool, 'doc': 'whether or not this group can be linked', 'default': True}, {'name': 'quantity', 'type': (str, int), 'doc': 'the required number of allowed instance', 'default': 1}, {'name': 'data_type_def', 'type': str, 'doc': 'the data type this specification represents', 'default': None}, - {'name': 'data_type_inc', 'type': (str, 'BaseStorageSpec'), - 'doc': 'the data type this specification extends', 'default': None}, + {'name': 'data_type_inc', 'type': str, 'doc': 'the data type this specification extends', 'default': None}, ] @@ -335,19 +486,16 @@ def __init__(self, **kwargs): self['quantity'] = quantity if not linkable: self['linkable'] = False - resolve = False + if data_type_inc is not None: - if isinstance(data_type_inc, BaseStorageSpec): - self[self.inc_key()] = data_type_inc.data_type_def + if data_type_def == data_type_inc: + msg = f"data_type_inc and data_type_def cannot be the same: {data_type_inc}. Ignoring data_type_inc." + warn(msg) else: self[self.inc_key()] = data_type_inc if data_type_def is not None: self.pop('required', None) self[self.def_key()] = data_type_def - # resolve inherited and overridden fields only if data_type_inc is a spec - # NOTE: this does not happen when loading specs from a file - if data_type_inc is not None and isinstance(data_type_inc, BaseStorageSpec): - resolve = True # self.attributes / self['attributes']: tuple/list of attributes # self.__attributes: dict of all attributes, including attributes from parent (data_type_inc) types @@ -359,36 +507,56 @@ def __init__(self, **kwargs): self.set_attribute(attribute) self.__new_attributes = set(self.__attributes.keys()) self.__overridden_attributes = set() + self.__inc_spec_resolved = False self.__resolved = False - if resolve: - self.resolve_spec(data_type_inc) @property def default_name(self): '''The default name for this spec''' return self.get('default_name', None) + @property + def inc_spec_resolved(self): + return self.__inc_spec_resolved + @property def resolved(self): return self.__resolved + @resolved.setter + def resolved(self, val: bool): + if not isinstance(val, bool): + raise ValueError("resolved must be a boolean") + self.__resolved = val + @property def required(self): ''' Whether or not the this spec represents a required field ''' return self.quantity not in (ZERO_OR_ONE, ZERO_OR_MANY) - @docval({'name': 'inc_spec', 'type': 'hdmf.spec.spec.BaseStorageSpec', - 'doc': 'the data type this specification represents'}) - def resolve_spec(self, **kwargs): - """Add attributes from the inc_spec to this spec and track which attributes are new and overridden.""" - inc_spec = getargs('inc_spec', kwargs) - for attribute in inc_spec.attributes: - self.__new_attributes.discard(attribute.name) - if attribute.name in self.__attributes: - self.__overridden_attributes.add(attribute.name) + def resolve_inc_spec(self, inc_spec: 'BaseStorageSpec', namespace: 'SpecNamespace'): + """Add attributes from the inc_spec to this spec and track which attributes are new and overridden. + + Parameters + ---------- + inc_spec : BaseStorageSpec + The BaseStorageSpec to inherit from + namespace : SpecNamespace + The namespace containing the specs - this is unused here + """ + for inc_spec_attribute in inc_spec.attributes: + self.__new_attributes.discard(inc_spec_attribute.name) + if inc_spec_attribute.name in self.__attributes: + self.__overridden_attributes.add(inc_spec_attribute.name) + new_attribute = self.__attributes[inc_spec_attribute.name] + _resolve_inc_spec_dtype(new_attribute, inc_spec_attribute) + _resolve_inc_spec_shape(new_attribute, inc_spec_attribute) + _resolve_inc_spec_dims(new_attribute, inc_spec_attribute) + _resolve_inc_spec_value(new_attribute, inc_spec_attribute) else: - self.set_attribute(attribute) - self.__resolved = True + # TODO: would be nice to have inherited attributes come before new attributes in the attributes list + self.set_attribute(inc_spec_attribute) + self.__inc_spec_resolved = True @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to check'}) def is_inherited_spec(self, **kwargs): @@ -443,10 +611,12 @@ def is_many(self): @classmethod def get_data_type_spec(cls, data_type_def): # unused + warn("get_data_type_spec is deprecated and will be removed in HDMF 6.0.", DeprecationWarning) return AttributeSpec(cls.type_key(), 'the data type of this object', 'text', value=data_type_def) @classmethod def get_namespace_spec(cls): # unused + warn("get_namespace_spec is deprecated and will be removed in HDMF 6.0.", DeprecationWarning) return AttributeSpec('namespace', 'the namespace for the data type of this object', 'text', required=False) @property @@ -517,6 +687,9 @@ def quantity(self): @docval(*_attr_args) def add_attribute(self, **kwargs): ''' Add an attribute to this specification ''' + warn("BaseStorageSpec.add_attribute is deprecated and will be removed in HDMF 6.0. " + "Use BaseStorageSpec.set_attribute instead.", + DeprecationWarning, stacklevel=2) spec = AttributeSpec(**kwargs) self.set_attribute(spec) return spec @@ -651,7 +824,7 @@ def build_const_args(cls, spec_dict): {'name': 'default_value', 'type': None, 'doc': 'a default value for this dataset', 'default': None}, {'name': 'value', 'type': None, 'doc': 'a fixed value for this dataset', 'default': None}, {'name': 'data_type_def', 'type': str, 'doc': 'the data type this specification represents', 'default': None}, - {'name': 'data_type_inc', 'type': (str, 'DatasetSpec'), + {'name': 'data_type_inc', 'type': str, 'doc': 'the data type this specification extends', 'default': None}, ] @@ -668,9 +841,11 @@ def __init__(self, **kwargs): default_value, value = popargs('default_value', 'value', kwargs) if shape is not None: self['shape'] = shape + if dims is None: # set dummy dims "dim_0", "dim_1", ... if shape is specified but dims is not + self['dims'] = tuple(['dim_%d' % i for i in range(len(shape))]) if dims is not None: self['dims'] = dims - if 'shape' not in self: + if 'shape' not in self: # set dummy shape (None, None, ...) if dims is specified but shape is not self['shape'] = tuple([None] * len(dims)) if self.shape is not None and self.dims is not None: if len(self['dims']) != len(self['shape']): @@ -688,6 +863,8 @@ def __init__(self, **kwargs): super().__init__(doc, **kwargs) if default_value is not None: self['default_value'] = default_value + if value is not None: + raise ValueError("cannot specify 'value' and 'default_value'") if value is not None: self['value'] = value if self.name is not None: @@ -696,56 +873,24 @@ def __init__(self, **kwargs): raise ValueError("quantity %s invalid for spec with fixed name. Valid values are: %s" % (self.quantity, str(valid_quant_vals))) - @classmethod - def __get_prec_level(cls, dtype): - m = re.search('[0-9]+', dtype) - if m is not None: - prec = int(m.group()) - else: - prec = 32 - return (dtype[0], prec) - @classmethod - def __is_sub_dtype(cls, orig, new): - if isinstance(orig, RefSpec): - if not isinstance(new, RefSpec): - return False - return orig == new - else: - orig_prec = cls.__get_prec_level(orig) - new_prec = cls.__get_prec_level(new) - if orig_prec[0] != new_prec[0]: - # cannot extend int to float and vice-versa - return False - return new_prec >= orig_prec - - @docval({'name': 'inc_spec', 'type': 'hdmf.spec.spec.DatasetSpec', - 'doc': 'the data type this specification represents'}) - def resolve_spec(self, **kwargs): - inc_spec = getargs('inc_spec', kwargs) - if isinstance(self.dtype, list): - # merge the new types - inc_dtype = inc_spec.dtype - if isinstance(inc_dtype, str): - msg = 'Cannot extend simple data type to compound data type' - raise ValueError(msg) - order = OrderedDict() - if inc_dtype is not None: - for dt in inc_dtype: - order[dt['name']] = dt - for dt in self.dtype: - name = dt['name'] - if name in order: - # verify that the extension has supplied - # a valid subtyping of existing type - orig = order[name].dtype - new = dt.dtype - if not self.__is_sub_dtype(orig, new): - msg = 'Cannot extend %s to %s' % (str(orig), str(new)) - raise ValueError(msg) - order[name] = dt - self['dtype'] = list(order.values()) - super().resolve_spec(inc_spec) + def resolve_inc_spec(self, inc_spec: 'DatasetSpec', namespace: 'SpecNamespace'): + """Add fields and attributes from the inc_spec to this spec. + + Parameters + ---------- + inc_spec : DatasetSpec + The DatasetSpec to inherit from + namespace : SpecNamespace + The namespace containing the specs - this is unused here + """ + if not isinstance(inc_spec, DatasetSpec): # TODO: replace with Pydantic type checking + raise TypeError("Cannot resolve included spec: expected DatasetSpec, got %s" % type(inc_spec)) + _resolve_inc_spec_dtype(self, inc_spec) + _resolve_inc_spec_shape(self, inc_spec) + _resolve_inc_spec_dims(self, inc_spec) + _resolve_inc_spec_value(self, inc_spec) + super().resolve_inc_spec(inc_spec, namespace) @property def dims(self): @@ -869,7 +1014,7 @@ def required(self): 'default': 1, }, {'name': 'data_type_def', 'type': str, 'doc': 'the data type this specification represents', 'default': None}, - {'name': 'data_type_inc', 'type': (str, 'GroupSpec'), + {'name': 'data_type_inc', 'type': str, 'doc': 'the data type this specification data_type_inc', 'default': None}, ] @@ -902,9 +1047,22 @@ def __init__(self, **kwargs): self.__overridden_groups = set() super().__init__(doc, **kwargs) - @docval({'name': 'inc_spec', 'type': 'GroupSpec', 'doc': 'the data type this specification represents'}) - def resolve_spec(self, **kwargs): - inc_spec = getargs('inc_spec', kwargs) + def resolve_inc_spec(self, inc_spec: 'GroupSpec', namespace: 'SpecNamespace'): # noqa: C901 + """Add groups, datasets, links, and attributes from the inc_spec to this spec and track which ones are new and + overridden. + + Note that data_types and target_types are not added to this spec, but are used to determine if any datasets or + links need to be added to this spec. + + Parameters + ---------- + inc_spec : GroupSpec + The GroupSpec to inherit from + namespace : SpecNamespace + The namespace containing the specs + """ + if not isinstance(inc_spec, GroupSpec): # TODO: replace with Pydantic type checking + raise TypeError("Cannot resolve included spec: expected GroupSpec, got %s" % type(inc_spec)) data_types = list() target_types = list() # resolve inherited datasets @@ -914,10 +1072,24 @@ def resolve_spec(self, **kwargs): continue self.__new_datasets.discard(dataset.name) if dataset.name in self.__datasets: + # check compatibility between data_type_inc of the existing dataset spec and the included dataset spec + if ( + dataset.data_type_inc != self.__datasets[dataset.name].data_type_inc and + (dataset.data_type_inc is None or self.__datasets[dataset.name].data_type_inc is None or + dataset.data_type_inc not in namespace.get_hierarchy(self.__datasets[dataset.name].data_type_inc) + ) + ): + msg = ("Cannot resolve included dataset spec '%s' with data_type_inc '%s' because a dataset " + "spec with the same name already exists with data_type_inc '%s', and data type '%s' " + "is not a child type of data type '%s'." + % (dataset.name, dataset.data_type_inc, self.__datasets[dataset.name].data_type_inc, + self.__datasets[dataset.name].data_type_inc, dataset.data_type_inc)) + raise ValueError(msg) + # if the included dataset spec was added earlier during resolution, don't add it again # but resolve the spec using the included dataset spec - the included spec may contain # properties not specified in the version of this spec added earlier during resolution - self.__datasets[dataset.name].resolve_spec(dataset) + self.__datasets[dataset.name].resolve_inc_spec(dataset, namespace) self.__overridden_datasets.add(dataset.name) else: self.set_dataset(dataset) @@ -928,7 +1100,24 @@ def resolve_spec(self, **kwargs): continue self.__new_groups.discard(group.name) if group.name in self.__groups: - self.__groups[group.name].resolve_spec(group) + # check compatibility between data_type_inc of the existing group spec and the included group spec + if ( + group.data_type_inc != self.__groups[group.name].data_type_inc and + (group.data_type_inc is None or self.__groups[group.name].data_type_inc is None or + group.data_type_inc not in namespace.get_hierarchy(self.__groups[group.name].data_type_inc) + ) + ): + msg = ("Cannot resolve included group spec '%s' with data_type_inc '%s' because a group " + "spec with the same name already exists with data_type_inc '%s', and data type '%s' " + "is not a child type of data type '%s'." + % (group.name, group.data_type_inc, self.__groups[group.name].data_type_inc, + self.__groups[group.name].data_type_inc, group.data_type_inc)) + raise ValueError(msg) + + # if the included group spec was added earlier during resolution, don't add it again + # but resolve the spec using the included group spec - the included spec may contain + # properties not specified in the version of this spec added earlier during resolution + self.__groups[group.name].resolve_inc_spec(group, namespace) self.__overridden_groups.add(group.name) else: self.set_group(group) @@ -939,6 +1128,7 @@ def resolve_spec(self, **kwargs): continue self.__new_links.discard(link.name) if link.name in self.__links: + # TODO: check compatibility between target_type of the existing link spec and the included link spec self.__overridden_links.add(link.name) else: self.set_link(link) @@ -965,7 +1155,7 @@ def resolve_spec(self, **kwargs): (isinstance(existing_dt_spec, list) or existing_dt_spec.name is not None) and link_spec.name is None): self.set_link(link_spec) - super().resolve_spec(inc_spec) + super().resolve_inc_spec(inc_spec, namespace) @docval({'name': 'name', 'type': str, 'doc': 'the name of the dataset'}, raises="ValueError, if 'name' is not part of this spec") @@ -1273,6 +1463,8 @@ def links(self): @docval(*_group_args) def add_group(self, **kwargs): ''' Add a new specification for a subgroup to this group specification ''' + warn("GroupSpec.add_group is deprecated and will be removed in HDMF 6.0. Use GroupSpec.set_group instead.", + DeprecationWarning, stacklevel=2) spec = self.__class__(**kwargs) self.set_group(spec) return spec @@ -1306,6 +1498,8 @@ def get_group(self, **kwargs): @docval(*_dataset_args) def add_dataset(self, **kwargs): ''' Add a new specification for a dataset to this group specification ''' + warn("GroupSpec.add_dataset is deprecated and will be removed in HDMF 6.0. Use GroupSpec.set_dataset instead.", + DeprecationWarning, stacklevel=2) spec = self.dataset_spec_cls()(**kwargs) self.set_dataset(spec) return spec @@ -1339,6 +1533,8 @@ def get_dataset(self, **kwargs): @docval(*_link_args) def add_link(self, **kwargs): ''' Add a new specification for a link to this group specification ''' + warn("GroupSpec.add_link is deprecated and will be removed in HDMF 6.0. Use GroupSpec.set_link instead.", + DeprecationWarning, stacklevel=2) spec = self.link_spec_cls()(**kwargs) self.set_link(spec) return spec diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py index 1a0be981d..9f5bb927d 100644 --- a/src/hdmf/validate/validator.py +++ b/src/hdmf/validate/validator.py @@ -1,9 +1,7 @@ import re from abc import ABCMeta, abstractmethod -from copy import copy from itertools import chain from collections import defaultdict, OrderedDict - import numpy as np from .errors import Error, DtypeError, MissingError, MissingDataType, ShapeError, IllegalLinkError, IncorrectDataType @@ -17,29 +15,7 @@ from ..query import ReferenceResolver -__synonyms = DtypeHelper.primary_dtype_synonyms - -__additional = { - 'float': ['double'], - 'int8': ['short', 'int', 'long'], - 'short': ['int', 'long'], - 'int': ['long'], - 'uint8': ['uint16', 'uint32', 'uint64'], - 'uint16': ['uint32', 'uint64'], - 'uint32': ['uint64'], - 'utf': ['ascii'] -} - -# if the spec dtype is a key in __allowable, then all types in __allowable[key] are valid -__allowable = dict() -for dt, dt_syn in __synonyms.items(): - allow = copy(dt_syn) - if dt in __additional: - for addl in __additional[dt]: - allow.extend(__synonyms[addl]) - for syn in dt_syn: - __allowable[syn] = allow -__allowable['numeric'] = set(chain.from_iterable(__allowable[k] for k in __allowable if 'int' in k or 'float' in k)) +__allowable = DtypeHelper.allowable def check_type(expected, received, string_format=None): diff --git a/tests/unit/build_tests/test_classgenerator.py b/tests/unit/build_tests/test_classgenerator.py index 16136a8da..0a39a0f7d 100644 --- a/tests/unit/build_tests/test_classgenerator.py +++ b/tests/unit/build_tests/test_classgenerator.py @@ -135,10 +135,12 @@ def test_multi_container_post_init(self): ], attributes=[AttributeSpec(name='attr1', doc='a string attribute', dtype='text')]) - multi_spec = GroupSpec(doc='A test extension that contains a multi', - data_type_def='Multi', - groups=[GroupSpec(data_type_inc=bar_spec, doc='test multi', quantity='*')], - attributes=[AttributeSpec(name='attr1', doc='a float attribute', dtype='float')]) + multi_spec = GroupSpec( + doc='A test extension that contains a multi', + data_type_def='Multi', + groups=[GroupSpec(data_type_inc='Bar', doc='test multi', quantity='*')], + attributes=[AttributeSpec(name='attr1', doc='a float attribute', dtype='float')] + ) spec_catalog = SpecCatalog() spec_catalog.register_spec(bar_spec, 'test.yaml') @@ -152,6 +154,7 @@ def test_multi_container_post_init(self): ) namespace_catalog = NamespaceCatalog() namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) + namespace_catalog.resolve_all_specs() type_map = TypeMap(namespace_catalog) Multi = type_map.get_dt_container_cls('Multi', CORE_NAMESPACE, self.post_init) @@ -180,11 +183,12 @@ def setUp(self): def test_dynamic_container_creation(self): baz_spec = GroupSpec('A test extension with no Container class', - data_type_def='Baz', data_type_inc=self.bar_spec, + data_type_def='Baz', data_type_inc='Bar', attributes=[AttributeSpec('attr3', 'a float attribute', 'float'), AttributeSpec('attr4', 'another float attribute', 'float'), AttributeSpec('attr_array', 'an array attribute', 'text', shape=(None,)),]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') + self.type_map.namespace_catalog.resolve_all_specs() cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'attr_array', 'skip_post_init'} received_args = set() @@ -209,10 +213,11 @@ def test_dynamic_container_default_name(self): def test_dynamic_container_creation_defaults(self): baz_spec = GroupSpec('A test extension with no Container class', - data_type_def='Baz', data_type_inc=self.bar_spec, + data_type_def='Baz', data_type_inc='Bar', attributes=[AttributeSpec('attr3', 'a float attribute', 'float'), AttributeSpec('attr4', 'another float attribute', 'float')]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') + self.type_map.namespace_catalog.resolve_all_specs() cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) expected_args = {'name', 'data', 'attr1', 'attr2', 'attr3', 'attr4', 'attr_array', 'foo', 'skip_post_init'} received_args = set(map(lambda x: x['name'], get_docval(cls.__init__))) @@ -222,10 +227,11 @@ def test_dynamic_container_creation_defaults(self): def test_dynamic_container_constructor(self): baz_spec = GroupSpec('A test extension with no Container class', - data_type_def='Baz', data_type_inc=self.bar_spec, + data_type_def='Baz', data_type_inc='Bar', attributes=[AttributeSpec('attr3', 'a float attribute', 'float'), AttributeSpec('attr4', 'another float attribute', 'float')]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') + self.type_map.namespace_catalog.resolve_all_specs() cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) # TODO: test that constructor works! inst = cls(name='My Baz', data=[1, 2, 3, 4], attr1='string attribute', attr2=1000, attr3=98.6, attr4=1.0) @@ -239,11 +245,12 @@ def test_dynamic_container_constructor(self): def test_dynamic_container_constructor_name(self): # name is specified in spec and cannot be changed baz_spec = GroupSpec('A test extension with no Container class', - data_type_def='Baz', data_type_inc=self.bar_spec, + data_type_def='Baz', data_type_inc='Bar', name='A fixed name', attributes=[AttributeSpec('attr3', 'a float attribute', 'float'), AttributeSpec('attr4', 'another float attribute', 'float')]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') + self.type_map.namespace_catalog.resolve_all_specs() cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) with self.assertRaises(TypeError): @@ -261,12 +268,13 @@ def test_dynamic_container_constructor_name_default_name(self): # if both name and default_name are specified, name should be used with self.assertWarns(Warning): baz_spec = GroupSpec('A test extension with no Container class', - data_type_def='Baz', data_type_inc=self.bar_spec, + data_type_def='Baz', data_type_inc='Bar', name='A fixed name', default_name='A default name', attributes=[AttributeSpec('attr3', 'a float attribute', 'float'), AttributeSpec('attr4', 'another float attribute', 'float')]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') + self.type_map.namespace_catalog.resolve_all_specs() cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) inst = cls(data=[1, 2, 3, 4], attr1='string attribute', attr2=1000, attr3=98.6, attr4=1.0) @@ -274,17 +282,18 @@ def test_dynamic_container_constructor_name_default_name(self): def test_dynamic_container_composition(self): baz_spec2 = GroupSpec('A composition inside', data_type_def='Baz2', - data_type_inc=self.bar_spec, + data_type_inc='Bar', attributes=[ AttributeSpec('attr3', 'a float attribute', 'float'), AttributeSpec('attr4', 'another float attribute', 'float')]) - baz_spec1 = GroupSpec('A composition test outside', data_type_def='Baz1', data_type_inc=self.bar_spec, + baz_spec1 = GroupSpec('A composition test outside', data_type_def='Baz1', data_type_inc='Bar', attributes=[AttributeSpec('attr3', 'a float attribute', 'float'), AttributeSpec('attr4', 'another float attribute', 'float')], groups=[GroupSpec('A composition inside', data_type_inc='Baz2')]) self.spec_catalog.register_spec(baz_spec1, 'extension.yaml') self.spec_catalog.register_spec(baz_spec2, 'extension.yaml') + self.type_map.namespace_catalog.resolve_all_specs() Baz2 = self.type_map.get_dt_container_cls('Baz2', CORE_NAMESPACE) Baz1 = self.type_map.get_dt_container_cls('Baz1', CORE_NAMESPACE) Baz1(name='My Baz', data=[1, 2, 3, 4], attr1='string attribute', attr2=1000, attr3=98.6, attr4=1.0, @@ -299,17 +308,18 @@ def test_dynamic_container_composition(self): def test_dynamic_container_composition_reverse_order(self): baz_spec2 = GroupSpec('A composition inside', data_type_def='Baz2', - data_type_inc=self.bar_spec, + data_type_inc='Bar', attributes=[ AttributeSpec('attr3', 'a float attribute', 'float'), AttributeSpec('attr4', 'another float attribute', 'float')]) - baz_spec1 = GroupSpec('A composition test outside', data_type_def='Baz1', data_type_inc=self.bar_spec, + baz_spec1 = GroupSpec('A composition test outside', data_type_def='Baz1', data_type_inc='Bar', attributes=[AttributeSpec('attr3', 'a float attribute', 'float'), AttributeSpec('attr4', 'another float attribute', 'float')], groups=[GroupSpec('A composition inside', data_type_inc='Baz2')]) self.spec_catalog.register_spec(baz_spec1, 'extension.yaml') self.spec_catalog.register_spec(baz_spec2, 'extension.yaml') + self.type_map.namespace_catalog.resolve_all_specs() Baz1 = self.type_map.get_dt_container_cls('Baz1', CORE_NAMESPACE) Baz2 = self.type_map.get_dt_container_cls('Baz2', CORE_NAMESPACE) Baz1(name='My Baz', data=[1, 2, 3, 4], attr1='string attribute', attr2=1000, attr3=98.6, attr4=1.0, @@ -323,21 +333,23 @@ def test_dynamic_container_composition_reverse_order(self): attr2=1000, attr3=98.6, attr4=1.0, baz2=bar) def test_dynamic_container_composition_missing_type(self): - baz_spec1 = GroupSpec('A composition test outside', data_type_def='Baz1', data_type_inc=self.bar_spec, + baz_spec1 = GroupSpec('A composition test outside', data_type_def='Baz1', data_type_inc='Bar', attributes=[AttributeSpec('attr3', 'a float attribute', 'float'), AttributeSpec('attr4', 'another float attribute', 'float')], groups=[GroupSpec('A composition inside', data_type_inc='Baz2')]) self.spec_catalog.register_spec(baz_spec1, 'extension.yaml') + # this error will be raised during load_namespaces when resolving all specs msg = "No specification for 'Baz2' in namespace 'test_core'" with self.assertRaisesWith(ValueError, msg): - self.type_map.get_dt_container_cls('Baz1', CORE_NAMESPACE) + self.type_map.namespace_catalog.resolve_all_specs() def test_dynamic_container_fixed_name(self): """Test that dynamic class generation for an extended type with a fixed name works.""" baz_spec = GroupSpec('A test extension with no Container class', - data_type_def='Baz', data_type_inc=self.bar_spec, name='Baz') + data_type_def='Baz', data_type_inc='Bar', name='Baz') self.spec_catalog.register_spec(baz_spec, 'extension.yaml') + self.type_map.namespace_catalog.resolve_all_specs() Baz = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) obj = Baz(data=[1, 2, 3, 4], attr1='string attribute', attr2=1000) self.assertEqual(obj.name, 'Baz') @@ -359,10 +371,11 @@ def __init__(self, **kwargs): self.type_map.register_container_type(CORE_NAMESPACE, "Bar", FixedAttrBar) baz_spec = GroupSpec('A test extension with no Container class', - data_type_def='Baz', data_type_inc=self.bar_spec, + data_type_def='Baz', data_type_inc='Bar', attributes=[AttributeSpec('attr3', 'a float attribute', 'float'), AttributeSpec('attr4', 'another float attribute', 'float')]) self.spec_catalog.register_spec(baz_spec, 'extension.yaml') + self.type_map.namespace_catalog.resolve_all_specs() cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) expected_args = {'name', 'data', 'attr2', 'attr3', 'attr4', 'skip_post_init'} received_args = set() @@ -382,13 +395,14 @@ def test_multi_container_spec(self): doc='A test extension that contains a multi', data_type_def='Multi', groups=[ - GroupSpec(data_type_inc=self.bar_spec, doc='test multi', quantity='*') + GroupSpec(data_type_inc='Bar', doc='test multi', quantity='*') ], attributes=[ AttributeSpec(name='attr3', doc='a float attribute', dtype='float') ] ) self.spec_catalog.register_spec(multi_spec, 'extension.yaml') + self.type_map.namespace_catalog.resolve_all_specs() Bar = self.type_map.get_dt_container_cls('Bar', CORE_NAMESPACE) Multi = self.type_map.get_dt_container_cls('Multi', CORE_NAMESPACE) assert issubclass(Multi, MultiContainerInterface) @@ -414,15 +428,16 @@ def test_multi_container_spec_with_inc(self): multi_spec = GroupSpec( doc='A test extension that contains a multi', data_type_def='Multi', - data_type_inc=self.bar_spec, + data_type_inc='Bar', groups=[ - GroupSpec(data_type_inc=self.bar_spec, doc='test multi', quantity='*') + GroupSpec(data_type_inc='Bar', doc='test multi', quantity='*') ], attributes=[ AttributeSpec(name='attr3', doc='a float attribute', dtype='float') ] ) self.spec_catalog.register_spec(multi_spec, 'extension.yaml') + self.type_map.namespace_catalog.resolve_all_specs() Bar = self.type_map.get_dt_container_cls('Bar', CORE_NAMESPACE) Multi = self.type_map.get_dt_container_cls('Multi', CORE_NAMESPACE) assert issubclass(Multi, MultiContainerInterface) @@ -457,13 +472,14 @@ def test_multi_container_spec_zero_or_more(self): doc='A test extension that contains a multi', data_type_def='Multi', groups=[ - GroupSpec(data_type_inc=self.bar_spec, doc='test multi', quantity='*') + GroupSpec(data_type_inc='Bar', doc='test multi', quantity='*') ], attributes=[ AttributeSpec(name='attr3', doc='a float attribute', dtype='float') ] ) self.spec_catalog.register_spec(multi_spec, 'extension.yaml') + self.type_map.namespace_catalog.resolve_all_specs() Multi = self.type_map.get_dt_container_cls('Multi', CORE_NAMESPACE) multi = Multi( name='my_multi', @@ -476,13 +492,14 @@ def test_multi_container_spec_one_or_more_missing(self): doc='A test extension that contains a multi', data_type_def='Multi', groups=[ - GroupSpec(data_type_inc=self.bar_spec, doc='test multi', quantity='+') + GroupSpec(data_type_inc='Bar', doc='test multi', quantity='+') ], attributes=[ AttributeSpec(name='attr3', doc='a float attribute', dtype='float') ] ) self.spec_catalog.register_spec(multi_spec, 'extension.yaml') + self.type_map.namespace_catalog.resolve_all_specs() Multi = self.type_map.get_dt_container_cls('Multi', CORE_NAMESPACE) with self.assertRaisesWith(TypeError, "MCIClassGenerator.set_init..__init__: missing argument 'bars'"): Multi( @@ -495,13 +512,14 @@ def test_multi_container_spec_one_or_more_ok(self): doc='A test extension that contains a multi', data_type_def='Multi', groups=[ - GroupSpec(data_type_inc=self.bar_spec, doc='test multi', quantity='+') + GroupSpec(data_type_inc='Bar', doc='test multi', quantity='+') ], attributes=[ AttributeSpec(name='attr3', doc='a float attribute', dtype='float') ] ) self.spec_catalog.register_spec(multi_spec, 'extension.yaml') + self.type_map.namespace_catalog.resolve_all_specs() Multi = self.type_map.get_dt_container_cls('Multi', CORE_NAMESPACE) multi = Multi( name='my_multi', @@ -732,8 +750,10 @@ def _build_separate_namespaces(self): data_type_def='Baz', data_type_inc='Bar', groups=[ - GroupSpec(data_type_inc='Qux', doc='a qux', quantity='?'), - GroupSpec(data_type_inc='Bar', doc='a bar', quantity='?') + GroupSpec(data_type_inc='Bar', doc='a bar', quantity='?'), + ], + datasets=[ + DatasetSpec(data_type_inc='Qux', doc='a qux', quantity='?'), ] ) moo_spec = DatasetSpec( diff --git a/tests/unit/helpers/io.py b/tests/unit/helpers/io.py new file mode 100644 index 000000000..5bacab60f --- /dev/null +++ b/tests/unit/helpers/io.py @@ -0,0 +1,26 @@ +from hdmf.backends.io import HDMFIO + +class DoNothingIO(HDMFIO): + + @staticmethod + def can_read(path): + pass + + def read_builder(self): + pass + + def write_builder(self, **kwargs): + pass + + def open(self): + pass + + def close(self): + pass + + @classmethod + def load_namespaces(cls, namespace_catalog, path, namespaces): + pass + + def load_namespaces_io(self, namespace_catalog, namespaces): + pass diff --git a/tests/unit/helpers/utils.py b/tests/unit/helpers/utils.py index 0f4b3c4bf..14e70da4b 100644 --- a/tests/unit/helpers/utils.py +++ b/tests/unit/helpers/utils.py @@ -500,6 +500,7 @@ def create_test_type_map(specs, container_classes, mappers=None): ) namespace_catalog = NamespaceCatalog() namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) + namespace_catalog.resolve_all_specs() type_map = TypeMap(namespace_catalog) for type_name, container_cls in container_classes.items(): type_map.register_container_type(CORE_NAMESPACE, type_name, container_cls) @@ -627,19 +628,6 @@ def __init__(self, **kwargs): def dataset_spec_cls(cls): return CustomDatasetSpec - @docval(*deepcopy(swap_inc_def(GroupSpec, "CustomGroupSpec"))) - def add_group(self, **kwargs): - spec = CustomGroupSpec(**kwargs) - self.set_group(spec) - return spec - - @docval(*deepcopy(swap_inc_def(DatasetSpec, "CustomDatasetSpec"))) - def add_dataset(self, **kwargs): - """Add a new specification for a subgroup to this group specification""" - spec = CustomDatasetSpec(**kwargs) - self.set_dataset(spec) - return spec - class CustomDatasetSpec(BaseStorageOverride, DatasetSpec): @docval(*deepcopy(swap_inc_def(DatasetSpec, "CustomDatasetSpec"))) diff --git a/tests/unit/spec_tests/test_attribute_spec.py b/tests/unit/spec_tests/test_attribute_spec.py index bac8e12a3..807afb5d0 100644 --- a/tests/unit/spec_tests/test_attribute_spec.py +++ b/tests/unit/spec_tests/test_attribute_spec.py @@ -57,6 +57,7 @@ def test_shape(self): shape=shape) self.assertEqual(spec['shape'], shape) self.assertEqual(spec.shape, shape) + self.assertEqual(spec.dims, ('dim_0', 'dim_1')) def test_dims_without_shape(self): spec = AttributeSpec('attribute1', diff --git a/tests/unit/spec_tests/test_dataset_spec.py b/tests/unit/spec_tests/test_dataset_spec.py index 60025fd7e..e3b3a93fe 100644 --- a/tests/unit/spec_tests/test_dataset_spec.py +++ b/tests/unit/spec_tests/test_dataset_spec.py @@ -44,13 +44,34 @@ def test_constructor_datatype(self): def test_constructor_shape(self): shape = [None, 2] - spec = DatasetSpec('my first dataset', - 'int', - name='dataset1', - shape=shape, - attributes=self.attributes) + spec = DatasetSpec( + doc='my first dataset', + dtype='int', + name='dataset1', + shape=shape, + ) self.assertEqual(spec['shape'], shape) self.assertEqual(spec.shape, shape) + self.assertEqual(spec.dims, ('dim_0', 'dim_1')) + + def test_dims_without_shape(self): + spec = DatasetSpec( + doc='my first dataset', + dtype='int', + name='dataset1', + dims=("test",), + ) + self.assertEqual(spec.shape, (None, )) + + def test_colliding_shape_and_dims(self): + with self.assertRaises(ValueError): + DatasetSpec( + doc='my first dataset', + dtype='int', + name='dataset1', + dims=("test",), + shape=[None, 2], + ) def test_constructor_invalidate_dtype(self): with self.assertRaises(ValueError): @@ -73,40 +94,6 @@ def test_constructor_ref_spec(self): data_type_def='EphysData') self.assertDictEqual(spec['dtype'], dtype) - def test_datatype_extension(self): - base = DatasetSpec('my first dataset', - 'int', - name='dataset1', - attributes=self.attributes, - linkable=False, - data_type_def='EphysData') - - attributes = [AttributeSpec('attribute3', 'my first extending attribute', 'float')] - ext = DatasetSpec('my first dataset extension', - 'int', - name='dataset1', - attributes=attributes, - linkable=False, - data_type_inc=base, - data_type_def='SpikeData') - self.assertDictEqual(ext['attributes'][0], attributes[0]) - self.assertDictEqual(ext['attributes'][1], self.attributes[0]) - self.assertDictEqual(ext['attributes'][2], self.attributes[1]) - ext_attrs = ext.attributes - self.assertIs(ext, ext_attrs[0].parent) - self.assertIs(ext, ext_attrs[1].parent) - self.assertIs(ext, ext_attrs[2].parent) - - def test_datatype_extension_groupspec(self): - '''Test to make sure DatasetSpec catches when a GroupSpec used as data_type_inc''' - base = GroupSpec('a fake grop', - data_type_def='EphysData') - with self.assertRaises(TypeError): - DatasetSpec('my first dataset extension', - 'int', - name='dataset1', - data_type_inc=base, - data_type_def='SpikeData') def test_constructor_table(self): dtype1 = DtypeSpec('column1', 'the first column', 'int') @@ -165,72 +152,6 @@ def test_name_with_compatible_quantity(self): name='ds1', quantity=1) - def test_datatype_table_extension(self): - dtype1 = DtypeSpec('column1', 'the first column', 'int') - dtype2 = DtypeSpec('column2', 'the second column', 'float') - base = DatasetSpec('my first table', - [dtype1, dtype2], - attributes=self.attributes, - data_type_def='SimpleTable') - self.assertEqual(base['dtype'], [dtype1, dtype2]) - self.assertEqual(base['doc'], 'my first table') - dtype3 = DtypeSpec('column3', 'the third column', 'text') - ext = DatasetSpec('my first table extension', - [dtype3], - data_type_inc=base, - data_type_def='ExtendedTable') - self.assertEqual(ext['dtype'], [dtype1, dtype2, dtype3]) - self.assertEqual(ext['doc'], 'my first table extension') - - def test_datatype_table_extension_higher_precision(self): - dtype1 = DtypeSpec('column1', 'the first column', 'int') - dtype2 = DtypeSpec('column2', 'the second column', 'float32') - base = DatasetSpec('my first table', - [dtype1, dtype2], - attributes=self.attributes, - data_type_def='SimpleTable') - self.assertEqual(base['dtype'], [dtype1, dtype2]) - self.assertEqual(base['doc'], 'my first table') - dtype3 = DtypeSpec('column2', 'the second column, with greater precision', 'float64') - ext = DatasetSpec('my first table extension', - [dtype3], - data_type_inc=base, - data_type_def='ExtendedTable') - self.assertEqual(ext['dtype'], [dtype1, dtype3]) - self.assertEqual(ext['doc'], 'my first table extension') - - def test_datatype_table_extension_lower_precision(self): - dtype1 = DtypeSpec('column1', 'the first column', 'int') - dtype2 = DtypeSpec('column2', 'the second column', 'float64') - base = DatasetSpec('my first table', - [dtype1, dtype2], - attributes=self.attributes, - data_type_def='SimpleTable') - self.assertEqual(base['dtype'], [dtype1, dtype2]) - self.assertEqual(base['doc'], 'my first table') - dtype3 = DtypeSpec('column2', 'the second column, with greater precision', 'float32') - with self.assertRaisesWith(ValueError, 'Cannot extend float64 to float32'): - DatasetSpec('my first table extension', - [dtype3], - data_type_inc=base, - data_type_def='ExtendedTable') - - def test_datatype_table_extension_diff_format(self): - dtype1 = DtypeSpec('column1', 'the first column', 'int') - dtype2 = DtypeSpec('column2', 'the second column', 'float64') - base = DatasetSpec('my first table', - [dtype1, dtype2], - attributes=self.attributes, - data_type_def='SimpleTable') - self.assertEqual(base['dtype'], [dtype1, dtype2]) - self.assertEqual(base['doc'], 'my first table') - dtype3 = DtypeSpec('column2', 'the second column, with greater precision', 'int32') - with self.assertRaisesWith(ValueError, 'Cannot extend float64 to int32'): - DatasetSpec('my first table extension', - [dtype3], - data_type_inc=base, - data_type_def='ExtendedTable') - def test_data_type_property_value(self): """Test that the property data_type has the expected value""" test_cases = { @@ -275,3 +196,8 @@ def test_constructor_validates_default_name(self): "Default name 'one/two' is invalid. Names of Groups and Datasets cannot contain '/'", ): DatasetSpec(doc='my first dataset', dtype='int', default_name='one/two', data_type_def='test') + + def test_constructor_value_default_value(self): + msg = "cannot specify 'value' and 'default_value'" + with self.assertRaisesWith(ValueError, msg): + DatasetSpec(doc='my first dataset', dtype='int', name='dataset1', value=42, default_value=0) diff --git a/tests/unit/spec_tests/test_dtype_spec.py b/tests/unit/spec_tests/test_dtype_spec.py index 946bbb9b7..300e7ecc6 100644 --- a/tests/unit/spec_tests/test_dtype_spec.py +++ b/tests/unit/spec_tests/test_dtype_spec.py @@ -41,6 +41,21 @@ def test_check_dtype_ref(self): refspec = RefSpec(target_type='target', reftype='object') self.assertIs(refspec, DtypeHelper.check_dtype(refspec)) + def test_is_allowed(self): + self.assertTrue(DtypeHelper.is_allowed_dtype('int32', 'int')) + self.assertTrue(DtypeHelper.is_allowed_dtype('float64', 'float')) + self.assertFalse(DtypeHelper.is_allowed_dtype('int32', 'float')) + self.assertFalse(DtypeHelper.is_allowed_dtype('string', 'int')) + self.assertTrue(DtypeHelper.is_allowed_dtype('object', 'object')) + self.assertTrue(DtypeHelper.is_allowed_dtype('int64', 'numeric')) + self.assertTrue(DtypeHelper.is_allowed_dtype('float32', 'numeric')) + self.assertFalse(DtypeHelper.is_allowed_dtype('string', 'numeric')) + self.assertTrue(DtypeHelper.is_allowed_dtype('numeric', 'numeric')) + + msg = "Unknown dtype 'bad dtype'" + with self.assertRaisesRegex(ValueError, msg): + DtypeHelper.is_allowed_dtype('int32', 'bad dtype') + class DtypeSpecTests(TestCase): def setUp(self): diff --git a/tests/unit/spec_tests/test_group_spec.py b/tests/unit/spec_tests/test_group_spec.py index 4cf6ad71d..789b012de 100644 --- a/tests/unit/spec_tests/test_group_spec.py +++ b/tests/unit/spec_tests/test_group_spec.py @@ -98,6 +98,19 @@ def test_set_dataset(self): spec.set_dataset(self.datasets[0]) self.assertIs(spec, self.datasets[0].parent) + def test_add_dataset(self): + group = GroupSpec( + doc='A test group', + name='root' + ) + with self.assertWarns(DeprecationWarning): + group.add_dataset( + doc='A test dataset', + dtype='int', + name='dataset' + ) + self.assertIsInstance(group.get_dataset('dataset'), DatasetSpec) + def test_set_link(self): group = GroupSpec( doc='A test group', @@ -117,11 +130,12 @@ def test_add_link(self): doc='A test group', name='root' ) - group.add_link( - 'A test link', - 'LinkTarget', - name='link_name' - ) + with self.assertWarns(DeprecationWarning): + group.add_link( + doc='A test link', + target_type='LinkTarget', + name='link_name' + ) self.assertIsInstance(group.get_link('link_name'), LinkSpec) def test_set_group(self): @@ -141,66 +155,13 @@ def test_add_group(self): doc='A test group', name='root' ) - group.add_group( - 'A test group', - name='subgroup' - ) + with self.assertWarns(DeprecationWarning): + group.add_group( + 'A test group', + name='subgroup' + ) self.assertIsInstance(group.get_group('subgroup'), GroupSpec) - def test_type_extension(self): - spec = GroupSpec('A test group', - name='parent_type', - datasets=self.datasets, - attributes=self.attributes, - linkable=False, - data_type_def='EphysData') - dset1_attributes_ext = [ - AttributeSpec('dset1_extra_attribute', 'an extra attribute for the first dataset', 'text') - ] - ext_datasets = [ - DatasetSpec('my first dataset extension', - 'int', - name='dataset1', - attributes=dset1_attributes_ext, - linkable=True), - ] - ext_attributes = [ - AttributeSpec('ext_extra_attribute', 'an extra attribute for the group', 'text'), - ] - ext = GroupSpec('A test group extension', - name='child_type', - datasets=ext_datasets, - attributes=ext_attributes, - linkable=False, - data_type_inc=spec, - data_type_def='SpikeData') - ext_dset1 = ext.get_dataset('dataset1') - ext_dset1_attrs = ext_dset1.attributes - self.assertDictEqual(ext_dset1_attrs[0], dset1_attributes_ext[0]) - self.assertDictEqual(ext_dset1_attrs[1], self.dset1_attributes[0]) - self.assertDictEqual(ext_dset1_attrs[2], self.dset1_attributes[1]) - self.assertEqual(ext.data_type_def, 'SpikeData') - self.assertEqual(ext.data_type_inc, 'EphysData') - - ext_dset2 = ext.get_dataset('dataset2') - self.maxDiff = None - # this will suffice for now, assertDictEqual doesn't do deep equality checks - self.assertEqual(str(ext_dset2), str(self.datasets[1])) - self.assertAttributesEqual(ext_dset2, self.datasets[1]) - - res_attrs = ext.attributes - self.assertDictEqual(res_attrs[0], ext_attributes[0]) - self.assertDictEqual(res_attrs[1], self.attributes[0]) - self.assertDictEqual(res_attrs[2], self.attributes[1]) - - # test that inherited specs are tracked appropriate - for d in self.datasets: - with self.subTest(dataset=d.name): - self.assertTrue(ext.is_inherited_spec(d)) - self.assertFalse(spec.is_inherited_spec(d)) - - json.dumps(spec) - def assertDatasetsEqual(self, spec1, spec2): spec1_dsets = spec1.datasets spec2_dsets = spec2.datasets @@ -226,7 +187,8 @@ def test_add_attribute(self): datasets=self.datasets, linkable=False) for attrspec in self.attributes: - spec.add_attribute(**attrspec) + with self.assertWarns(DeprecationWarning): + spec.add_attribute(**attrspec) self.assertListEqual(spec['attributes'], self.attributes) self.assertListEqual(spec['datasets'], self.datasets) self.assertNotIn('data_type_def', spec) @@ -308,11 +270,13 @@ def test_data_type_property_value(self): def test_get_data_type_spec(self): expected = AttributeSpec('data_type', 'the data type of this object', 'text', value='MyType') - self.assertDictEqual(GroupSpec.get_data_type_spec('MyType'), expected) + with self.assertWarns(DeprecationWarning): + self.assertDictEqual(GroupSpec.get_data_type_spec('MyType'), expected) def test_get_namespace_spec(self): expected = AttributeSpec('namespace', 'the namespace for the data type of this object', 'text', required=False) - self.assertDictEqual(GroupSpec.get_namespace_spec(), expected) + with self.assertWarns(DeprecationWarning): + self.assertDictEqual(GroupSpec.get_namespace_spec(), expected) def test_build_warn_extra_args(self): spec_dict = { @@ -331,335 +295,17 @@ def test_no_name_no_def_no_inc(self): msg = ("Cannot create Group or Dataset spec with no name without specifying 'data_type_def' " "and/or 'data_type_inc'.") with self.assertRaisesWith(ValueError, msg): - GroupSpec('A test group') + GroupSpec(doc='A test group') def test_name_with_multiple(self): msg = ("Cannot give specific name to something that can exist multiple times: name='MyGroup', quantity='*'") with self.assertRaisesWith(ValueError, msg): - GroupSpec('A test group', name='MyGroup', quantity='*') - - -class TestResolveAttrs(TestCase): - - def setUp(self): - self.def_group_spec = GroupSpec( - doc='A test group', - name='root', - data_type_def='MyGroup', - attributes=[AttributeSpec('attribute1', 'my first attribute', 'text'), - AttributeSpec('attribute2', 'my second attribute', 'text')] - ) - self.inc_group_spec = GroupSpec( - doc='A test group', - name='root', - data_type_inc='MyGroup', - attributes=[AttributeSpec('attribute2', 'my second attribute', 'text', value='fixed'), - AttributeSpec('attribute3', 'my third attribute', 'text', value='fixed')] - ) - self.inc_group_spec.resolve_spec(self.def_group_spec) - - def test_resolved(self): - self.assertTupleEqual(self.inc_group_spec.attributes, ( - AttributeSpec('attribute2', 'my second attribute', 'text', value='fixed'), - AttributeSpec('attribute3', 'my third attribute', 'text', value='fixed'), - AttributeSpec('attribute1', 'my first attribute', 'text') - )) - - self.assertEqual(self.inc_group_spec.get_attribute('attribute1'), - AttributeSpec('attribute1', 'my first attribute', 'text')) - self.assertEqual(self.inc_group_spec.get_attribute('attribute2'), - AttributeSpec('attribute2', 'my second attribute', 'text', value='fixed')) - self.assertEqual(self.inc_group_spec.get_attribute('attribute3'), - AttributeSpec('attribute3', 'my third attribute', 'text', value='fixed')) - - self.assertTrue(self.inc_group_spec.resolved) - - def test_is_inherited_spec(self): - self.assertFalse(self.def_group_spec.is_inherited_spec(self.def_group_spec.attributes[0])) - self.assertFalse(self.def_group_spec.is_inherited_spec(self.def_group_spec.attributes[1])) - - attr_spec_map = {attr.name: attr for attr in self.inc_group_spec.attributes} - self.assertTrue(self.inc_group_spec.is_inherited_spec(attr_spec_map["attribute1"])) - self.assertTrue(self.inc_group_spec.is_inherited_spec(attr_spec_map["attribute2"])) - self.assertFalse(self.inc_group_spec.is_inherited_spec(attr_spec_map["attribute3"])) - - def test_is_overridden_spec(self): - self.assertFalse(self.def_group_spec.is_overridden_spec(self.def_group_spec.attributes[0])) - self.assertFalse(self.def_group_spec.is_overridden_spec(self.def_group_spec.attributes[0])) - - attr_spec_map = {attr.name: attr for attr in self.inc_group_spec.attributes} - self.assertFalse(self.inc_group_spec.is_overridden_spec(attr_spec_map["attribute1"])) - self.assertTrue(self.inc_group_spec.is_overridden_spec(attr_spec_map["attribute2"])) - self.assertFalse(self.inc_group_spec.is_overridden_spec(attr_spec_map["attribute3"])) - - def test_is_inherited_attribute(self): - self.assertFalse(self.def_group_spec.is_inherited_attribute('attribute1')) - self.assertFalse(self.def_group_spec.is_inherited_attribute('attribute2')) - self.assertTrue(self.inc_group_spec.is_inherited_attribute('attribute1')) - self.assertTrue(self.inc_group_spec.is_inherited_attribute('attribute2')) - self.assertFalse(self.inc_group_spec.is_inherited_attribute('attribute3')) - with self.assertRaisesWith(ValueError, "Attribute 'attribute4' not found"): - self.inc_group_spec.is_inherited_attribute('attribute4') - - def test_is_overridden_spec_nested(self): - """Test that is_overridden_spec correctly identifies overridden specs in nested structures.""" - # Create base spec with a dataset containing an attribute - base_dataset = DatasetSpec('Base dataset', - 'int', - name='test_dataset', - attributes=[AttributeSpec('attr1', 'Base attr', 'text')]) - base_group = GroupSpec('Base group', - name='test_group', - attributes=[AttributeSpec('attr1', 'Base attr', 'text')]) - base_spec = GroupSpec('A base group', - data_type_def='BaseType', - datasets=[base_dataset], - groups=[base_group]) - - # Create extending spec that overrides both dataset and group with new attribute values - override_dataset = DatasetSpec('Override dataset', - 'int', - name='test_dataset', - attributes=[AttributeSpec('attr1', 'Override attr', 'text')]) - override_group = GroupSpec('Override group', - name='test_group', - attributes=[AttributeSpec('attr1', 'Override attr', 'text')]) - ext_spec = GroupSpec('An extending group', - data_type_inc='BaseType', - data_type_def='ExtType', - datasets=[override_dataset], - groups=[override_group]) - - # Resolve the extension - ext_spec.resolve_spec(base_spec) - - # Test attribute in overridden dataset is marked as overridden - dataset_attr = ext_spec.get_dataset('test_dataset').get_attribute('attr1') - self.assertTrue(ext_spec.is_overridden_spec(dataset_attr)) - - # Test attribute in overridden group is marked as overridden - group_attr = ext_spec.get_group('test_group').get_attribute('attr1') - self.assertTrue(ext_spec.is_overridden_spec(group_attr)) - - # Test attributes in base spec are not marked as overridden - base_dataset_attr = base_spec.get_dataset('test_dataset').get_attribute('attr1') - base_group_attr = base_spec.get_group('test_group').get_attribute('attr1') - self.assertFalse(base_spec.is_overridden_spec(base_dataset_attr)) - self.assertFalse(base_spec.is_overridden_spec(base_group_attr)) - - def test_is_overridden_group(self): - """Test that is_overridden_group correctly identifies overridden groups.""" - # Create base spec with a group - base_group = GroupSpec('Base group', - name='test_group', - attributes=[]) - base_spec = GroupSpec('A base group', - data_type_def='BaseType', - groups=[base_group]) - - # Create extending spec that overrides the group - override_group = GroupSpec('Override group', - name='test_group', - attributes=[]) - ext_spec = GroupSpec('An extending group', - data_type_inc='BaseType', - data_type_def='ExtType', - groups=[override_group]) - - # Resolve the extension - ext_spec.resolve_spec(base_spec) - - # Test base spec has no overridden groups - self.assertFalse(base_spec.is_overridden_group('test_group')) - - # Test extending spec correctly identifies overridden group - self.assertTrue(ext_spec.is_overridden_group('test_group')) - - # Test non-existent group raises error - with self.assertRaisesWith(ValueError, "Group 'nonexistent_group' not found in spec"): - ext_spec.is_overridden_group('nonexistent_group') - - # Test new group in extending spec is not overridden - new_group = GroupSpec('New group', - name='new_group', - attributes=[]) - ext_spec.set_group(new_group) - self.assertFalse(ext_spec.is_overridden_group('new_group')) - - def test_is_overridden_attribute(self): - self.assertFalse(self.def_group_spec.is_overridden_attribute('attribute1')) - self.assertFalse(self.def_group_spec.is_overridden_attribute('attribute2')) - self.assertFalse(self.inc_group_spec.is_overridden_attribute('attribute1')) - self.assertTrue(self.inc_group_spec.is_overridden_attribute('attribute2')) - self.assertFalse(self.inc_group_spec.is_overridden_attribute('attribute3')) - with self.assertRaisesWith(ValueError, "Attribute 'attribute4' not found"): - self.inc_group_spec.is_overridden_attribute('attribute4') - - def test_resolve_group_inheritance(self): - """Test resolution of inherited groups in GroupSpec.resolve_spec.""" - # Create base group with named and unnamed groups - unnamed_group = GroupSpec('An unnamed group', - data_type_def='UnnamedType', - attributes=[]) - named_group = GroupSpec('A named group', - name='named_group', - attributes=[]) - base_groups = [unnamed_group, named_group] - - base_spec = GroupSpec('A test group', - data_type_def='BaseType', - groups=base_groups) - - # Create extending group that overrides the named group and adds a new one - override_group = GroupSpec('Override named group', - name='named_group', - attributes=[]) - new_group = GroupSpec('A new group', - name='new_group', - attributes=[]) - ext_groups = [override_group, new_group] - - ext_spec = GroupSpec('An extending group', - data_type_inc='BaseType', - data_type_def='ExtType', - groups=ext_groups) - - # Resolve the extension - ext_spec.resolve_spec(base_spec) - - # Test unnamed group is added to data_types - self.assertEqual(ext_spec.get_data_type('UnnamedType'), unnamed_group) - - # Test named group is overridden - resolved_group = ext_spec.get_group('named_group') - self.assertEqual(resolved_group.doc, 'Override named group') - self.assertTrue(ext_spec.is_overridden_spec(resolved_group)) - - # Test new group is added - new_resolved = ext_spec.get_group('new_group') - self.assertEqual(new_resolved.doc, 'A new group') - self.assertFalse(ext_spec.is_overridden_spec(new_resolved)) - - def test_resolve_group_inheritance_multiple(self): - """Test resolution of multiple levels of group inheritance.""" - # Base spec with a named group - base_group = GroupSpec('Base group', - name='test_group', - attributes=[]) - base_spec = GroupSpec('A base group', - data_type_def='BaseType', - groups=[base_group]) - - # First extension overrides the group - mid_group = GroupSpec('Mid group', - name='test_group', - attributes=[]) - mid_spec = GroupSpec('A middle group', - data_type_inc='BaseType', - data_type_def='MidType', - groups=[mid_group]) - - # Second extension inherits without override - ext_spec = GroupSpec('An extending group', - data_type_inc='MidType', - data_type_def='ExtType') - - # Resolve the extensions - mid_spec.resolve_spec(base_spec) - ext_spec.resolve_spec(mid_spec) - - # Test group inheritance through multiple levels - resolved_group = ext_spec.get_group('test_group') - self.assertEqual(resolved_group.doc, 'Mid group') - self.assertTrue(ext_spec.is_inherited_spec(resolved_group)) - - -class TestResolveGroupSameAttributeName(TestCase): - # https://github.com/hdmf-dev/hdmf/issues/1121 - - def test_is_inherited_two_different_datasets(self): - self.def_group_spec = GroupSpec( - doc='A test group', - data_type_def='MyGroup', - datasets=[ - DatasetSpec( - name='dset1', - doc="dset1", - dtype='int', - attributes=[AttributeSpec('attr1', 'MyGroup.dset1.attr1', 'text')] - ), - ] - ) - self.inc_group_spec = GroupSpec( - doc='A test subgroup', - data_type_def='SubGroup', - data_type_inc='MyGroup', - datasets=[ - DatasetSpec( - name='dset2', - doc="dset2", - dtype='int', - attributes=[AttributeSpec('attr1', 'SubGroup.dset2.attr1', 'text')] - ), - ] - ) - self.inc_group_spec.resolve_spec(self.def_group_spec) - - self.assertFalse(self.def_group_spec.is_inherited_spec(self.def_group_spec.datasets[0].attributes[0])) - - dset_spec_map = {dset.name: dset for dset in self.inc_group_spec.datasets} - self.assertFalse(self.inc_group_spec.is_inherited_spec(dset_spec_map["dset2"].attributes[0])) - self.assertTrue(self.inc_group_spec.is_inherited_spec(dset_spec_map["dset1"].attributes[0])) - - def test_is_inherited_different_groups_and_datasets(self): - self.def_group_spec = GroupSpec( - doc='A test group', - data_type_def='MyGroup', - attributes=[AttributeSpec('attr1', 'MyGroup.attr1', 'text')], # <-- added from above - datasets=[ - DatasetSpec( - name='dset1', - doc="dset1", - dtype='int', - attributes=[AttributeSpec('attr1', 'MyGroup.dset1.attr1', 'text')] - ), - ] - ) - self.inc_group_spec = GroupSpec( - doc='A test subgroup', - data_type_def='SubGroup', - data_type_inc='MyGroup', - attributes=[AttributeSpec('attr1', 'SubGroup.attr1', 'text')], # <-- added from above - datasets=[ - DatasetSpec( - name='dset2', - doc="dset2", - dtype='int', - attributes=[AttributeSpec('attr1', 'SubGroup.dset2.attr1', 'text')] - ), - ] - ) - self.inc_group_spec.resolve_spec(self.def_group_spec) - - self.assertFalse(self.def_group_spec.is_inherited_spec(self.def_group_spec.datasets[0].attributes[0])) - - dset_spec_map = {dset.name: dset for dset in self.inc_group_spec.datasets} - self.assertFalse(self.inc_group_spec.is_inherited_spec(dset_spec_map["dset2"].attributes[0])) - self.assertTrue(self.inc_group_spec.is_inherited_spec(dset_spec_map["dset1"].attributes[0])) - self.assertTrue(self.inc_group_spec.is_inherited_spec(self.inc_group_spec.attributes[0])) - - self.inc_group_spec2 = GroupSpec( - doc='A test subsubgroup', - data_type_def='SubSubGroup', - data_type_inc='SubGroup', - ) - self.inc_group_spec2.resolve_spec(self.inc_group_spec) - - dset_spec_map = {dset.name: dset for dset in self.inc_group_spec2.datasets} - self.assertTrue(self.inc_group_spec2.is_inherited_spec(dset_spec_map["dset1"].attributes[0])) - self.assertTrue(self.inc_group_spec2.is_inherited_spec(dset_spec_map["dset2"].attributes[0])) - self.assertTrue(self.inc_group_spec2.is_inherited_spec(self.inc_group_spec2.attributes[0])) + GroupSpec(doc='A test group', name='MyGroup', quantity='*') + def test_same_data_type_def_inc(self): + msg = ("data_type_inc and data_type_def cannot be the same: MyType. Ignoring data_type_inc.") + with self.assertWarnsWith(UserWarning, msg): + GroupSpec(doc='A test group', data_type_def='MyType', data_type_inc='MyType') @@ -678,58 +324,6 @@ def test_constructor(self): self.assertIs(spec, links[1].parent) json.dumps(spec) - def test_extension_no_overwrite(self): - link0 = LinkSpec(doc='Link 0', target_type='TargetType0') # test unnamed - link1 = LinkSpec(doc='Link 1', target_type='TargetType1', name='MyType1') # test named - link2 = LinkSpec(doc='Link 2', target_type='TargetType2', quantity='*') # test named, multiple - links = [link0, link1, link2] - parent_spec = GroupSpec( - doc='A test group', - name='parent', - links=links, - data_type_def='ParentType' - ) - child_spec = GroupSpec( - doc='A test group', - name='child', - data_type_inc=parent_spec, - data_type_def='ChildType' - ) - - for link in links: - with self.subTest(link_target_type=link.target_type): - self.assertTrue(child_spec.is_inherited_spec(link)) - self.assertFalse(child_spec.is_overridden_spec(link)) - - def test_extension_overwrite(self): - link0 = LinkSpec(doc='Link 0', target_type='TargetType0', name='MyType0') - link1 = LinkSpec(doc='Link 1', target_type='TargetType1', name='MyType1') - # NOTE overwriting unnamed LinkSpec is not allowed - # NOTE overwriting spec with quantity that could be >1 is not allowed - links = [link0, link1] - parent_spec = GroupSpec( - doc='A test group', - name='parent', - links=links, - data_type_def='ParentType' - ) - - link0_overwrite = LinkSpec(doc='New link 0', target_type='TargetType0', name='MyType0') - link1_overwrite = LinkSpec(doc='New link 1', target_type='TargetType1Child', name='MyType1') - overwritten_links = [link0_overwrite, link1_overwrite] - child_spec = GroupSpec( - doc='A test group', - name='child', - links=overwritten_links, - data_type_inc=parent_spec, - data_type_def='ChildType' - ) - - for link in overwritten_links: - with self.subTest(link_target_type=link.target_type): - self.assertTrue(child_spec.is_inherited_spec(link)) - self.assertTrue(child_spec.is_overridden_spec(link)) - class SpecWithDupsTest(TestCase): diff --git a/tests/unit/spec_tests/test_load_namespace.py b/tests/unit/spec_tests/test_load_namespace.py index 381b530b6..46d699593 100644 --- a/tests/unit/spec_tests/test_load_namespace.py +++ b/tests/unit/spec_tests/test_load_namespace.py @@ -96,32 +96,6 @@ def tearDown(self): if os.path.exists(self.specs_path): os.remove(self.specs_path) - def test_inherited_attributes(self): - self.ns_catalog.load_namespaces(self.namespace_path, resolve=True) - ts_spec = self.ns_catalog.get_spec(self.NS_NAME, 'EphysData') - es_spec = self.ns_catalog.get_spec(self.NS_NAME, 'SpikeData') - ts_attrs = {s.name for s in ts_spec.attributes} - es_attrs = {s.name for s in es_spec.attributes} - for attr in ts_attrs: - with self.subTest(attr=attr): - self.assertIn(attr, es_attrs) - # self.assertSetEqual(ts_attrs, es_attrs) - ts_dsets = {s.name for s in ts_spec.datasets} - es_dsets = {s.name for s in es_spec.datasets} - for dset in ts_dsets: - with self.subTest(dset=dset): - self.assertIn(dset, es_dsets) - # self.assertSetEqual(ts_dsets, es_dsets) - - def test_inherited_attributes_not_resolved(self): - self.ns_catalog.load_namespaces(self.namespace_path, resolve=False) - es_spec = self.ns_catalog.get_spec(self.NS_NAME, 'SpikeData') - src_attrs = {s.name for s in self.ext_attributes} - ext_attrs = {s.name for s in es_spec.attributes} - self.assertSetEqual(src_attrs, ext_attrs) - src_dsets = {s.name for s in self.ext_datasets} - ext_dsets = {s.name for s in es_spec.datasets} - self.assertSetEqual(src_dsets, ext_dsets) class TestSpecLoadEdgeCase(TestCase): diff --git a/tests/unit/spec_tests/test_spec_catalog.py b/tests/unit/spec_tests/test_spec_catalog.py index 5a4cceebe..9a03f8f82 100644 --- a/tests/unit/spec_tests/test_spec_catalog.py +++ b/tests/unit/spec_tests/test_spec_catalog.py @@ -161,6 +161,18 @@ def test_get_full_hierarchy(self): } self.assertDictEqual(full_hierarchy, expected_hierarchy) + def test_circular_hierarchy(self): + """Test that circular dependencies are detected and raise an error.""" + # Create circular dependency: A -> B -> A + spec_a = GroupSpec(data_type_inc="TypeB", data_type_def="TypeA", doc="Group A") + spec_b = GroupSpec(data_type_inc="TypeA", data_type_def="TypeB", doc="Group B") + self.catalog.register_spec(spec_a, 'test.yaml') + self.catalog.register_spec(spec_b, 'test.yaml') + + msg = "Circular reference detected in type hierarchy for TypeA" + with self.assertRaisesWith(ValueError, msg): + self.catalog.get_hierarchy('TypeA') + def test_copy_spec_catalog(self): # Register the spec first self.catalog.register_spec(self.spec, 'test.yaml') diff --git a/tests/unit/spec_tests/test_spec_resolution.py b/tests/unit/spec_tests/test_spec_resolution.py new file mode 100644 index 000000000..d7c4a9d2a --- /dev/null +++ b/tests/unit/spec_tests/test_spec_resolution.py @@ -0,0 +1,1733 @@ +""" +Tests for the spec resolution system. + +This module tests the resolution functionality that allows specs to be resolved after loading, including +cross-namespace resolution capabilities. +""" + +import os +from pathlib import Path +import tempfile +import json +from typing import Union +import ruamel.yaml as yaml +import shutil + +from hdmf.spec import ( + AttributeSpec, + DatasetSpec, + DtypeSpec, + GroupSpec, + LinkSpec, + RefSpec, + SpecNamespace, + NamespaceCatalog, +) +from hdmf.spec.spec import BaseStorageSpec +from hdmf.testing import TestCase + + +class TestSpecResolution(TestCase): + """Test the spec resolution system.""" + + # NOTE: tests of _resolve_inc_spec_dtype, _resolve_inc_spec_shape, _resolve_inc_spec_dims, etc. + # are done for dataset specs below, so they are not repeated here for attributes + # NOTE: tests of resolving attributes are done for group specs below and not repeated for datasets + # because resolution of attributes is managed by BaseStorageSpec.resolve_inc_spec + + def setUp(self): + """Set up test specs and namespaces.""" + # Create base specs + self.base_attr = AttributeSpec(name="base_attr", dtype="text", doc="Base attribute") + self.base_dataset = DatasetSpec( + data_type_def="BaseDataset", + name="base_dataset", + dtype="int", + doc="Base dataset", + attributes=[self.base_attr], + ) + self.base_group = GroupSpec( + data_type_def="BaseGroup", + name="base_group", + doc="Base group", + datasets=[ + DatasetSpec( + data_type_inc="BaseDataset", + name="base_dataset", + doc="Base dataset reference", + ) + ], + ) + + def test_resolved_property_setter(self): + """Test that the resolved property can be set.""" + ext_attr = AttributeSpec(name="ext_attr", dtype="text", doc="Extension attribute") + ext_dataset = DatasetSpec( + data_type_inc="BaseDataset", + data_type_def="ExtDataset", + name="ext_dataset", + dtype="int", + doc="Extended dataset", + attributes=[ext_attr], + ) + + self.assertFalse(ext_dataset.resolved) + + # Set resolved to True + ext_dataset.resolved = True + self.assertTrue(ext_dataset.resolved) + + # Set resolved to False + ext_dataset.resolved = False + self.assertFalse(ext_dataset.resolved) + + # Invalid value should raise error + with self.assertRaises(ValueError): + ext_dataset.resolved = "not a boolean" + + def test_resolve_inc_spec_dataset_add_attribute(self): + """Test extending a dataset and adding an attribute.""" + # Create an extension dataset that adds an attribute + # This should be the same as an extension group that adds a dataset because this is managed by + # BaseStorageSpec.resolve_inc_spec + ext_attr = AttributeSpec(name="ext_attr", dtype="text", doc="Extension attribute") + ext_dataset = DatasetSpec( + data_type_inc="BaseDataset", + data_type_def="ExtDataset", + name="ext_dataset", + dtype="int", + doc="Extended dataset", + attributes=[ext_attr], + ) + + # Initially, the extension should not be resolved + # Note: resolved property is managed by the overarching NamespaceCatalog on subspecs being resolved + self.assertFalse(ext_dataset.inc_spec_resolved) + + # Resolve the extension (no namespace needed for this test) + ext_dataset.resolve_inc_spec(self.base_dataset, None) + + # Check that resolution flags are set + self.assertTrue(ext_dataset.inc_spec_resolved) + + # Check that attributes are inherited (ext_attr should be present, base_attr should be inherited) + ext_attrs = {attr.name: attr for attr in ext_dataset.attributes} + self.assertIn("base_attr", ext_attrs) + self.assertIn("ext_attr", ext_attrs) + + # Check inheritance tracking + self.assertTrue(ext_dataset.is_inherited_attribute("base_attr")) + self.assertFalse(ext_dataset.is_inherited_attribute("ext_attr")) + + def test_resolve_inc_spec_group_add_dataset(self): + """Test extending a group and adding a dataset.""" + # Create an extension group that adds a dataset + # NOTE: technically ExtDataset does not need to be defined for this test, but included for completeness + _ = DatasetSpec( + data_type_inc="BaseDataset", + data_type_def="ExtDataset", + name="ext_dataset", + dtype="int", + doc="Extended dataset", + ) + ext_group = GroupSpec( + data_type_inc="BaseGroup", + data_type_def="ExtGroup", + name="ext_group", + doc="Extended group", + datasets=[ + DatasetSpec( + data_type_inc="ExtDataset", + name="ext_dataset", + doc="Extended dataset reference", + ) + ], + ) + + # Initially, the extension should not be resolved + self.assertFalse(ext_group.resolved) + self.assertFalse(ext_group.inc_spec_resolved) + + # Resolve the extension (no namespace needed for this test) + ext_group.resolve_inc_spec(self.base_group, None) + + # Check that resolution flags are set + self.assertTrue(ext_group.inc_spec_resolved) + # Note: GroupSpec resolution depends on subspecs being resolved + + # Check that datasets are inherited + ext_datasets = [dset.name for dset in ext_group.datasets] + ext_datasets_expected = ["ext_dataset", "base_dataset"] + self.assertEqual(ext_datasets, ext_datasets_expected) + + # Check inheritance tracking + self.assertTrue(ext_group.is_inherited_dataset("base_dataset")) + self.assertFalse(ext_group.is_inherited_dataset("ext_dataset")) + + def test_resolve_inc_spec_dtype_inheritance(self): + """Test that dtype is inherited correctly.""" + base_dataset = DatasetSpec( + data_type_def="BaseWithShape", + dtype="int", + dims=("x", "y"), + shape=(None, 3), + doc="Base dataset with shape", + ) + + ext_dataset = DatasetSpec( + data_type_inc="BaseWithShape", + data_type_def="ExtWithShape", + doc="Extended dataset", + ) + + # Resolve the extension (no namespace needed for this test) + ext_dataset.resolve_inc_spec(base_dataset, None) + + # Check that dtype is inherited + self.assertEqual(ext_dataset.dtype, "int") + + # without data_type_def + ext_dataset2 = DatasetSpec( + data_type_inc="BaseWithShape", + doc="Extended dataset", + ) + + # Resolve the extension (no namespace needed for this test) + ext_dataset2.resolve_inc_spec(base_dataset, None) + + # Check that dtype is inherited + self.assertEqual(ext_dataset2.dtype, "int") + + def test_resolve_inc_spec_attribute_simple_override(self): + """Test that attribute overrides work correctly.""" + base_group = GroupSpec( + doc="A test group", + data_type_def="MyGroup", + attributes=[ + AttributeSpec(name="attribute1", doc="my first attribute", dtype="text"), + AttributeSpec(name="attribute2", doc="my second attribute", dtype="text"), + ], + ) + ext_group = GroupSpec( + doc="A test group", + name="root", + data_type_inc="MyGroup", + attributes=[ + AttributeSpec( + name="attribute2", + doc="my second attribute", + dtype="text", + value="fixed", + ), + AttributeSpec( + name="attribute3", + doc="my third attribute", + dtype="text", + value="fixed", + ), + ], + ) + # Resolve the extension (no namespace needed for this test) + ext_group.resolve_inc_spec(base_group, None) + + self.assertTupleEqual( + ext_group.attributes, + ( + AttributeSpec( + name="attribute2", + doc="my second attribute", + dtype="text", + value="fixed", + ), + AttributeSpec( + name="attribute3", + doc="my third attribute", + dtype="text", + value="fixed", + ), + AttributeSpec(name="attribute1", doc="my first attribute", dtype="text"), + ), + ) + + self.assertEqual( + ext_group.get_attribute("attribute1"), + AttributeSpec(name="attribute1", doc="my first attribute", dtype="text"), + ) + self.assertEqual( + ext_group.get_attribute("attribute2"), + AttributeSpec( + name="attribute2", + doc="my second attribute", + dtype="text", + value="fixed", + ), + ) + self.assertEqual( + ext_group.get_attribute("attribute3"), + AttributeSpec(name="attribute3", doc="my third attribute", dtype="text", value="fixed"), + ) + + # Check is_inherited_spec + self.assertFalse(base_group.is_inherited_spec(base_group.attributes[0])) + self.assertFalse(base_group.is_inherited_spec(base_group.attributes[1])) + + attr_spec_map = {attr.name: attr for attr in ext_group.attributes} + self.assertTrue(ext_group.is_inherited_spec(attr_spec_map["attribute1"])) + self.assertTrue(ext_group.is_inherited_spec(attr_spec_map["attribute2"])) + self.assertFalse(ext_group.is_inherited_spec(attr_spec_map["attribute3"])) + + # Check is_overridden_spec + self.assertFalse(base_group.is_overridden_spec(base_group.attributes[0])) + self.assertFalse(base_group.is_overridden_spec(base_group.attributes[1])) + + attr_spec_map = {attr.name: attr for attr in ext_group.attributes} + self.assertFalse(ext_group.is_overridden_spec(attr_spec_map["attribute1"])) + self.assertTrue(ext_group.is_overridden_spec(attr_spec_map["attribute2"])) + self.assertFalse(ext_group.is_overridden_spec(attr_spec_map["attribute3"])) + + # Check is_inherited_attribute + self.assertFalse(base_group.is_inherited_attribute("attribute1")) + self.assertFalse(base_group.is_inherited_attribute("attribute2")) + self.assertTrue(ext_group.is_inherited_attribute("attribute1")) + self.assertTrue(ext_group.is_inherited_attribute("attribute2")) + self.assertFalse(ext_group.is_inherited_attribute("attribute3")) + with self.assertRaisesWith(ValueError, "Attribute 'attribute4' not found"): + ext_group.is_inherited_attribute("attribute4") + + # Check is_overridden_attribute + self.assertFalse(base_group.is_overridden_attribute("attribute1")) + self.assertFalse(base_group.is_overridden_attribute("attribute2")) + self.assertFalse(ext_group.is_overridden_attribute("attribute1")) + self.assertTrue(ext_group.is_overridden_attribute("attribute2")) + self.assertFalse(ext_group.is_overridden_attribute("attribute3")) + with self.assertRaisesWith(ValueError, "Attribute 'attribute4' not found"): + ext_group.is_overridden_attribute("attribute4") + + def test_resolve_inc_spec_is_overridden_spec_nested(self): + """Test that is_overridden_spec correctly identifies overridden specs in nested structures.""" + # Create base spec with a dataset containing an attribute + base_dataset = DatasetSpec( + doc="Base dataset", + dtype="int", + name="test_dataset", + attributes=[AttributeSpec(name="attr1", doc="Base attr", dtype="text")], + ) + base_group = GroupSpec( + doc="Base group", + name="test_group", + attributes=[AttributeSpec(name="attr1", doc="Base attr", dtype="text")], + ) + base_spec = GroupSpec( + doc="A base group", + data_type_def="BaseType", + datasets=[base_dataset], + groups=[base_group], + ) + # Create extending spec that overrides both dataset and group with new attribute values + override_dataset = DatasetSpec( + doc="Override dataset", + dtype="int", + name="test_dataset", + attributes=[AttributeSpec(name="attr1", doc="Override attr", dtype="text")], + ) + override_group = GroupSpec( + doc="Override group", + name="test_group", + attributes=[AttributeSpec(name="attr1", doc="Override attr", dtype="text")], + ) + ext_spec = GroupSpec( + "An extending group", + data_type_inc="BaseType", + data_type_def="ExtType", + datasets=[override_dataset], + groups=[override_group], + ) + + # Resolve the extension (no namespace needed for this test) + ext_spec.resolve_inc_spec(base_spec, None) + + # Test attribute in overridden dataset is marked as overridden + dataset_attr = ext_spec.get_dataset("test_dataset").get_attribute("attr1") + self.assertTrue(ext_spec.is_overridden_spec(dataset_attr)) + + # Test attribute in overridden group is marked as overridden + group_attr = ext_spec.get_group("test_group").get_attribute("attr1") + self.assertTrue(ext_spec.is_overridden_spec(group_attr)) + + # Test attributes in base spec are not marked as overridden + base_dataset_attr = base_spec.get_dataset("test_dataset").get_attribute("attr1") + base_group_attr = base_spec.get_group("test_group").get_attribute("attr1") + self.assertFalse(base_spec.is_overridden_spec(base_dataset_attr)) + self.assertFalse(base_spec.is_overridden_spec(base_group_attr)) + + def test_resolve_inc_spec_group_spec_is_overridden_group(self): + """Test that is_overridden_group correctly identifies overridden groups.""" + # Create base spec with a group + base_group = GroupSpec(doc="Base group", name="test_group") + base_spec = GroupSpec(doc="A base group", data_type_def="BaseType", groups=[base_group]) + + # Create extending spec that overrides the group + override_group = GroupSpec(doc="Override group", name="test_group") + ext_spec = GroupSpec( + doc="An extending group", + data_type_inc="BaseType", + data_type_def="ExtType", + groups=[override_group], + ) + + # Resolve the extension (no namespace needed for this test) + ext_spec.resolve_inc_spec(base_spec, None) + + # Test base spec has no overridden groups + self.assertFalse(base_spec.is_overridden_group("test_group")) + + # Test extending spec correctly identifies overridden group + self.assertTrue(ext_spec.is_overridden_group("test_group")) + + # Test non-existent group raises error + with self.assertRaisesWith(ValueError, "Group 'nonexistent_group' not found in spec"): + ext_spec.is_overridden_group("nonexistent_group") + + # Test new group in extending spec is not overridden + new_group = GroupSpec(doc="New group", name="new_group") + ext_spec.set_group(new_group) + self.assertFalse(ext_spec.is_overridden_group("new_group")) + + def test_resolve_inc_spec_group_spec_inheritance(self): + """Test resolution of inherited groups in GroupSpec.resolve_inc_spec.""" + # Create base group with named and unnamed groups + unnamed_group = GroupSpec(doc="An unnamed group", data_type_def="UnnamedType") + named_group = GroupSpec(doc="A named group", name="named_group") + base_groups = [unnamed_group, named_group] + + base_spec = GroupSpec(doc="A test group", data_type_def="BaseType", groups=base_groups) + + # Create extending group that overrides the named group and adds a new one + override_group = GroupSpec(doc="Override named group", name="named_group") + new_group = GroupSpec(doc="A new group", name="new_group") + ext_groups = [override_group, new_group] + + ext_spec = GroupSpec( + doc="An extending group", + data_type_inc="BaseType", + data_type_def="ExtType", + groups=ext_groups, + ) + + # Resolve the extension (no namespace needed for this test) + ext_spec.resolve_inc_spec(base_spec, None) + + # Test unnamed group is added to data_types + self.assertEqual(ext_spec.get_data_type("UnnamedType"), unnamed_group) + + # Test named group is overridden + resolved_group = ext_spec.get_group("named_group") + self.assertEqual(resolved_group.doc, "Override named group") + self.assertTrue(ext_spec.is_overridden_spec(resolved_group)) + + # Test new group is added + new_resolved = ext_spec.get_group("new_group") + self.assertEqual(new_resolved.doc, "A new group") + self.assertFalse(ext_spec.is_overridden_spec(new_resolved)) + + def test_resolve_inc_spec_group_spec_inheritance_multiple(self): + """Test resolution of multiple levels of group inheritance.""" + # Base spec with a named group + base_group = GroupSpec(doc="Base group", name="test_group") + base_spec = GroupSpec(doc="A base group", data_type_def="BaseType", groups=[base_group]) + + # First extension overrides the group + mid_group = GroupSpec(doc="Mid group", name="test_group") + mid_spec = GroupSpec( + doc="A middle group", + data_type_inc="BaseType", + data_type_def="MidType", + groups=[mid_group], + ) + + # Second extension inherits without override + ext_spec = GroupSpec(doc="An extending group", data_type_inc="MidType", data_type_def="ExtType") + + # Resolve the extensions (no namespace needed for this test) + mid_spec.resolve_inc_spec(base_spec, None) + ext_spec.resolve_inc_spec(mid_spec, None) + + # Test group inheritance through multiple levels + resolved_group = ext_spec.get_group("test_group") + self.assertEqual(resolved_group.doc, "Mid group") + self.assertTrue(ext_spec.is_inherited_spec(resolved_group)) + + def test_resolve_inc_spec_group_spec_links_no_overwrite(self): + link0 = LinkSpec(doc="Link 0", target_type="TargetType0") # test unnamed + link1 = LinkSpec(doc="Link 1", target_type="TargetType1", name="MyType1") # test named + link2 = LinkSpec(doc="Link 2", target_type="TargetType2", quantity="*") # test named, multiple + links = [link0, link1, link2] + parent_spec = GroupSpec( + data_type_def="ParentType", + doc="A test group", + links=links, + ) + child_spec = GroupSpec( + data_type_def="ChildType", + data_type_inc="ParentType", + doc="A test group", + ) + # Resolve the extension (no namespace needed for this test) + child_spec.resolve_inc_spec(parent_spec, None) + + for link in links: + with self.subTest(link_target_type=link.target_type): + self.assertTrue(child_spec.is_inherited_spec(link)) + self.assertFalse(child_spec.is_overridden_spec(link)) + + def test_resolve_inc_spec_group_spec_links_overwrite(self): + link0 = LinkSpec(doc="Link 0", target_type="TargetType0", name="MyType0") + link1 = LinkSpec(doc="Link 1", target_type="TargetType1", name="MyType1") + # NOTE overwriting unnamed LinkSpec is not allowed + # TODO test overwriting LinkSpec or DatasetSpec with mismatched quantity + links = [link0, link1] + parent_spec = GroupSpec( + data_type_def="ParentType", + doc="A test group", + links=links, + ) + + link0_overwrite = LinkSpec(doc="New link 0", target_type="TargetType0", name="MyType0") + link1_overwrite = LinkSpec(doc="New link 1", target_type="TargetType1Child", name="MyType1") + overwritten_links = [link0_overwrite, link1_overwrite] + child_spec = GroupSpec( + data_type_def="ChildType", + data_type_inc="ParentType", + doc="A test group", + links=overwritten_links, + ) + # Resolve the extension (no namespace needed for this test) + child_spec.resolve_inc_spec(parent_spec, None) + + for link in overwritten_links: + with self.subTest(link_target_type=link.target_type): + self.assertTrue(child_spec.is_inherited_spec(link)) + self.assertTrue(child_spec.is_overridden_spec(link)) + + def test_resolve_inc_spec_is_inherited_two_different_datasets(self): + """Test is_inherited_spec with different attribute names in base and extension.""" + # https://github.com/hdmf-dev/hdmf/issues/1121 + base_group = GroupSpec( + doc="A test group", + data_type_def="MyGroup", + datasets=[ + DatasetSpec( + name="dset1", + doc="dset1", + dtype="int", + attributes=[AttributeSpec("attr1", "MyGroup.dset1.attr1", "text")], + ), + ], + ) + ext_group = GroupSpec( + doc="A test subgroup", + data_type_def="SubGroup", + data_type_inc="MyGroup", + datasets=[ + DatasetSpec( + name="dset2", + doc="dset2", + dtype="int", + attributes=[AttributeSpec("attr1", "SubGroup.dset2.attr1", "text")], + ), + ], + ) + # Resolve the extension (no namespace needed for this test) + ext_group.resolve_inc_spec(base_group, None) + + self.assertFalse(base_group.is_inherited_spec(base_group.datasets[0].attributes[0])) + + dset_spec_map = {dset.name: dset for dset in ext_group.datasets} + self.assertFalse(ext_group.is_inherited_spec(dset_spec_map["dset2"].attributes[0])) + self.assertTrue(ext_group.is_inherited_spec(dset_spec_map["dset1"].attributes[0])) + + def test_resolve_inc_spec_is_inherited_same_name(self): + """Test is_inherited_spec with same attribute name in base and extension.""" + # https://github.com/hdmf-dev/hdmf/issues/1121 + base_group = GroupSpec( + doc="A test group", + data_type_def="MyGroup", + attributes=[AttributeSpec("attr1", "MyGroup.attr1", "text")], # <-- added from above test + datasets=[ + DatasetSpec( + name="dset1", + doc="dset1", + dtype="int", + attributes=[AttributeSpec("attr1", "MyGroup.dset1.attr1", "text")], + ), + ], + ) + ext_group = GroupSpec( + doc="A test subgroup", + data_type_def="SubGroup", + data_type_inc="MyGroup", + attributes=[AttributeSpec("attr1", "SubGroup.attr1", "text")], # <-- added from above test + datasets=[ + DatasetSpec( + name="dset2", + doc="dset2", + dtype="int", + attributes=[AttributeSpec("attr1", "SubGroup.dset2.attr1", "text")], + ), + ], + ) + # Resolve the extension (no namespace needed for this test) + ext_group.resolve_inc_spec(base_group, None) + + self.assertFalse(base_group.is_inherited_spec(base_group.datasets[0].attributes[0])) + + dset_spec_map = {dset.name: dset for dset in ext_group.datasets} + self.assertFalse(ext_group.is_inherited_spec(dset_spec_map["dset2"].attributes[0])) + self.assertTrue(ext_group.is_inherited_spec(dset_spec_map["dset1"].attributes[0])) + self.assertTrue(ext_group.is_inherited_spec(ext_group.attributes[0])) + + ext_group2 = GroupSpec( + doc="A test subsubgroup", + data_type_def="SubSubGroup", + data_type_inc="SubGroup", + ) + # Resolve the extension (no namespace needed for this test) + ext_group2.resolve_inc_spec(ext_group, None) + + dset_spec_map = {dset.name: dset for dset in ext_group2.datasets} + self.assertTrue(ext_group2.is_inherited_spec(dset_spec_map["dset1"].attributes[0])) + self.assertTrue(ext_group2.is_inherited_spec(dset_spec_map["dset2"].attributes[0])) + self.assertTrue(ext_group2.is_inherited_spec(ext_group2.attributes[0])) + + def test_resolve_inc_spec_cpd_dtype_extension_new_col(self): + """Test that adding a column to a compound dtype in an extension works correctly.""" + # Create a base dataset with compound dtype + base_dtype = [ + DtypeSpec(name="col1", dtype="int", doc="First column"), + DtypeSpec(name="col2", dtype="float", doc="Second column"), + ] + base_dataset = DatasetSpec(data_type_def="BaseCompound", dtype=base_dtype, doc="Base compound dataset") + + # Create an extension that adds a column + ext_dtype = [DtypeSpec(name="col3", dtype="text", doc="Third column")] + ext_dataset = DatasetSpec( + data_type_inc="BaseCompound", + data_type_def="ExtCompound", + dtype=ext_dtype, + doc="Extended compound dataset", + ) + + # Resolve the extension (no namespace needed for this test) + ext_dataset.resolve_inc_spec(base_dataset, None) + + # Check that all columns are present + self.assertEqual(len(ext_dataset.dtype), 3) + col_names = [col.name for col in ext_dataset.dtype] + self.assertIn("col1", col_names) + self.assertIn("col2", col_names) + self.assertIn("col3", col_names) + + def test_resolve_inc_spec_cpd_dtype_override_higher_precision(self): + """Test that overriding to higher precision dtypes in compound dtypes works correctly.""" + base_dtype = [ + DtypeSpec(name="col1", dtype="int32", doc="First column"), + DtypeSpec(name="col2", dtype="float32", doc="Second column"), + ] + base_dataset = DatasetSpec(data_type_def="BaseCompound", dtype=base_dtype, doc="Base compound dataset") + + # Create an extension that overrides col2 with higher precision + ext_dtype = [DtypeSpec(name="col2", dtype="float64", doc="Second column with higher precision")] + ext_dataset = DatasetSpec( + data_type_inc="BaseCompound", + data_type_def="ExtCompound", + dtype=ext_dtype, + doc="Extended compound dataset", + ) + + # Resolve the extension (no namespace needed for this test) + ext_dataset.resolve_inc_spec(base_dataset, None) + + # Check that col2 was overridden with higher precision + col2 = next(col for col in ext_dataset.dtype if col.name == "col2") + self.assertEqual(col2.dtype, "float64") + self.assertEqual(col2.doc, "Second column with higher precision") + + def test_resolve_inc_spec_cpd_dtype_override_lower_precision_error(self): + """Test that overriding to lower precision dtypes in compound dtypes raises an error.""" + base_dtype = [DtypeSpec(name="col1", dtype="float64", doc="First column")] + base_dataset = DatasetSpec(data_type_def="BaseCompound", dtype=base_dtype, doc="Base compound dataset") + + # Create an extension that tries to override col1 with lower precision + ext_dtype = [DtypeSpec(name="col1", dtype="float32", doc="First column with lower precision")] + ext_dataset = DatasetSpec( + data_type_inc="BaseCompound", + data_type_def="ExtCompound", + dtype=ext_dtype, + doc="Extended compound dataset", + ) + + # This should raise an error + msg = "Cannot extend float64 to float32" + with self.assertRaisesWith(ValueError, msg): + ext_dataset.resolve_inc_spec(base_dataset, None) + + def test_resolve_inc_spec_cpd_dtype_override_incompatible_dtype_error(self): + """Test that overriding to incompatible dtypes in compound dtypes raises an error.""" + base_dtype = [DtypeSpec(name="col1", dtype="float64", doc="First column")] + base_dataset = DatasetSpec(data_type_def="BaseCompound", dtype=base_dtype, doc="Base compound dataset") + + # Create an extension that tries to override col1 with incompatible dtype + ext_dtype = [DtypeSpec(name="col1", dtype="text", doc="First column with incompatible dtype")] + ext_dataset = DatasetSpec( + data_type_inc="BaseCompound", + data_type_def="ExtCompound", + dtype=ext_dtype, + doc="Extended compound dataset", + ) + + # This should raise an error + msg = "Cannot extend float64 to text" + with self.assertRaisesWith(ValueError, msg): + ext_dataset.resolve_inc_spec(base_dataset, None) + + def test_resolve_inc_spec_compound_to_simple_dtype_error(self): + """Test error when trying to extend compound dtype to simple dtype.""" + # Base with compound dtype + base_dtype = [DtypeSpec(name="col1", dtype="int", doc="Column 1")] + base_dataset = DatasetSpec(data_type_def="BaseCompound", dtype=base_dtype, doc="Base dataset") + + # Extension with simple dtype + ext_dataset = DatasetSpec( + data_type_inc="BaseCompound", + data_type_def="ExtSimple", + dtype="int", + doc="Extended dataset", + ) + + # This should raise an error + msg = "Cannot extend compound data type to simple data type" + with self.assertRaisesWith(ValueError, msg): + ext_dataset.resolve_inc_spec(base_dataset, None) + + def test_resolve_inc_spec_simple_to_compound_dtype_error(self): + """Test error when trying to extend simple dtype to compound dtype.""" + # Base with simple dtype + base_dataset = DatasetSpec(data_type_def="BaseSimple", dtype="int", doc="Base dataset") + + # Extension with compound dtype + ext_dtype = [DtypeSpec(name="col1", dtype="int", doc="Column 1")] + ext_dataset = DatasetSpec( + data_type_inc="BaseSimple", + data_type_def="ExtCompound", + dtype=ext_dtype, + doc="Extended dataset", + ) + + # This should raise an error + msg = "Cannot extend simple data type to compound data type" + with self.assertRaisesWith(ValueError, msg): + ext_dataset.resolve_inc_spec(base_dataset, None) + + def test_resolve_inc_spec_ref_dtype_same(self): + """Test that ref dtypes are resolved correctly.""" + base_dataset = DatasetSpec( + data_type_def="BaseWithRef", + dtype=RefSpec(target_type="OtherType", reftype="object"), + doc="Base dataset with ref dtype", + ) + + ext_dataset = DatasetSpec( + data_type_inc="BaseWithRef", + data_type_def="ExtWithRef", + dtype=RefSpec(target_type="OtherType", reftype="object"), + doc="Extended dataset with same ref dtype", + ) + + # Resolve the extension (no namespace needed for this test) + ext_dataset.resolve_inc_spec(base_dataset, None) + + # Check that dtype is inherited + self.assertEqual(ext_dataset.dtype, RefSpec(target_type="OtherType", reftype="object")) + + def test_resolve_inc_spec_ref_dtype_to_simple_error(self): + """Test that resolving ref dtypes mismatches raises an error.""" + base_dataset = DatasetSpec( + data_type_def="BaseWithRef", + dtype=RefSpec(target_type="AType", reftype="object"), + doc="Base dataset with ref dtype", + ) + + ext_dataset = DatasetSpec( + data_type_inc="BaseWithRef", + data_type_def="ExtWithRef", + dtype="int", + doc="Extended dataset with int dtype", + ) + + # Resolve the extension + msg = "Cannot extend {'target_type': 'AType', 'reftype': 'object'} to int" + with self.assertRaisesWith(ValueError, msg): + ext_dataset.resolve_inc_spec(base_dataset, None) + + def test_resolve_inc_spec_simple_to_ref_dtype_error(self): + """Test that resolving ref dtypes mismatches raises an error.""" + base_dataset = DatasetSpec( + data_type_def="BaseWithRef", + dtype="int", + doc="Base dataset", + ) + + ext_dataset = DatasetSpec( + data_type_inc="BaseWithRef", + data_type_def="ExtWithRef", + dtype=RefSpec(target_type="AType", reftype="object"), + doc="Extended dataset with a ref dtype", + ) + + # Resolve the extension + msg = "Cannot extend int to {'target_type': 'AType', 'reftype': 'object'}" + with self.assertRaisesWith(ValueError, msg): + ext_dataset.resolve_inc_spec(base_dataset, None) + + def test_resolve_inc_spec_override_higher_precision(self): + """Test that overriding to higher precision dtypes works correctly.""" + base_dataset = DatasetSpec(data_type_def="BaseCompound", dtype="int32", doc="Base dataset") + + # Create an extension that overrides BaseCompound with higher precision + ext_dataset = DatasetSpec( + data_type_inc="BaseCompound", + data_type_def="ExtCompound", + dtype="int64", + doc="Extended dataset", + ) + + # Resolve the extension (no namespace needed for this test) + ext_dataset.resolve_inc_spec(base_dataset, None) + + # Check that the dtype was overridden with higher precision + self.assertEqual(ext_dataset.dtype, "int64") + + def test_resolve_inc_spec_override_lower_precision_error(self): + """Test that overriding to lower precision dtypes raises an error.""" + base_dataset = DatasetSpec(data_type_def="BaseCompound", dtype="int64", doc="Base dataset") + + # Create an extension that overrides BaseCompound with lower precision + ext_dataset = DatasetSpec( + data_type_inc="BaseCompound", + data_type_def="ExtCompound", + dtype="int32", + doc="Extended dataset", + ) + + # This should raise an error + msg = "Cannot extend int64 to int32" + with self.assertRaisesWith(ValueError, msg): + ext_dataset.resolve_inc_spec(base_dataset, None) + + def test_resolve_inc_spec_override_incompatible_dtype_error(self): + """Test that overriding to an incompatible dtype raises an error.""" + base_dataset = DatasetSpec(data_type_def="BaseCompound", dtype="int64", doc="Base dataset") + + # Create an extension that overrides BaseCompound with incompatible dtype + ext_dataset = DatasetSpec( + data_type_inc="BaseCompound", + data_type_def="ExtCompound", + dtype="text", + doc="Extended dataset", + ) + + # This should raise an error + msg = "Cannot extend int64 to text" + with self.assertRaisesWith(ValueError, msg): + ext_dataset.resolve_inc_spec(base_dataset, None) + + def test_resolve_inc_spec_override_numeric_to_numeric_dtype(self): + """Test that overriding a numeric dtype to numeric dtype works correctly.""" + # numeric is a special case that needs to be handled specially + base_dataset = DatasetSpec(data_type_def="BaseCompound", dtype="numeric", doc="Base dataset") + + # Create an extension that overrides BaseCompound with a compatible dtype + ext_dataset = DatasetSpec( + data_type_inc="BaseCompound", + dtype="numeric", + doc="Extended dataset", + ) + + ext_dataset.resolve_inc_spec(base_dataset, None) + self.assertEqual(ext_dataset.dtype, "numeric") + + def test_resolve_inc_spec_override_numeric_to_float_dtype(self): + """Test that overriding a numeric dtype to float dtype works correctly.""" + base_dataset = DatasetSpec(data_type_def="BaseCompound", dtype="numeric", doc="Base dataset") + + # Create an extension that overrides BaseCompound with a compatible dtype + ext_dataset = DatasetSpec( + data_type_inc="BaseCompound", + dtype="float32", + doc="Extended dataset", + ) + + ext_dataset.resolve_inc_spec(base_dataset, None) + self.assertEqual(ext_dataset.dtype, "float32") + + def test_resolve_inc_spec_shape_dims_inheritance(self): + """Test that shape and dims are inherited correctly.""" + base_dataset = DatasetSpec( + data_type_def="BaseWithShape", + dtype="int", + dims=("x", "y"), + shape=(None, 3), + doc="Base dataset with shape", + ) + + ext_dataset = DatasetSpec( + data_type_inc="BaseWithShape", + data_type_def="ExtWithShape", + doc="Extended dataset", + ) + + # Resolve the extension (no namespace needed for this test) + ext_dataset.resolve_inc_spec(base_dataset, None) + + # Check that shape and dims are inherited + self.assertEqual(ext_dataset.shape, (None, 3)) + self.assertEqual(ext_dataset.dims, ("x", "y")) + + # test without data_type_def + ext_dataset2 = DatasetSpec( + data_type_inc="BaseWithShape", + doc="Extended dataset", + ) + + # Resolve the extension (no namespace needed for this test) + ext_dataset2.resolve_inc_spec(base_dataset, None) + + # Check that shape and dims are inherited + self.assertEqual(ext_dataset2.shape, (None, 3)) + self.assertEqual(ext_dataset2.dims, ("x", "y")) + + def test_resolve_inc_spec_shape_extension_error(self): + """Test error when trying to extend to incompatible shape.""" + # Base with 2D shape + base_dataset = DatasetSpec(data_type_def="Base2D", dtype="int", shape=(None, 3), doc="Base dataset") + + # Extension with 3D shape (more dimensions) + ext_dataset = DatasetSpec( + data_type_inc="Base2D", + data_type_def="Ext3D", + dtype="int", + shape=(None, 3, 4), + doc="Extended dataset", + ) + + # This should raise an error + msg = "Cannot extend shape (None, 3) to (None, 3, 4)" + with self.assertRaisesWith(ValueError, msg): + ext_dataset.resolve_inc_spec(base_dataset, None) + + # TODO: re-enable when this is implemented + # def test_resolve_inc_spec_shape_list_extension(self): + # """Test trying to restrict a list of allowed shapes.""" + # # Base with two allowed shapes + # base_dataset = DatasetSpec( + # data_type_def="Base2D", + # dtype="int", + # shape=((None, 3), (None, None, 3)), + # doc="Base dataset", + # ) + + # # Extension with one of the allowed shapes + # ext_dataset = DatasetSpec( + # data_type_inc="Base2D", + # data_type_def="Ext3D", + # dtype="int", + # shape=(None, None, 3), + # doc="Extended dataset", + # ) + + # ext_dataset.resolve_inc_spec(base_dataset, None) + # self.assertEqual(ext_dataset.shape, (None, None, 3)) + + # TODO: re-enable when this is implemented + # def test_resolve_inc_spec_shape_list_extension_error(self): + # """Test error when trying to extend a list of allowed shapes.""" + # # Base with two allowed shapes + # base_dataset = DatasetSpec( + # data_type_def="Base2D", + # dtype="int", + # shape=((None, 3), (None, None, 3)), + # doc="Base dataset", + # ) + + # # Extension with not one of the allowed shapes + # ext_dataset1 = DatasetSpec( + # data_type_inc="Base2D", + # data_type_def="Ext3D", + # dtype="int", + # shape=(None,), + # doc="Extended dataset", + # ) + + # msg = r"Cannot extend shape \(None, 3\), \(None, None, 3\) to \(None,\)" + # with self.assertRaisesWith(ValueError, msg): + # ext_dataset1.resolve_inc_spec(base_dataset, None) + + # # Extension with not one of the allowed shapes + # ext_dataset2 = DatasetSpec( + # data_type_inc="Base2D", + # data_type_def="Ext3D", + # dtype="int", + # shape=(None, 2), + # doc="Extended dataset", + # ) + + # msg = r"Cannot extend shape \(None, 3\), \(None, None, 3\) to \(None, 2\)" + # with self.assertRaisesWith(ValueError, msg): + # ext_dataset2.resolve_inc_spec(base_dataset, None) + + # # Extension with not one of the allowed shapes + # ext_dataset3 = DatasetSpec( + # data_type_inc="Base2D", + # data_type_def="Ext3D", + # dtype="int", + # shape=((None, 4), (None, None, 2)), + # doc="Extended dataset", + # ) + + # msg = r"Cannot extend shape \(None, 3\), \(None, None, 3\) to \(None, 4\), \(None, None, 2\)" + # with self.assertRaisesWith(ValueError, msg): + # ext_dataset3.resolve_inc_spec(base_dataset, None) + + def test_resolve_inc_spec_default_value_inheritance(self): + """Test that default_value is inherited correctly.""" + base_dataset = DatasetSpec( + data_type_def="BaseWithValue", + dtype="int", + default_value=42, + doc="Base dataset with value", + ) + + ext_dataset = DatasetSpec( + data_type_inc="BaseWithValue", + data_type_def="ExtWithValue", + dtype="int", + doc="Extended dataset", + ) + + # Resolve the extension (no namespace needed for this test) + ext_dataset.resolve_inc_spec(base_dataset, None) + + # Check that default_value is inherited + self.assertEqual(ext_dataset.default_value, 42) + + def test_resolve_inc_spec_value_inheritance(self): + """Test that value is inherited correctly.""" + base_dataset = DatasetSpec( + data_type_def="BaseWithValue", + dtype="int", + value=42, + doc="Base dataset with value", + ) + + ext_dataset = DatasetSpec( + data_type_inc="BaseWithValue", + data_type_def="ExtWithValue", + dtype="int", + doc="Extended dataset", + ) + + # Resolve the extension (no namespace needed for this test) + ext_dataset.resolve_inc_spec(base_dataset, None) + + # Check that value is inherited + self.assertEqual(ext_dataset.value, 42) + + def test_resolve_inc_spec_default_value_value_inheritance(self): + """Test that default_value and value are inherited correctly.""" + base_dataset = DatasetSpec( + data_type_def="BaseWithValue", + dtype="int", + default_value=42, + doc="Base dataset with value", + ) + + ext_dataset = DatasetSpec( + data_type_inc="BaseWithValue", + data_type_def="ExtWithValue", + value=100, + dtype="int", + doc="Extended dataset", + ) + + # Resolve the extension (no namespace needed for this test) + ext_dataset.resolve_inc_spec(base_dataset, None) + + # Check that default_value is nullified by the fixed value + self.assertEqual(ext_dataset.value, 100) + self.assertIsNone(ext_dataset.default_value) + + def test_resolve_inc_spec_wrong_spec_type(self): + """Test error when trying to resolve DatasetSpec with GroupSpec.""" + base_group = GroupSpec(data_type_def="BaseGroup", doc="Base group") + ext_dataset = DatasetSpec( + data_type_inc="BaseGroup", + data_type_def="ExtDataset", + dtype="int", + doc="Extended dataset", + ) + + # This should raise an error from docval type checking + with self.assertRaises(TypeError): + ext_dataset.resolve_inc_spec(base_group, None) + + base_dataset = DatasetSpec(data_type_def="BaseDataset", doc="Base dataset", dtype="int") + ext_group = GroupSpec( + data_type_inc="BaseDataset", + data_type_def="ExtGroup", + doc="Extended dataset", + ) + + # This should raise an error from docval type checking + with self.assertRaises(TypeError): + ext_group.resolve_inc_spec(base_dataset, None) + + +class TestNamespaceCatalogResolution(TestCase): + """Test the NamespaceCatalog resolution functionality.""" + + def setUp(self): + """Set up test namespaces and specs.""" + self.tempdir = Path(tempfile.mkdtemp()) + self.ns_catalog = NamespaceCatalog() + + def tearDown(self): + """Clean up temporary files.""" + if os.path.exists(self.tempdir): + shutil.rmtree(self.tempdir) + + def create_test_namespace( + self, + name: str, + specs: list[BaseStorageSpec], + dependencies: Union[list[str], None] = None, + ) -> str: + """Helper to create a test namespace with given specs.""" + # Create specs file + specs_filename = f"{name}.specs.yaml" + specs_path = self.tempdir / specs_filename + + specs_dict = {"groups": [], "datasets": []} + for spec in specs: + if isinstance(spec, GroupSpec): + specs_dict["groups"].append(spec) + elif isinstance(spec, DatasetSpec): + specs_dict["datasets"].append(spec) + + with open(specs_path, "w") as f: + yaml_obj = yaml.YAML(typ="safe", pure=True) + yaml_obj.default_flow_style = False + yaml_obj.dump(json.loads(json.dumps(specs_dict)), f) + + # Create namespace file + ns_path = self.tempdir / f"{name}.namespace.yaml" + + schema = [{"source": specs_filename}] + if dependencies: + for dep in dependencies: + schema.insert(0, {"namespace": dep}) + + ns_dict = { + "doc": f"Test namespace {name}", + "name": name, + "schema": schema, + "version": "1.0.0", + } + + namespace = SpecNamespace.build_namespace(**ns_dict) + ns_file_dict = {"namespaces": [namespace]} + + with open(ns_path, "w") as f: + yaml_obj = yaml.YAML(typ="safe", pure=True) + yaml_obj.default_flow_style = False + yaml_obj.dump(json.loads(json.dumps(ns_file_dict)), f) + + return str(ns_path) + + def test_resolve_all_specs_simple(self): + """Test NamespaceCatalog.resolve_all_specs with simple inheritance.""" + # Create base and extension specs + base_spec = GroupSpec(data_type_def="BaseType", doc="Base group") + ext_spec = GroupSpec(data_type_inc="BaseType", data_type_def="ExtType", doc="Extended group") + + ns_path = self.create_test_namespace("test", [base_spec, ext_spec]) + + # Load namespace without resolution + self.ns_catalog.load_namespaces(ns_path, resolve=False) + + # Check that specs are not resolved + ext_loaded = self.ns_catalog.get_spec("test", "ExtType") + self.assertFalse(ext_loaded.resolved) + self.assertFalse(ext_loaded.inc_spec_resolved) + + # Resolve all specs + self.ns_catalog.resolve_all_specs() + + # Check that specs are now resolved + ext_loaded = self.ns_catalog.get_spec("test", "ExtType") + self.assertTrue(ext_loaded.resolved) + self.assertTrue(ext_loaded.inc_spec_resolved) + + def test_resolve_all_specs_cross_namespace(self): + """Test resolve_all_specs with cross-namespace inheritance.""" + # Create base namespace + base_spec = GroupSpec(data_type_def="BaseType", doc="Base group") + base_ns_path = self.create_test_namespace("base", [base_spec]) + + # Create extension namespace that depends on base + ext_spec = GroupSpec(data_type_inc="BaseType", data_type_def="ExtType", doc="Extended group") + ext_ns_path = self.create_test_namespace("ext", [ext_spec], dependencies=["base"]) + + # Load both namespaces + self.ns_catalog.load_namespaces(base_ns_path, resolve=False) + self.ns_catalog.load_namespaces(ext_ns_path, resolve=False) + + # Check that extension spec is not resolved + ext_loaded = self.ns_catalog.get_spec("ext", "ExtType") + self.assertFalse(ext_loaded.resolved) + + # Resolve all specs + self.ns_catalog.resolve_all_specs() + + # Check that extension spec is now resolved + ext_loaded = self.ns_catalog.get_spec("ext", "ExtType") + self.assertTrue(ext_loaded.resolved) + self.assertTrue(ext_loaded.inc_spec_resolved) + + def test_resolve_all_specs_complex_hierarchy(self): + """Test resolve_all_specs with complex inheritance hierarchy.""" + # Create a chain of inheritance: Base -> Mid -> Ext + base_spec = GroupSpec(data_type_def="BaseType", doc="Base group") + mid_spec = GroupSpec(data_type_inc="BaseType", data_type_def="MidType", doc="Mid group") + ext_spec = GroupSpec(data_type_inc="MidType", data_type_def="ExtType", doc="Extended group") + + ns_path = self.create_test_namespace("test", [base_spec, mid_spec, ext_spec]) + + # Load namespace without resolution + self.ns_catalog.load_namespaces(ns_path, resolve=False) + + # Resolve all specs + self.ns_catalog.resolve_all_specs() + + # Check that all specs are resolved + for type_name in ["BaseType", "MidType", "ExtType"]: + spec = self.ns_catalog.get_spec("test", type_name) + self.assertTrue(spec.resolved) + if spec.data_type_inc: + self.assertTrue(spec.inc_spec_resolved) + + def test_circular_hierarchy(self): + """Test that circular dependencies are detected during load_namespaces and raises an error.""" + # Create circular dependency: A -> B -> A + spec_a = GroupSpec(data_type_inc="TypeB", data_type_def="TypeA", doc="Group A") + spec_b = GroupSpec(data_type_inc="TypeA", data_type_def="TypeB", doc="Group B") + ns_path = self.create_test_namespace("test", [spec_a, spec_b]) + + # Load namespace without resolution + msg = "Circular reference detected in type hierarchy for TypeA" + with self.assertRaisesWith(ValueError, msg): + self.ns_catalog.load_namespaces(ns_path, resolve=False) + + def test_resolve_all_specs_named_group_contains_itself(self): + """Test that a group containing itself as a subspec is detected and allowed.""" + # Create circular dependency: Spec A contains named subspec B, B includes A + # Also create the very odd case where the root A has attr1 defined as 1D numeric but all nested A's + # have attr1 defined as int (and the 1D shape should propagate down) + # TODO: This is a rare case that is not yet supported due to infinite recursion between GroupSpec.build_spec and + # GroupSpec.build_const_args + # spec_a = GroupSpec( + # data_type_def="TypeA", + # doc="Group A", + # groups=[ + # GroupSpec( + # data_type_inc="TypeA", + # name="subgroup", + # doc="Subgroup", + # attributes=[AttributeSpec(name="attr1", dtype="int", doc="Attribute 1")], + # ), + # ], + # attributes=[AttributeSpec(name="attr1", dtype="numeric", doc="Attribute 1", shape=[None,])], + # ) + + # ns_path = self.create_test_namespace("test", [spec_a]) + + # # Load namespace without resolution + # self.ns_catalog.load_namespaces(ns_path) # should resolve without error + + # # Check that spec is resolved + # spec = self.ns_catalog.get_spec("test", "TypeA") + # self.assertTrue(spec.resolved) + # self.assertTrue(spec.inc_spec_resolved) + # self.assertEqual(spec.attributes[0].dtype, "numeric") + # self.assertEqual(spec.groups[0].attributes[0].dtype, "int") + # self.assertEqual(spec.attributes[0].shape, [None,]) + # self.assertEqual(spec.groups[0].attributes[0].shape, [None,]) + + def test_resolve_all_specs_with_subspecs(self): + """Test resolve_all_specs with specs that have subspecs.""" + # Create base dataset + base_dataset = DatasetSpec(data_type_def="BaseDataset", dtype="int", doc="Base dataset") + + # Create group with dataset subspec + group_with_dataset = GroupSpec( + data_type_def="GroupWithDataset", + doc="Group with dataset", + datasets=[DatasetSpec(data_type_inc="BaseDataset", name="sub_dataset", doc="Sub dataset")], + ) + + ns_path = self.create_test_namespace("test", [base_dataset, group_with_dataset]) + + # Load namespace with resolution + self.ns_catalog.load_namespaces(ns_path) + + # Check that group and its subspecs are resolved + group_spec = self.ns_catalog.get_spec("test", "GroupWithDataset") + self.assertTrue(group_spec.resolved) + + # Check that the dataset subspec is also resolved + dataset_subspec = group_spec.datasets[0] + self.assertTrue(dataset_subspec.resolved) + self.assertTrue(dataset_subspec.inc_spec_resolved) + + def test_resolve_all_specs_invalid_inc_spec_error(self): + """Test error when a spec extends a non-existent type.""" + # Create a spec that extends a non-existent type + invalid_spec = GroupSpec( + data_type_inc="NonExistentType", + data_type_def="InvalidType", + doc="Invalid group", + ) + + ns_path = self.create_test_namespace("test", [invalid_spec]) + + # Load namespace without resolution + self.ns_catalog.load_namespaces(ns_path, resolve=False) + + # Attempting to resolve should raise an error + msg = "No specification for 'NonExistentType' in namespace 'test'" + with self.assertRaisesWith(ValueError, msg): + self.ns_catalog.resolve_all_specs() + + def test_resolve_all_specs_complex(self): + # DatasetSpec D1 has 1D, 2D, or 3D shape, any dtype, no attributes + # GroupSpec A1 contains a DatasetSpec D1 dataset named "col" + # GroupSpec A2 extends A1 + # A2 specifies that the dataset "col" should have 1D or 2D shape, dtype int32, and an extra attribute attr1 + # Check that after resolution, A2/col has attributes [attr1] + # GroupSpec A3 extends A2 + # A3 specifies that the dataset "col" should have 1D shape, dtype int64, and an extra attribute attr2 + # Check that after resolution, A3/col has attributes [attr2, attr1] + # DatasetSpec D2 extends D1 that specifies shape 2D or 3D shape, dtype float64, and an extra attribute attr3 + # GroupSpec A4 extends A1 + # A4 specifies that the dataset "col" should be of type D2 and have 2D shape and an extra attribute attr4 + # Check that after resolution, A4/col has attributes [attr4, attr3] and dtype float64 + d1 = DatasetSpec( + data_type_def="D1", + name="col", + dtype=None, + shape=((None,), (None, None), (None, None, None)), + doc="Dataset D1", + ) + a1 = GroupSpec( + data_type_def="A1", + datasets=[DatasetSpec(name="col", data_type_inc="D1", doc="D1 col in A1")], + doc="Group A1", + ) + a2 = GroupSpec( + data_type_def="A2", + data_type_inc="A1", + datasets=[ + DatasetSpec( + name="col", + data_type_inc="D1", + shape=((None,), (None, None)), + dtype="int32", + attributes=[AttributeSpec(name="attr1", dtype="int", doc="Attribute 1")], + doc="Extended D1 col in A2 with restrictions", + ) + ], + doc="Group A2", + ) + a3 = GroupSpec( + data_type_def="A3", + data_type_inc="A2", + datasets=[ + DatasetSpec( + name="col", + data_type_inc="D1", + shape=(None,), + dtype="int64", + attributes=[AttributeSpec(name="attr2", dtype="text", doc="Attribute 2")], + doc="Extended D1 col in A3 with further restrictions", + ) + ], + doc="Group A3", + ) + d2 = DatasetSpec( + data_type_def="D2", + data_type_inc="D1", + shape=((None, None), (None, None, None)), + dtype="float64", + attributes=[AttributeSpec(name="attr3", dtype="float", doc="Attribute 3")], + doc="Dataset D2 extending D1 with restrictions", + ) + a4 = GroupSpec( + data_type_def="A4", + data_type_inc="A1", + datasets=[ + DatasetSpec( + name="col", + data_type_inc="D2", # D2 extends D1 so this is valid + shape=(None, None), + attributes=[AttributeSpec(name="attr4", dtype="float", doc="Attribute 4")], + doc="D2 col in A4 with restrictions", + ) + ], + doc="Group A4", + ) + ns_path = self.create_test_namespace("test", [d1, d2, a1, a2, a3, a4]) + self.ns_catalog.load_namespaces(ns_path) # check no errors + + a2_loaded = self.ns_catalog.get_spec("test", "A2") + self.assertEqual( + a2_loaded.datasets[0].attributes, + (AttributeSpec(name="attr1", dtype="int", doc="Attribute 1"),), + ) + + a3_loaded = self.ns_catalog.get_spec("test", "A3") + self.assertEqual( + a3_loaded.datasets[0].attributes, + ( + AttributeSpec(name="attr2", dtype="text", doc="Attribute 2"), + AttributeSpec(name="attr1", dtype="int", doc="Attribute 1"), + ), + ) + + a4_loaded = self.ns_catalog.get_spec("test", "A4") + self.assertTrue(a4_loaded.datasets[0].resolved) + self.assertEqual( + a4_loaded.datasets[0].attributes, + ( + AttributeSpec(name="attr4", dtype="float", doc="Attribute 4"), + # AttributeSpec(name="attr3", dtype="float", doc="Attribute 3"), # TODO this should exist + ), + ) + # self.assertEqual(a4_loaded.datasets[0].dtype, "float64") # TODO this should work + + def test_resolve_all_specs_subspec_dataset_data_type_mismatch_error1(self): + # DatasetSpec D1 has 1D, 2D, or 3D shape, any dtype, no attributes + # GroupSpec A1 contains a DatasetSpec D1 dataset named "col" + # GroupSpec A2 extends A1 + # A2 specifies that the dataset "col" does not have a data type - this should cause an error + # because A1/col is of type D1 + d1 = DatasetSpec( + data_type_def="D1", + name="col", + dtype=None, + shape=((None,), (None, None), (None, None, None)), + doc="Dataset D1", + ) + a1 = GroupSpec( + data_type_def="A1", + datasets=[DatasetSpec(name="col", data_type_inc="D1", doc="D1 col in A1")], + doc="Group A1", + ) + a2 = GroupSpec( + data_type_def="A2", + data_type_inc="A1", + datasets=[ + DatasetSpec( + # no data_type_inc here should cause an error + name="col", + shape=((None,), (None, None), (None, None, None)), + dtype="int32", + doc="Column in A2 that conflicts with A1/col data type", + ) + ], + doc="Group A2", + ) + ns_path = self.create_test_namespace("test", [d1, a1, a2]) + self.ns_catalog.load_namespaces(ns_path, resolve=False) + + msg = ( + "Cannot resolve included dataset spec 'col' with data_type_inc 'D1' because a dataset spec with the " + "same name already exists with data_type_inc 'None', and data type 'None' is not a child type of data " + "type 'D1'." + ) + with self.assertRaisesWith(ValueError, msg): + self.ns_catalog.resolve_all_specs() + + def test_resolve_all_specs_subspec_dataset_data_type_mismatch_error2(self): + # DatasetSpecs D1 and D2 are not related + # GroupSpec A1 contains a DatasetSpec D1 dataset named "col" + # GroupSpec A2 extends A1 + # A2 specifies that the dataset "col" has data type D2 that does not inherit from D1 - this should cause an + # error because A1/col is of type D1 + d1 = DatasetSpec( + data_type_def="D1", + name="col", + dtype=None, + shape=(None,), + doc="Dataset D1", + ) + d2 = DatasetSpec( + data_type_def="D2", + name="col", + dtype=None, + shape=(None,), + doc="Dataset D2", + ) + a1 = GroupSpec( + data_type_def="A1", + datasets=[ + DatasetSpec( + name="col", + data_type_inc="D1", + doc="D1 col in A1", + ) + ], + doc="Group A1", + ) + a2 = GroupSpec( + data_type_def="A2", + data_type_inc="A1", + datasets=[ + DatasetSpec( + name="col", + data_type_inc="D2", # conflicting data type inc + doc="Column in A2 that conflicts with A1/col data type", + ) + ], + doc="Group A2", + ) + ns_path = self.create_test_namespace("test", [d1, d2, a1, a2]) + self.ns_catalog.load_namespaces(ns_path, resolve=False) + + msg = ( + "Cannot resolve included dataset spec 'col' with data_type_inc 'D1' because a dataset spec with the " + "same name already exists with data_type_inc 'D2', and data type 'D2' is not a child type of data " + "type 'D1'." + ) + with self.assertRaisesWith(ValueError, msg): + self.ns_catalog.resolve_all_specs() + + def test_resolve_all_specs_subspec_group_data_type_mismatch_error1(self): + # GroupSpec D1 + # GroupSpec A1 contains a GroupSpec D1 group named "col" + # GroupSpec A2 extends A1 + # A2 specifies that the group "col" does not have a data type - this should cause an error + # because A1/col is of type D1 + d1 = GroupSpec( + data_type_def="D1", + name="col", + doc="Group D1", + ) + a1 = GroupSpec( + data_type_def="A1", + groups=[GroupSpec(name="col", data_type_inc="D1", doc="D1 col in A1")], + doc="Group A1", + ) + a2 = GroupSpec( + data_type_def="A2", + data_type_inc="A1", + groups=[ + GroupSpec( + # no data_type_inc here should cause an error + name="col", + doc="Column in A2 that conflicts with A1/col data type", + ) + ], + doc="Group A2", + ) + ns_path = self.create_test_namespace("test", [d1, a1, a2]) + self.ns_catalog.load_namespaces(ns_path, resolve=False) + + msg = ( + "Cannot resolve included group spec 'col' with data_type_inc 'D1' because a group spec with the " + "same name already exists with data_type_inc 'None', and data type 'None' is not a child type of data " + "type 'D1'." + ) + with self.assertRaisesWith(ValueError, msg): + self.ns_catalog.resolve_all_specs() + + def test_resolve_all_specs_subspec_group_data_type_mismatch_error2(self): + # GroupSpecs D1 and D2 are not related + # GroupSpec A1 contains a GroupSpec D1 group named "col" + # GroupSpec A2 extends A1 + # A2 specifies that the group "col" has data type D2 that does not inherit from D1 - this should cause an + # error because A1/col is of type D1 + d1 = GroupSpec( + data_type_def="D1", + name="col", + doc="Group D1", + ) + d2 = GroupSpec( + data_type_def="D2", + name="col", + doc="Group D2", + ) + a1 = GroupSpec( + data_type_def="A1", + groups=[ + GroupSpec( + name="col", + data_type_inc="D1", + doc="D1 col in A1", + ) + ], + doc="Group A1", + ) + a2 = GroupSpec( + data_type_def="A2", + data_type_inc="A1", + groups=[ + GroupSpec( + name="col", + data_type_inc="D2", # conflicting data type inc + doc="Column in A2 that conflicts with A1/col data type", + ) + ], + doc="Group A2", + ) + ns_path = self.create_test_namespace("test", [d1, d2, a1, a2]) + self.ns_catalog.load_namespaces(ns_path, resolve=False) + + msg = ( + "Cannot resolve included group spec 'col' with data_type_inc 'D1' because a group spec with the " + "same name already exists with data_type_inc 'D2', and data type 'D2' is not a child type of data " + "type 'D1'." + ) + with self.assertRaisesWith(ValueError, msg): + self.ns_catalog.resolve_all_specs() + + # TODO: this is a complex case that has not been observed in practice and is not yet supported + # def test_resolve_all_specs_complex_error(self): + # # DatasetSpec D1 has 1D, 2D, or 3D shape, any dtype, no attributes + # # GroupSpec A1 contains a DatasetSpec D1 dataset named "col" + # # GroupSpec A2 extends A1 + # # A2 specifies that the dataset "col" should have 1D or 2D shape, dtype int32, and an extra attribute attr1 + # # GroupSpec A5 extends A2 + # # A5 specifies that the dataset "col" should be of type D2. This will rarely happen. A2/col should be + # # brought in first when resolving A5's inc spec. Then the refinement of "col" in A5 to say that it should be + # # of type D2 should cause an error when it is found that D2's dtype is incompatible with A2/col's dtype. + # d1 = DatasetSpec( + # data_type_def="D1", + # name="col", + # dtype=None, + # shape=((None,), (None, None), (None, None, None)), + # doc="Dataset D1", + # ) + # a1 = GroupSpec( + # data_type_def="A1", + # name="A1", + # datasets=[DatasetSpec(name="col", data_type_inc="D1", doc="D1 col in A1")], + # doc="Group A1", + # ) + # a2 = GroupSpec( + # data_type_def="A2", + # data_type_inc="A1", + # name="A2", + # datasets=[ + # DatasetSpec( + # name="col", + # data_type_inc="D1", # TODO test whether this is necessary + # shape=((None,), (None, None)), + # dtype="int32", + # attributes=[AttributeSpec(name="attr1", dtype="int", doc="Attribute 1")], + # doc="Extended D1 col in A2 with restrictions and new attribute attr1", + # ) + # ], + # doc="Group A2", + # ) + # d2 = DatasetSpec( + # data_type_def="D2", + # data_type_inc="D1", + # shape=((None, None), (None, None, None)), + # dtype="float64", + # attributes=[AttributeSpec(name="attr3", dtype="float", doc="Attribute 3")], + # doc="Dataset D2 extending D1 with restrictions and new attribute attr3", + # ) + # a5 = GroupSpec( + # data_type_def="A5", + # data_type_inc="A2", + # name="A5", + # datasets=[ + # # A5 defines "col" to be of type D2 (dtype float64, shape (1D, 2D)), which is incompatible with A2/col + # # (dtype int32, shape (2D, 3D)), and that should cause an error during resolution + # DatasetSpec( + # name="col", + # data_type_inc="D2", + # doc="D2 col in A5", + # ) + # ], + # doc="Group A5", + # ) + # ns_path = self.create_test_namespace("test", [d1, d2, a1, a2, a5]) + # self.ns_catalog.load_namespaces(ns_path, resolve=False) + + # msg = ("Could not resolve all specifications. The following specifications could not be resolved: " + # "A5, col in A5") + # with self.assertRaisesWith(RuntimeError, msg): + # self.ns_catalog.resolve_all_specs() + + def test_resolve_inc_spec_ref_dtype_subtype(self): + """Test that resolving a ref dtype subtype raises no error.""" + g1 = GroupSpec(data_type_def="G1", doc="A group type") + g2 = GroupSpec(data_type_def="G2", data_type_inc="G1", doc="A group subtype") + + d1 = DatasetSpec( + data_type_def="D1", + dtype=RefSpec(target_type="G1", reftype="object"), + doc="Base dataset with ref dtype", + ) + + d2 = DatasetSpec( + data_type_inc="D1", + data_type_def="D2", + dtype=RefSpec(target_type="G2", reftype="object"), + doc="Extended dataset with ref dtype that is a subtype of D1's ref dtype", + ) + + ns_path = self.create_test_namespace("test", [g1, g2, d1, d2]) + self.ns_catalog.load_namespaces(ns_path) # check no errors + + self.assertEqual(d2.dtype, RefSpec(target_type="G2", reftype="object")) + + # TODO: this is not yet implemented + # def test_resolve_inc_spec_ref_dtype_mismatch_error(self): + # """Test that resolving ref dtypes mismatches raises an error.""" + # # Not sure if this should be tested through NamespaceCatalog.resolve_all_specs or on + # # DatasetSpec.resolve_inc_spec directly + # g1 = GroupSpec(data_type_def="G1", doc="A group type") + # h1 = GroupSpec(data_type_def="H1", doc="An unrelated group type") + + # d1 = DatasetSpec( + # data_type_def="D1", + # dtype=RefSpec(target_type="G1", reftype="object"), + # doc="Base dataset with ref dtype", + # ) + + # d2 = DatasetSpec( + # data_type_inc="D1", + # data_type_def="D2", + # dtype=RefSpec(target_type="H1", reftype="object"), + # doc="Extended dataset with ref dtype that is not a subtype of D1's ref dtype", + # ) + + # ns_path = self.create_test_namespace("test", [g1, h1, d1, d2]) + # self.ns_catalog.load_namespaces(ns_path, resolve=False) + + # msg = "TODO" + # with self.assertRaisesWith(ValueError, msg): + # self.ns_catalog.resolve_all_specs() + + # self.assertEqual(d2.dtype, RefSpec(target_type="H1", reftype="object")) diff --git a/tests/unit/spec_tests/test_spec_write.py b/tests/unit/spec_tests/test_spec_write.py index 4bcebe8bf..1b4dbe883 100644 --- a/tests/unit/spec_tests/test_spec_write.py +++ b/tests/unit/spec_tests/test_spec_write.py @@ -1,9 +1,8 @@ import datetime import os -from hdmf.spec.namespace import SpecNamespace, NamespaceCatalog -from hdmf.spec.spec import GroupSpec -from hdmf.spec.write import NamespaceBuilder, YAMLSpecWriter, export_spec +from hdmf.spec import SpecNamespace, NamespaceCatalog, GroupSpec, DatasetSpec, NamespaceBuilder, export_spec +from hdmf.spec.write import YAMLSpecWriter from hdmf.testing import TestCase @@ -33,15 +32,11 @@ def setUp(self): ext2 = GroupSpec('An extension of a DataSeries interface', attributes=[], - datasets=[], + datasets=[DatasetSpec(doc='test', dtype='float', name='testdata')], groups=[], data_type_inc='MyDataSeries', data_type_def='MyExtendedMyDataSeries') - ext2.add_dataset(doc='test', - dtype='float', - name='testdata') - self.data_types = [ext1, ext2] # add the extension diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index e644be922..0fb9d7dd9 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -8,8 +8,8 @@ from hdmf.utils import docval from hdmf.common import DynamicTable, VectorData, DynamicTableRegion from hdmf.backends.hdf5.h5tools import HDF5IO -from hdmf.backends.io import HDMFIO +from tests.unit.helpers.io import DoNothingIO class Subcontainer(Container): pass @@ -25,6 +25,7 @@ def __init__(self, **kwargs): class TestHERDManager(TestCase): + def test_get_and_set_resources(self): em = HERDManager() er = HERD() @@ -35,21 +36,6 @@ def test_get_and_set_resources(self): er_get = em.external_resources self.assertEqual(er, er_get) - def test_link_resources_deprecated(self): - em = HERDManager() - er = HERD() - with self.assertWarns(DeprecationWarning): - em.link_resources(herd=er) - self.assertEqual(em.external_resources, er) - - def test_get_linked_resources_deprecated(self): - em = HERDManager() - er = HERD() - em.external_resources = er - with self.assertWarns(DeprecationWarning): - herd = em.get_linked_resources() - self.assertEqual(herd, er) - class TestContainer(TestCase): @@ -535,25 +521,7 @@ def test_repr_html_hdmf_io(self): dataset = io._file.create_dataset(name='my_dataset', data=np.array([1, 2, 3, 4], dtype=np.int64)) obj = self.ContainerWithData(data=dataset, str="hello") - class OtherIO(HDMFIO): - - @staticmethod - def can_read(path): - pass - - def read_builder(self): - pass - - def write_builder(self, **kwargs): - pass - - def open(self): - pass - - def close(self): - pass - - obj.read_io = OtherIO() + obj.read_io = DoNothingIO() expected_html_table = ( 'class="container-fields">