diff --git a/src/hdmf/build/classgenerator.py b/src/hdmf/build/classgenerator.py index dcdb49b77..00f20675f 100644 --- a/src/hdmf/build/classgenerator.py +++ b/src/hdmf/build/classgenerator.py @@ -1,11 +1,12 @@ from copy import deepcopy from datetime import datetime, date from collections.abc import Callable +import warnings import numpy as np from ..container import Container, Data, MultiContainerInterface -from ..spec import AttributeSpec, LinkSpec, RefSpec, GroupSpec +from ..spec import AttributeSpec, LinkSpec, RefSpec, GroupSpec, DatasetSpec from ..spec.spec import BaseStorageSpec, ZERO_OR_MANY, ONE_OR_MANY from ..utils import docval, getargs, ExtenderMeta, get_docval, popargs, AllowPositional @@ -79,7 +80,7 @@ def generate_class(self, **kwargs): break # each field_spec should be processed by only one generator for class_generator in self.__custom_generators: - class_generator.post_process(classdict, bases, docval_args, spec) + class_generator.post_process(classdict, bases, docval_args, spec, type_map) for class_generator in reversed(self.__custom_generators): # go in reverse order so that base init is added first and @@ -252,7 +253,7 @@ def process_field_spec(cls, classdict, docval_args, parent_cls, attr_name, not_i docval_arg = dict( name=attr_name, doc=field_spec.doc, - type=cls._get_type(field_spec, type_map) + type=dtype, ) shape = getattr(field_spec, 'shape', None) if shape is not None: @@ -285,12 +286,13 @@ def _add_to_docval_args(cls, docval_args, arg, err_if_present=False): docval_args.append(arg) @classmethod - def post_process(cls, classdict, bases, docval_args, spec): + def post_process(cls, classdict, bases, docval_args, spec, type_map): """Convert classdict['__fields__'] to tuple and update docval args for a fixed name and default name. :param classdict: The class dictionary to convert with '__fields__' key (or a different bases[0]._fieldsname) :param bases: The list of base classes. :param docval_args: The dict of docval arguments. :param spec: The spec for the container class to generate. + :param type_map: The type map to use. """ # convert classdict['__fields__'] from list to tuple if present for b in bases: @@ -308,6 +310,33 @@ def post_process(cls, classdict, bases, docval_args, spec): # set default name in docval args if provided cls._set_default_name(docval_args, spec.default_name) + if isinstance(spec, DatasetSpec): + # handle the data field specially + # fixed and default values are not supported for datasets + if getattr(spec, 'value', None) is not None: + warnings.warn( + "Generating a class for a dataset with a fixed value is not supported. " + "The fixed value will be ignored." + ) + if getattr(spec, 'default_value', None) is not None: + warnings.warn( + "Generating a class for a dataset with a default value is not supported. " + "The default value will be ignored." + ) + + data_docval_arg = dict(name='data', doc=spec.doc) + shape = spec.shape + if shape is None and spec.dims is None: + if spec.dtype is not None: + dtype = cls._get_type_from_spec_dtype(spec.dtype) + else: + dtype = ('scalar_data', 'array_data', 'data') + else: + dtype = ('array_data', 'data') + data_docval_arg['shape'] = shape + data_docval_arg['type'] = dtype + cls._add_to_docval_args(docval_args, data_docval_arg) + @classmethod def _get_attrs_not_to_set_init(cls, classdict, parent_docval_args): return parent_docval_args @@ -413,12 +442,13 @@ def process_field_spec(cls, classdict, docval_args, parent_cls, attr_name, not_i cls._add_to_docval_args(docval_args, docval_arg) @classmethod - def post_process(cls, classdict, bases, docval_args, spec): + def post_process(cls, classdict, bases, docval_args, spec, type_map): """Add MultiContainerInterface to the list of base classes. :param classdict: The class dictionary. :param bases: The list of base classes. :param docval_args: The dict of docval arguments. :param spec: The spec for the container class to generate. + :param type_map: The type map to use. """ if '__clsconf__' in classdict: # do not add MCI as a base if a base is already a subclass of MultiContainerInterface diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index e01eabf11..af5e4ba1a 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -1,3 +1,4 @@ +import datetime import logging import re import warnings @@ -92,6 +93,17 @@ def _ascii(s): raise ValueError("Expected unicode or ascii string, got %s" % type(s)) +def _isoformat(s): + """ + A helper function for converting to ISO format + """ + if isinstance(s, (datetime.datetime, datetime.date)): + return s.isoformat() + elif isinstance(s, str): # probably already converted to isoformat + return s + else: + raise ValueError("Expected datetime, got %s" % type(s)) + class ObjectMapper(metaclass=ExtenderMeta): '''A class for mapping between Spec objects and AbstractContainer attributes @@ -125,8 +137,8 @@ class ObjectMapper(metaclass=ExtenderMeta): "utf-8": _unicode, "ascii": _ascii, "bytes": _ascii, - "isodatetime": _ascii, - "datetime": _ascii, + "isodatetime": _isoformat, + "datetime": _isoformat, } __no_convert = set() @@ -230,8 +242,8 @@ def convert_dtype(cls, spec, value, spec_dtype=None) -> tuple: # noqa: C901 else: ret = value.astype('U') ret_dtype = "utf8" - elif spec_dtype_type is _ascii: - ret = value.astype('S') + elif spec_dtype_type in (_ascii, _isoformat): + ret = value.astype('S') # this works for datetime objects ret_dtype = "ascii" else: dtype_func, warning_msg = cls.__resolve_numeric_dtype(value.dtype, spec_dtype_type) @@ -245,7 +257,7 @@ def convert_dtype(cls, spec, value, spec_dtype=None) -> tuple: # noqa: C901 if len(value) == 0: if spec_dtype_type is _unicode: ret_dtype = 'utf8' - elif spec_dtype_type is _ascii: + elif spec_dtype_type in (_ascii, _isoformat): ret_dtype = 'ascii' else: ret_dtype = spec_dtype_type @@ -261,15 +273,16 @@ def convert_dtype(cls, spec, value, spec_dtype=None) -> tuple: # noqa: C901 ret = value if spec_dtype_type is _unicode: ret_dtype = "utf8" - elif spec_dtype_type is _ascii: + elif spec_dtype_type in (_ascii, _isoformat): ret_dtype = "ascii" else: ret_dtype, warning_msg = cls.__resolve_numeric_dtype(value.dtype, spec_dtype_type) else: - if spec_dtype_type in (_unicode, _ascii): - ret_dtype = 'ascii' + if spec_dtype_type in (_unicode, _ascii, _isoformat): if spec_dtype_type is _unicode: ret_dtype = 'utf8' + else: + ret_dtype = 'ascii' ret = spec_dtype_type(value) else: dtype_func, warning_msg = cls.__resolve_numeric_dtype(type(value), spec_dtype_type) @@ -343,6 +356,8 @@ def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901 elif np.issubdtype(value.dtype, np.dtype('O')): # Only variable-length strings should ever appear as generic objects. # Everything else should have a well-defined type + # NOTE: a datetime object would be converted to a string by this check + # but users should not provide arrays of datetime objects to an untyped/generic spec ret_dtype = 'utf8' else: ret_dtype = value.dtype.type @@ -357,7 +372,7 @@ def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901 cls.__check_convert_numeric(ret_dtype) if ret_dtype is str: ret_dtype = 'utf8' - elif ret_dtype is bytes: + elif ret_dtype in (bytes, datetime.datetime, datetime.date): ret_dtype = 'ascii' return value, ret_dtype if isinstance(spec_dtype, RefSpec): @@ -636,6 +651,7 @@ def __get_data_type(cls, spec): def __convert_string(self, value, spec): """Convert string types to the specified dtype.""" + # TODO: combine this with the logic in convert_dtype def __apply_string_type(value, string_type): # NOTE: if a user passes a h5py.Dataset that is not wrapped with a hdmf.utils.StrDataset, # then this conversion may not be correct. Users should unpack their string h5py.Datasets @@ -660,7 +676,7 @@ def __apply_string_type(value, string_type): string_type = str elif 'ascii' in spec.dtype: string_type = bytes - elif 'isodatetime' in spec.dtype: + elif 'datetime' in spec.dtype: def string_type(x): return x.isoformat() # method works for both date and datetime if string_type is not None: diff --git a/src/hdmf/common/io/table.py b/src/hdmf/common/io/table.py index 379553c07..8cc56bdfb 100644 --- a/src/hdmf/common/io/table.py +++ b/src/hdmf/common/io/table.py @@ -99,12 +99,13 @@ def process_field_spec(cls, classdict, docval_args, parent_cls, attr_name, not_i # do not add DynamicTable columns to init docval @classmethod - def post_process(cls, classdict, bases, docval_args, spec): + def post_process(cls, classdict, bases, docval_args, spec, type_map): """Convert classdict['__columns__'] to tuple. :param classdict: The class dictionary. :param bases: The list of base classes. :param docval_args: The dict of docval arguments. :param spec: The spec for the container class to generate. + :param type_map: The type map to use. """ # convert classdict['__columns__'] from list to tuple if present columns = classdict.get('__columns__') diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py index 4244d776b..8c51a8c09 100644 --- a/src/hdmf/utils.py +++ b/src/hdmf/utils.py @@ -1,5 +1,6 @@ import collections import copy as _copy +import datetime import re import types import warnings @@ -12,7 +13,7 @@ __macros = { 'array_data': [np.ndarray, list, tuple, h5py.Dataset], - 'scalar_data': [str, int, float, bytes, bool], + 'scalar_data': [str, int, float, bytes, bool, datetime.datetime, datetime.date], 'data': [] } diff --git a/tests/unit/build_tests/test_classgenerator.py b/tests/unit/build_tests/test_classgenerator.py index 16136a8da..432970304 100644 --- a/tests/unit/build_tests/test_classgenerator.py +++ b/tests/unit/build_tests/test_classgenerator.py @@ -1,3 +1,4 @@ +import datetime import numpy as np import os import shutil @@ -35,7 +36,7 @@ def process_field_spec(cls, classdict, docval_args, parent_cls, attr_name, not_i classdict.setdefault('process_field_spec', list()).append(attr_name) @classmethod - def post_process(cls, classdict, bases, docval_args, spec): + def post_process(cls, classdict, bases, docval_args, spec, type_map): classdict['post_process'] = True spec = GroupSpec( @@ -510,6 +511,36 @@ def test_multi_container_spec_one_or_more_ok(self): ) assert len(multi.bars) == 1 + def test_get_class_include_scalar_datetime_attribute(self): + """Test that get_class resolves a scalar datetime attribute.""" + goo_spec = GroupSpec( + doc='A test group that has a scalar datetime attribute', + data_type_def='Goo', + attributes=[ + AttributeSpec( + name='attr1', + doc='a scalar datetime attribute', + dtype='datetime', + ), + ] + ) + self.spec_catalog.register_spec(goo_spec, 'extension.yaml') + goo_cls = self.type_map.get_dt_container_cls('Goo', CORE_NAMESPACE) + goo = goo_cls(name='my_goo', attr1=datetime.datetime(2020, 1, 1, 0, 0, 0)) + self.assertEqual(goo.attr1, datetime.datetime(2020, 1, 1, 0, 0, 0)) + + def test_get_class_include_scalar_datetime_dataset(self): + """Test that get_class resolves a scalar datetime dataset.""" + goo_spec = DatasetSpec( + doc='A test dataset with dtype datetime', + data_type_def='Goo', + dtype='datetime', + ) + self.spec_catalog.register_spec(goo_spec, 'extension.yaml') + goo_cls = self.type_map.get_dt_container_cls('Goo', CORE_NAMESPACE) + goo = goo_cls(name='my_goo', data=datetime.datetime(2020, 1, 1, 0, 0, 0)) + self.assertEqual(goo.data, datetime.datetime(2020, 1, 1, 0, 0, 0)) + class TestDynamicContainerFixedValue(TestCase): @@ -1321,7 +1352,7 @@ def test_post_process_fixed_name(self): docval_args = [{'name': 'name', 'type': str, 'doc': 'name'}, {'name': 'attr1', 'type': ('array_data', 'data'), 'doc': 'a string attribute', 'shape': [None]}] - CustomClassGenerator.post_process(classdict, bases, docval_args, spec) + CustomClassGenerator.post_process(classdict, bases, docval_args, spec, self.type_map) expected = [{'name': 'attr1', 'type': ('array_data', 'data'), 'doc': 'a string attribute', 'shape': [None]}] @@ -1348,7 +1379,7 @@ def test_post_process_default_name(self): docval_args = [{'name': 'name', 'type': str, 'doc': 'name'}, {'name': 'attr1', 'type': ('array_data', 'data'), 'doc': 'a string attribute', 'shape': [None]}] - CustomClassGenerator.post_process(classdict, bases, docval_args, spec) + CustomClassGenerator.post_process(classdict, bases, docval_args, spec, self.type_map) expected = [{'name': 'name', 'type': str, 'doc': 'name', 'default': 'MyBaz'}, {'name': 'attr1', 'type': ('array_data', 'data'), 'doc': 'a string attribute', @@ -1450,7 +1481,7 @@ def test_post_process(self): ) bases = [Bar] docval_args = [] - MCIClassGenerator.post_process(classdict, bases, docval_args, multi_spec) + MCIClassGenerator.post_process(classdict, bases, docval_args, multi_spec, self.type_map) self.assertEqual(bases, [Bar, MultiContainerInterface]) def test_post_process_already_multi(self): @@ -1478,7 +1509,7 @@ class Multi1(MultiContainerInterface): ) bases = [Multi1] docval_args = [] - MCIClassGenerator.post_process(classdict, bases, docval_args, multi_spec) + MCIClassGenerator.post_process(classdict, bases, docval_args, multi_spec, self.type_map) self.assertEqual(bases, [Multi1]) def test_post_process_container(self): @@ -1505,5 +1536,5 @@ class Multi1(MultiContainerInterface): ) bases = [Container] docval_args = [] - MCIClassGenerator.post_process(classdict, bases, docval_args, multi_spec) + MCIClassGenerator.post_process(classdict, bases, docval_args, multi_spec, self.type_map) self.assertEqual(bases, [MultiContainerInterface, Container]) diff --git a/tests/unit/test_hdf5_roundtrip_extensions.py b/tests/unit/test_hdf5_roundtrip_extensions.py new file mode 100644 index 000000000..19508e9ec --- /dev/null +++ b/tests/unit/test_hdf5_roundtrip_extensions.py @@ -0,0 +1,130 @@ +import datetime +import os +import shutil +import tempfile + +from hdmf.backends.hdf5 import HDF5IO +from hdmf.common import get_type_map +from hdmf.testing import TestCase +from hdmf.spec import AttributeSpec,DatasetSpec, GroupSpec, SpecCatalog, SpecNamespace, NamespaceCatalog +from hdmf.build import BuildManager, TypeMap +from tests.unit.helpers.utils import CORE_NAMESPACE + + +class TestExtensionDatetime(TestCase): + def setUp(self): + + self.dataset1_spec = DatasetSpec( + data_type_def='TestDatasetNoDtypeInDef', + data_type_inc='Data', + doc='a test Dataset without a specified dtype', # this is overridden where it is used + ) + self.dataset2_spec = DatasetSpec( + data_type_def='TestDatasetWithDtypeInDef', + data_type_inc='Data', + doc='a test Dataset with a specified dtype', # this is overridden where it is used + dtype='datetime', + attributes=[ + AttributeSpec( + name='my_attr', + doc='a scalar datetime attribute', + dtype='datetime', + required=False, + ) + ] + ) + + self.group_spec = GroupSpec( + data_type_def='TestGroup', + data_type_inc='Container', + doc='A test group that contains a dataset', + datasets=[ + # NOTE: these are all scalar datasets + DatasetSpec( + data_type_inc='TestDatasetNoDtypeInDef', + name='my_data1', + doc='a test Dataset without a specified dtype where the dtype is added in the data_type_inc', + dtype='datetime', + quantity='?', + ), + DatasetSpec( + data_type_inc='TestDatasetWithDtypeInDef', + name='my_data2', + doc='a test Dataset with a specified dtype where the dtype is specified in the data_type_def', + quantity='?', + ), + DatasetSpec( + name='my_data3', + doc='a test Dataset with no data_type_inc', + dtype='datetime', + quantity='?', + ), + ], + attributes=[ + AttributeSpec( + name='my_attr', + doc='a scalar datetime attribute', + dtype='datetime', + required=False, + ) + ] + ) + + from hdmf.spec.write import YAMLSpecWriter + writer = YAMLSpecWriter(outdir='.') + + self.spec_catalog = SpecCatalog() + self.spec_catalog.register_spec(self.dataset1_spec, 'test.yaml') + self.spec_catalog.register_spec(self.dataset2_spec, 'test.yaml') + self.spec_catalog.register_spec(self.group_spec, 'test.yaml') + self.namespace = SpecNamespace( + doc='a test namespace', + name=CORE_NAMESPACE, + schema=[ + dict(namespace='hdmf-common'), + dict(source='test.yaml'), + ], + version='0.1.0', + catalog=self.spec_catalog + ) + + self.test_dir = tempfile.mkdtemp() + spec_fpath = os.path.join(self.test_dir, 'test.yaml') + namespace_fpath = os.path.join(self.test_dir, 'test-namespace.yaml') + writer.write_spec(dict(datasets=[self.dataset1_spec, self.dataset2_spec], groups=[self.group_spec]), spec_fpath) + writer.write_namespace(self.namespace, namespace_fpath) + self.namespace_catalog = NamespaceCatalog() + # We only use Container and Data from hdmf-common + hdmf_typemap = get_type_map() + self.type_map = TypeMap(self.namespace_catalog) + self.type_map.merge(hdmf_typemap, ns_catalog=True) + self.type_map.load_namespaces(namespace_fpath) + self.manager = BuildManager(self.type_map) + + self.TestDatasetNoDtypeInDef = self.type_map.get_dt_container_cls('TestDatasetNoDtypeInDef', CORE_NAMESPACE) + self.TestDatasetWithDtypeInDef = self.type_map.get_dt_container_cls('TestDatasetWithDtypeInDef', CORE_NAMESPACE) + self.TestGroup = self.type_map.get_dt_container_cls('TestGroup', CORE_NAMESPACE) + + def tearDown(self) -> None: + shutil.rmtree(self.test_dir) + + def test_roundtrip(self): + group = self.TestGroup(name='my_group') + group.my_data1 = self.TestDatasetNoDtypeInDef(name='my_data1', data=datetime.datetime(2020, 1, 1, 0, 0, 0)) + group.my_data2 = self.TestDatasetWithDtypeInDef( + name='my_data2', + data=datetime.datetime(2020, 1, 1, 0, 0, 0), + my_attr=datetime.datetime(2020, 1, 1, 0, 0, 0), + ) + group.my_data3 = datetime.datetime(2020, 1, 1, 0, 0, 0) + group.my_attr = datetime.datetime(2020, 1, 1, 0, 0, 0) + with HDF5IO(os.path.join(self.test_dir, 'test.h5'), 'w', manager=self.manager) as f: + f.write(group) + with HDF5IO(os.path.join(self.test_dir, 'test.h5'), 'r', manager=self.manager) as f: + group_read = f.read() + self.assertContainerEqual(group_read, group) + self.assertEqual(group_read.my_data1.data, datetime.datetime(2020, 1, 1, 0, 0, 0)) + self.assertEqual(group_read.my_data2.data, datetime.datetime(2020, 1, 1, 0, 0, 0)) + self.assertEqual(group_read.my_data3.data, datetime.datetime(2020, 1, 1, 0, 0, 0)) + self.assertEqual(group_read.my_data2.my_attr, datetime.datetime(2020, 1, 1, 0, 0, 0)) + self.assertEqual(group_read.my_attr, datetime.datetime(2020, 1, 1, 0, 0, 0))