-
Notifications
You must be signed in to change notification settings - Fork 25
Modify dset/attr builders based on sidecar JSON #677
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
rly
wants to merge
27
commits into
dev
Choose a base branch
from
sidecar_mods
base: dev
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from 3 commits
Commits
Show all changes
27 commits
Select commit
Hold shift + click to select a range
9e4ba60
Add first at reading sidecar modifications
rly 1f53919
Pretty-print json
rly dafc650
Update to work if json is not present
rly de5fefe
Refactor BuilderUpdater functionality to sep class
rly 3f1f8f2
Merge branch 'dev' into sidecar_mods
rly 036fa1e
Handle changing sub-dataset attr, add sidecar fields
rly b4b5419
Use semantic versioning in version label
rly 151c69d
Add jsonschema for sidecar json
rly 32d1397
Add validation to read
rly 933ef40
Update to use new schema. More tests needed
rly 393e5b3
Update tests (more to do)
rly 2fda06d
Add description, author, and contact to sidecar JSON, fix tests
rly 28c6893
Merge branch 'dev' into sidecar_mods
rly 6da168d
Merge branch 'dev' into sidecar_mods
rly 618ab1c
Merge branch 'dev' of https://github.com/hdmf-dev/hdmf into sidecar_mods
rly 393ffdf
Merge branch 'sidecar_mods' of https://github.com/hdmf-dev/hdmf into …
rly 729e989
Update documentation, refactor, and add test cases
rly ecd244d
Update
rly 168f4a9
Add link to sidecar json schema
rly 1c57573
Add examples to doc
rly 62ed248
Update sidecar.rst
rly 7078ca1
Merge branch 'dev' into sidecar_mods
rly 9faf7a2
Update sidecar.rst
rly 827d61d
Update docs/source/sidecar.rst
rly ef22dc5
Update sidecar.rst
rly 2bb7185
Merge branch 'dev' into sidecar_mods
rly fee5245
Merge branch 'dev' into sidecar_mods
rly File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,4 @@ | ||
| import json | ||
| import logging | ||
| import os.path | ||
| import warnings | ||
|
|
@@ -514,6 +515,7 @@ def read_builder(self): | |
| if f_builder is None: | ||
| f_builder = self.__read_group(self.__file, ROOT_NAME, ignore=ignore) | ||
| self.__read[self.__file] = f_builder | ||
| self.update_builder_from_sidecar(f_builder) | ||
| return f_builder | ||
|
|
||
| def __set_written(self, builder): | ||
|
|
@@ -1549,3 +1551,63 @@ def set_dataio(cls, **kwargs): | |
| """ | ||
| cargs, ckwargs = fmt_docval_args(H5DataIO.__init__, kwargs) | ||
| return H5DataIO(*cargs, **ckwargs) | ||
|
|
||
| @docval( | ||
| {'name': 'f_builder', 'type': GroupBuilder, 'doc': 'A GroupBuilder representing the main file object.'}, | ||
| returns='The same input GroupBuilder, now modified.', | ||
| rtype='GroupBuilder' | ||
| ) | ||
| def update_builder_from_sidecar(self, **kwargs): | ||
|
||
| # in-place update of the builder | ||
| # the sidecar json will have the same name as the file but have suffix .json | ||
| f_builder = getargs('f_builder', kwargs) | ||
| sidecar_path = Path(self.__file.filename).with_suffix('.json') | ||
| if not sidecar_path.is_file(): | ||
| return | ||
|
|
||
| with open(sidecar_path, 'r') as f: | ||
| versions = json.load(f)['versions'] | ||
|
|
||
| builder_map = self.__get_object_id_map(f_builder) | ||
| for version_dict in versions: | ||
| for change_dict in version_dict.get('changes'): | ||
| object_id = change_dict['object_id'] | ||
| relative_path = change_dict.get('relative_path') | ||
| new_value = change_dict['new_value'] | ||
|
|
||
| builder = builder_map[object_id] | ||
| if relative_path in builder.attributes: | ||
| # TODO handle different dtypes | ||
| builder.attributes[relative_path] = new_value | ||
| elif isinstance(builder, GroupBuilder): | ||
| obj = builder.get(relative_path) | ||
| if isinstance(obj, DatasetBuilder): # update data in sub-DatasetBuilder | ||
| self.__update_dataset_builder(obj, new_value) | ||
| else: | ||
| raise ValueError("Relative path '%s' not recognized as a dataset or attribute") | ||
| else: # DatasetBuilder has object_id | ||
| if not relative_path: # update data | ||
| self.__update_dataset_builder(builder, new_value) | ||
| else: | ||
| raise ValueError("Relative path '%s' not recognized as None or attribute") | ||
| # TODO handle compound dtypes | ||
|
|
||
| return f_builder | ||
|
|
||
| def __update_dataset_builder(self, dset_builder, value): | ||
| # TODO handle different dtypes | ||
| dset_builder['data'] = value | ||
|
|
||
| def __get_object_id_map(self, builder): | ||
| stack = [builder] | ||
| ret = dict() | ||
| while len(stack): | ||
| b = stack.pop() | ||
| if 'object_id' in b.attributes: | ||
| ret[b.attributes['object_id']] = b | ||
| if isinstance(b, GroupBuilder): | ||
| for g in b.groups.values(): | ||
| stack.append(g) | ||
| for d in b.datasets.values(): | ||
| stack.append(d) | ||
| return ret | ||
Empty file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,162 @@ | ||
| import json | ||
| import os | ||
|
|
||
| from hdmf import Container | ||
| from hdmf.backends.hdf5.h5tools import HDF5IO | ||
| from hdmf.build import BuildManager, TypeMap, ObjectMapper | ||
| from hdmf.spec import AttributeSpec, DatasetSpec, GroupSpec, SpecCatalog, SpecNamespace, NamespaceCatalog | ||
| from hdmf.testing import TestCase | ||
| from hdmf.utils import getargs, docval | ||
|
|
||
|
|
||
| class TestBasic(TestCase): | ||
|
|
||
| def setUp(self): | ||
| self.h5_path = "./tests/unit/io_tests/test_sidecar.h5" | ||
| foo2 = Foo('sub_foo', [-1, -2, -3], 'OLD', [-17]) | ||
| foo1 = Foo('foo1', [1, 2, 3], 'old', [17], foo2) | ||
| with HDF5IO(self.h5_path, manager=_get_manager(), mode='w') as io: | ||
| io.write(foo1) | ||
|
|
||
| version2 = { | ||
| "label": "version 2", | ||
| "description": "change attr1 from 'old' to 'my experiment' and my_data from [1, 2, 3] to [4, 5, 6, 7]", | ||
| "changes": [ | ||
| { | ||
| "object_id": foo1.object_id, | ||
| "relative_path": "attr1", | ||
| "new_value": "my experiment" | ||
| }, | ||
| { | ||
| "object_id": foo1.object_id, | ||
| "relative_path": "my_data", | ||
| "new_value": [4, 5, 6, 7] | ||
| } | ||
| ] | ||
| } | ||
|
|
||
| version3 = { | ||
| "label": "version 3", | ||
| "description": "change sub_foo/my_data from [-1, -2, -3] to [[0]]", | ||
| "changes": [ | ||
| { | ||
| "object_id": foo2.object_id, | ||
| "relative_path": "my_data", | ||
| "new_value": [[0]] | ||
| } | ||
| ] | ||
| } | ||
|
|
||
| sidecar = dict() | ||
| sidecar["versions"] = [version2, version3] | ||
|
|
||
| self.json_path = "./tests/unit/io_tests/test_sidecar.json" | ||
| with open(self.json_path, 'w') as outfile: | ||
| json.dump(sidecar, outfile, indent=4) | ||
|
|
||
| def tearDown(self): | ||
| if os.path.exists(self.h5_path): | ||
| os.remove(self.h5_path) | ||
| if os.path.exists(self.json_path): | ||
| os.remove(self.json_path) | ||
|
|
||
| def test_update_builder(self): | ||
| io = HDF5IO(self.h5_path, 'r', manager=_get_manager()) | ||
| foo1 = io.read() | ||
| assert foo1.attr1 == "my experiment" | ||
| assert foo1.my_data == [4, 5, 6, 7] | ||
| assert foo1.sub_foo.my_data == [[0]] | ||
|
|
||
|
|
||
| class Foo(Container): | ||
|
|
||
| @docval({'name': 'name', 'type': str, 'doc': 'the name of this Foo'}, | ||
| {'name': 'my_data', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer dataset'}, | ||
| {'name': 'attr1', 'type': str, 'doc': 'a string attribute'}, | ||
| {'name': 'attr2', 'type': ('array_data', 'data'), 'doc': 'a 1-D integer attribute'}, | ||
| {'name': 'sub_foo', 'type': 'Foo', 'doc': 'a child Foo', 'default': None}) | ||
| def __init__(self, **kwargs): | ||
| name, my_data, attr1, attr2, sub_foo = getargs('name', 'my_data', 'attr1', 'attr2', 'sub_foo', kwargs) | ||
| super().__init__(name=name) | ||
| self.__data = my_data | ||
| self.__attr1 = attr1 | ||
| self.__attr2 = attr2 | ||
| self.__sub_foo = sub_foo | ||
| if sub_foo is not None: | ||
| assert sub_foo.name == 'sub_foo' # on read mapping will not work otherwise | ||
| self.__sub_foo.parent = self | ||
|
|
||
| @property | ||
| def my_data(self): | ||
| return self.__data | ||
|
|
||
| @property | ||
| def attr1(self): | ||
| return self.__attr1 | ||
|
|
||
| @property | ||
| def attr2(self): | ||
| return self.__attr2 | ||
|
|
||
| @property | ||
| def sub_foo(self): | ||
| return self.__sub_foo | ||
|
|
||
|
|
||
| def _get_manager(): | ||
| foo_spec = GroupSpec( | ||
| doc='A test group specification with a data type', | ||
| data_type_def='Foo', | ||
| groups=[ | ||
| GroupSpec( | ||
| doc='a child Foo', | ||
| data_type_inc='Foo', | ||
| name='sub_foo', | ||
| quantity='?', | ||
| ) | ||
| ], | ||
| datasets=[ | ||
| DatasetSpec( | ||
| doc='a 1-D integer dataset', | ||
| dtype='int', | ||
| name='my_data', | ||
| shape=[None, ], | ||
| attributes=[ | ||
| AttributeSpec( | ||
| name='attr2', | ||
| doc='a 1-D integer attribute', | ||
| dtype='int', | ||
| shape=[None, ], | ||
| ) | ||
| ] | ||
| ) | ||
| ], | ||
| attributes=[ | ||
| AttributeSpec(name='attr1', doc='a string attribute', dtype='text'), | ||
| ] | ||
| ) | ||
|
|
||
| class FooMapper(ObjectMapper): | ||
| """Remap 'attr2' attribute on Foo container to 'my_data' dataset spec > 'attr2' attribute spec.""" | ||
| def __init__(self, spec): | ||
| super().__init__(spec) | ||
| my_data_spec = spec.get_dataset('my_data') | ||
| self.map_spec('attr2', my_data_spec.get_attribute('attr2')) | ||
|
|
||
| spec_catalog = SpecCatalog() | ||
| spec_catalog.register_spec(foo_spec, 'test.yaml') | ||
| namespace_name = 'test_core' | ||
| namespace = SpecNamespace( | ||
| doc='a test namespace', | ||
| name=namespace_name, | ||
| schema=[{'source': 'test.yaml'}], | ||
| version='0.1.0', | ||
| catalog=spec_catalog | ||
| ) | ||
| namespace_catalog = NamespaceCatalog() | ||
| namespace_catalog.add_namespace(namespace_name, namespace) | ||
| type_map = TypeMap(namespace_catalog) | ||
| type_map.register_container_type(namespace_name, 'Foo', Foo) | ||
| type_map.register_map(Foo, FooMapper) | ||
| manager = BuildManager(type_map) | ||
| return manager |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.