Skip to content

Commit 4eb8468

Browse files
authored
Add support and documentation for export from HDMF version 2 (#1280)
* Add roundtrip export testing * Stash changes * Fix test warning * Remove logging * Clean up roundtrip mixin * Use HDMF 2.1.0 * First pass at adding export and export docs * Update tutorial * Add tutorial on export * Fix rst formatting * Change docs file name and add section on object IDs and HDMF export * Fix generate new ID text * Update changelog
1 parent 3d00833 commit 4eb8468

File tree

5 files changed

+297
-6
lines changed

5 files changed

+297
-6
lines changed

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
# PyNWB Changelog
22

3-
## PyNWB 1.4.0 (August 11, 2020)
3+
## PyNWB 1.4.0 (August 12, 2020)
4+
5+
Users can now add/remove containers from a written NWB file and export the modified NWBFile to a new file path.
6+
@rly (#1280)
7+
- See https://pynwb.readthedocs.io/en/stable/tutorials/general/add-remove-containers.html for examples and more
8+
information.
49

510
### Internal improvements:
611
- Update requirements to use HDMF 2.1.0. @rly (#1256)
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
"""
2+
.. _modifying_data:
3+
4+
Adding/removing containers from an NWB file
5+
============================================
6+
7+
This tutorial explains how to add and remove containers from an existing NWB file and either write the data back to the
8+
same file or export the data to a new file.
9+
"""
10+
11+
###############################################################################
12+
# Adding objects to an NWB file in read/write mode
13+
# ----------------------------------------------------
14+
# PyNWB supports adding container objects to an existing NWB file - that is, reading data from an NWB file, adding a
15+
# container object, such as a new :py:class:`~pynwb.base.TimeSeries` object, and writing the modified
16+
# :py:class:`~pynwb.file.NWBFile` back to the same file path on disk. To do so:
17+
#
18+
# 1. open the file with an :py:class:`~pynwb.NWBHDF5IO` object in read/write mode (``mode='r+'`` or ``mode='a'``)
19+
# 2. read the :py:class:`~pynwb.file.NWBFile`
20+
# 3. add container objects to the :py:class:`~pynwb.file.NWBFile` object
21+
# 4. write the modified :py:class:`~pynwb.file.NWBFile` using the same :py:class:`~pynwb.NWBHDF5IO` object
22+
#
23+
# For example:
24+
25+
from pynwb import NWBFile, NWBHDF5IO, TimeSeries
26+
import datetime
27+
import numpy as np
28+
29+
# first, write a test NWB file
30+
nwbfile = NWBFile(
31+
session_description='demonstrate adding to an NWB file',
32+
identifier='NWB123',
33+
session_start_time=datetime.datetime.now(datetime.timezone.utc),
34+
)
35+
36+
filename = 'nwbfile.nwb'
37+
with NWBHDF5IO(filename, 'w') as io:
38+
io.write(nwbfile)
39+
40+
# open the NWB file in r+ mode
41+
with NWBHDF5IO(filename, 'r+') as io:
42+
read_nwbfile = io.read()
43+
44+
# create a TimeSeries and add it to the file under the acquisition group
45+
data = list(range(100, 200, 10))
46+
timestamps = np.arange(10, dtype=np.float)
47+
test_ts = TimeSeries(
48+
name='test_timeseries',
49+
data=data,
50+
unit='m',
51+
timestamps=timestamps
52+
)
53+
read_nwbfile.add_acquisition(test_ts)
54+
55+
# write the modified NWB file
56+
io.write(read_nwbfile)
57+
58+
# confirm the file contains the new TimeSeries in acquisition
59+
with NWBHDF5IO(filename, 'r') as io:
60+
read_nwbfile = io.read()
61+
print(read_nwbfile)
62+
63+
###############################################################################
64+
# .. note::
65+
#
66+
# You cannot remove objects from an NWB file using the above method.
67+
68+
###############################################################################
69+
# Modifying an NWB file in this way has limitations. The destination file path must be the same as the source
70+
# file path, and it is not possible to remove objects from an NWB file. You can use the
71+
# :py:meth:`NWBHDF5IO.export <pynwb.NWBHDF5IO.export>` method, detailed below, to modify an NWB file in these ways.
72+
#
73+
# .. warning::
74+
#
75+
# NWB datasets that have been written to disk are read as :py:class:`h5py.Dataset <h5py.Dataset>` objects.
76+
# Directly modifying the data in these :py:class:`h5py.Dataset <h5py.Dataset>` objects immediately
77+
# modifies the data on disk
78+
# (the :py:meth:`NWBHDF5IO.write <pynwb.NWBHDF5IO.write>` method does not need to be called and the
79+
# :py:class:`~pynwb.NWBHDF5IO` instance does not need to be closed). Directly modifying datasets in this way
80+
# can lead to files that do not validate or cannot be opened, so take caution when using this method.
81+
# Note: only chunked datasets or datasets with ``maxshape`` set can be resized.
82+
# See the `h5py chunked storage documentation <https://docs.h5py.org/en/stable/high/dataset.html#chunked-storage>`_
83+
# for more details.
84+
85+
###############################################################################
86+
# .. note::
87+
#
88+
# It is not possible to modify the attributes (fields) of an NWB container in memory.
89+
90+
###############################################################################
91+
# Exporting a written NWB file to a new file path
92+
# ---------------------------------------------------
93+
# Use the :py:meth:`NWBHDF5IO.export <pynwb.NWBHDF5IO.export>` method to read data to an existing NWB file,
94+
# modify the data, and write the modified data to a new file path. Modifications to the data can be additions or
95+
# removals of objects, such as :py:class:`~pynwb.base.TimeSeries` objects. This is especially useful if you
96+
# have raw data and processed data in the same NWB file and you want to create a new NWB file with all of the
97+
# contents of the original file except for the raw data for sharing with collaborators.
98+
#
99+
# To remove existing containers, use the :py:class:`~hdmf.utils.LabelledDict.pop` method on any
100+
# :py:class:`~hdmf.utils.LabelledDict` object, such as ``NWBFile.acquisition``, ``NWBFile.processing``,
101+
# ``NWBFile.analysis``, ``NWBFile.processing``, ``NWBFile.scratch``, ``NWBFile.devices``, ``NWBFile.stimulus``,
102+
# ``NWBFile.stimulus_template``, ``NWBFile.electrode_groups``, ``NWBFile.imaging_planes``,
103+
# ``NWBFile.icephys_electrodes``, ``NWBFile.ogen_sites``, ``NWBFile.lab_meta_data``,
104+
# and :py:class:`~pynwb.base.ProcessingModule` objects.
105+
#
106+
# For example:
107+
108+
# first, create a test NWB file with a TimeSeries in the acquisition group
109+
nwbfile = NWBFile(
110+
session_description='demonstrate export of an NWB file',
111+
identifier='NWB123',
112+
session_start_time=datetime.datetime.now(datetime.timezone.utc),
113+
)
114+
data1 = list(range(100, 200, 10))
115+
timestamps1 = np.arange(10, dtype=np.float)
116+
test_ts1 = TimeSeries(
117+
name='test_timeseries1',
118+
data=data1,
119+
unit='m',
120+
timestamps=timestamps1
121+
)
122+
nwbfile.add_acquisition(test_ts1)
123+
124+
# then, create a processing module for processed behavioral data
125+
nwbfile.create_processing_module(
126+
name='behavior',
127+
description='processed behavioral data'
128+
)
129+
data2 = list(range(100, 200, 10))
130+
timestamps2 = np.arange(10, dtype=np.float)
131+
test_ts2 = TimeSeries(
132+
name='test_timeseries2',
133+
data=data2,
134+
unit='m',
135+
timestamps=timestamps2
136+
)
137+
nwbfile.processing['behavior'].add(test_ts2)
138+
139+
# write these objects to an NWB file
140+
filename = 'nwbfile.nwb'
141+
with NWBHDF5IO(filename, 'w') as io:
142+
io.write(nwbfile)
143+
144+
# read the written file
145+
export_filename = 'exported_nwbfile.nwb'
146+
with NWBHDF5IO(filename, mode='r') as read_io:
147+
read_nwbfile = read_io.read()
148+
149+
# add a new TimeSeries to the behavior processing module
150+
data3 = list(range(100, 200, 10))
151+
timestamps3 = np.arange(10, dtype=np.float)
152+
test_ts3 = TimeSeries(
153+
name='test_timeseries3',
154+
data=data3,
155+
unit='m',
156+
timestamps=timestamps3
157+
)
158+
read_nwbfile.processing['behavior'].add(test_ts3)
159+
160+
# use the pop method to remove the original TimeSeries from the acquisition group
161+
read_nwbfile.acquisition.pop('test_timeseries1')
162+
163+
# use the pop method to remove a TimeSeries from a processing module
164+
read_nwbfile.processing['behavior'].data_interfaces.pop('test_timeseries2')
165+
166+
# call the export method to write the modified NWBFile instance to a new file path
167+
# the original file is not modified
168+
with NWBHDF5IO(export_filename, mode='w') as export_io:
169+
export_io.export(src_io=read_io, nwbfile=read_nwbfile)
170+
171+
# confirm the exported file does not contain TimeSeries with names 'test_timeseries1' or 'test_timeseries2'
172+
# but does contain a new TimeSeries in processing['behavior'] with name 'test_timeseries3'
173+
with NWBHDF5IO(export_filename, 'r') as io:
174+
read_nwbfile = io.read()
175+
print(read_nwbfile)
176+
print(read_nwbfile.processing['behavior'])
177+
178+
###############################################################################
179+
# .. note::
180+
#
181+
# :py:class:`~pynwb.epoch.TimeIntervals` objects, such as ``NWBFile.epochs``, ``NWBFile.trials``,
182+
# ``NWBFile.invalid_times``, and custom :py:class:`~pynwb.epoch.TimeIntervals` objects cannot be
183+
# removed (popped) from ``NWBFile.intervals``.
184+
185+
###############################################################################
186+
# .. warning::
187+
#
188+
# Removing an object from an NWBFile may break links and references within the file and across files.
189+
# This is analogous to having shortcuts/aliases to a file on your filesystem and then deleting the file.
190+
# Extra caution should be taken when removing heavily referenced items such as
191+
# :py:class:`~pynwb.device.Device` objects,
192+
# :py:class:`~pynwb.ecephys.ElectrodeGroup` objects, the electrodes table, and the
193+
# :py:class:`~pynwb.ophys.PlaneSegmentation` table.
194+
195+
###############################################################################
196+
# Exporting with new object IDs
197+
# ---------------------------------
198+
# When exporting a read NWB file to a new file path, the object IDs within the original NWB file will be copied to the
199+
# new file. To make the exported NWB file contain a new set of object IDs, call
200+
# :py:meth:`~hdmf.container.AbstractContainer.generate_new_id` on your :py:class:`~pynwb.file.NWBFile` object.
201+
# This will generate a new object ID for the :py:class:`~pynwb.file.NWBFile` object and all of the objects within
202+
# the NWB file.
203+
204+
export_filename = 'exported_nwbfile.nwb'
205+
with NWBHDF5IO(filename, mode='r') as read_io:
206+
read_nwbfile = read_io.read()
207+
read_nwbfile.generate_new_id()
208+
209+
with NWBHDF5IO(export_filename, mode='w') as export_io:
210+
export_io.export(src_io=read_io, nwbfile=read_nwbfile)
211+
212+
###############################################################################
213+
# More information about export
214+
# ---------------------------------
215+
# For more information about the export functionality, see https://hdmf.readthedocs.io/en/latest/export.html

src/pynwb/__init__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,17 @@ def __init__(self, **kwargs):
245245
manager = get_manager()
246246
super(NWBHDF5IO, self).__init__(path, manager=manager, mode=mode, file=file_obj, comm=comm)
247247

248+
@docval({'name': 'src_io', 'type': HDMFIO, 'doc': 'the HDMFIO object for reading the data to export'},
249+
{'name': 'nwbfile', 'type': 'NWBFile',
250+
'doc': 'the NWBFile object to export. If None, then the entire contents of src_io will be exported',
251+
'default': None},
252+
{'name': 'write_args', 'type': dict, 'doc': 'arguments to pass to :py:meth:`write_builder`',
253+
'default': dict()})
254+
def export(self, **kwargs):
255+
nwbfile = popargs('nwbfile', kwargs)
256+
kwargs['container'] = nwbfile
257+
call_docval_func(super().export, kwargs)
258+
248259

249260
from . import io as __io # noqa: F401,E402
250261
from .core import NWBContainer, NWBData # noqa: F401,E402

src/pynwb/testing/testh5io.py

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,20 @@ def setUp(self):
3737
self.create_date = datetime(2018, 4, 15, 12, tzinfo=tzlocal())
3838
self.container_type = self.container.__class__.__name__
3939
self.filename = 'test_%s.nwb' % self.container_type
40+
self.export_filename = 'test_export_%s.nwb' % self.container_type
4041
self.writer = None
4142
self.reader = None
43+
self.export_reader = None
4244

4345
def tearDown(self):
4446
if self.writer is not None:
4547
self.writer.close()
4648
if self.reader is not None:
4749
self.reader.close()
50+
if self.export_reader is not None:
51+
self.export_reader.close()
4852
remove_test_file(self.filename)
53+
remove_test_file(self.export_filename)
4954

5055
@abstractmethod
5156
def setUpContainer(self):
@@ -62,9 +67,23 @@ def test_roundtrip(self):
6267
self.assertIsNotNone(str(self.read_container))
6368
# make sure we get a completely new object
6469
self.assertNotEqual(id(self.container), id(self.read_container))
70+
# make sure the object ID is preserved
6571
self.assertIs(self.read_nwbfile.objects[self.container.object_id], self.read_container)
6672
self.assertContainerEqual(self.read_container, self.container)
6773

74+
def test_roundtrip_export(self):
75+
"""
76+
Test whether the test Container read from an exported file has the same contents as the original test Container
77+
and validate the file
78+
"""
79+
self.read_container = self.roundtripExportContainer()
80+
self.assertIsNotNone(str(self.read_container)) # added as a test to make sure printing works
81+
# make sure we get a completely new object
82+
self.assertNotEqual(id(self.container), id(self.read_container))
83+
# make sure the object ID is preserved
84+
self.assertIs(self.read_exported_nwbfile.objects[self.container.object_id], self.read_container)
85+
self.assertContainerEqual(self.read_container, self.container, ignore_hdmf_attrs=True)
86+
6887
def roundtripContainer(self, cache_spec=False):
6988
"""
7089
Add the test Container to an NWBFile, write it to file, read the file, and return the test Container from the
@@ -76,9 +95,8 @@ def roundtripContainer(self, cache_spec=False):
7695
self.addContainer(nwbfile)
7796

7897
with warnings.catch_warnings(record=True) as ws:
79-
self.writer = NWBHDF5IO(self.filename, mode='w')
80-
self.writer.write(nwbfile, cache_spec=cache_spec)
81-
self.writer.close()
98+
with NWBHDF5IO(self.filename, mode='w') as write_io:
99+
write_io.write(nwbfile, cache_spec=cache_spec)
82100

83101
self.validate()
84102

@@ -101,6 +119,41 @@ def roundtripContainer(self, cache_spec=False):
101119
self.reader = None
102120
raise e
103121

122+
def roundtripExportContainer(self, cache_spec=False):
123+
"""
124+
Add the test Container to an NWBFile, write it to file, read the file, export the read NWBFile to another
125+
file, and return the test Container from the file
126+
"""
127+
self.roundtripContainer(cache_spec=cache_spec) # self.read_nwbfile is now set
128+
129+
with warnings.catch_warnings(record=True) as ws:
130+
NWBHDF5IO.export_io(
131+
src_io=self.reader,
132+
path=self.export_filename,
133+
cache_spec=cache_spec,
134+
)
135+
136+
self.validate()
137+
138+
self.export_reader = NWBHDF5IO(self.export_filename, mode='r')
139+
self.read_exported_nwbfile = self.export_reader.read()
140+
141+
if ws:
142+
for w in ws:
143+
if issubclass(w.category, (MissingRequiredWarning,
144+
OrphanContainerWarning,
145+
BrokenLinkWarning)):
146+
raise Exception('%s: %s' % (w.category.__name__, w.message))
147+
else:
148+
warnings.warn(w.message, w.category)
149+
150+
try:
151+
return self.getContainer(self.read_exported_nwbfile)
152+
except Exception as e:
153+
self.export_reader.close()
154+
self.export_reader = None
155+
raise e
156+
104157
@abstractmethod
105158
def addContainer(self, nwbfile):
106159
""" Should add the test Container to the given NWBFile """
@@ -112,14 +165,21 @@ def getContainer(self, nwbfile):
112165
raise NotImplementedError('Cannot run test unless getContainer is implemented')
113166

114167
def validate(self):
115-
""" Validate the created file """
168+
""" Validate the created files """
116169
if os.path.exists(self.filename):
117170
with NWBHDF5IO(self.filename, mode='r') as io:
118171
errors = pynwb_validate(io)
119172
if errors:
120173
for err in errors:
121174
raise Exception(err)
122175

176+
if os.path.exists(self.export_filename):
177+
with NWBHDF5IO(self.filename, mode='r') as io:
178+
errors = pynwb_validate(io)
179+
if errors:
180+
for err in errors:
181+
raise Exception(err)
182+
123183

124184
class AcquisitionH5IOMixin(NWBH5IOMixin):
125185
"""

tests/integration/hdf5/test_ophys.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def setUpContainer(self):
3434
indicator='GFP',
3535
location='somewhere in the brain',
3636
reference_frame='unknown',
37-
origin_coords=[10, 20],
37+
origin_coords=[10., 20.],
3838
origin_coords_unit='millimeters',
3939
grid_spacing=[0.001, 0.001],
4040
grid_spacing_unit='millimeters',

0 commit comments

Comments
 (0)