Skip to content

Commit f46215a

Browse files
authored
Adding a simple python hdf5 wrapper module (#1056)
* Adding a simple python hdf5 wrapper module * Adding hdf5_wrapper use example, documentation
1 parent a1e32fb commit f46215a

File tree

4 files changed

+260
-0
lines changed

4 files changed

+260
-0
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2+
from .wrapper import hdf5_wrapper
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
2+
import numpy as np
3+
import hdf5_wrapper
4+
5+
6+
def print_database_iterative(database, level=0):
7+
"""
8+
@brief print the database targets iteratively by level
9+
@param database the wrapper for the current database
10+
@param level the depth within the database
11+
"""
12+
# Note: you can also iterate over the hdf5_wrapper object directly
13+
for k in database.keys():
14+
print('%s%s' % (' ' * level, k))
15+
16+
if isinstance(database[k], hdf5_wrapper.hdf5_wrapper):
17+
# This is a group, so continue iterating downward
18+
print_database_iterative(database[k], level+1)
19+
else:
20+
# This is likely to be an array
21+
print(database[k])
22+
print()
23+
24+
25+
def read_write_hdf5_database_example():
26+
"""
27+
@brief simple demonstration of hdf5_wrapper
28+
"""
29+
30+
# ------------------------
31+
# Generate test data
32+
# ------------------------
33+
source_a = {'1D_double_array': np.random.randn(10),
34+
'string_array': np.array(['a', 'list', 'of', 'strings']),
35+
'child_a': {'2D_double_array': np.random.randn(2, 3)}}
36+
37+
source_b = {'1D_integer_array': np.random.randint(0, 100, 5),
38+
'child_b': {'3D_double_array': np.random.randn(4, 5, 2)}}
39+
40+
# ------------------------
41+
# Write databases to file
42+
# ------------------------
43+
# Write the first piece-by-piece to an hdf5_file
44+
# Note: when you exit the following scope, the database is automatically closed
45+
with hdf5_wrapper.hdf5_wrapper('database_a.hdf5', mode='a') as database_a:
46+
# Assign the two array objects to this level
47+
database_a['1D_double_array'] = source_a['1D_double_array']
48+
database_a['string_array'] = source_a['string_array']
49+
50+
# Create a child group and assign the final array
51+
child_a = database_a['child_a']
52+
child_a['2D_double_array'] = source_a['child_a']['2D_double_array']
53+
54+
# Automatically write the second source to a second database
55+
with hdf5_wrapper.hdf5_wrapper('database_b.hdf5', mode='a') as database_b:
56+
database_b['/'] = source_b
57+
58+
# Create a third database that links the either two
59+
with hdf5_wrapper.hdf5_wrapper('database_c.hdf5', mode='a') as database_c:
60+
database_c.link('database_a', 'database_a.hdf5')
61+
database_c.link('database_b', 'database_b.hdf5')
62+
63+
# ---------------------------------------
64+
# Read the databases from the filesystem
65+
# ---------------------------------------
66+
print('Database contents:')
67+
with hdf5_wrapper.hdf5_wrapper('database_c.hdf5') as database_c:
68+
# Iteratively print the database contents
69+
print_database_iterative(database_c, 1)
70+
71+
# As a final note, you can also access low-level h5py functionality
72+
# by interacting directly with the database target, e.g.:
73+
print('Database attributes:')
74+
print(' ', database_c.target.attrs)
75+
76+
77+
if __name__ == "__main__":
78+
read_write_hdf5_database_example()
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
2+
import h5py
3+
import numpy as np
4+
from numpy.core.defchararray import encode, decode
5+
6+
7+
class hdf5_wrapper():
8+
"""
9+
@brief a class for reading/writing hdf5 files, which behaves similar to a native dict
10+
"""
11+
12+
def __init__(self, fname='', target='', mode='r'):
13+
"""
14+
@brief initialize the hdf5_wrapper class
15+
@param fname the filename of a new or existing hdf5 database
16+
@param target the handle of an existing hdf5 dataset
17+
@param mode the read/write behavior of the database (default='r')
18+
19+
@details If the fname is supplied (either by a positional or keyword argument),
20+
the wrapper will open a hdf5 database from the filesystem. The reccomended
21+
options for the mode flag include 'r' for read-only and 'a' for read/write
22+
access. If write mode is enabled, and the fname does not point
23+
to an existing file, a new database will be created.
24+
25+
If the target is supplied, then a new instance of the wrapper will
26+
be created using an existing database handle.
27+
"""
28+
29+
self.mode = mode
30+
self.target = target
31+
if fname:
32+
self.target = h5py.File(fname, self.mode)
33+
34+
def __getitem__(self, k):
35+
"""
36+
@brief get a target from the database
37+
@param k name of target group or array
38+
39+
@return the returned value depends on the type of the target:
40+
- An existing hdf5 group will return an instance of hdf5_wrapper
41+
- An existing array will return an numpy ndarray
42+
- If the target is not present in the datastructure and the
43+
database is open in read/write mode, the wrapper will create a
44+
new group and return an hdf5_wrapper
45+
- Otherwise, this will throw an error
46+
"""
47+
if (k not in self.target):
48+
if (self.mode in ['w', 'a']):
49+
self.target.create_group(k)
50+
else:
51+
raise ValueError('Entry does not exist in database: %s' % (k))
52+
53+
tmp = self.target[k]
54+
55+
if isinstance(tmp, h5py._hl.group.Group):
56+
return hdf5_wrapper(target=tmp, mode=self.mode)
57+
elif isinstance(tmp, h5py._hl.dataset.Dataset):
58+
tmp = np.array(tmp)
59+
60+
# Decode any string types
61+
if (tmp.dtype.kind in ['S', 'U', 'O']):
62+
tmp = decode(tmp)
63+
64+
# Convert any 0-length arrays to native types
65+
if not tmp.shape:
66+
tmp = tmp[()]
67+
68+
return tmp
69+
else:
70+
return tmp
71+
72+
def __setitem__(self, k, value):
73+
"""
74+
@brief write an object to the database if write-mode is enabled
75+
@param k the name of the object
76+
@param value the object to be written
77+
"""
78+
79+
if (self.mode in ['w', 'a']):
80+
if isinstance(value, dict):
81+
# Recursively add groups and their children
82+
if (k not in self.target):
83+
self.target.create_group(k)
84+
new_group = self[k]
85+
for x in value:
86+
new_group[x] = value[x]
87+
else:
88+
# Delete the old copy if necessary
89+
if (k in self.target):
90+
del(self.target[k])
91+
92+
# Add everything else as an ndarray
93+
tmp = np.array(value)
94+
if (tmp.dtype.kind in ['S', 'U', 'O']):
95+
tmp = encode(tmp)
96+
self.target[k] = tmp
97+
else:
98+
raise ValueError('Cannot write to an hdf5 opened in read-only mode! This can be changed by overriding the default mode argument for the wrapper.')
99+
100+
def link(self, k, target):
101+
"""
102+
@brief link an external hdf5 file to this location in the database
103+
@param k the name of the new link in the database
104+
@param target the path to the external database
105+
"""
106+
self.target[k] = h5py.ExternalLink(target, '/')
107+
108+
def keys(self):
109+
"""
110+
@brief get a list of groups and arrays located at the current level
111+
@return a list of strings
112+
"""
113+
if isinstance(self.target, h5py._hl.group.Group):
114+
return list(self.target)
115+
else:
116+
raise ValueError('Object not a group!')
117+
118+
def __enter__(self):
119+
"""
120+
@brief entry point for an iterator
121+
"""
122+
return self
123+
124+
def __exit__(self, type, value, traceback):
125+
"""
126+
@brief end point for an iterator
127+
"""
128+
self.target.close()
129+
130+
def __del__(self):
131+
"""
132+
@brief closes the database on wrapper deletion
133+
"""
134+
try:
135+
if isinstance(self.target, h5py._hl.files.File):
136+
self.target.close()
137+
except:
138+
pass
139+
140+
def close(self):
141+
"""
142+
@brief closes the database
143+
"""
144+
if isinstance(self.target, h5py._hl.files.File):
145+
self.target.close()
146+
147+
def get_copy(self):
148+
"""
149+
@brief copy the entire database into memory
150+
@return a dictionary holding the database contents
151+
"""
152+
tmp = {}
153+
self.copy(tmp)
154+
return tmp
155+
156+
def copy(self, output):
157+
"""
158+
@brief pack the contents of the current database level onto the target dict
159+
@param output the dictionary to pack objects into
160+
"""
161+
for k in self.keys():
162+
tmp = self[k]
163+
164+
if isinstance(tmp, hdf5_wrapper):
165+
output[k] = {}
166+
tmp.copy(output[k])
167+
else:
168+
output[k] = tmp
169+
170+

hdf5_wrapper_package/setup.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from distutils.core import setup
2+
3+
setup(name='hdf5_wrapper',
4+
version='0.1.0',
5+
description='Simple wrapper for h5py objects',
6+
author='Chris Sherman',
7+
author_email='[email protected]',
8+
packages=['hdf5_wrapper'],
9+
install_requires=['h5py', 'numpy'])
10+

0 commit comments

Comments
 (0)