Skip to content

Commit 06b5a6f

Browse files
committed
added h5py writer
1 parent 48d43e4 commit 06b5a6f

File tree

8 files changed

+559
-266
lines changed

8 files changed

+559
-266
lines changed

src/h5json/h5tojson/h5tojson.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
# request a copy from help@hdfgroup.org. #
1111
##############################################################################
1212
import sys
13-
import argparse
1413
import os.path as op
1514
import logging
1615
import logging.handlers
@@ -21,16 +20,18 @@
2120

2221

2322
def main():
24-
parser = argparse.ArgumentParser(usage="%(prog)s [-h] [-D|-d] <hdf5_file>")
25-
parser.add_argument("-D", action="store_true", help="suppress all data output")
26-
parser.add_argument(
27-
"-d",
28-
action="store_true",
29-
help="suppress data output for" + " datasets (but not attribute values)",
30-
)
31-
parser.add_argument("filename", nargs="+", help="HDF5 to be converted to json")
32-
args = parser.parse_args()
33-
23+
if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"):
24+
print(f"usage: {sys.argv[0]} [-h] [--nodata] <hdf5_file>")
25+
sys.exit(0)
26+
27+
no_data = False
28+
filename = None
29+
for i in range(1, len(sys.argv)):
30+
if sys.argv[i] == "--nodata":
31+
no_data = True
32+
else:
33+
filename = sys.argv[i]
34+
3435
# create logger
3536
log = logging.getLogger("h5tojson")
3637
# log.setLevel(logging.WARN)
@@ -41,15 +42,14 @@ def main():
4142
# add handler to logger
4243
log.addHandler(handler)
4344

44-
filename = args.filename[0]
4545
if not op.isfile(filename):
4646
sys.exit(f"Cannot find file: {filename}")
4747

4848
log.info(f"h5tojson {filename}")
4949

5050
kwargs = {"app_logger": log}
5151

52-
with Hdf5db(h5_reader=H5pyReader(filename, **kwargs), h5_writer=H5JsonWriter("/tmp/foo.json", no_data=False, **kwargs), **kwargs) as db:
52+
with Hdf5db(h5_reader=H5pyReader(filename, **kwargs), h5_writer=H5JsonWriter(None, no_data=no_data, **kwargs), **kwargs) as db:
5353
pass
5454

5555
if __name__ == "__main__":

src/h5json/hdf5db.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def getObjectIdByPath(self, h5path, parent_id=None):
224224

225225
def getObjectByPath(self, path):
226226
""" Get Object JSON at given path """
227-
obj_id = self.getObjectIDByPath(path)
227+
obj_id = self.getObjectIdByPath(path)
228228
obj_json = self.getObjectById(obj_id)
229229
return obj_json
230230

src/h5json/jsontoh5/jsontoh5.py

Lines changed: 28 additions & 249 deletions
Original file line numberDiff line numberDiff line change
@@ -9,238 +9,35 @@
99
# distribution tree. If you do not have access to this file, you may #
1010
# request a copy from help@hdfgroup.org. #
1111
##############################################################################
12-
import json
13-
import argparse
14-
import h5py
12+
import sys
13+
import os.path as op
1514
import logging
1615
import logging.handlers
1716

1817
from h5json import Hdf5db
18+
from h5json.writer.h5py_writer import H5pyWriter
19+
from h5json.reader.h5json_reader import H5JsonReader
1920

20-
21-
"""
22-
Writeh5 - return json representation of all objects within the given file
23-
h5writer = Writeh5(db, h5json)
24-
h5writer.writeFile()
25-
"""
26-
27-
28-
class Writeh5:
29-
def __init__(self, db, json, options=None):
30-
self.options = options
31-
self.db = db
32-
self.json = json
33-
self.root_uuid = None
34-
35-
#
36-
# Create a hard, soft, or external link
37-
#
38-
def createLink(self, link_obj, parent_uuid):
39-
title = link_obj["title"]
40-
link_class = link_obj["class"]
41-
if link_class == "H5L_TYPE_HARD":
42-
child_uuid = link_obj["id"]
43-
self.db.linkObject(parent_uuid, child_uuid, title)
44-
elif link_class == "H5L_TYPE_SOFT":
45-
h5path = link_obj["h5path"]
46-
self.db.createSoftLink(parent_uuid, h5path, title)
47-
elif link_class == "H5L_TYPE_EXTERNAL":
48-
h5path = link_obj["h5path"]
49-
link_file = link_obj["file"]
50-
self.db.createExternalLink(parent_uuid, link_file, h5path, title)
51-
else:
52-
print("Unable to create link with class:", link_class)
53-
54-
#
55-
# Create HDF5 dataset object and write data values
56-
#
57-
def createDataset(self, uuid, body):
58-
datatype = body["type"]
59-
if isinstance(datatype, str) and datatype.startswith("datatypes/"):
60-
# committed datatype, just pass in the UUID part
61-
datatype = datatype[len("datatypes/") :]
62-
dims = () # if no space in body, default to scalar
63-
max_shape = None
64-
creation_props = None
65-
if "creationProperties" in body:
66-
creation_props = body["creationProperties"]
67-
if "shape" in body:
68-
shape = body["shape"]
69-
if shape["class"] == "H5S_SIMPLE":
70-
dims = shape["dims"]
71-
if isinstance(dims, int):
72-
# convert int to array
73-
dim1 = shape
74-
dims = [dim1]
75-
if "maxdims" in shape:
76-
max_shape = shape["maxdims"]
77-
if isinstance(max_shape, int):
78-
# convert to array
79-
dim1 = max_shape
80-
max_shape = [dim1]
81-
# convert H5S_UNLIMITED's to None's
82-
for i in range(len(max_shape)):
83-
if max_shape[i] == "H5S_UNLIMITED":
84-
max_shape[i] = None
85-
elif shape["class"] == "H5S_NULL":
86-
dims = None
87-
88-
self.db.createDataset(
89-
datatype,
90-
dims,
91-
max_shape=max_shape,
92-
creation_props=creation_props,
93-
obj_uuid=uuid,
94-
)
95-
96-
if "value" in body:
97-
data = body["value"]
98-
if data:
99-
data = self.db.toRef(len(dims), datatype, data)
100-
self.db.setDatasetValuesByUuid(uuid, data)
101-
102-
def createAttribute(self, attr_json, col_name, uuid):
103-
attr_name = attr_json["name"]
104-
datatype = attr_json["type"]
105-
if isinstance(datatype, str) and datatype.startswith("datatypes/"):
106-
# committed datatype, just pass in the UUID part
107-
datatype = datatype[len("datatypes/") :]
108-
109-
attr_value = None
110-
if "value" in attr_json:
111-
attr_value = attr_json["value"]
112-
dims = None
113-
if "shape" in attr_json:
114-
shape = attr_json["shape"]
115-
if shape["class"] == "H5S_SIMPLE":
116-
dims = shape["dims"]
117-
if isinstance(dims, int):
118-
# convert int to array
119-
dim1 = shape
120-
dims = [dim1]
121-
elif shape["class"] == "H5S_SCALAR":
122-
dims = () # empty tuple for scalar
123-
self.db.createAttribute(col_name, uuid, attr_name, dims, datatype, attr_value)
124-
125-
#
126-
# create committed datatype HDF5 object
127-
#
128-
def createDatatype(self, uuid, body):
129-
datatype = body["type"]
130-
self.db.createCommittedType(datatype, obj_uuid=uuid)
131-
132-
#
133-
# Create HDF5 group object (links and attributes will be added later)
134-
#
135-
def createGroup(self, uuid, body):
136-
if uuid != self.root_uuid:
137-
self.db.createGroup(obj_uuid=uuid)
138-
139-
#
140-
# Create all the HDF5 objects defined in the JSON file
141-
#
142-
def createObjects(self):
143-
# create datatypes
144-
if "datatypes" in self.json:
145-
datatypes = self.json["datatypes"]
146-
for uuid in datatypes:
147-
json_obj = datatypes[uuid]
148-
self.createDatatype(uuid, json_obj)
149-
# create groups
150-
if "groups" in self.json:
151-
groups = self.json["groups"]
152-
for uuid in groups:
153-
json_obj = groups[uuid]
154-
self.createGroup(uuid, json_obj)
155-
# create datasets
156-
if "datasets" in self.json:
157-
datasets = self.json["datasets"]
158-
for uuid in datasets:
159-
json_obj = datasets[uuid]
160-
self.createDataset(uuid, json_obj)
161-
162-
#
163-
# Create all the attributes for HDF5 objects defined in the JSON file
164-
# Note: this needs to be done after createObjects since an attribute
165-
# may use a committed datatype
166-
#
167-
def createAttributes(self):
168-
dimension_list_attrs = [] # track dimension list attributes
169-
# create datatype attributes
170-
if "datatypes" in self.json:
171-
datatypes = self.json["datatypes"]
172-
for uuid in datatypes:
173-
body = datatypes[uuid]
174-
if "attributes" in body:
175-
attributes = body["attributes"]
176-
for attribute in attributes:
177-
self.createAttribute(attribute, "datatypes", uuid)
178-
# create group attributes
179-
if "groups" in self.json:
180-
groups = self.json["groups"]
181-
for uuid in groups:
182-
body = groups[uuid]
183-
if "attributes" in body:
184-
attributes = body["attributes"]
185-
for attribute in attributes:
186-
self.createAttribute(attribute, "groups", uuid)
187-
# create datasets
188-
if "datasets" in self.json:
189-
datasets = self.json["datasets"]
190-
for uuid in datasets:
191-
body = datasets[uuid]
192-
if "attributes" in body:
193-
attributes = body["attributes"]
194-
for attribute in attributes:
195-
if attribute["name"] == "DIMENSION_LIST":
196-
# defer dimension list creation until after we've created all other
197-
# attributes (otherwsie attach_scale may fail)
198-
dimension_list_attrs.append(
199-
{"attribute": attribute, "uuid": uuid}
200-
)
201-
else:
202-
self.createAttribute(attribute, "datasets", uuid)
203-
204-
# finally, do dimension_list attributes
205-
for item in dimension_list_attrs:
206-
attribute = item["attribute"]
207-
uuid = item["uuid"]
208-
self.createAttribute(attribute, "datasets", uuid)
209-
210-
#
211-
# Link all the objects
212-
# Note: this will "de-anonymous-ize" objects defined in the HDF5 file
213-
# Any non-linked objects will be deleted when the __db__ group is deleted
214-
#
215-
def createLinks(self):
216-
if "groups" in self.json:
217-
groups = self.json["groups"]
218-
for uuid in groups:
219-
json_obj = groups[uuid]
220-
if "links" in json_obj:
221-
links = json_obj["links"]
222-
for link in links:
223-
self.createLink(link, uuid)
224-
225-
def writeFile(self):
226-
227-
self.root_uuid = self.json["root"]
228-
229-
self.createObjects() # create datasets, groups, committed datatypes
230-
self.createAttributes() # create attributes for objects
231-
self.createLinks() # link it all together
232-
21+
23322

23423
def main():
235-
parser = argparse.ArgumentParser(usage="%(prog)s [-h] <json_file> <h5_file>")
236-
parser.add_argument(
237-
"in_filename", nargs="+", help="JSon file to be converted to h5"
238-
)
239-
parser.add_argument("out_filename", nargs="+", help="name of HDF5 output file")
240-
args = parser.parse_args()
241-
24+
if len(sys.argv) < 3 or sys.argv[1] in ("-h", "--help"):
25+
print(f"usage: {sys.argv[0]} [-h] [--nodata] <json_file> <h5_file>")
26+
sys.exit(0)
27+
28+
no_data = False
29+
json_filename = None
30+
hdf5_filename = None
31+
for i in range(1, len(sys.argv)):
32+
if sys.argv[i] == "--nodata":
33+
no_data = True
34+
elif not json_filename:
35+
json_filename = sys.argv[i]
36+
else:
37+
hdf5_filename = sys.argv[i]
38+
24239
# create logger
243-
log = logging.getLogger("h5serv")
40+
log = logging.getLogger("h5json")
24441
# log.setLevel(logging.WARN)
24542
log.setLevel(logging.INFO)
24643
# add log handler
@@ -249,34 +46,16 @@ def main():
24946
# add handler to logger
25047
log.addHandler(handler)
25148

252-
text = open(args.in_filename[0]).read()
253-
254-
# parse the json file
255-
h5json = json.loads(text)
256-
257-
if "root" not in h5json:
258-
raise Exception("no root key in input file")
259-
root_uuid = h5json["root"]
260-
261-
filename = args.out_filename[0]
262-
263-
# create the file, will raise IOError if there's a problem
264-
Hdf5db.createHDF5File(filename)
49+
if not op.isfile(json_filename):
50+
sys.exit(f"Cannot find file: {json_filename}")
26551

266-
with Hdf5db(
267-
filename, root_uuid=root_uuid, update_timestamps=False, app_logger=log
268-
) as db:
269-
h5writer = Writeh5(db, h5json)
270-
h5writer.writeFile()
52+
log.info(f"jsontoh5 {json_filename} to {hdf5_filename}")
27153

272-
# open with h5py and remove the _db_ group
273-
# Note: this will delete any anonymous (un-linked) objects
274-
f = h5py.File(filename, "a")
275-
if "__db__" in f:
276-
del f["__db__"]
277-
f.close()
54+
kwargs = {"app_logger": log}
55+
56+
with Hdf5db(h5_reader=H5JsonReader(json_filename, **kwargs), h5_writer=H5pyWriter(hdf5_filename, no_data=no_data, **kwargs), **kwargs) as db:
57+
pass
27858

279-
print("done!")
28059

28160

28261
if __name__ == "__main__":

src/h5json/reader/h5py_reader.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@ def get_root_id(self):
108108
""" Return root id """
109109
return self._root_id
110110

111+
def getObjIdByAddress(self, addr):
112+
if addr in self._addr_map:
113+
return self._addr_map[addr]
114+
else:
115+
return None
116+
111117
def getAttribute(self, obj_id, name, include_data=True):
112118
""" Return JSON for the given attribute """
113119

@@ -130,7 +136,7 @@ def getAttribute(self, obj_id, name, include_data=True):
130136
type_uuid = None
131137
addr = h5py.h5o.get_info(typeid).addr
132138
type_uuid = self.getObjIdByAddress(addr)
133-
committedType = self.getCommittedTypeItemByUuid(type_uuid)
139+
committedType = self._id_map[type_uuid]
134140
type_item = committedType["type"].copy()
135141
type_item["id"] = type_uuid
136142
else:

src/h5json/writer/h5json_writer.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,9 @@ def __init__(
3030
no_data=False,
3131
app_logger=None
3232
):
33-
super().__init__(filepath, append=append, app_logger=app_logger)
33+
super().__init__(filepath, append=append, no_data=no_data, app_logger=app_logger)
3434
self.alias_db = {}
3535
self.json = {}
36-
self._no_data = no_data
3736
self._root_uuid = None
3837

3938
def flush(self):

0 commit comments

Comments
 (0)