Skip to content

Commit 4b9cb68

Browse files
committed
added h5json_writer
1 parent 2f546b9 commit 4b9cb68

File tree

9 files changed

+719
-33
lines changed

9 files changed

+719
-33
lines changed

src/h5json/dset_util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def make_new_dset(
9494

9595

9696
# TBD - other properties
97-
dset_json = {"shape": shape_json, "type": type_json, "cpl": cpl}
97+
dset_json = {"shape": shape_json, "type": type_json, "cpl": cpl, "attributes": {}}
9898
dset_json["created"] = time.time()
9999
dset_json["modified"] = None
100100

src/h5json/h5json_writer.py

Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
##############################################################################
2+
# Copyright by The HDF Group. #
3+
# All rights reserved. #
4+
# #
5+
# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and #
6+
# Utilities. The full HDF5 REST Server copyright notice, including #
7+
# terms governing use, modification, and redistribution, is contained in #
8+
# the file COPYING, which can be found at the root of the source code #
9+
# distribution tree. If you do not have access to this file, you may #
10+
# request a copy from help@hdfgroup.org. #
11+
##############################################################################
12+
13+
import json
14+
15+
from .h5writer import H5Writer
16+
from .objid import stripId, getCollectionForId
17+
18+
class H5JsonWriter(H5Writer):
19+
"""
20+
This abstract class defines properties and methods that the Hdf5db class uses for writing to an HDF5
21+
compatible storage medium.
22+
"""
23+
24+
25+
def __init__(
26+
self,
27+
filepath,
28+
append=False,
29+
no_data=False,
30+
app_logger=None
31+
):
32+
super().__init__(filepath, append=append, app_logger=app_logger)
33+
self.alias_db = {}
34+
self.json = {}
35+
self._no_data = no_data
36+
self._root_uuid = None
37+
38+
def flush(self):
39+
""" Write dirty items """
40+
# json writer doesn't support incremental updates, so we'll wait
41+
# for close to write out database
42+
self.log.info("flush")
43+
44+
def close(self):
45+
""" close storage handle """
46+
self.dumpFile()
47+
48+
49+
def _setAlias(self, obj_id, id_set, h5path):
50+
""" add the given h5path to the object's alias list
51+
If the object is a group, recurse through each hard link """
52+
obj_json = self.db.getObjectById(obj_id)
53+
alias_list = self.alias_db[obj_id]
54+
if h5path in alias_list:
55+
return # nothing to do
56+
alias_list.append(h5path)
57+
if getCollectionForId(obj_id) != "groups":
58+
return # done
59+
id_set.add(obj_id) # keep track of objects we've visited to avoid loops
60+
links = obj_json["links"]
61+
if h5path[-1] != '/':
62+
h5path += '/'
63+
64+
for link_name in links:
65+
link_json = links[link_name]
66+
if link_json["class"] == "H5L_TYPE_HARD":
67+
tgt_id = link_json["id"]
68+
if tgt_id in id_set:
69+
self.log.info(f"_setAlias - circular loop found")
70+
else:
71+
self._setAlias(tgt_id, id_set, h5path+link_name)
72+
id_set.remove(obj_id)
73+
74+
def getAliasList(self):
75+
""" update the alias list for each object """
76+
# clear exiting aliases
77+
obj_ids = self.db.getCollection()
78+
for obj_id in obj_ids:
79+
self.alias_db[obj_id] = []
80+
81+
self._setAlias(self._root_uuid, set(), "/")
82+
83+
84+
def dumpAttribute(self, obj_id, attr_name):
85+
self.log.info(f"dumpAttribute: [{attr_name}]")
86+
item = self.db.getAttribute(obj_id, attr_name)
87+
response = {"name": attr_name}
88+
response["type"] = item["type"]
89+
response["shape"] = item["shape"]
90+
if True: #not self.options.D:
91+
if "value" not in item:
92+
self.log.warning("no value key in attribute: " + attr_name)
93+
else:
94+
# dump values unless header -D was passed
95+
response["value"] = item["value"]
96+
return response
97+
98+
def dumpAttributes(self, obj_id):
99+
attrs = self.db.getAttributes(obj_id)
100+
self.log.info(f"dumpAttributes: {obj_id}")
101+
items = []
102+
for attr_name in attrs:
103+
item = self.dumpAttribute(obj_id, attr_name)
104+
items.append(item)
105+
106+
return items
107+
108+
def dumpLink(self, obj_id, name):
109+
item = self.db.getLink(obj_id, name)
110+
response = {"class": item["class"]}
111+
if "id" in item:
112+
tgt_id = item["id"]
113+
response["collection"] = getCollectionForId(tgt_id)
114+
response["id"] = stripId(tgt_id)
115+
116+
for key in item:
117+
if key in ("id", "created", "modified"):
118+
continue
119+
response[key] = item[key]
120+
response["title"] = name
121+
return response
122+
123+
def dumpLinks(self, obj_id):
124+
links = self.db.getLinks(obj_id)
125+
items = []
126+
for link_name in links:
127+
item = self.dumpLink(obj_id, link_name)
128+
items.append(item)
129+
return items
130+
131+
def dumpGroup(self, obj_id):
132+
item = self.db.getObjectById(obj_id)
133+
response = {}
134+
alias = self.alias_db[obj_id]
135+
response["alias"] = alias
136+
137+
if "cpl" in item:
138+
item["creationProperties"] = item["cpl"]
139+
attributes = self.dumpAttributes(obj_id)
140+
if attributes:
141+
response["attributes"] = attributes
142+
links = self.dumpLinks(obj_id)
143+
if links:
144+
response["links"] = links
145+
return response
146+
147+
def dumpGroups(self):
148+
groups = {}
149+
item = self.dumpGroup(self._root_uuid)
150+
root_uuid = stripId(self._root_uuid)
151+
groups[root_uuid] = item
152+
obj_ids = self.db.getCollection("groups")
153+
for obj_id in obj_ids:
154+
if obj_id == self._root_uuid:
155+
continue
156+
item = self.dumpGroup(obj_id)
157+
obj_uuid = stripId(obj_id)
158+
groups[obj_uuid] = item
159+
160+
self.json["groups"] = groups
161+
162+
def dumpDataset(self, obj_id):
163+
response = {}
164+
self.log.info("dumpDataset: " + obj_id)
165+
item = self.db.getObjectById(obj_id)
166+
if "alias" in item:
167+
alias = item["alias"]
168+
if alias:
169+
self.log.info(f"dumpDataset alias: [{alias[0]}]")
170+
response["alias"] = item["alias"]
171+
172+
response["type"] = item["type"]
173+
shapeItem = item["shape"]
174+
shape_rsp = {}
175+
num_elements = 1
176+
shape_rsp["class"] = shapeItem["class"]
177+
if "dims" in shapeItem:
178+
shape_rsp["dims"] = shapeItem["dims"]
179+
for dim in shapeItem["dims"]:
180+
num_elements *= dim
181+
if "maxdims" in shapeItem:
182+
maxdims = []
183+
for dim in shapeItem["maxdims"]:
184+
if dim == 0:
185+
maxdims.append("H5S_UNLIMITED")
186+
else:
187+
maxdims.append(dim)
188+
shape_rsp["maxdims"] = maxdims
189+
response["shape"] = shape_rsp
190+
191+
if "cpl" in item:
192+
response["creationProperties"] = item["cpl"]
193+
194+
attributes = self.dumpAttributes(obj_id)
195+
if attributes:
196+
response["attributes"] = attributes
197+
198+
if not self._no_data:
199+
if num_elements > 0:
200+
value = self.db.getDatasetValues(obj_id)
201+
response["value"] = value # dump values unless header flag was passed
202+
else:
203+
response["value"] = [] # empty list
204+
return response
205+
206+
def dumpDatasets(self):
207+
obj_ids = self.db.getCollection("datasets")
208+
if obj_ids:
209+
datasets = {}
210+
for obj_id in obj_ids:
211+
item = self.dumpDataset(obj_id)
212+
datasets[obj_id] = item
213+
214+
self.json["datasets"] = datasets
215+
216+
def dumpDatatype(self, obj_id):
217+
response = {}
218+
item = self.db.getObjectById(obj_id)
219+
response["alias"] = item["alias"]
220+
response["type"] = item["type"]
221+
if "cpl" in item:
222+
response["creationProperties"] = item["cpl"]
223+
attributes = self.dumpAttributes(obj_id)
224+
if attributes:
225+
response["attributes"] = attributes
226+
return response
227+
228+
def dumpDatatypes(self):
229+
obj_ids = self.db.getCollection("datatypes")
230+
if obj_ids:
231+
datatypes = {}
232+
for obj_id in obj_ids:
233+
item = self.dumpDatatype(obj_id)
234+
datatypes[obj_id] = item
235+
236+
self.json["datatypes"] = datatypes
237+
238+
239+
def dumpFile(self):
240+
self._root_uuid = self.db.getObjectIdByPath("/")
241+
242+
db_version_info = self.db.getVersionInfo()
243+
244+
self.json["apiVersion"] = db_version_info["hdf5-json-version"]
245+
self.json["root"] = stripId(self._root_uuid)
246+
self.getAliasList() # create alias_db with obj_id to alias list dict
247+
self.dumpGroups()
248+
249+
self.dumpDatasets()
250+
251+
self.dumpDatatypes()
252+
253+
print(json.dumps(self.json, sort_keys=True, indent=4))
254+
255+
256+

src/h5json/h5py_reader.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
# distribution tree. If you do not have access to this file, you may #
1010
# request a copy from help@hdfgroup.org. #
1111
##############################################################################
12-
import logging
13-
1412
import h5py
1513
import numpy as np
1614

@@ -42,13 +40,16 @@ def __init__(
4240
filepath,
4341
app_logger=None
4442
):
43+
self._id_map = {}
44+
self._addr_map = {}
45+
"""
4546
if app_logger:
4647
self.log = app_logger
4748
else:
4849
self.log = logging.getLogger()
49-
self._id_map = {}
50-
self._addr_map = {}
5150
self._filepath = filepath
51+
"""
52+
super().__init__(filepath, app_logger=app_logger)
5253
f = h5py.File(self._filepath)
5354
self._f = f
5455
self._root_id = createObjId(obj_type="groups")
@@ -182,7 +183,7 @@ def _getLinks(self, grp):
182183
return items
183184

184185
def _getGroup(self, grp, include_links=True):
185-
self.log.info("_getGroup alias: [{grp.name}]")
186+
self.log.info(f"_getGroup alias: [{grp.name}]")
186187

187188
item = {"alias": grp.name}
188189

@@ -192,15 +193,15 @@ def _getGroup(self, grp, include_links=True):
192193
return item
193194

194195
def _getDatatype(self, ctype, include_attrs=True):
195-
self.log.info("getDatatype alias: ]{ctype.name}")
196+
self.log.info(f"getDatatype alias: ]{ctype.name}")
196197
item = {"alias": ctype.name}
197198
item["type"] = getTypeItem(ctype.dtype)
198199

199200
return item
200201

201202

202203
def _getDataset(self, dset):
203-
self.log.info("getDataset alias: [{dset.name}]")
204+
self.log.info(f"getDataset alias: [{dset.name}]")
204205

205206
item = {"alias": dset.name}
206207

src/h5json/h5reader.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
##############################################################################
1212
from abc import ABC, abstractmethod
1313

14-
14+
import logging
1515

1616

1717
class H5Reader(ABC):
@@ -23,9 +23,14 @@ class H5Reader(ABC):
2323

2424
def __init__(
2525
self,
26-
filepath
26+
filepath,
27+
app_logger=None
2728
):
2829
self._filepath = filepath
30+
if app_logger:
31+
self.log = app_logger
32+
else:
33+
self.log = logging.getLogger()
2934

3035
@abstractmethod
3136
def get_root_id(self):

0 commit comments

Comments
 (0)