Skip to content

Commit c0a6cc3

Browse files
committed
update h5tojson script
1 parent c5c28a4 commit c0a6cc3

File tree

5 files changed

+252
-328
lines changed

5 files changed

+252
-328
lines changed

src/h5json/dset_util.py

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -58,48 +58,6 @@
5858
_H5PY_COMPRESSION_FILTERS = ("gzip", "lzf", "szip")
5959
"""
6060

61-
def make_new_dset(
62-
shape=None,
63-
dtype=None,
64-
chunks=None,
65-
compression=None,
66-
shuffle=None,
67-
maxshape=None,
68-
compression_opts=None,
69-
fillvalue=None,
70-
cpl=None
71-
):
72-
73-
type_json = getTypeItem(dtype)
74-
if shape == "H5S_NULL":
75-
shape_json = {"class": "H5S_NULL"}
76-
else:
77-
shape_json = {"class": "H5S_SIMPLE"}
78-
shape_json["dims"] = list(shape)
79-
80-
if maxshape:
81-
shape_json["maxshape"] = maxshape
82-
if cpl is None:
83-
cpl = {}
84-
if chunks:
85-
cpl["chunks"] = chunks
86-
if compression:
87-
cpl["compression"] = compression
88-
if shuffle:
89-
cpl["shuffle"] = shuffle
90-
if compression_opts:
91-
cpl["compression_opts"] = compression_opts
92-
if fillvalue:
93-
cpl["fillvalue"] = fillvalue
94-
95-
96-
# TBD - other properties
97-
dset_json = {"shape": shape_json, "type": type_json, "cpl": cpl, "attributes": {}}
98-
dset_json["created"] = time.time()
99-
dset_json["modified"] = None
100-
101-
return dset_json
102-
10361
def resize_dataset(dset_json, shape):
10462
shape_json = dset_json["shape"]
10563
shape_class = shape_json["class"]

src/h5json/h5tojson/h5tojson.py

Lines changed: 13 additions & 202 deletions
Original file line numberDiff line numberDiff line change
@@ -10,216 +10,29 @@
1010
# request a copy from help@hdfgroup.org. #
1111
##############################################################################
1212
import sys
13-
import json
1413
import argparse
1514
import os.path as op
16-
import tempfile
1715
import logging
1816
import logging.handlers
19-
from h5json import Hdf5db
20-
from h5json import hdf5dtype
21-
22-
23-
class DumpJson:
24-
"""
25-
DumpJson - return json representation of all objects within the given file
26-
"""
27-
28-
def __init__(self, db, app_logger=None, options=None):
29-
self.options = options
30-
self.db = db
31-
if app_logger:
32-
self.log = app_logger
33-
else:
34-
self.log = logging.getLogger()
35-
self.json = {}
36-
37-
def dumpAttribute(self, col_name, uuid, attr_name):
38-
self.log.info("dumpAttribute: [" + attr_name + "]")
39-
item = self.db.getAttributeItem(col_name, uuid, attr_name)
40-
response = {"name": attr_name}
41-
typeItem = item["type"]
42-
response["type"] = hdf5dtype.getTypeResponse(typeItem)
43-
response["shape"] = item["shape"]
44-
if not self.options.D:
45-
if "value" not in item:
46-
self.log.warning("no value key in attribute: " + attr_name)
47-
else:
48-
response["value"] = item[
49-
"value"
50-
] # dump values unless header -D was passed
51-
return response
52-
53-
def dumpAttributes(self, col_name, uuid):
54-
attr_list = self.db.getAttributeItems(col_name, uuid)
55-
self.log.info("dumpAttributes: " + uuid)
56-
items = []
57-
for attr in attr_list:
58-
item = self.dumpAttribute(col_name, uuid, attr["name"])
59-
items.append(item)
60-
61-
return items
62-
63-
def dumpLink(self, uuid, name):
64-
item = self.db.getLinkItemByUuid(uuid, name)
65-
for key in ("ctime", "mtime", "href"):
66-
if key in item:
67-
del item[key]
68-
return item
69-
70-
def dumpLinks(self, uuid):
71-
link_list = self.db.getLinkItems(uuid)
72-
items = []
73-
for link in link_list:
74-
item = self.dumpLink(uuid, link["title"])
75-
items.append(item)
76-
return items
77-
78-
def dumpGroup(self, uuid):
79-
item = self.db.getGroupItemByUuid(uuid)
80-
if "alias" in item:
81-
alias = item["alias"]
82-
if alias:
83-
self.log.info("dumpGroup alias: [" + alias[0] + "]")
84-
for key in ("ctime", "mtime", "linkCount", "attributeCount", "id"):
85-
if key in item:
86-
del item[key]
87-
attributes = self.dumpAttributes("groups", uuid)
88-
if attributes:
89-
item["attributes"] = attributes
90-
links = self.dumpLinks(uuid)
91-
if links:
92-
item["links"] = links
93-
return item
94-
95-
def dumpGroups(self):
96-
groups = {}
97-
item = self.dumpGroup(self.root_uuid)
98-
groups[self.root_uuid] = item
99-
uuids = self.db.getCollection("groups")
100-
for uuid in uuids:
101-
item = self.dumpGroup(uuid)
102-
groups[uuid] = item
103-
104-
self.json["groups"] = groups
105-
106-
def dumpDataset(self, uuid):
107-
response = {}
108-
self.log.info("dumpDataset: " + uuid)
109-
item = self.db.getDatasetItemByUuid(uuid)
110-
if "alias" in item:
111-
alias = item["alias"]
112-
if alias:
113-
self.log.info("dumpDataset alias: [" + alias[0] + "]")
114-
response["alias"] = item["alias"]
115-
116-
typeItem = item["type"]
117-
response["type"] = hdf5dtype.getTypeResponse(typeItem)
118-
shapeItem = item["shape"]
119-
shape_rsp = {}
120-
num_elements = 1
121-
shape_rsp["class"] = shapeItem["class"]
122-
if "dims" in shapeItem:
123-
shape_rsp["dims"] = shapeItem["dims"]
124-
for dim in shapeItem["dims"]:
125-
num_elements *= dim
126-
if "maxdims" in shapeItem:
127-
maxdims = []
128-
for dim in shapeItem["maxdims"]:
129-
if dim == 0:
130-
maxdims.append("H5S_UNLIMITED")
131-
else:
132-
maxdims.append(dim)
133-
shape_rsp["maxdims"] = maxdims
134-
response["shape"] = shape_rsp
135-
136-
if "creationProperties" in item:
137-
response["creationProperties"] = item["creationProperties"]
138-
139-
attributes = self.dumpAttributes("datasets", uuid)
140-
if attributes:
141-
response["attributes"] = attributes
142-
143-
if not (self.options.D or self.options.d):
144-
if num_elements > 0:
145-
value = self.db.getDatasetValuesByUuid(uuid)
146-
response["value"] = value # dump values unless header flag was passed
147-
else:
148-
response["value"] = [] # empty list
149-
return response
150-
151-
def dumpDatasets(self):
152-
uuids = self.db.getCollection("datasets")
153-
if uuids:
154-
datasets = {}
155-
for uuid in uuids:
156-
item = self.dumpDataset(uuid)
157-
datasets[uuid] = item
158-
159-
self.json["datasets"] = datasets
160-
161-
def dumpDatatype(self, uuid):
162-
response = {}
163-
item = self.db.getCommittedTypeItemByUuid(uuid)
164-
response["alias"] = item["alias"]
165-
typeItem = item["type"]
166-
response["type"] = hdf5dtype.getTypeResponse(typeItem)
167-
attributes = self.dumpAttributes("datatypes", uuid)
168-
if attributes:
169-
response["attributes"] = attributes
170-
return response
171-
172-
def dumpDatatypes(self):
173-
uuids = self.db.getCollection("datatypes")
174-
if uuids:
175-
datatypes = {}
176-
for uuid in uuids:
177-
item = self.dumpDatatype(uuid)
178-
datatypes[uuid] = item
179-
180-
self.json["datatypes"] = datatypes
181-
182-
def dumpFile(self):
183-
184-
self.root_uuid = self.db.getUUIDByPath("/")
185-
186-
db_version_info = self.db.getVersionInfo()
187-
188-
self.json["apiVersion"] = db_version_info["hdf5-json-version"]
189-
self.json["root"] = self.root_uuid
190-
191-
self.dumpGroups()
192-
193-
self.dumpDatasets()
194-
195-
self.dumpDatatypes()
196-
197-
print(json.dumps(self.json, sort_keys=True, indent=4))
198-
199-
200-
def getTempFileName():
201-
"""
202-
Generate a temporary filename to avoid problems with trying to create a dbfile
203-
in a read-only directory. (See: https://github.com/HDFGroup/h5serv/issues/37)
204-
"""
205-
f = tempfile.NamedTemporaryFile(delete=False)
206-
f.close()
207-
return f.name
20817

18+
from h5json import Hdf5db
19+
from h5json.writer.h5json_writer import H5JsonWriter
20+
from h5json.reader.h5py_reader import H5pyReader
21+
20922

21023
def main():
21124
parser = argparse.ArgumentParser(usage="%(prog)s [-h] [-D|-d] <hdf5_file>")
212-
parser.add_argument("-D", action="store_true", help="surpress all data output")
25+
parser.add_argument("-D", action="store_true", help="suppress all data output")
21326
parser.add_argument(
21427
"-d",
21528
action="store_true",
216-
help="surpress data output for" + " datasets (but not attribute values)",
29+
help="suppress data output for" + " datasets (but not attribute values)",
21730
)
21831
parser.add_argument("filename", nargs="+", help="HDF5 to be converted to json")
21932
args = parser.parse_args()
22033

22134
# create logger
222-
log = logging.getLogger("h5serv")
35+
log = logging.getLogger("h5tojson")
22336
# log.setLevel(logging.WARN)
22437
log.setLevel(logging.INFO)
22538
# add log handler
@@ -230,16 +43,14 @@ def main():
23043

23144
filename = args.filename[0]
23245
if not op.isfile(filename):
233-
sys.exit("Cannot find file: %s" % filename)
234-
235-
log.info("h5tojson " + filename)
46+
sys.exit(f"Cannot find file: {filename}")
23647

237-
dbFilename = getTempFileName()
238-
log.info("Using dbFile: " + dbFilename)
239-
with Hdf5db(filename, dbFilePath=dbFilename, readonly=True, app_logger=log) as db:
240-
dumper = DumpJson(db, app_logger=log, options=args)
241-
dumper.dumpFile()
48+
log.info(f"h5tojson {filename}")
24249

50+
kwargs = {"app_logger": log}
51+
52+
with Hdf5db(h5_reader=H5pyReader(filename, **kwargs), h5_writer=H5JsonWriter("/tmp/foo.json", no_data=False, **kwargs), **kwargs) as db:
53+
pass
24354

24455
if __name__ == "__main__":
24556
main()

0 commit comments

Comments
 (0)