Skip to content

Commit 985a842

Browse files
committed
set dataset values in create if possible
1 parent 74d3a62 commit 985a842

File tree

3 files changed

+38
-24
lines changed

3 files changed

+38
-24
lines changed

src/h5json/dset_util.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,24 @@ def resize_dataset(dset_json, shape):
4343
dset_json["modified"] = time.time()
4444

4545

46-
def getNumElements(dset_json):
46+
def getDims(dset_json):
47+
""" return extents of the dataset shape as a tuple """
4748
shape_json = dset_json["shape"]
4849
shape_class = shape_json["class"]
4950
if shape_class == "H5S_NULL":
50-
num_elements = 0
51+
dims = None
5152
elif shape_class == "H5S_SCALAR":
52-
num_elements = 1
53+
dims = ()
5354
elif shape_class == "H5S_SIMPLE":
54-
dims = shape_json["dims"]
55-
num_elements = int(np.prod(dims))
56-
return num_elements
55+
dims = tuple(shape_json["dims"])
56+
else:
57+
raise ValueError(f"Unexpected shape class: {shape_class}")
58+
return dims
59+
60+
61+
def getNumElements(dset_json):
62+
""" return the number of elements defined by the dataset's shape
63+
returns None for null shape, 1 for scalar shape, and product of
64+
extents otherwise """
65+
66+
return int(np.prod(getDims(dset_json)))

src/h5json/hsdsstore/hsds_writer.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from ..hdf5dtype import isVlen
1818
from ..array_util import arrayToBytes, bytesArrayToList
19-
from ..dset_util import getNumElements
19+
from ..dset_util import getNumElements, getDims
2020
from .. import selections
2121
from ..h5writer import H5Writer
2222
from .httpconn import HttpConn
@@ -251,7 +251,7 @@ def multiPost(items):
251251
items.clear()
252252

253253
self.log.debug(f"hsds_writer> createObjects, {len(obj_ids)} objects")
254-
MAX_OBJECTS_PER_REQUEST = 3
254+
MAX_OBJECTS_PER_REQUEST = 300
255255
collections = ("groups", "datasets", "datatypes")
256256
col_items = {}
257257
dset_value_update_ids = set()
@@ -286,15 +286,25 @@ def multiPost(items):
286286
item[key] = obj_json[key]
287287

288288
# initialize dataset values if provided and not too large
289-
if "updates" in obj_json:
290-
updates = obj_json["updates"]
291-
if updates and len(updates) == 1 and self.getDatasetSize(obj_id) < MAX_INIT_SIZE:
289+
if collection == "datasets":
290+
dset_dims = getDims(obj_json) # will be None for null space datasets
291+
dset_size = self.getDatasetSize(obj_id) # number of bytes defined by the shape
292+
init_arr = None # data to be passed to post create method
293+
updates = obj_json.get("updates")
294+
if updates and len(updates) == 1 and dset_size < MAX_INIT_SIZE:
292295
sel, arr = updates[0]
293296
if sel.select_type == selections.H5S_SELECT_ALL:
294-
value = bytesArrayToList(arr)
295-
item["value"] = value
297+
init_arr = arr
296298
updates.clear() # reset the update list
297-
if updates:
299+
if self._init and init_arr is None and dset_dims is not None:
300+
# get all values from dataset if small enough
301+
if dset_size < MAX_INIT_SIZE:
302+
sel_all = selections.select(dset_dims, ...)
303+
init_arr = self.db.getDatasetValues(obj_id, sel_all)
304+
if init_arr is not None:
305+
value = bytesArrayToList(init_arr)
306+
item["value"] = value
307+
elif updates or self._init:
298308
dset_value_update_ids.add(obj_id) # will set dataset value below
299309

300310
# add to the list of new items for the given collection
@@ -436,18 +446,13 @@ def updateValues(self, dset_ids):
436446
if getCollectionForId(dset_id) != "datasets":
437447
continue # ignore groups and datatypes
438448
dset_json = self.db.getObjectById(dset_id)
439-
dset_shape = dset_json["shape"]
440-
dset_class = dset_shape['class']
441-
if dset_class == "H5S_NULL":
449+
dset_dims = getDims(dset_json)
450+
if dset_dims is None:
442451
# no data to update
443452
continue
444453
if self._init:
445454
# get all data for the dataset
446455
# TBD: do this by chunks
447-
if dset_class == "H5S_SCALAR":
448-
dset_dims = []
449-
else:
450-
dset_dims = dset_shape["dims"]
451456
sel_all = selections.select(dset_dims, ...)
452457
arr = self.db.getDatasetValues(dset_id, sel_all)
453458
if arr is not None:
@@ -491,7 +496,8 @@ def flush(self):
491496
dirty_ids.add(root_id) # add back root for attribute and link creation
492497
if not self._no_data:
493498
# initialize dataset values
494-
self.updateValues(obj_ids)
499+
pass
500+
# self.updateValues(obj_ids)
495501
self._init = False
496502
elif self.db.new_objects:
497503
self.log.debug(f"hsds_writer> {len(self.db.new_objects)} objects to create")

test/unit/h5py_reader_test.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,7 @@ def testSimple(self):
4141
db = Hdf5db(app_logger=self.log)
4242
db.reader = H5pyReader(filepath, app_logger=self.log)
4343
root_id = db.open()
44-
print("got root_id:", root_id)
4544
root_json = db.getObjectById(root_id)
46-
print("got root_json:", root_json)
4745
root_attrs = root_json["attributes"]
4846
self.assertEqual(len(root_attrs), 2)
4947
self.assertEqual(list(root_attrs.keys()), ["attr1", "attr2"])

0 commit comments

Comments
 (0)