Skip to content

Commit 7840ca4

Browse files
committed
add test for incremental updates
1 parent 4169d5c commit 7840ca4

File tree

4 files changed

+73
-7
lines changed

4 files changed

+73
-7
lines changed

src/h5json/hdf5db.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,19 @@ def reader(self):
8282
@reader.setter
8383
def reader(self, value: H5Reader):
8484
""" set the reader """
85+
if self._writer:
86+
self.flush()
8587
if self._reader:
8688
self._reader.close()
89+
root_id = value.get_root_id()
90+
if not root_id:
91+
raise ValueError(f"reader {type(value)} unable to return root_id")
92+
group_json = value.getObjectById(root_id)
93+
if not group_json:
94+
raise ValueError(f"reader {type(value)} unable to return group json")
8795
self._reader = value
96+
self._db[root_id] = group_json
97+
self._root_id = root_id
8898

8999
@property
90100
def writer(self):
@@ -411,15 +421,10 @@ def createAttribute(self, obj_id, name, value, shape=None, dtype=None):
411421

412422
obj_json = self.getObjectById(obj_id)
413423
attrs_json = obj_json["attributes"]
414-
if name in attrs_json:
415-
# replace, keep, created timestamp
416-
created = attrs_json["created"]
417-
else:
418-
created = time.time()
419424
type_json = getTypeItem(dtype)
420425
# finally put it all together...
421426
attr_json = {"shape": shape_json, "type": type_json, "value": value_json}
422-
attr_json["created"] = created
427+
attr_json["created"] = time.time()
423428

424429
# slot into the obj_json["attrs"]
425430
attrs_json[name] = attr_json

src/h5json/writer/h5py_writer.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
##############################################################################
1212
import h5py
1313
import numpy as np
14+
import time
1415

1516
from ..objid import getCollectionForId, isValidUuid, getUuidFromId, isObjId
1617
from ..hdf5dtype import createDataType
@@ -39,6 +40,7 @@ def __init__(
3940
self._init = False
4041
else:
4142
self._init = True
43+
self._flush_time = 0.0
4244

4345
def _copy_element(self, val, src_dt, tgt_dt, fout=None):
4446
""" convert the given dataset or attribute element to h5py equivalent """
@@ -379,10 +381,14 @@ def updateAttributes(self, obj_id, obj):
379381
attrs = obj_json["attributes"]
380382
for name in attrs:
381383
attr_json = attrs[name]
384+
if "created" in attr_json and attr_json["created"] < self._flush_time:
385+
# ttribute should be saved already
386+
continue
382387
self.createAttribute(obj, name, attr_json)
383388

384389
def flush(self):
385390
""" Write dirty items """
391+
386392
if not self.db:
387393
# no db set yet
388394
return False
@@ -393,6 +399,7 @@ def flush(self):
393399
with h5py.File(self._filepath, mode=mode) as f:
394400
if self.db.new_objects or self._init:
395401
root_json = self.db.getObjectById(root_id)
402+
396403
if "links" in root_json:
397404
root_links = root_json["links"]
398405
self._createObjects(f, root_links, visited=set((root_id,)))
@@ -408,6 +415,10 @@ def flush(self):
408415
self.initializeDatasetValues(obj_id, obj)
409416
else:
410417
self.updateDatasetValues(obj_id, obj)
418+
# mark time write is complete
419+
# updates before this time will not need to be written
420+
# TBD: possible race condition with multithreading
421+
self._flush_time = time.time()
411422

412423
self._init = False # done with init after first flush
413424
return True # all objects written successfully

test/unit/h5json_reader_test.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ def __init__(self, *args, **kwargs):
3939
def testSimple(self):
4040
filepath = "data/json/tall.json"
4141
kwargs = {"app_logger": self.log}
42-
with Hdf5db(h5_reader=H5JsonReader(filepath, **kwargs), **kwargs) as db:
42+
with Hdf5db(**kwargs) as db:
43+
h5_reader = H5JsonReader(filepath, **kwargs)
44+
db.reader = h5_reader
4345
root_id = db.getObjectIdByPath("/")
4446
root_json = db.getObjectById(root_id)
4547

test/unit/h5py_writer_test.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import h5py
1616
import numpy as np
1717
from h5json import Hdf5db
18+
from h5json.reader.h5json_reader import H5JsonReader
1819
from h5json.writer.h5py_writer import H5pyWriter
1920
from h5json.hdf5dtype import special_dtype, Reference
2021
from h5json import selections
@@ -473,6 +474,53 @@ def testCommittedCompoundType(self):
473474
sub_dt = t1.dtype["field_4"]
474475
self.assertEqual(sub_dt, h5py.special_dtype(vlen=str))
475476

477+
def testReaderWithUpdate(self):
478+
479+
file_in = "data/json/tall.json"
480+
file_out = "test/unit/out/h5py_writer_test_testReaderWithUpdate.h5"
481+
482+
with Hdf5db(app_logger=self.log) as db:
483+
db.reader = H5JsonReader(file_in)
484+
db.writer = H5pyWriter(file_out, no_data=False)
485+
dset111_id = db.getObjectIdByPath("/g1/g1.1/dset1.1.1")
486+
db.flush()
487+
488+
with h5py.File(file_out) as f:
489+
self.assertTrue("/g1/g1.1/dset1.1.1" in f)
490+
dset111 = f["/g1/g1.1/dset1.1.1"]
491+
self.assertEqual(len(dset111.attrs), 2)
492+
493+
db.createAttribute(dset111_id, "attr3", "hello")
494+
dset_json = db.getObjectById(dset111_id)
495+
db.flush()
496+
497+
with h5py.File(file_out) as f:
498+
self.assertTrue("/g1/g1.1/dset1.1.1" in f)
499+
dset111 = f["/g1/g1.1/dset1.1.1"]
500+
self.assertEqual(len(dset111.attrs), 3)
501+
self.assertEqual(dset111.attrs["attr3"], b"hello")
502+
503+
db.createAttribute(dset111_id, "attr3", "bye-bye")
504+
db.flush()
505+
506+
with h5py.File(file_out) as f:
507+
self.assertTrue("/g1/g1.1/dset1.1.1" in f)
508+
dset111 = f["/g1/g1.1/dset1.1.1"]
509+
self.assertEqual(len(dset111.attrs), 3)
510+
self.assertEqual(dset111.attrs["attr3"], b"bye-bye")
511+
g1 = f["g1"]
512+
513+
# create a new link
514+
g13_id = db.createGroup()
515+
g1_id = db.getObjectIdByPath("/g1")
516+
db.createHardLink(g1_id, "g1.3", g13_id)
517+
db.flush()
518+
519+
with h5py.File(file_out) as f:
520+
g1 = f["g1"]
521+
self.assertEqual(len(g1), 3)
522+
self.assertTrue("g1.3" in g1)
523+
476524

477525
if __name__ == "__main__":
478526
# setup test files

0 commit comments

Comments
 (0)