Skip to content

Commit 5e25696

Browse files
committed
refactor dataset write code
1 parent acfd41a commit 5e25696

File tree

1 file changed

+91
-59
lines changed

1 file changed

+91
-59
lines changed

h5json/hdf5db.py

Lines changed: 91 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2228,6 +2228,10 @@ def setDatasetValuesByUuid(self, obj_uuid, data, slices=None, format="json"):
22282228
dt = dset.dtype
22292229
typeItem = getTypeItem(dt)
22302230
itemSize = getItemSize(typeItem)
2231+
rank = len(dset.shape)
2232+
arraySize = 1
2233+
for extent in dset.shape:
2234+
arraySize *= arraySize
22312235

22322236
if itemSize == "H5T_VARIABLE" and format == "binary":
22332237
msg = "Only JSON is supported for for this data type"
@@ -2254,69 +2258,97 @@ def setDatasetValuesByUuid(self, obj_uuid, data, slices=None, format="json"):
22542258
data = self.listToRef(data)
22552259

22562260
if slices is None:
2257-
# write entire dataset
2258-
if format == "binary":
2259-
if len(data) != (dset.size * itemSize):
2260-
msg = "Expected " + (dset.size * itemSize) + " bytes, but got: " + len(data)
2261-
self.log.info(msg)
2262-
raise IOError(errno.EINVAL, msg)
2263-
arr = np.fromstring(data, dtype=dset.dtype)
2264-
arr.reshape(dset.shape)
2265-
dset[()] = arr
2266-
else:
2267-
# json data
2268-
try:
2269-
dset[()] = data
2270-
except TypeError as te:
2271-
raise IOError(errno.EINVAL, str(te))
2261+
slices = []
2262+
# create selection that covers entire dataset
2263+
for dim in range(rank):
2264+
s = slice(0, dset.shape[dim], 1)
2265+
slices.append(s)
2266+
slices = tuple(slices)
2267+
2268+
2269+
if type(slices) != tuple:
2270+
msg = "setDatasetValuesByUuid: bad type for dim parameter"
2271+
self.log.error(msg)
2272+
raise IOError(erno.EIO, msg)
2273+
2274+
2275+
if len(slices) != rank:
2276+
msg = "number of dims in selection not same as rank"
2277+
self.log.info(msg)
2278+
raise IOError(errno.EINVAL, msg)
2279+
2280+
npoints = 1
2281+
np_shape = []
2282+
for i in range(rank):
2283+
s = slices[i]
2284+
2285+
if s.start < 0 or s.step <= 0 or s.stop < s.start:
2286+
msg = "invalid slice specification"
2287+
self.log.info(msg)
2288+
raise IOError(errno.EINVAL, msg)
2289+
if s.stop > dset.shape[i]:
2290+
msg = "invalid slice specification"
2291+
self.log.info(msg)
2292+
raise IOError(errno.EINVAL, msg)
2293+
np_shape.append(s.stop - s.start)
2294+
2295+
count = (s.stop - s.start) // s.step
2296+
if count <= 0:
2297+
msg = "invalid slice specification"
2298+
self.log.info(msg)
2299+
raise IOError(errno.EINVAL, msg)
2300+
2301+
npoints *= count
2302+
2303+
np_shape = tuple(np_shape) # for comparison with ndarray shape
2304+
2305+
self.log.info("selection shape:" + str(np_shape))
22722306

2307+
if format == "binary":
2308+
if npoints*itemSize != len(data):
2309+
msg = "Expected: " + str(npoints*itemSize) + " bytes, but got: " + str(len(data))
2310+
self.log.info(msg)
2311+
raise IOError(errno.EINVAL, msg)
2312+
arr = np.fromstring(data, dtype=dset.dtype)
2313+
arr = arr.reshape(np_shape) # conform to selection shape
2314+
22732315
else:
2274-
if type(slices) != tuple:
2275-
msg = "setDatasetValuesByUuid: bad type for dim parameter"
2276-
self.log.error(msg)
2277-
return False
2278-
rank = len(dset.shape)
2279-
2280-
if len(slices) != rank:
2281-
self.log.error("setDatasetValuesByUuid: number of dims in selection not same as rank")
2282-
return False
2283-
else:
2284-
npoints = 1
2285-
for i in range(rank):
2286-
s = slices[i]
2287-
count = (s.stop - s.start) // s.step
2288-
npoints *= count
2289-
if count <= 0:
2290-
self.log.error("invalid slice specification")
2316+
# data is json
2317+
if npoints == 1 and len(dset.dtype) > 1:
2318+
# convert to tuple for compound singleton writes
2319+
data = [tuple(data),]
2320+
2321+
arr = np.array(data, dtype=dset.dtype)
2322+
# raise an exception of the array shape doesn't match the selection shape
2323+
# allow if the array is a scalar and the selection shape is one element,
2324+
# numpy is ok with this
2325+
if arr.shape == () and np_shape == (1,):
2326+
np_shape = ()
2327+
if arr.shape == (1,) and np_shape == ():
2328+
np_shape = (1,)
22912329

2292-
if format == "binary":
2293-
np_shape = []
2294-
for i in range(rank):
2295-
s = slices[i]
2296-
np_shape.append( (s.stop - s.start) )
2297-
arr = np.fromstring(data, dtype=dset.dtype)
2298-
arr = arr.reshape(np_shape)
2330+
if arr.shape != np_shape:
2331+
msg = "data shape doesn't match selection shape"
2332+
msg += "--data shape: " + str(arr.shape)
2333+
msg += "--selection shape: " + str(np_shape)
2334+
2335+
self.log.info(msg)
2336+
raise IOError(errno.EINVAL, msg)
22992337

2300-
if rank == 1:
2301-
s = slices[0]
2302-
dset[s] = arr
2303-
else:
2304-
dset[slices] = arr
2305-
else:
2306-
if count == 1 and len(dset.dtype) > 1:
2307-
# convert to tuple for compound singleton writes
2308-
data = tuple(data)
2309-
if rank == 1:
2310-
s = slices[0]
2311-
try:
2312-
dset[s] = data
2313-
except TypeError as te:
2314-
raise IOError(errno.EINVAL, str(te))
2315-
else:
2316-
try:
2317-
dset[slices] = data
2318-
except TypeError as te:
2319-
raise IOError(errno.EINVAL, str(te))
2338+
# write temp numpy array to dataset
2339+
if rank == 1:
2340+
s = slices[0]
2341+
try:
2342+
dset[s] = arr
2343+
except TypeError as te:
2344+
self.log.info("h5py setitem exception: " + str(te))
2345+
raise IOError(errno.EINVAL, str(te))
2346+
else:
2347+
try:
2348+
dset[slices] = arr
2349+
except TypeError as te:
2350+
self.log.info("h5py setitem exception: " + str(te))
2351+
raise IOError(errno.EINVAL, str(te))
23202352

23212353
# update modified time
23222354
self.setModifiedTime(obj_uuid)

0 commit comments

Comments
 (0)