Skip to content

Commit 313e8bf

Browse files
committed
fix hdf5 export metadata data size too large error
1 parent dcfee82 commit 313e8bf

File tree

1 file changed

+14
-3
lines changed

1 file changed

+14
-3
lines changed

dimspy/portals/hdf5_portal.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@
1111
"""
1212

1313

14-
import os, logging, zlib, h5py
14+
import os, logging, zlib, h5py, textwrap
1515
import cPickle as cp
1616
import numpy as np
1717
from ast import literal_eval
18+
from string import join
1819
from dimspy.models.peaklist_tags import Tag, PeakList_Tags
1920
from dimspy.models.peaklist import PeakList
2021
from dimspy.models.peak_matrix import PeakMatrix, unmask_all_peakmatrix
@@ -26,9 +27,16 @@ def _eval(v):
2627
except (ValueError, SyntaxError):
2728
return str(v)
2829

29-
_packMeta = lambda x: np.array(zlib.compress(cp.dumps(x)) + '\xFF') # numpy truncates right-side \x00
30+
_packMeta = lambda x: np.array(zlib.compress(cp.dumps(x), 9) + '\xFF') # numpy truncates right-side \x00
3031
_unpackMeta = lambda x: cp.loads(zlib.decompress(x[:-1]))
3132

33+
_BOOL_HEADERS = np.array([0xE5, 0xAD, 0x71, 0x47], dtype = np.uint8)
34+
_encUInt8 = lambda x,b: np.array(map(lambda v: int(v,16), textwrap.wrap('{0:#0{1}x}'.format(x,2*b+2)[2:], 2)), dtype = np.uint8)
35+
_decUInt8 = lambda x: int(join(map(lambda v: '{0:#0{1}x}'.format(v,4)[2:], x), ''), 16)
36+
37+
_packBool = lambda x: np.r_[_BOOL_HEADERS, np.packbits(x), _encUInt8(x.shape[0], 4)]
38+
_unpackBool = lambda x: np.unpackbits(x[len(_BOOL_HEADERS):-4])[:_decUInt8(x[-4:])].astype(bool)
39+
3240

3341
# peaklists portals
3442
def save_peaklists_as_hdf5(pkls, filename):
@@ -149,7 +157,8 @@ def _saveattr(attr):
149157

150158
dset.attrs['flag_names'] = pm.flag_names
151159
for fn in pm.flag_names:
152-
dset.attrs[fn] = pm.flag_values(fn)
160+
fvals = pm.flag_values(fn)
161+
dset.attrs[fn] = fvals if fvals.nbytes < 64000 else _packBool(fvals) if fvals.dtype.kind == 'b' else _packMeta(fvals)
153162

154163

155164
def load_peak_matrix_from_hdf5(filename):
@@ -180,6 +189,8 @@ def load_peak_matrix_from_hdf5(filename):
180189
ptgs = [PeakList_Tags(*[Tag(_eval(v), None if t == 'None' else t) for t,v in tags]) for tags in map(lambda x: dset.attrs[x], tatt)]
181190

182191
flgs = [(fn, dset.attrs[fn]) for fn in dset.attrs['flag_names']]
192+
flgs = [(fn, _unpackBool(fv) if fv.dtype.kind == 'u' and np.all(fv[:len(_BOOL_HEADERS)] == _BOOL_HEADERS) else \
193+
_unpackMeta(fv) if fv.dtype.kind == 's' and fv[-1] == '\xFF' else fv) for fn,fv in flgs]
183194
alst = [(attr, np.array(f[attr]).astype(f[attr].attrs['dtype'])) for attr in attl]
184195

185196
pm = PeakMatrix(pids, ptgs, alst)

0 commit comments

Comments
 (0)