Skip to content

Commit 4d648da

Browse files
committed
Got NXazint1d reader done
1 parent 6a1e0a5 commit 4d648da

File tree

1 file changed

+166
-56
lines changed

1 file changed

+166
-56
lines changed

GSASII/imports/G2pwd_HDF5.py

Lines changed: 166 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
from .. import GSASIIobj as G2obj
1515
from .. import GSASIIfiles as G2fil
1616

17+
#from .. import GSASIIpath
18+
#breakpoint = GSASIIpath.IPyBreak_base
19+
1720
class HDF5_Reader(G2obj.ImportPowderData):
1821
'''Routine to read multiple powder patterns from an HDF5 file.
1922
@@ -125,6 +128,11 @@ def HDF5list(self, filename):
125128
126129
:param filename:
127130
'''
131+
def ShowH5NeXusName(obj,keylist):
132+
key = '/'.join(keylist)
133+
if "NX_class" in obj[key].attrs:
134+
return obj[key].attrs["NX_class"]
135+
128136
fp = h5py.File(filename, 'r')
129137
#print(f'Contents of {filename}')
130138
HDF5entries = self.RecurseH5Element(fp)
@@ -136,6 +144,7 @@ def HDF5list(self, filename):
136144
for k in j:
137145
m = max(m,len('/'.join(k)))
138146
for k in j:
147+
nxname = ShowH5NeXusName(fp,k)
139148
lbl = self.ShowH5Element(fp,k)
140149
if '\n' in lbl:
141150
lbl = '; '.join(lbl.split('\n'))
@@ -144,6 +153,7 @@ def HDF5list(self, filename):
144153
# if '\n' in lbl:
145154
# lbl = lbl.split()[0] + '...'
146155
if lbl != '(group)': strings.append(f"{'/'.join(k):{m}s} {lbl}")
156+
if nxname: print(f"{'/'.join(k):{m}s} {lbl} {nxname}")
147157
with open(filename+'_contents.txt', 'w') as fp:
148158
for i in strings: fp.write(f'{i}\n')
149159

@@ -153,18 +163,23 @@ def ContentsValidator(self, filename):
153163
'''
154164
try:
155165
fp = h5py.File(filename, 'r')
156-
if 'entry' in fp: # NeXus
157-
#self.HDF5entries = []
158-
#self.HDF5list(filename)
159-
if 'definition' in fp['/entry']:
160-
# MAX IV NXazint1d file
161-
if fp['/entry/definition'][()].decode() == 'NXazint1d':
162-
return True
163-
except IOError:
166+
# test for MaxIV NeXus/NXazint1d & NXazint2d
167+
test = True
168+
while test:
169+
test = False
170+
entry = getNeXusBase(fp)
171+
if entry is None: break # not NeXus
172+
if 'definition' not in fp[entry]: break # not MaxIV NXazint*
173+
definition = fp[entry+'/definition'][()].decode()
174+
if definition == 'NXazint1d': return True
175+
if definition == 'NXazint2d': return True
176+
# test for next HDF5 type here
177+
#
178+
except IOError: # not HDF5
164179
return False
165180
finally:
166181
fp.close()
167-
return False
182+
return False # nothing passed -- not valid
168183

169184
def Reader(self, filename, ParentFrame=None, **kwarg):
170185
'''Scan file for sections needed by defined file types (currently
@@ -182,19 +197,26 @@ def Reader(self, filename, ParentFrame=None, **kwarg):
182197
self.blknum = 0
183198
else:
184199
self.blknum = min(self.selections)
200+
# was file read into buffer? If so skip opening file to save time
201+
definition = fpbuffer.get('definition','')
202+
if definition == 'NXazint1d':
203+
return self.readNXazint1d(filename, fpbuffer)
204+
elif definition == 'NXazint2d':
205+
return self.readNXazint2d(filename, fpbuffer)
206+
# first or non-buffered read
185207
try:
186208
fp = h5py.File(filename, 'r')
187-
if 'entry' in fp: # NeXus
188-
if 'definition' in fp['/entry']:
189-
# MAX IV NXazint1d file
190-
if fp['/entry/definition'][()].decode() == 'NXazint1d':
209+
entry = getNeXusBase(fp)
210+
if entry: # NeXus
211+
if 'definition' in fp[entry]: # MaxIV NXazint*
212+
definition = fp[entry+'/definition'][()].decode()
213+
fpbuffer['definition'] = definition
214+
if definition == 'NXazint1d':
191215
return self.readNXazint1d(filename, fpbuffer)
192-
193-
# MAX IV NXazint1d file
194-
#if fp['/entry/definition'][()].decode() == 'NXazint2d':
195-
# return self.readNXazint2d(filename, fpbuffer)
196-
# return True
197-
# https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint2d.html
216+
elif definition == 'NXazint2d':
217+
return self.readNXazint2d(filename, fpbuffer)
218+
# not a supported file type
219+
return False
198220
except IOError:
199221
print ('cannot open file '+ filename)
200222
return False
@@ -211,67 +233,95 @@ def readNXazint1d(self, filename, fpbuffer={}):
211233
#self.instmsg = 'HDF file'
212234
self.comments = []
213235
doread = False # has the file already been read into a buffer?
214-
arrays = ('entry/data/radial_axis','entry/data/I','entry/data/I_errors')
215-
floats = ('entry/instrument/monochromator/wavelength',
216-
'entry/reduction/input/polarization_factor')
217-
strings = ('entry/instrument/name','entry/reduction/input/unit',
218-
'entry/sample/name','entry/instrument/source/name')
219-
for i in arrays+floats+strings:
220-
if i not in fpbuffer:
236+
fileItems = {
237+
# arrays
238+
'radial_axis':('NXdata','radial_axis'),
239+
'I':('NXdata','I'),
240+
'I_errors':('NXdata','I_errors'),
241+
# floats
242+
'wavelength':('NXmonochromator','wavelength'),
243+
'polarization_factor':('NXparameters','polarization_factor'),
244+
# strings
245+
'instrument/name':('NXinstrument','name'),
246+
'unit':('NXparameters','unit'),
247+
'sample/name':('NXsample','name'),
248+
'source/name':('NXsource','name'),
249+
}
250+
# test if we have what we need in the buffer
251+
for k in fileItems:
252+
if k not in fpbuffer:
221253
doread = True
222254
break
223-
if doread: # read into buffer
255+
if doread:
256+
# Nope, need to fill the buffer
224257
try:
225258
fp = h5py.File(filename, 'r')
226-
for i in arrays:
227-
fpbuffer[i] = np.array(fp.get(i))
228-
self.numbanks = len(fpbuffer['entry/data/I']) # number of scans
229-
for i in floats:
230-
fpbuffer[i] = float(fp[i][()])
231-
for i in strings:
232-
try:
233-
fpbuffer[i] = fp[i][()].decode()
234-
self.comments.append(f'{i}={fpbuffer[i]}')
235-
except:
236-
fpbuffer[i] = None
237-
if fpbuffer['entry/reduction/input/unit'] != '2th':
238-
print('NXazint1d HDF5 file has units',fpbuffer['entry/reduction/input/unit'])
239-
self.errors = 'NXazint1d only can be read with 2th units'
240-
return False
259+
entry = getNeXusBase(fp)
260+
# lookup NeXus locations
261+
nexusDict = {i:None for i in set([i[0] for i in fileItems.values()])}
262+
recurseNeXusEntries(fp,entry,nexusDict)
263+
# save selected items from file in buffer
264+
savedKeys = []
265+
for k,loc in fileItems.items():
266+
if nexusDict[loc[0]] is None:
267+
fpbuffer[k] = None
268+
continue
269+
key = '/'.join((nexusDict[loc[0]],)+loc[1:])
270+
savedKeys.append(key)
271+
if key not in fp:
272+
fpbuffer[k] = None
273+
continue
274+
val = fp[key]
275+
if val.shape:
276+
fpbuffer[k] = np.array(val)
277+
elif 'float' in str(val.dtype):
278+
fpbuffer[k] = float(val[()])
279+
self.comments.append(f'{k}={val[()]}')
280+
else:
281+
fpbuffer[k] = val[()].decode()
282+
self.comments.append(f'{k}={fpbuffer[k]}')
283+
self.numbanks = len(fpbuffer['I'])
241284
# save arrays that are potentially tracking the parametric conditions
285+
# into ParamTrackingVars.
242286
# e.g. variables with the same length as the humber of datasets
243-
paramItems = self.RecurseH5Element(fp,length=self.numbanks)
244287
fpbuffer['ParamTrackingVars'] = {}
288+
paramItems = self.RecurseH5Element(fp,length=self.numbanks)
245289
for i in paramItems:
246290
for j in i:
247291
key = '/'.join(j)
248-
if key in arrays: continue
292+
if key in savedKeys: continue # standard data array
249293
obj = fp.get(key)
250294
if obj is None: continue
251295
if len(obj[()].shape) != 1: continue
252296
# are all values the same? If so, put them into the comments
253-
# for the first histogram. If they are changing, note that and
254-
# later they will be put into every histogram.
297+
# for the first histogram only. If they are changing, note that
298+
# here and later they will be put into every histogram.
255299
if all(obj[0] == obj):
256300
self.comments.append(f'{key.split("/")[-1]}={obj[0]}')
257301
else:
258302
fpbuffer['ParamTrackingVars'][key] = np.array(obj[()])
259-
if self.selections is None or len(self.selections) == 0:
260-
self.blknum = 0
261-
else:
262-
self.blknum = min(self.selections)
263303
except IOError:
264304
print (f'Can not open or read file {filename}')
265305
return False
266306
finally:
267307
fp.close()
268-
x = fpbuffer['entry/data/radial_axis']
269-
y = fpbuffer['entry/data/I'][self.blknum]
308+
if fpbuffer['unit'] != '2th':
309+
print('NXazint1d HDF5 file has units',fpbuffer['entry/reduction/input/unit'])
310+
self.errors = 'NXazint1d only can be read with 2th units'
311+
return False
312+
# initialize the block selection
313+
if self.selections is None or len(self.selections) == 0:
314+
self.blknum = 0
315+
else:
316+
self.blknum = min(self.selections)
317+
# now pull the selected dataset from the buffer
318+
x = fpbuffer['radial_axis']
319+
y = fpbuffer['I'][self.blknum]
270320
try:
271-
esd = fpbuffer['entry/data/I_errors'][self.blknum]
321+
esd = fpbuffer['I_errors'][self.blknum]
272322
w = np.where(esd==0,0,np.nan_to_num(1/esd**2))
273323
except:
274-
w = np.nan_to_num(1/y) # best we can do, alas
324+
w = np.nan_to_num(1/y) # best we can do, alas w/o reported s.u.'s
275325
self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)]
276326
# add parametric var as a comment
277327
for key,arr in fpbuffer['ParamTrackingVars'].items():
@@ -287,12 +337,11 @@ def readNXazint1d(self, filename, fpbuffer={}):
287337
self.Sample['Phi'] = val
288338
elif 'omega' in key:
289339
self.Sample['Omega'] = val
290-
#self.comments = comments[selblk]
291340
self.powderentry[0] = filename
292341
#self.powderentry[1] = Pos # position offset (never used, I hope)
293342
self.powderentry[2] = self.blknum # bank number
294343
self.idstring = f'#{self.blknum} {os.path.split(filename)[1][:60]}'
295-
self.instdict['wave'] = fpbuffer['entry/instrument/monochromator/wavelength']
344+
self.instdict['wave'] = fpbuffer['wavelength']
296345
# if not, are there more [selected] images that after this to be read?
297346
self.repeat = False
298347
if self.blknum < self.numbanks-1:
@@ -307,3 +356,64 @@ def readNXazint1d(self, filename, fpbuffer={}):
307356
except IndexError: # last selected image has been read
308357
self.repeat = False
309358
return True
359+
360+
# NeXus support routines. These were influenced heavily by Frederik Holm Gjørup
361+
# Also see NeXus support in plaid (https://github.com/fgjorup/plaid/blob/main/plaid/nexus.py)
362+
363+
def getNeXusBase(fp):
364+
'''This returns the base entry in a NeXus compilant HDF5 file
365+
(usually "/entry" for MaxIV files) or None if this is not a valid
366+
NeXus file.
367+
'''
368+
for key in fp:
369+
if ("NX_class" in fp[key].attrs and
370+
fp[key].attrs["NX_class"] == "NXentry"):
371+
return key
372+
373+
def getNeXusEntry(fp,base,target):
374+
'''This returns the entry in a NeXus compilant HDF5 file matching
375+
the name target, or None, if this is not found as a child of the key `base`.
376+
Not in use as it is more practical to use :func:`recurseNeXusEntries`.
377+
'''
378+
for key in fp[base]:
379+
subkey = '/'.join([base,key])
380+
if "NX_class" in fp[subkey].attrs:
381+
#print(key, list(fp[subkey].attrs),fp[subkey].attrs["NX_class"])
382+
if ("NX_class" in fp[subkey].attrs and
383+
fp[subkey].attrs["NX_class"] == target):
384+
return subkey
385+
else:
386+
print(key)
387+
388+
def recurseNeXusEntry(fp,node,target):
389+
'''Recurse through the HDF5 tree looking for NeXus class `target`.
390+
Not in use, as :func:`recurseNeXusEntries` is used to get all
391+
targets in a single pass through the tree.
392+
'''
393+
if node is None: return # needed?
394+
val = fp[node]
395+
if ("NX_class" in val.attrs and
396+
val.attrs["NX_class"] == target):
397+
return node
398+
if not isinstance(val, h5py.Group): return
399+
for key in val:
400+
subkey = '/'.join([node,key])
401+
res = recurseNeXusEntry(fp,subkey,target)
402+
if res: return res
403+
404+
def recurseNeXusEntries(fp,node,targetdict):
405+
'''recurse through the HDF5 tree looking for the NeXus classes
406+
in `targetdict`, storing the HDF5 key for each class in the dict
407+
408+
:param fp: HDF5 file pointer
409+
:param str node: name of current HDF5 key
410+
:param dict targetdict: dict to place HDF5 keys corresponding to
411+
the desired NeXus classes. As input this has the NeXus classes
412+
is the dict keys and the
413+
'''
414+
val = fp[node]
415+
if ("NX_class" in val.attrs and val.attrs["NX_class"] in targetdict):
416+
targetdict[val.attrs["NX_class"]] = node
417+
if isinstance(val, h5py.Group):
418+
for key in val:
419+
recurseNeXusEntries(fp,'/'.join([node,key]),targetdict)

0 commit comments

Comments
 (0)