1414from .. import GSASIIobj as G2obj
1515from .. import GSASIIfiles as G2fil
1616
17+ #from .. import GSASIIpath
18+ #breakpoint = GSASIIpath.IPyBreak_base
19+
1720class HDF5_Reader (G2obj .ImportPowderData ):
1821 '''Routine to read multiple powder patterns from an HDF5 file.
1922
@@ -125,6 +128,11 @@ def HDF5list(self, filename):
125128
126129 :param filename:
127130 '''
131+ def ShowH5NeXusName (obj ,keylist ):
132+ key = '/' .join (keylist )
133+ if "NX_class" in obj [key ].attrs :
134+ return obj [key ].attrs ["NX_class" ]
135+
128136 fp = h5py .File (filename , 'r' )
129137 #print(f'Contents of {filename}')
130138 HDF5entries = self .RecurseH5Element (fp )
@@ -136,6 +144,7 @@ def HDF5list(self, filename):
136144 for k in j :
137145 m = max (m ,len ('/' .join (k )))
138146 for k in j :
147+ nxname = ShowH5NeXusName (fp ,k )
139148 lbl = self .ShowH5Element (fp ,k )
140149 if '\n ' in lbl :
141150 lbl = '; ' .join (lbl .split ('\n ' ))
@@ -144,6 +153,7 @@ def HDF5list(self, filename):
144153 # if '\n' in lbl:
145154 # lbl = lbl.split()[0] + '...'
146155 if lbl != '(group)' : strings .append (f"{ '/' .join (k ):{m }s} { lbl } " )
156+ if nxname : print (f"{ '/' .join (k ):{m }s} { lbl } { nxname } " )
147157 with open (filename + '_contents.txt' , 'w' ) as fp :
148158 for i in strings : fp .write (f'{ i } \n ' )
149159
@@ -153,18 +163,23 @@ def ContentsValidator(self, filename):
153163 '''
154164 try :
155165 fp = h5py .File (filename , 'r' )
156- if 'entry' in fp : # NeXus
157- #self.HDF5entries = []
158- #self.HDF5list(filename)
159- if 'definition' in fp ['/entry' ]:
160- # MAX IV NXazint1d file
161- if fp ['/entry/definition' ][()].decode () == 'NXazint1d' :
162- return True
163- except IOError :
166+ # test for MaxIV NeXus/NXazint1d & NXazint2d
167+ test = True
168+ while test :
169+ test = False
170+ entry = getNeXusBase (fp )
171+ if entry is None : break # not NeXus
172+ if 'definition' not in fp [entry ]: break # not MaxIV NXazint*
173+ definition = fp [entry + '/definition' ][()].decode ()
174+ if definition == 'NXazint1d' : return True
175+ if definition == 'NXazint2d' : return True
176+ # test for next HDF5 type here
177+ #
178+ except IOError : # not HDF5
164179 return False
165180 finally :
166181 fp .close ()
167- return False
182+ return False # nothing passed -- not valid
168183
169184 def Reader (self , filename , ParentFrame = None , ** kwarg ):
170185 '''Scan file for sections needed by defined file types (currently
@@ -182,19 +197,26 @@ def Reader(self, filename, ParentFrame=None, **kwarg):
182197 self .blknum = 0
183198 else :
184199 self .blknum = min (self .selections )
200+ # was file read into buffer? If so skip opening file to save time
201+ definition = fpbuffer .get ('definition' ,'' )
202+ if definition == 'NXazint1d' :
203+ return self .readNXazint1d (filename , fpbuffer )
204+ elif definition == 'NXazint2d' :
205+ return self .readNXazint2d (filename , fpbuffer )
206+ # first or non-buffered read
185207 try :
186208 fp = h5py .File (filename , 'r' )
187- if 'entry' in fp : # NeXus
188- if 'definition' in fp ['/entry' ]:
189- # MAX IV NXazint1d file
190- if fp ['/entry/definition' ][()].decode () == 'NXazint1d' :
209+ entry = getNeXusBase (fp )
210+ if entry : # NeXus
211+ if 'definition' in fp [entry ]: # MaxIV NXazint*
212+ definition = fp [entry + '/definition' ][()].decode ()
213+ fpbuffer ['definition' ] = definition
214+ if definition == 'NXazint1d' :
191215 return self .readNXazint1d (filename , fpbuffer )
192-
193- # MAX IV NXazint1d file
194- #if fp['/entry/definition'][()].decode() == 'NXazint2d':
195- # return self.readNXazint2d(filename, fpbuffer)
196- # return True
197- # https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint2d.html
216+ elif definition == 'NXazint2d' :
217+ return self .readNXazint2d (filename , fpbuffer )
218+ # not a supported file type
219+ return False
198220 except IOError :
199221 print ('cannot open file ' + filename )
200222 return False
@@ -211,67 +233,95 @@ def readNXazint1d(self, filename, fpbuffer={}):
211233 #self.instmsg = 'HDF file'
212234 self .comments = []
213235 doread = False # has the file already been read into a buffer?
214- arrays = ('entry/data/radial_axis' ,'entry/data/I' ,'entry/data/I_errors' )
215- floats = ('entry/instrument/monochromator/wavelength' ,
216- 'entry/reduction/input/polarization_factor' )
217- strings = ('entry/instrument/name' ,'entry/reduction/input/unit' ,
218- 'entry/sample/name' ,'entry/instrument/source/name' )
219- for i in arrays + floats + strings :
220- if i not in fpbuffer :
236+ fileItems = {
237+ # arrays
238+ 'radial_axis' :('NXdata' ,'radial_axis' ),
239+ 'I' :('NXdata' ,'I' ),
240+ 'I_errors' :('NXdata' ,'I_errors' ),
241+ # floats
242+ 'wavelength' :('NXmonochromator' ,'wavelength' ),
243+ 'polarization_factor' :('NXparameters' ,'polarization_factor' ),
244+ # strings
245+ 'instrument/name' :('NXinstrument' ,'name' ),
246+ 'unit' :('NXparameters' ,'unit' ),
247+ 'sample/name' :('NXsample' ,'name' ),
248+ 'source/name' :('NXsource' ,'name' ),
249+ }
250+ # test if we have what we need in the buffer
251+ for k in fileItems :
252+ if k not in fpbuffer :
221253 doread = True
222254 break
223- if doread : # read into buffer
255+ if doread :
256+ # Nope, need to fill the buffer
224257 try :
225258 fp = h5py .File (filename , 'r' )
226- for i in arrays :
227- fpbuffer [i ] = np .array (fp .get (i ))
228- self .numbanks = len (fpbuffer ['entry/data/I' ]) # number of scans
229- for i in floats :
230- fpbuffer [i ] = float (fp [i ][()])
231- for i in strings :
232- try :
233- fpbuffer [i ] = fp [i ][()].decode ()
234- self .comments .append (f'{ i } ={ fpbuffer [i ]} ' )
235- except :
236- fpbuffer [i ] = None
237- if fpbuffer ['entry/reduction/input/unit' ] != '2th' :
238- print ('NXazint1d HDF5 file has units' ,fpbuffer ['entry/reduction/input/unit' ])
239- self .errors = 'NXazint1d only can be read with 2th units'
240- return False
259+ entry = getNeXusBase (fp )
260+ # lookup NeXus locations
261+ nexusDict = {i :None for i in set ([i [0 ] for i in fileItems .values ()])}
262+ recurseNeXusEntries (fp ,entry ,nexusDict )
263+ # save selected items from file in buffer
264+ savedKeys = []
265+ for k ,loc in fileItems .items ():
266+ if nexusDict [loc [0 ]] is None :
267+ fpbuffer [k ] = None
268+ continue
269+ key = '/' .join ((nexusDict [loc [0 ]],)+ loc [1 :])
270+ savedKeys .append (key )
271+ if key not in fp :
272+ fpbuffer [k ] = None
273+ continue
274+ val = fp [key ]
275+ if val .shape :
276+ fpbuffer [k ] = np .array (val )
277+ elif 'float' in str (val .dtype ):
278+ fpbuffer [k ] = float (val [()])
279+ self .comments .append (f'{ k } ={ val [()]} ' )
280+ else :
281+ fpbuffer [k ] = val [()].decode ()
282+ self .comments .append (f'{ k } ={ fpbuffer [k ]} ' )
283+ self .numbanks = len (fpbuffer ['I' ])
241284 # save arrays that are potentially tracking the parametric conditions
285+ # into ParamTrackingVars.
242286 # e.g. variables with the same length as the humber of datasets
243- paramItems = self .RecurseH5Element (fp ,length = self .numbanks )
244287 fpbuffer ['ParamTrackingVars' ] = {}
288+ paramItems = self .RecurseH5Element (fp ,length = self .numbanks )
245289 for i in paramItems :
246290 for j in i :
247291 key = '/' .join (j )
248- if key in arrays : continue
292+ if key in savedKeys : continue # standard data array
249293 obj = fp .get (key )
250294 if obj is None : continue
251295 if len (obj [()].shape ) != 1 : continue
252296 # are all values the same? If so, put them into the comments
253- # for the first histogram. If they are changing, note that and
254- # later they will be put into every histogram.
297+ # for the first histogram only . If they are changing, note that
298+ # here and later they will be put into every histogram.
255299 if all (obj [0 ] == obj ):
256300 self .comments .append (f'{ key .split ("/" )[- 1 ]} ={ obj [0 ]} ' )
257301 else :
258302 fpbuffer ['ParamTrackingVars' ][key ] = np .array (obj [()])
259- if self .selections is None or len (self .selections ) == 0 :
260- self .blknum = 0
261- else :
262- self .blknum = min (self .selections )
263303 except IOError :
264304 print (f'Can not open or read file { filename } ' )
265305 return False
266306 finally :
267307 fp .close ()
268- x = fpbuffer ['entry/data/radial_axis' ]
269- y = fpbuffer ['entry/data/I' ][self .blknum ]
308+ if fpbuffer ['unit' ] != '2th' :
309+ print ('NXazint1d HDF5 file has units' ,fpbuffer ['entry/reduction/input/unit' ])
310+ self .errors = 'NXazint1d only can be read with 2th units'
311+ return False
312+ # initialize the block selection
313+ if self .selections is None or len (self .selections ) == 0 :
314+ self .blknum = 0
315+ else :
316+ self .blknum = min (self .selections )
317+ # now pull the selected dataset from the buffer
318+ x = fpbuffer ['radial_axis' ]
319+ y = fpbuffer ['I' ][self .blknum ]
270320 try :
271- esd = fpbuffer ['entry/data/ I_errors' ][self .blknum ]
321+ esd = fpbuffer ['I_errors' ][self .blknum ]
272322 w = np .where (esd == 0 ,0 ,np .nan_to_num (1 / esd ** 2 ))
273323 except :
274- w = np .nan_to_num (1 / y ) # best we can do, alas
324+ w = np .nan_to_num (1 / y ) # best we can do, alas w/o reported s.u.'s
275325 self .powderdata = [x ,y ,w ,np .zeros_like (x ),np .zeros_like (x ),np .zeros_like (x )]
276326 # add parametric var as a comment
277327 for key ,arr in fpbuffer ['ParamTrackingVars' ].items ():
@@ -287,12 +337,11 @@ def readNXazint1d(self, filename, fpbuffer={}):
287337 self .Sample ['Phi' ] = val
288338 elif 'omega' in key :
289339 self .Sample ['Omega' ] = val
290- #self.comments = comments[selblk]
291340 self .powderentry [0 ] = filename
292341 #self.powderentry[1] = Pos # position offset (never used, I hope)
293342 self .powderentry [2 ] = self .blknum # bank number
294343 self .idstring = f'#{ self .blknum } { os .path .split (filename )[1 ][:60 ]} '
295- self .instdict ['wave' ] = fpbuffer ['entry/instrument/monochromator/ wavelength' ]
344+ self .instdict ['wave' ] = fpbuffer ['wavelength' ]
296345 # if not, are there more [selected] images that after this to be read?
297346 self .repeat = False
298347 if self .blknum < self .numbanks - 1 :
@@ -307,3 +356,64 @@ def readNXazint1d(self, filename, fpbuffer={}):
307356 except IndexError : # last selected image has been read
308357 self .repeat = False
309358 return True
359+
360+ # NeXus support routines. These were influenced heavily by Frederik Holm Gjørup
361+ # Also see NeXus support in plaid (https://github.com/fgjorup/plaid/blob/main/plaid/nexus.py)
362+
363+ def getNeXusBase (fp ):
364+ '''This returns the base entry in a NeXus compilant HDF5 file
365+ (usually "/entry" for MaxIV files) or None if this is not a valid
366+ NeXus file.
367+ '''
368+ for key in fp :
369+ if ("NX_class" in fp [key ].attrs and
370+ fp [key ].attrs ["NX_class" ] == "NXentry" ):
371+ return key
372+
373+ def getNeXusEntry (fp ,base ,target ):
374+ '''This returns the entry in a NeXus compilant HDF5 file matching
375+ the name target, or None, if this is not found as a child of the key `base`.
376+ Not in use as it is more practical to use :func:`recurseNeXusEntries`.
377+ '''
378+ for key in fp [base ]:
379+ subkey = '/' .join ([base ,key ])
380+ if "NX_class" in fp [subkey ].attrs :
381+ #print(key, list(fp[subkey].attrs),fp[subkey].attrs["NX_class"])
382+ if ("NX_class" in fp [subkey ].attrs and
383+ fp [subkey ].attrs ["NX_class" ] == target ):
384+ return subkey
385+ else :
386+ print (key )
387+
388+ def recurseNeXusEntry (fp ,node ,target ):
389+ '''Recurse through the HDF5 tree looking for NeXus class `target`.
390+ Not in use, as :func:`recurseNeXusEntries` is used to get all
391+ targets in a single pass through the tree.
392+ '''
393+ if node is None : return # needed?
394+ val = fp [node ]
395+ if ("NX_class" in val .attrs and
396+ val .attrs ["NX_class" ] == target ):
397+ return node
398+ if not isinstance (val , h5py .Group ): return
399+ for key in val :
400+ subkey = '/' .join ([node ,key ])
401+ res = recurseNeXusEntry (fp ,subkey ,target )
402+ if res : return res
403+
404+ def recurseNeXusEntries (fp ,node ,targetdict ):
405+ '''recurse through the HDF5 tree looking for the NeXus classes
406+ in `targetdict`, storing the HDF5 key for each class in the dict
407+
408+ :param fp: HDF5 file pointer
409+ :param str node: name of current HDF5 key
410+ :param dict targetdict: dict to place HDF5 keys corresponding to
411+ the desired NeXus classes. As input this has the NeXus classes
412+ is the dict keys and the
413+ '''
414+ val = fp [node ]
415+ if ("NX_class" in val .attrs and val .attrs ["NX_class" ] in targetdict ):
416+ targetdict [val .attrs ["NX_class" ]] = node
417+ if isinstance (val , h5py .Group ):
418+ for key in val :
419+ recurseNeXusEntries (fp ,'/' .join ([node ,key ]),targetdict )
0 commit comments