Got NXazint1d reader done

briantoby · briantoby · commit 4d648dab1ce4 · 2025-10-16T16:22:23.000-05:00
diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py
@@ -14,6 +14,9 @@
 from .. import GSASIIobj as G2obj
 from .. import GSASIIfiles as G2fil
 
+#from .. import GSASIIpath
+#breakpoint = GSASIIpath.IPyBreak_base
+
 class HDF5_Reader(G2obj.ImportPowderData):
     '''Routine to read multiple powder patterns from an HDF5 file. 
 
@@ -125,6 +128,11 @@ def HDF5list(self, filename):
 
         :param filename: 
         '''
+        def ShowH5NeXusName(obj,keylist):
+            key = '/'.join(keylist)
+            if "NX_class" in obj[key].attrs:
+                return obj[key].attrs["NX_class"]
+
         fp = h5py.File(filename, 'r')
         #print(f'Contents of {filename}')
         HDF5entries = self.RecurseH5Element(fp)
@@ -136,6 +144,7 @@ def HDF5list(self, filename):
             for k in j:
                 m = max(m,len('/'.join(k)))
             for k in j:
+                nxname = ShowH5NeXusName(fp,k)
                 lbl = self.ShowH5Element(fp,k)
                 if '\n' in lbl:
                     lbl = '; '.join(lbl.split('\n'))
@@ -144,6 +153,7 @@ def HDF5list(self, filename):
                 # if '\n' in lbl:
                 #     lbl = lbl.split()[0] + '...'
                 if lbl != '(group)': strings.append(f"{'/'.join(k):{m}s} {lbl}")
+                if nxname: print(f"{'/'.join(k):{m}s} {lbl} {nxname}")
         with open(filename+'_contents.txt', 'w') as fp:
             for i in strings: fp.write(f'{i}\n')
                     
@@ -153,18 +163,23 @@ def ContentsValidator(self, filename):
         '''
         try:
             fp = h5py.File(filename, 'r')
-            if 'entry' in fp: # NeXus
-                #self.HDF5entries = []
-                #self.HDF5list(filename)
-                if 'definition' in fp['/entry']:
-                    # MAX IV NXazint1d file
-                    if fp['/entry/definition'][()].decode() == 'NXazint1d':
-                        return True
-        except IOError:
+            # test for MaxIV NeXus/NXazint1d & NXazint2d
+            test = True
+            while test:
+                test = False
+                entry = getNeXusBase(fp)
+                if entry is None: break # not NeXus
+                if 'definition' not in fp[entry]: break # not MaxIV NXazint*
+                definition = fp[entry+'/definition'][()].decode()
+                if definition == 'NXazint1d': return True
+                if definition == 'NXazint2d': return True
+            # test for next HDF5 type here
+            #
+        except IOError: # not HDF5
             return False
         finally:
             fp.close()
-        return False
+        return False        # nothing passed -- not valid
 
     def Reader(self, filename, ParentFrame=None, **kwarg):
         '''Scan file for sections needed by defined file types (currently 
@@ -182,19 +197,26 @@ def Reader(self, filename, ParentFrame=None, **kwarg):
                 self.blknum = 0
             else:
                 self.blknum = min(self.selections)
+        # was file read into buffer? If so skip opening file to save time
+        definition = fpbuffer.get('definition','')
+        if definition == 'NXazint1d':
+            return self.readNXazint1d(filename, fpbuffer)
+        elif definition == 'NXazint2d':
+            return self.readNXazint2d(filename, fpbuffer)
+        # first or non-buffered read
         try:
             fp = h5py.File(filename, 'r')
-            if 'entry' in fp: # NeXus
-                if 'definition' in fp['/entry']:
-                    # MAX IV NXazint1d file
-                    if fp['/entry/definition'][()].decode() == 'NXazint1d':
+            entry = getNeXusBase(fp)
+            if entry: # NeXus
+                if 'definition' in fp[entry]: # MaxIV NXazint*
+                    definition = fp[entry+'/definition'][()].decode()
+                    fpbuffer['definition'] = definition
+                    if definition == 'NXazint1d':
                         return self.readNXazint1d(filename, fpbuffer)
-
-                    # MAX IV NXazint1d file
-                    #if fp['/entry/definition'][()].decode() == 'NXazint2d':
-                    #    return self.readNXazint2d(filename, fpbuffer)
-                    #    return True
-                    # https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint2d.html
+                    elif definition == 'NXazint2d':
+                        return self.readNXazint2d(filename, fpbuffer)
+            # not a supported file type
+            return False
         except IOError:
             print ('cannot open file '+ filename)
             return False
@@ -211,67 +233,95 @@ def readNXazint1d(self, filename, fpbuffer={}):
         #self.instmsg = 'HDF file'
         self.comments = []
         doread = False # has the file already been read into a buffer?
-        arrays = ('entry/data/radial_axis','entry/data/I','entry/data/I_errors')
-        floats = ('entry/instrument/monochromator/wavelength',
-                  'entry/reduction/input/polarization_factor')
-        strings = ('entry/instrument/name','entry/reduction/input/unit',
-                   'entry/sample/name','entry/instrument/source/name')
-        for i in arrays+floats+strings:
-            if i not in fpbuffer:
+        fileItems = {
+            # arrays
+            'radial_axis':('NXdata','radial_axis'),
+            'I':('NXdata','I'),
+            'I_errors':('NXdata','I_errors'),
+            # floats
+            'wavelength':('NXmonochromator','wavelength'),
+            'polarization_factor':('NXparameters','polarization_factor'),
+            # strings
+            'instrument/name':('NXinstrument','name'),
+            'unit':('NXparameters','unit'),
+            'sample/name':('NXsample','name'),
+            'source/name':('NXsource','name'),
+        }
+        # test if we have what we need in the buffer
+        for k in fileItems:
+            if k not in fpbuffer:                
                 doread = True
                 break
-        if doread:   # read into buffer
+        if doread:
+            # Nope, need to fill the buffer
             try:
                 fp = h5py.File(filename, 'r')
-                for i in arrays:
-                    fpbuffer[i] = np.array(fp.get(i))
-                self.numbanks = len(fpbuffer['entry/data/I']) # number of scans
-                for i in floats:
-                    fpbuffer[i] = float(fp[i][()])
-                for i in strings:
-                    try:
-                        fpbuffer[i] = fp[i][()].decode()
-                        self.comments.append(f'{i}={fpbuffer[i]}')
-                    except:
-                        fpbuffer[i] = None
-                if fpbuffer['entry/reduction/input/unit'] != '2th':
-                    print('NXazint1d HDF5 file has units',fpbuffer['entry/reduction/input/unit'])
-                    self.errors = 'NXazint1d only can be read with 2th units'
-                    return False
+                entry = getNeXusBase(fp)
+                # lookup NeXus locations
+                nexusDict = {i:None for i in set([i[0] for i in fileItems.values()])}
+                recurseNeXusEntries(fp,entry,nexusDict)
+                # save selected items from file in buffer
+                savedKeys = []
+                for k,loc in fileItems.items():
+                    if nexusDict[loc[0]] is None:
+                        fpbuffer[k] = None
+                        continue
+                    key = '/'.join((nexusDict[loc[0]],)+loc[1:])
+                    savedKeys.append(key)
+                    if key not in fp:
+                        fpbuffer[k] = None
+                        continue
+                    val = fp[key]
+                    if val.shape:
+                        fpbuffer[k] = np.array(val)
+                    elif 'float' in str(val.dtype):
+                        fpbuffer[k] = float(val[()])
+                        self.comments.append(f'{k}={val[()]}')
+                    else:
+                        fpbuffer[k] = val[()].decode()
+                        self.comments.append(f'{k}={fpbuffer[k]}')
+                self.numbanks = len(fpbuffer['I'])
                 # save arrays that are potentially tracking the parametric conditions
+                # into ParamTrackingVars.
                 # e.g. variables with the same length as the humber of datasets
-                paramItems = self.RecurseH5Element(fp,length=self.numbanks)
                 fpbuffer['ParamTrackingVars'] = {}
+                paramItems = self.RecurseH5Element(fp,length=self.numbanks)
                 for i in paramItems:
                     for j in i:
                         key = '/'.join(j)
-                        if key in arrays: continue
+                        if key in savedKeys: continue # standard data array
                         obj = fp.get(key)
                         if obj is None: continue
                         if len(obj[()].shape) != 1: continue
                         # are all values the same? If so, put them into the comments
-                        # for the first histogram. If they are changing, note that and
-                        # later they will be put into every histogram.
+                        # for the first histogram only. If they are changing, note that 
+                        # here and later they will be put into every histogram.
                         if all(obj[0] == obj):
                             self.comments.append(f'{key.split("/")[-1]}={obj[0]}')
                         else:
                             fpbuffer['ParamTrackingVars'][key] = np.array(obj[()])
-                if self.selections is None or len(self.selections) == 0:
-                    self.blknum = 0
-                else:
-                    self.blknum = min(self.selections)
             except IOError:
                 print (f'Can not open or read file {filename}')
                 return False
             finally:
                 fp.close()
-        x = fpbuffer['entry/data/radial_axis']
-        y = fpbuffer['entry/data/I'][self.blknum]
+            if fpbuffer['unit'] != '2th':
+                print('NXazint1d HDF5 file has units',fpbuffer['entry/reduction/input/unit'])
+                self.errors = 'NXazint1d only can be read with 2th units'
+                return False
+            # initialize the block selection
+            if self.selections is None or len(self.selections) == 0:
+                self.blknum = 0
+            else:
+                self.blknum = min(self.selections)
+        # now pull the selected dataset from the buffer
+        x = fpbuffer['radial_axis']
+        y = fpbuffer['I'][self.blknum]
         try:
-            esd = fpbuffer['entry/data/I_errors'][self.blknum]
+            esd = fpbuffer['I_errors'][self.blknum]
             w = np.where(esd==0,0,np.nan_to_num(1/esd**2))
         except:
-            w = np.nan_to_num(1/y)    # best we can do, alas
+            w = np.nan_to_num(1/y)    # best we can do, alas w/o reported s.u.'s
         self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)]
         # add parametric var as a comment
         for key,arr in fpbuffer['ParamTrackingVars'].items():
@@ -287,12 +337,11 @@ def readNXazint1d(self, filename, fpbuffer={}):
                 self.Sample['Phi'] = val
             elif 'omega' in key:
                 self.Sample['Omega'] = val
-        #self.comments = comments[selblk]
         self.powderentry[0] = filename
         #self.powderentry[1] = Pos # position offset (never used, I hope)
         self.powderentry[2] = self.blknum  # bank number
         self.idstring = f'#{self.blknum} {os.path.split(filename)[1][:60]}'
-        self.instdict['wave'] = fpbuffer['entry/instrument/monochromator/wavelength']
+        self.instdict['wave'] = fpbuffer['wavelength']
         # if not, are there more [selected] images that after this to be read?
         self.repeat = False
         if self.blknum < self.numbanks-1:
@@ -307,3 +356,64 @@ def readNXazint1d(self, filename, fpbuffer={}):
                 except IndexError:   # last selected image has been read
                     self.repeat = False
         return True
+
+# NeXus support routines. These were influenced heavily by Frederik Holm Gjørup
+# Also see NeXus support in plaid (https://github.com/fgjorup/plaid/blob/main/plaid/nexus.py)
+
+def getNeXusBase(fp):
+    '''This returns the base entry in a NeXus compilant HDF5 file
+    (usually "/entry" for MaxIV files) or None if this is not a valid 
+    NeXus file. 
+    '''
+    for key in fp:
+        if ("NX_class" in fp[key].attrs and
+                fp[key].attrs["NX_class"] == "NXentry"):
+            return key
+
+def getNeXusEntry(fp,base,target):
+    '''This returns the entry in a NeXus compilant HDF5 file matching 
+    the name target, or None, if this is not found as a child of the key `base`.
+    Not in use as it is more practical to use :func:`recurseNeXusEntries`.
+    '''
+    for key in fp[base]:
+        subkey = '/'.join([base,key])
+        if "NX_class" in fp[subkey].attrs:
+            #print(key, list(fp[subkey].attrs),fp[subkey].attrs["NX_class"])
+            if ("NX_class" in fp[subkey].attrs and
+                    fp[subkey].attrs["NX_class"] == target):
+                return subkey
+        else:
+            print(key)
+
+def recurseNeXusEntry(fp,node,target):
+    '''Recurse through the HDF5 tree looking for NeXus class `target`. 
+    Not in use, as :func:`recurseNeXusEntries` is used to get all 
+    targets in a single pass through the tree.
+    '''
+    if node is None: return  # needed?
+    val = fp[node]
+    if ("NX_class" in val.attrs and
+                val.attrs["NX_class"] == target):
+        return node
+    if not isinstance(val, h5py.Group): return
+    for key in val:
+        subkey = '/'.join([node,key])
+        res = recurseNeXusEntry(fp,subkey,target)
+        if res: return res
+
+def recurseNeXusEntries(fp,node,targetdict):
+    '''recurse through the HDF5 tree looking for the NeXus classes
+    in `targetdict`, storing the HDF5 key for each class in the dict
+
+    :param fp: HDF5 file pointer
+    :param str node: name of current HDF5 key
+    :param dict targetdict: dict to place HDF5 keys corresponding to
+       the desired NeXus classes. As input this has the NeXus classes
+       is the dict keys and the 
+    '''
+    val = fp[node]
+    if ("NX_class" in val.attrs and val.attrs["NX_class"] in targetdict):
+        targetdict[val.attrs["NX_class"]] = node
+    if isinstance(val, h5py.Group): 
+        for key in val:
+            recurseNeXusEntries(fp,'/'.join([node,key]),targetdict)