From c0fc5584bdb952aa21e9a31b13236bc09606f7be Mon Sep 17 00:00:00 2001
From: BHT <toby@anl.gov>
Date: Thu, 3 Jul 2025 16:03:49 -0500
Subject: [PATCH 1/7] start work on NeXus/HDF5 importer for MAX IV

---
 GSASII/imports/G2pwd_HDF5.py | 322 +++++++++++++++++++++++++++++++++++
 GSASII/imports/__init__.py   |   2 +
 2 files changed, 324 insertions(+)
 create mode 100644 GSASII/imports/G2pwd_HDF5.py
diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py
new file mode 100644
index 000000000..1b1a304af
--- /dev/null
+++ b/GSASII/imports/G2pwd_HDF5.py
@@ -0,0 +1,322 @@
+# -*- coding: utf-8 -*-
+'''
+'''
+
+from __future__ import division, print_function
+import os
+import sys
+try:
+    import h5py
+except ImportError:
+    h5py = None
+import numpy as np
+from .. import GSASIIobj as G2obj
+from .. import GSASIIfiles as G2fil
+#from .. import GSASIIpath
+
+# things to do:
+#   uncertainties
+#   instr. parms
+#instprmList = [('Bank',1.0), ('Lam',0.413263), ('Polariz.',0.99), 
+#            ('SH/L',0.002), ('Type','PXC'), ('U',1.163), ('V',-0.126), 
+#            ('W',0.063), ('X',0.0), ('Y',0.0), ('Z',0.0), ('Zero',0.0)]
+#   comments
+#   dataset naming
+#   sample parameters
+#sampleprmList = [('InstrName','APS 1-ID'), ('Temperature', 295.0)]
+#  'Scale': [1.0, True], 'Type': 'Debye-Scherrer',
+# 'Absorption': [0.0, False], 'DisplaceX': [0.0, False], 'DisplaceY': [0.0, False]# 'Pressure': 0.1, 'Time': 0.0, 'FreePrm1': 0.0,
+# 'FreePrm2': 0.0, 'FreePrm3': 0.0, 'Gonio. radius': 200.0, 'Omega': 0.0,
+# 'Chi': 0.0, 'Phi': 180.0, 'Azimuth': 0.0,
+# 'Materials': [{'Name': 'vacuum', 'VolFrac': 1.0}, {'Name': 'vacuum', 'VolFrac': 0.0}],
+# 'Thick': 1.0, 'Contrast': [0.0, 0.0], 'Trans': 1.0, 'SlitLen': 0.0}
+
+
+class HDF5_Reader(G2obj.ImportPowderData):
+    '''Routine to read multiple powder patterns from an HDF5 file. 
+
+    This importer targets NXazint1d and NXazint2d NeXus files from 
+    MAX IV. 
+    Perhaps in the future, other types of HDF5 powder data sources as well. 
+
+    The main file is <file>.hdf or <file>.h5, but optionally sample and 
+    instrument parameters can be placed in <file>.samprm and <file>.instprm. 
+    Any parameters placed in that file will override values set in the HDF5
+    file. 
+    '''
+    mode = None
+    def __init__(self):
+        if h5py is None:
+            self.UseReader = False
+            msg = 'HDF5 Reader skipped because h5py module is not installed'
+            G2fil.ImportErrorMsg(msg,{'HDF5 importer':['h5py','hdf5']})
+        super(self.__class__,self).__init__( # fancy way to self-reference
+            extensionlist=('.hdf','.h5'),strictExtension=True,
+            formatName = 'MAX IV HDF5',longFormatName = 'HDF5 integrated scans')
+        self.scriptable = True
+        #self.Iparm = {} #only filled for EDS data
+
+    def ShowH5Element(self,obj,keylist):
+        '''Format the contents of an HDF5 entry as a single line. Not used for 
+        reading files, only for use in :meth:`HDF5list`
+        '''
+        k = '/'.join(keylist)
+        try:
+            typ = str(type(obj[k]))
+        except:
+            return f'**Error** with key {k}'
+            
+        if ".Dataset'" in typ:
+            datfmt = obj[k].dtype
+            if datfmt == 'O' or str(datfmt).startswith('|S'):
+                # byte string
+                return f'value={obj[k][()].decode()}'
+            elif datfmt == 'bool': # Bool
+                return f'value={bool(obj[k][()])}'
+            elif datfmt in ('<f8', 'uint8', 'int64', '<f4'): # scalar value or array of values
+                try:
+                    return f'length {len(obj[k][()])}'
+                except:
+                    return f'value={obj[k][()]}'
+            else:
+                return f'dataset of type {repr(datfmt)}'
+        elif ".Group'" in typ:
+            return "(group)"
+        else:
+            return f'{prefix}/{i} is {type(obj[k])}'
+
+    def RecurseH5Element(self,obj,prefix=[]):
+        '''Returns a list of entries of all keys in the HDF5 file
+        (or group) in `obj`. Note that `obj` can be a file object, created by 
+        `h5py.File` or can be a subset `fp['key/subkey']`.
+
+        The returned list is organized where: 
+          * entry 0 is the top-level keys (/a, /b,...),
+          * entry 1 has the first level keys (/a/c /a/d, /b/d, /b/e,...)
+          * ...
+        Not used for reading files, only for use in :meth:`HDF5list`
+        '''
+        try:
+            self.HDF5entries
+        except AttributeError:
+            self.HDF5entries = []
+        depth = len(prefix)
+        if len(self.HDF5entries) < depth+1:
+            self.HDF5entries.append([])
+        for i in obj:
+            nextprefix = prefix+[i]
+            self.HDF5entries[depth].append(nextprefix)
+            try:
+                typ = str(type(obj[i]))
+            except:
+                print(f'**Error** with key {prefix}/{i}')
+                continue
+            if ".Group'" in typ:
+                t = f'{prefix}/{i}'
+                #print(f'\n{nextprefix} contents {(60-len(t))*'='}')
+                self.RecurseH5Element(obj[i],nextprefix)
+        return self.HDF5entries
+        
+                
+    def HDF5list(self, filename):
+        '''Shows the contents of an HDF5 file as a short listing. 
+        This is not used for HDF5 reading, but is of help with a new
+        type of HDF5 file to see what is present.
+
+        :param filename: 
+        '''
+        fp = h5py.File(filename, 'r')
+        #print(f'Contents of {filename}')
+        HDF5entries = self.RecurseH5Element(fp)
+        strings = []
+        for i,j in enumerate(HDF5entries):
+            if not strings or strings[-1] != 60*'=': 
+                strings.append(60*'=')
+            m = 0
+            for k in j:
+                m = max(m,len('/'.join(k)))
+            for k in j:
+                lbl = self.ShowH5Element(fp,k)
+                if len(lbl) > 50:
+                    lbl = lbl[:50] + '...'
+                if '\n' in lbl:
+                    lbl = lbl.split()[0] + '...'
+                if lbl != '(group)': strings.append(f"{'/'.join(k):{m}s} {lbl}")
+        with open(filename+'_contents.txt', 'w') as fp:
+            for i in strings: fp.write(f'{i}\n')
+                    
+    def ContentsValidator(self, filename):
+        '''Test if valid by seeing if the HDF5 library recognizes the file. 
+        Then get file type (currently MAX IV NeXus/NXazint[12]d only)
+        '''
+        from .. import GSASIIpath
+        try:
+            fp = h5py.File(filename, 'r')
+            if 'entry' in fp: # NeXus
+                if 'definition' in fp['/entry']:
+                    # MAX IV NXazint1d file
+                    if fp['/entry/definition'][()].decode() == 'NXazint1d':
+                        return True
+                    # MAX IV NXazint1d file
+                    if fp['/entry/definition'][()].decode() == 'NXazint2d':
+                        return True
+        except IOError:
+            return False
+        finally:
+            fp.close()
+        return False
+
+    def Reader(self, filename, ParentFrame=None, **kwarg):
+        '''Scan file for sections needed by defined file types (currently 
+        MAX IV NeXus/NXazint[12]d only) 
+        and then use appropriate routine to read the file.
+
+        Since usually there will be lots of scans in a single file, 
+        the goal is that the first pass should read the file into 
+        a buffer (if available) and subsequent calls will not 
+        need to access the file. 
+        '''
+        fpbuffer = kwarg.get('buffer',{})
+        if not hasattr(self,'blknum'):
+            if self.selections is None or len(self.selections) == 0:
+                self.blknum = 0
+            else:
+                self.blknum = min(self.selections)
+        try:
+            self.mode = None
+            fp = h5py.File(filename, 'r')
+        try:
+            fp = h5py.File(filename, 'r')
+            if 'entry' in fp: # NeXus
+                if 'definition' in fp['/entry']:
+                    # MAX IV NXazint1d file
+                    if fp['/entry/definition'][()].decode() == 'NXazint1d':
+                        return self.readNXazint1d(filename, fpbuffer)
+
+                    # MAX IV NXazint1d file
+                    #if fp['/entry/definition'][()].decode() == 'NXazint2d':
+                    #    return self.readNXazint2d(filename, fpbuffer)
+                    #    return True
+                    # https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint2d.html
+        except IOError:
+            print ('cannot open file '+ filename)
+            return False
+        finally:
+            fp.close()
+
+        print (f'Unknown type of HDF5 powder file {filename}')
+        return False
+
+    def readNXazint1d(self, filename, fpbuffer={}):
+        '''Read HDF5 file in NeXus as produced by MAX IV as a NXazint1d
+        see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint1d.html
+        '''
+        self.instmsg = 'HDF file'
+        doread = False # has the file already been read into a buffer?
+        for i in ('blkmap','intenArr_blknum')+self.midassections:
+            if i not in fpbuffer:
+                doread = True
+                break
+        else: # do we have the right section buffered?
+            doread = fpbuffer['intenArr_blknum'] != self.blknum
+        
+        if doread:   # read into buffer
+            try:
+                fp = h5py.File(filename, 'r')
+                if 'blkmap' not in fpbuffer:
+                    fpbuffer['blkmap'] = list(fp.get('OmegaSumFrame').keys())
+                if 'REtaMap' not in fpbuffer:
+                    fpbuffer['REtaMap'] = np.array(fp.get('REtaMap'))
+                if 'intenArr' not in fpbuffer or fpbuffer.get('intenArr_blknum',-1) != self.blknum:
+                    fpbuffer['intenArr'] = np.array(fp.get('OmegaSumFrame').get(
+                            fpbuffer['blkmap'][self.blknum]))
+                    fpbuffer['intenArr_blknum'] = self.blknum
+                    self.azmcnt = -1
+                if 'Omegas' not in fpbuffer:
+                    fpbuffer['Omegas'] = np.array(fp.get('Omegas'))
+            except IOError:
+                print ('cannot open file '+ filename)
+                return False
+            finally:
+                fp.close()
+            # get overriding sample & instrument parameters 
+            fpbuffer['sampleprm'] = {}
+            samplefile = os.path.splitext(filename)[0] + '.samprm'
+            if os.path.exists(samplefile):
+                fp = open(samplefile,'r')
+                S = fp.readline()
+                while S:
+                    if not S.strip().startswith('#'):
+                        [item,val] = S[:-1].split(':')
+                        fpbuffer['sampleprm'][item.strip("'")] = eval(val)
+                    S = fp.readline()
+                fp.close()
+            fpbuffer['instprm'] = {}
+            instfile = os.path.splitext(filename)[0] + '.instprm'
+            if os.path.exists(instfile):
+                self.instmsg = 'HDF and .instprm files'
+                fp = open(instfile,'r')
+                S = fp.readline()
+                while S:
+                    if not S.strip().startswith('#'):
+                        [item,val] = S[:-1].split(':')
+                        fpbuffer['instprm'][item.strip("'")] = eval(val)
+                    S = fp.readline()
+                fp.close()
+        # look for a non-empty scan (lineout)
+        use = [0]
+        while sum(use) == 0 and self.azmcnt < fpbuffer['intenArr'].shape[1]:
+            self.azmcnt += 1
+            if self.azmcnt >= fpbuffer['intenArr'].shape[1]:
+                return False
+            use = fpbuffer['REtaMap'][3,:,self.azmcnt] != 0
+
+        # now transfer information into current histogram 
+        self.pwdparms['Instrument Parameters'] = [
+            {'Type': ['PXC', 'PXC', False]},
+            {}]
+        inst = {}
+        inst.update(instprmList)
+        inst.update(fpbuffer['instprm'])
+        for key,val in inst.items():
+            self.pwdparms['Instrument Parameters'][0][key] = [val,val,False]
+        samp = {}
+        samp.update(sampleprmList)
+        samp.update(fpbuffer['sampleprm'])
+        for key,val in samp.items():
+            self.Sample[key] = val
+        self.numbanks=len(fpbuffer['blkmap'])
+        x = fpbuffer['REtaMap'][1,:,self.azmcnt][use]
+        y = fpbuffer['intenArr'][:,self.azmcnt][use]
+        w = np.nan_to_num(1/y)    # this is probably not correct
+        eta = np.average(fpbuffer['REtaMap'][2,:,self.azmcnt][use])
+        self.pwdparms['Instrument Parameters'][0]['Azimuth'] = [90-eta,90-eta,False]
+        self.pwdparms['Instrument Parameters'][0]['Bank'] = [self.azmcnt,self.azmcnt,False]
+#        self.Sample['Gonio. radius'] = float(S.split('=')[1])
+#        self.Sample['Omega'] = float(S.split('=')[1])
+#        self.Sample['Chi'] = float(S.split('=')[1])
+        self.Sample['Phi'] = Omega = fpbuffer['Omegas'][self.blknum]
+        self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)]
+        #self.comments = comments[selblk]
+        self.powderentry[0] = filename
+        #self.powderentry[1] = Pos # position offset (never used, I hope)
+        self.powderentry[2] = self.blknum  # bank number
+        self.idstring = f'{os.path.split(filename)[1][:10]} omega={Omega} eta={eta}'
+#        if GSASIIpath.GetConfigValue('debug'): print(
+#                f'Read entry #{self.azmcnt} img# {self.blknum} from file {filename}')
+        # are there more lineouts after this one in current image to read?
+        self.repeat = sum(sum(fpbuffer['REtaMap'][3,:,self.azmcnt+1:])) != 0
+        if self.repeat: return True
+        # if not, are there more [selected] images that after this to be read?
+        if self.blknum < self.numbanks-1:
+            if self.selections is None or len(self.selections) == 0:
+                self.blknum += 1
+                self.repeat = True
+            else:
+                try:
+                    s = sorted(self.selections)
+                    self.blknum = s[s.index(self.blknum)+1]
+                    self.repeat = True
+                except IndexError:   # last selected image has been read
+                    self.repeat = False
+        return True
diff --git a/GSASII/imports/__init__.py b/GSASII/imports/__init__.py
index 3d2b8ba1c..b2288eb08 100644
--- a/GSASII/imports/__init__.py
+++ b/GSASII/imports/__init__.py
@@ -24,6 +24,7 @@
 from . import G2pwd_FP
 from . import G2pwd_GPX
 from . import G2pwd_MIDAS
+from . import G2pwd_HDF5
 from . import G2pwd_Panalytical
 from . import G2pwd_csv
 from . import G2pwd_fxye
@@ -63,6 +64,7 @@
     "G2pwd_FP",
     "G2pwd_GPX",
     "G2pwd_MIDAS",
+    "G2pwd_HDF5",
     "G2pwd_Panalytical",
     "G2pwd_csv",
     "G2pwd_fxye",

From 69cab021041828d145dfe73f14474fc7353be5de Mon Sep 17 00:00:00 2001
From: BHT <toby@anl.gov>
Date: Wed, 30 Jul 2025 19:52:45 -0500
Subject: [PATCH 2/7] first working HDF5-MAXIV importer

---
 GSASII/imports/G2pwd_HDF5.py | 178 +++++++++++++++++------------------
 1 file changed, 89 insertions(+), 89 deletions(-)

diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py
index 1b1a304af..b8312061f 100644
--- a/GSASII/imports/G2pwd_HDF5.py
+++ b/GSASII/imports/G2pwd_HDF5.py
@@ -44,7 +44,7 @@ class HDF5_Reader(G2obj.ImportPowderData):
     Any parameters placed in that file will override values set in the HDF5
     file. 
     '''
-    mode = None
+    #mode = None
     def __init__(self):
         if h5py is None:
             self.UseReader = False
@@ -58,9 +58,12 @@ def __init__(self):
 
     def ShowH5Element(self,obj,keylist):
         '''Format the contents of an HDF5 entry as a single line. Not used for 
-        reading files, only for use in :meth:`HDF5list`
+        reading files, only used in :meth:`HDF5list`
         '''
         k = '/'.join(keylist)
+        l = obj.get(k, getlink=True)
+        if isinstance(l, h5py.ExternalLink): 
+            return f'link to file {l.filename}'
         try:
             typ = str(type(obj[k]))
         except:
@@ -75,7 +78,8 @@ def ShowH5Element(self,obj,keylist):
                 return f'value={bool(obj[k][()])}'
             elif datfmt in ('<f8', 'uint8', 'int64', '<f4'): # scalar value or array of values
                 try:
-                    return f'length {len(obj[k][()])}'
+                    len(obj[k][()])
+                    return f'array {obj[k].shape}'
                 except:
                     return f'value={obj[k][()]}'
             else:
@@ -83,7 +87,7 @@ def ShowH5Element(self,obj,keylist):
         elif ".Group'" in typ:
             return "(group)"
         else:
-            return f'{prefix}/{i} is {type(obj[k])}'
+            return f'type is {type(obj[k])}'
 
     def RecurseH5Element(self,obj,prefix=[]):
         '''Returns a list of entries of all keys in the HDF5 file
@@ -94,7 +98,7 @@ def RecurseH5Element(self,obj,prefix=[]):
           * entry 0 is the top-level keys (/a, /b,...),
           * entry 1 has the first level keys (/a/c /a/d, /b/d, /b/e,...)
           * ...
-        Not used for reading files, only for use in :meth:`HDF5list`
+        Not used for reading files, used only in :meth:`HDF5list`
         '''
         try:
             self.HDF5entries
@@ -106,13 +110,16 @@ def RecurseH5Element(self,obj,prefix=[]):
         for i in obj:
             nextprefix = prefix+[i]
             self.HDF5entries[depth].append(nextprefix)
+            # check for link objects
+            l = obj.get(i, getlink=True)
+            if isinstance(l, h5py.ExternalLink): continue
             try:
                 typ = str(type(obj[i]))
             except:
                 print(f'**Error** with key {prefix}/{i}')
                 continue
             if ".Group'" in typ:
-                t = f'{prefix}/{i}'
+                #t = f'{prefix}/{i}'
                 #print(f'\n{nextprefix} contents {(60-len(t))*'='}')
                 self.RecurseH5Element(obj[i],nextprefix)
         return self.HDF5entries
@@ -137,10 +144,12 @@ def HDF5list(self, filename):
                 m = max(m,len('/'.join(k)))
             for k in j:
                 lbl = self.ShowH5Element(fp,k)
+                if '\n' in lbl:
+                    lbl = '; '.join(lbl.split('\n'))
                 if len(lbl) > 50:
                     lbl = lbl[:50] + '...'
-                if '\n' in lbl:
-                    lbl = lbl.split()[0] + '...'
+                # if '\n' in lbl:
+                #     lbl = lbl.split()[0] + '...'
                 if lbl != '(group)': strings.append(f"{'/'.join(k):{m}s} {lbl}")
         with open(filename+'_contents.txt', 'w') as fp:
             for i in strings: fp.write(f'{i}\n')
@@ -149,17 +158,19 @@ def ContentsValidator(self, filename):
         '''Test if valid by seeing if the HDF5 library recognizes the file. 
         Then get file type (currently MAX IV NeXus/NXazint[12]d only)
         '''
-        from .. import GSASIIpath
+        #from .. import GSASIIpath
         try:
             fp = h5py.File(filename, 'r')
             if 'entry' in fp: # NeXus
+                #self.HDF5entries = []
+                #self.HDF5list(filename)
                 if 'definition' in fp['/entry']:
                     # MAX IV NXazint1d file
                     if fp['/entry/definition'][()].decode() == 'NXazint1d':
                         return True
                     # MAX IV NXazint1d file
-                    if fp['/entry/definition'][()].decode() == 'NXazint2d':
-                        return True
+                    #if fp['/entry/definition'][()].decode() == 'NXazint2d':
+                    #    return True
         except IOError:
             return False
         finally:
@@ -182,9 +193,6 @@ def Reader(self, filename, ParentFrame=None, **kwarg):
                 self.blknum = 0
             else:
                 self.blknum = min(self.selections)
-        try:
-            self.mode = None
-            fp = h5py.File(filename, 'r')
         try:
             fp = h5py.File(filename, 'r')
             if 'entry' in fp: # NeXus
@@ -211,103 +219,95 @@ def readNXazint1d(self, filename, fpbuffer={}):
         '''Read HDF5 file in NeXus as produced by MAX IV as a NXazint1d
         see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint1d.html
         '''
-        self.instmsg = 'HDF file'
+        #self.instmsg = 'HDF file'
         doread = False # has the file already been read into a buffer?
-        for i in ('blkmap','intenArr_blknum')+self.midassections:
+        arrays = ('entry/data/radial_axis','entry/data/I')
+        floats = ('entry/instrument/monochromator/wavelength',
+                  'entry/reduction/input/polarization_factor')
+        strings = ('entry/instrument/source/name','entry/reduction/input/unit')
+        for i in arrays+floats+strings:
             if i not in fpbuffer:
                 doread = True
                 break
-        else: # do we have the right section buffered?
-            doread = fpbuffer['intenArr_blknum'] != self.blknum
-        
         if doread:   # read into buffer
             try:
                 fp = h5py.File(filename, 'r')
-                if 'blkmap' not in fpbuffer:
-                    fpbuffer['blkmap'] = list(fp.get('OmegaSumFrame').keys())
-                if 'REtaMap' not in fpbuffer:
-                    fpbuffer['REtaMap'] = np.array(fp.get('REtaMap'))
-                if 'intenArr' not in fpbuffer or fpbuffer.get('intenArr_blknum',-1) != self.blknum:
-                    fpbuffer['intenArr'] = np.array(fp.get('OmegaSumFrame').get(
-                            fpbuffer['blkmap'][self.blknum]))
-                    fpbuffer['intenArr_blknum'] = self.blknum
-                    self.azmcnt = -1
-                if 'Omegas' not in fpbuffer:
-                    fpbuffer['Omegas'] = np.array(fp.get('Omegas'))
+                for i in arrays:
+                    fpbuffer[i] = np.array(fp.get(i))
+                for i in floats:
+                    fpbuffer[i] = float(fp[i][()])
+                for i in strings:
+                    fpbuffer[i] = fp[i][()].decode()
+                if fpbuffer['entry/reduction/input/unit'] != '2th':
+                    print('NXazint1d HDF5 file has units',fpbuffer['entry/reduction/input/unit'])
+                    self.errors = 'NXazint1d only can be read with 2th units'
+                    return False
+                if self.selections is None or len(self.selections) == 0:
+                    self.blknum = 0
+                else:
+                    self.blknum = min(self.selections)
             except IOError:
                 print ('cannot open file '+ filename)
                 return False
             finally:
                 fp.close()
-            # get overriding sample & instrument parameters 
-            fpbuffer['sampleprm'] = {}
-            samplefile = os.path.splitext(filename)[0] + '.samprm'
-            if os.path.exists(samplefile):
-                fp = open(samplefile,'r')
-                S = fp.readline()
-                while S:
-                    if not S.strip().startswith('#'):
-                        [item,val] = S[:-1].split(':')
-                        fpbuffer['sampleprm'][item.strip("'")] = eval(val)
-                    S = fp.readline()
-                fp.close()
-            fpbuffer['instprm'] = {}
-            instfile = os.path.splitext(filename)[0] + '.instprm'
-            if os.path.exists(instfile):
-                self.instmsg = 'HDF and .instprm files'
-                fp = open(instfile,'r')
-                S = fp.readline()
-                while S:
-                    if not S.strip().startswith('#'):
-                        [item,val] = S[:-1].split(':')
-                        fpbuffer['instprm'][item.strip("'")] = eval(val)
-                    S = fp.readline()
-                fp.close()
-        # look for a non-empty scan (lineout)
-        use = [0]
-        while sum(use) == 0 and self.azmcnt < fpbuffer['intenArr'].shape[1]:
-            self.azmcnt += 1
-            if self.azmcnt >= fpbuffer['intenArr'].shape[1]:
-                return False
-            use = fpbuffer['REtaMap'][3,:,self.azmcnt] != 0
-
+            self.numbanks=len(fpbuffer['entry/data/I'])
+            # # get overriding sample & instrument parameters
+            # fpbuffer['sampleprm'] = {}
+            # samplefile = os.path.splitext(filename)[0] + '.samprm'
+            # if os.path.exists(samplefile):
+            #     fp = open(samplefile,'r')
+            #     S = fp.readline()
+            #     while S:
+            #         if not S.strip().startswith('#'):
+            #             [item,val] = S[:-1].split(':')
+            #             fpbuffer['sampleprm'][item.strip("'")] = eval(val)
+            #         S = fp.readline()
+            #     fp.close()
+            # fpbuffer['instprm'] = {}
+            # instfile = os.path.splitext(filename)[0] + '.instprm'
+            # if os.path.exists(instfile):
+            #     self.instmsg = 'HDF and .instprm files'
+            #     fp = open(instfile,'r')
+            #     S = fp.readline()
+            #     while S:
+            #         if not S.strip().startswith('#'):
+            #             [item,val] = S[:-1].split(':')
+            #             fpbuffer['instprm'][item.strip("'")] = eval(val)
+            #         S = fp.readline()
+            #     fp.close()
         # now transfer information into current histogram 
-        self.pwdparms['Instrument Parameters'] = [
-            {'Type': ['PXC', 'PXC', False]},
-            {}]
-        inst = {}
-        inst.update(instprmList)
-        inst.update(fpbuffer['instprm'])
-        for key,val in inst.items():
-            self.pwdparms['Instrument Parameters'][0][key] = [val,val,False]
-        samp = {}
-        samp.update(sampleprmList)
-        samp.update(fpbuffer['sampleprm'])
-        for key,val in samp.items():
-            self.Sample[key] = val
-        self.numbanks=len(fpbuffer['blkmap'])
-        x = fpbuffer['REtaMap'][1,:,self.azmcnt][use]
-        y = fpbuffer['intenArr'][:,self.azmcnt][use]
-        w = np.nan_to_num(1/y)    # this is probably not correct
-        eta = np.average(fpbuffer['REtaMap'][2,:,self.azmcnt][use])
-        self.pwdparms['Instrument Parameters'][0]['Azimuth'] = [90-eta,90-eta,False]
-        self.pwdparms['Instrument Parameters'][0]['Bank'] = [self.azmcnt,self.azmcnt,False]
+        #self.pwdparms['Instrument Parameters'] = [
+        #    {'Type': ['PXC', 'PXC', False]},
+        #    {}]
+        # inst = {}
+        # inst.update(instprmList)
+        # inst.update(fpbuffer['instprm'])
+        # for key,val in inst.items():
+        #     self.pwdparms['Instrument Parameters'][0][key] = [val,val,False]
+        # samp = {}
+        # samp.update(sampleprmList)
+        # samp.update(fpbuffer['sampleprm'])
+        # for key,val in samp.items():
+        #     self.Sample[key] = val
+        x = fpbuffer['entry/data/radial_axis']
+        y = fpbuffer['entry/data/I'][self.blknum]
+        w = np.nan_to_num(1/y)    # this is not correct
+        #self.pwdparms['Instrument Parameters'][0]['Azimuth'] = [90-eta,90-eta,False]
+        #self.pwdparms['Instrument Parameters'][0]['Bank'] = [self.blknum,self.blknum,False]
 #        self.Sample['Gonio. radius'] = float(S.split('=')[1])
 #        self.Sample['Omega'] = float(S.split('=')[1])
 #        self.Sample['Chi'] = float(S.split('=')[1])
-        self.Sample['Phi'] = Omega = fpbuffer['Omegas'][self.blknum]
+        #self.Sample['Phi'] = Omega = fpbuffer['Omegas'][self.blknum]
         self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)]
         #self.comments = comments[selblk]
         self.powderentry[0] = filename
         #self.powderentry[1] = Pos # position offset (never used, I hope)
         self.powderentry[2] = self.blknum  # bank number
-        self.idstring = f'{os.path.split(filename)[1][:10]} omega={Omega} eta={eta}'
-#        if GSASIIpath.GetConfigValue('debug'): print(
-#                f'Read entry #{self.azmcnt} img# {self.blknum} from file {filename}')
-        # are there more lineouts after this one in current image to read?
-        self.repeat = sum(sum(fpbuffer['REtaMap'][3,:,self.azmcnt+1:])) != 0
-        if self.repeat: return True
+        self.idstring = f'#{self.blknum} {os.path.split(filename)[1][:60]}'
+        self.instdict['wave'] = fpbuffer['entry/instrument/monochromator/wavelength']
         # if not, are there more [selected] images that after this to be read?
+        self.repeat = False
         if self.blknum < self.numbanks-1:
             if self.selections is None or len(self.selections) == 0:
                 self.blknum += 1

From c7ff0e46ef3ca0d6a56210c9cac617dc93e307b6 Mon Sep 17 00:00:00 2001
From: BHT <toby@anl.gov>
Date: Thu, 2 Oct 2025 19:09:39 -0500
Subject: [PATCH 3/7] rework HDF5 importers for MaxIV

---
 GSASII/GSASIIdataGUI.py      |   2 +-
 GSASII/imports/G2img_HDF5.py |  57 +++++++------
 GSASII/imports/G2pwd_HDF5.py | 155 ++++++++++++++++-------------------
 GSASII/imports/__init__.py   |   2 +-
 GSASII/imports/meson.build   |   1 +
 5 files changed, 108 insertions(+), 109 deletions(-)

diff --git a/GSASII/GSASIIdataGUI.py b/GSASII/GSASIIdataGUI.py
index ebdf296d7..a05f03561 100644
--- a/GSASII/GSASIIdataGUI.py
+++ b/GSASII/GSASIIdataGUI.py
@@ -1616,7 +1616,7 @@ def GetDefaultParms(self,rd):
                 else:
                     rd.instmsg = 'default: '+dI.defaultIparm_lbl[res]
                     inst1,inst2 = self.ReadPowderInstprm(dI.defaultIparms[res],bank,rd)
-                    if rd.instdict.get('wave'):
+                    if rd.instdict.get('wave') and 'Lam' in inst1:
                         inst1['Lam'][0] = rd.instdict.get('wave')
                         inst1['Lam'][1] = rd.instdict.get('wave')
                     return [inst1,inst2]
diff --git a/GSASII/imports/G2img_HDF5.py b/GSASII/imports/G2img_HDF5.py
index d4f237a3d..388c673a7 100644
--- a/GSASII/imports/G2img_HDF5.py
+++ b/GSASII/imports/G2img_HDF5.py
@@ -54,12 +54,17 @@ def Reader(self, filename, ParentFrame=None, **kwarg):
             return False
         imagenum = kwarg.get('blocknum')
         if imagenum is None: imagenum = 1
+        quick = False
         # do we have a image number or a map to the section with the image?
         try:
-            int(imagenum)
-            # set up an index as to where images are found
-            self.buffer = kwarg.get('buffer',{})
-            if not self.buffer.get('imagemap'):
+            int(imagenum) # test if image # is a tuple
+        except: # pull the section name and number out from the imagenum value
+            kwargs = {'name':imagenum[0],'num':imagenum[1]}
+            quick = True
+        # set up an index as to where images are found
+        self.buffer = kwarg.get('buffer',{})
+        if not quick and not self.buffer.get('imagemap'):
+            try:
                 if GSASIIpath.GetConfigValue('debug'): print('Scanning for image map')
                 self.buffer['imagemap'] = []
                 self.Comments = self.visit(fp)
@@ -93,9 +98,13 @@ def Reader(self, filename, ParentFrame=None, **kwarg):
                     self.errors = 'No images selected from file'
                     fp.close()
                     return False
+            except Exception as msg:
+                print(f'Error mapping file:\n{msg}')
+                return False
+        if not quick: 
             self.buffer['selectedImages'] = self.buffer.get('selectedImages',
                                                 list(range(len(self.buffer['imagemap']))))
-            # get the first selected image
+            # get the next selected image
             while imagenum <= len(self.buffer['imagemap']):
                 if imagenum-1 in self.buffer['selectedImages']:
                     del self.buffer['selectedImages'][self.buffer['selectedImages'].index(imagenum-1)]
@@ -107,11 +116,6 @@ def Reader(self, filename, ParentFrame=None, **kwarg):
                 fp.close()
                 return False
             kwargs = {'imagenum':imagenum}
-            quick = False
-        except:
-            kwargs = {'name':imagenum[0],'num':imagenum[1]}
-            quick = True
-        # we have been passed a map to images
         self.Data,self.Npix,self.Image = self.readDataset(fp,**kwargs)
         if quick:
             fp.close()
@@ -153,19 +157,26 @@ def func(name, dset):
             if not hasattr(dset,'shape'): return # not array, can't be image
             if isinstance(dset, h5py.Dataset):
                 dims = dset.shape
-                if len(dims) < 2:
-                    head.append('%s: %s'%(dset.name,str(dset[()][0])))
-                elif len(dims) == 4:
-                    size = dims[2:]
-                    self.buffer['imagemap'] += [(dset.name,i,size) for i in range(dims[1])]
-                elif len(dims) == 3:
-                    size = dims[1:]
-                    self.buffer['imagemap'] += [(dset.name,i,size) for i in range(dims[0])]
-                elif len(dims) == 2:
-                    size = dims
-                    self.buffer['imagemap'] += [(dset.name,None,size)]
-                else:
-                    print('Skipping entry '+str(dset.name)+'. Shape is '+str(dims))
+                try:
+                    if len(dims) == 0:
+                        val = dset[()]
+                        if type(val) is bytes: val = val.decode()
+                        head.append(f'{dset.name}: {val}')
+                    elif len(dims) < 2:
+                        head.append(f'{dset.name}: {dset[()][0]}')
+                    elif len(dims) == 4:
+                        size = dims[2:]
+                        self.buffer['imagemap'] += [(dset.name,i,size) for i in range(dims[1])]
+                    elif len(dims) == 3:
+                        size = dims[1:]
+                        self.buffer['imagemap'] += [(dset.name,i,size) for i in range(dims[0])]
+                    elif len(dims) == 2:
+                        size = dims
+                        self.buffer['imagemap'] += [(dset.name,None,size)]
+                    else:
+                        print(f'Skipping entry {dset.name}. Shape is {dims}')
+                except Exception as msg:
+                    print(f'Skipping entry {dset.name} Error getting shape\n{msg}')
         fp.visititems(func)
         return head
 
diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py
index b8312061f..88bbf6b2a 100644
--- a/GSASII/imports/G2pwd_HDF5.py
+++ b/GSASII/imports/G2pwd_HDF5.py
@@ -1,10 +1,11 @@
 # -*- coding: utf-8 -*-
-'''
+'''Use to read powder patterns from HDF5 files. At present the only supported 
+format is a NeXus variant named NXazint1d. 
 '''
 
 from __future__ import division, print_function
 import os
-import sys
+
 try:
     import h5py
 except ImportError:
@@ -12,25 +13,6 @@
 import numpy as np
 from .. import GSASIIobj as G2obj
 from .. import GSASIIfiles as G2fil
-#from .. import GSASIIpath
-
-# things to do:
-#   uncertainties
-#   instr. parms
-#instprmList = [('Bank',1.0), ('Lam',0.413263), ('Polariz.',0.99), 
-#            ('SH/L',0.002), ('Type','PXC'), ('U',1.163), ('V',-0.126), 
-#            ('W',0.063), ('X',0.0), ('Y',0.0), ('Z',0.0), ('Zero',0.0)]
-#   comments
-#   dataset naming
-#   sample parameters
-#sampleprmList = [('InstrName','APS 1-ID'), ('Temperature', 295.0)]
-#  'Scale': [1.0, True], 'Type': 'Debye-Scherrer',
-# 'Absorption': [0.0, False], 'DisplaceX': [0.0, False], 'DisplaceY': [0.0, False]# 'Pressure': 0.1, 'Time': 0.0, 'FreePrm1': 0.0,
-# 'FreePrm2': 0.0, 'FreePrm3': 0.0, 'Gonio. radius': 200.0, 'Omega': 0.0,
-# 'Chi': 0.0, 'Phi': 180.0, 'Azimuth': 0.0,
-# 'Materials': [{'Name': 'vacuum', 'VolFrac': 1.0}, {'Name': 'vacuum', 'VolFrac': 0.0}],
-# 'Thick': 1.0, 'Contrast': [0.0, 0.0], 'Trans': 1.0, 'SlitLen': 0.0}
-
 
 class HDF5_Reader(G2obj.ImportPowderData):
     '''Routine to read multiple powder patterns from an HDF5 file. 
@@ -52,13 +34,14 @@ def __init__(self):
             G2fil.ImportErrorMsg(msg,{'HDF5 importer':['h5py','hdf5']})
         super(self.__class__,self).__init__( # fancy way to self-reference
             extensionlist=('.hdf','.h5'),strictExtension=True,
-            formatName = 'MAX IV HDF5',longFormatName = 'HDF5 integrated scans')
+            formatName = 'MAX IV HDF5',longFormatName = 'MaxIV NXazint1d HDF5 integrated scans')
         self.scriptable = True
         #self.Iparm = {} #only filled for EDS data
 
     def ShowH5Element(self,obj,keylist):
         '''Format the contents of an HDF5 entry as a single line. Not used for 
-        reading files, only used in :meth:`HDF5list`
+        reading files, only used in :meth:`HDF5list` which is here for software
+        development. 
         '''
         k = '/'.join(keylist)
         l = obj.get(k, getlink=True)
@@ -89,16 +72,18 @@ def ShowH5Element(self,obj,keylist):
         else:
             return f'type is {type(obj[k])}'
 
-    def RecurseH5Element(self,obj,prefix=[]):
+    def RecurseH5Element(self,obj,prefix=[],length=None):
         '''Returns a list of entries of all keys in the HDF5 file
         (or group) in `obj`. Note that `obj` can be a file object, created by 
         `h5py.File` or can be a subset `fp['key/subkey']`.
+        
+        If length is specified, only the entries with arrays of that
+        length are returned.
 
         The returned list is organized where: 
           * entry 0 is the top-level keys (/a, /b,...),
           * entry 1 has the first level keys (/a/c /a/d, /b/d, /b/e,...)
           * ...
-        Not used for reading files, used only in :meth:`HDF5list`
         '''
         try:
             self.HDF5entries
@@ -109,19 +94,27 @@ def RecurseH5Element(self,obj,prefix=[]):
             self.HDF5entries.append([])
         for i in obj:
             nextprefix = prefix+[i]
-            self.HDF5entries[depth].append(nextprefix)
-            # check for link objects
-            l = obj.get(i, getlink=True)
-            if isinstance(l, h5py.ExternalLink): continue
+            if length is None:
+                self.HDF5entries[depth].append(nextprefix)
             try:
                 typ = str(type(obj[i]))
             except:
                 print(f'**Error** with key {prefix}/{i}')
                 continue
+            if length is not None and ".Group'" not in typ:
+                # get length of this obj[i]
+                try:
+                    if len(obj[i]) == length:
+                        self.HDF5entries[depth].append(nextprefix)
+                except TypeError:
+                    continue
+            # check for link objects
+            l = obj.get(i, getlink=True)
+            if isinstance(l, h5py.ExternalLink): continue
             if ".Group'" in typ:
                 #t = f'{prefix}/{i}'
                 #print(f'\n{nextprefix} contents {(60-len(t))*'='}')
-                self.RecurseH5Element(obj[i],nextprefix)
+                self.RecurseH5Element(obj[i],nextprefix,length)
         return self.HDF5entries
         
                 
@@ -158,7 +151,6 @@ def ContentsValidator(self, filename):
         '''Test if valid by seeing if the HDF5 library recognizes the file. 
         Then get file type (currently MAX IV NeXus/NXazint[12]d only)
         '''
-        #from .. import GSASIIpath
         try:
             fp = h5py.File(filename, 'r')
             if 'entry' in fp: # NeXus
@@ -168,9 +160,6 @@ def ContentsValidator(self, filename):
                     # MAX IV NXazint1d file
                     if fp['/entry/definition'][()].decode() == 'NXazint1d':
                         return True
-                    # MAX IV NXazint1d file
-                    #if fp['/entry/definition'][()].decode() == 'NXazint2d':
-                    #    return True
         except IOError:
             return False
         finally:
@@ -220,11 +209,13 @@ def readNXazint1d(self, filename, fpbuffer={}):
         see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint1d.html
         '''
         #self.instmsg = 'HDF file'
+        self.comments = []
         doread = False # has the file already been read into a buffer?
-        arrays = ('entry/data/radial_axis','entry/data/I')
+        arrays = ('entry/data/radial_axis','entry/data/I','entry/data/I_errors')
         floats = ('entry/instrument/monochromator/wavelength',
                   'entry/reduction/input/polarization_factor')
-        strings = ('entry/instrument/source/name','entry/reduction/input/unit')
+        strings = ('entry/instrument/name','entry/reduction/input/unit',
+                   'entry/sample/name','entry/instrument/source/name')
         for i in arrays+floats+strings:
             if i not in fpbuffer:
                 doread = True
@@ -234,72 +225,68 @@ def readNXazint1d(self, filename, fpbuffer={}):
                 fp = h5py.File(filename, 'r')
                 for i in arrays:
                     fpbuffer[i] = np.array(fp.get(i))
+                self.numbanks = len(fpbuffer['entry/data/I']) # number of scans
                 for i in floats:
                     fpbuffer[i] = float(fp[i][()])
                 for i in strings:
-                    fpbuffer[i] = fp[i][()].decode()
+                    try:
+                        fpbuffer[i] = fp[i][()].decode()
+                        self.comments.append(f'{i}={fpbuffer[i]}')
+                    except:
+                        fpbuffer[i] = None
                 if fpbuffer['entry/reduction/input/unit'] != '2th':
                     print('NXazint1d HDF5 file has units',fpbuffer['entry/reduction/input/unit'])
                     self.errors = 'NXazint1d only can be read with 2th units'
                     return False
+                # save arrays that are potentially tracking the parametric conditions
+                # e.g. variables with the same length as the humber of datasets
+                paramItems = self.RecurseH5Element(fp,length=self.numbanks)
+                fpbuffer['ParamTrackingVars'] = {}
+                for i in paramItems:
+                    for j in i:
+                        key = '/'.join(j)
+                        if key in arrays: continue
+                        obj = fp.get(key)
+                        if obj is None: continue
+                        if len(obj[()].shape) != 1: continue
+                        # are all values the same? If so, put them into the comments
+                        # for the first histogram. If they are changing, note that and
+                        # later they will be put into every histogram.
+                        if all(obj[0] == obj):
+                            self.comments.append(f'{key.split("/")[-1]}={obj[0]}')
+                        else:
+                            fpbuffer['ParamTrackingVars'][key] = np.array(obj[()])
                 if self.selections is None or len(self.selections) == 0:
                     self.blknum = 0
                 else:
                     self.blknum = min(self.selections)
             except IOError:
-                print ('cannot open file '+ filename)
+                print (f'Can not open or read file {filename}')
                 return False
             finally:
                 fp.close()
-            self.numbanks=len(fpbuffer['entry/data/I'])
-            # # get overriding sample & instrument parameters
-            # fpbuffer['sampleprm'] = {}
-            # samplefile = os.path.splitext(filename)[0] + '.samprm'
-            # if os.path.exists(samplefile):
-            #     fp = open(samplefile,'r')
-            #     S = fp.readline()
-            #     while S:
-            #         if not S.strip().startswith('#'):
-            #             [item,val] = S[:-1].split(':')
-            #             fpbuffer['sampleprm'][item.strip("'")] = eval(val)
-            #         S = fp.readline()
-            #     fp.close()
-            # fpbuffer['instprm'] = {}
-            # instfile = os.path.splitext(filename)[0] + '.instprm'
-            # if os.path.exists(instfile):
-            #     self.instmsg = 'HDF and .instprm files'
-            #     fp = open(instfile,'r')
-            #     S = fp.readline()
-            #     while S:
-            #         if not S.strip().startswith('#'):
-            #             [item,val] = S[:-1].split(':')
-            #             fpbuffer['instprm'][item.strip("'")] = eval(val)
-            #         S = fp.readline()
-            #     fp.close()
-        # now transfer information into current histogram 
-        #self.pwdparms['Instrument Parameters'] = [
-        #    {'Type': ['PXC', 'PXC', False]},
-        #    {}]
-        # inst = {}
-        # inst.update(instprmList)
-        # inst.update(fpbuffer['instprm'])
-        # for key,val in inst.items():
-        #     self.pwdparms['Instrument Parameters'][0][key] = [val,val,False]
-        # samp = {}
-        # samp.update(sampleprmList)
-        # samp.update(fpbuffer['sampleprm'])
-        # for key,val in samp.items():
-        #     self.Sample[key] = val
         x = fpbuffer['entry/data/radial_axis']
         y = fpbuffer['entry/data/I'][self.blknum]
-        w = np.nan_to_num(1/y)    # this is not correct
-        #self.pwdparms['Instrument Parameters'][0]['Azimuth'] = [90-eta,90-eta,False]
-        #self.pwdparms['Instrument Parameters'][0]['Bank'] = [self.blknum,self.blknum,False]
-#        self.Sample['Gonio. radius'] = float(S.split('=')[1])
-#        self.Sample['Omega'] = float(S.split('=')[1])
-#        self.Sample['Chi'] = float(S.split('=')[1])
-        #self.Sample['Phi'] = Omega = fpbuffer['Omegas'][self.blknum]
+        try:
+            esd = fpbuffer['entry/data/I_errors'][self.blknum]
+            w = np.where(esd==0,0,np.nan_to_num(1/esd**2))
+        except:
+            w = np.nan_to_num(1/y)    # best we can do, alas
         self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)]
+        # add parametric var as a comment
+        for key,arr in fpbuffer['ParamTrackingVars'].items():
+            val = arr[self.blknum]
+            self.comments.append(f'{key.split("/")[-1]}={val}')
+            if 'temperature' in key:
+                self.Sample['Temperature'] = val # in K already
+            elif 'time' in key:
+                self.Sample['Time'] = val # should be seconds
+            elif 'chi' in key:
+                self.Sample['Chi'] = val # not sure if correct mapping
+            elif 'phi' in key:
+                self.Sample['Phi'] = val
+            elif 'omega' in key:
+                self.Sample['Omega'] = val
         #self.comments = comments[selblk]
         self.powderentry[0] = filename
         #self.powderentry[1] = Pos # position offset (never used, I hope)
diff --git a/GSASII/imports/__init__.py b/GSASII/imports/__init__.py
index 9549992e2..c585dbec3 100644
--- a/GSASII/imports/__init__.py
+++ b/GSASII/imports/__init__.py
@@ -25,8 +25,8 @@
 from . import G2pwd_CIF
 from . import G2pwd_FP
 from . import G2pwd_GPX
-from . import G2pwd_MIDAS
 from . import G2pwd_HDF5
+from . import G2pwd_MIDAS
 from . import G2pwd_Panalytical
 from . import G2pwd_csv
 from . import G2pwd_fxye
diff --git a/GSASII/imports/meson.build b/GSASII/imports/meson.build
index eb1269c4e..60b955942 100644
--- a/GSASII/imports/meson.build
+++ b/GSASII/imports/meson.build
@@ -26,6 +26,7 @@ py.install_sources([
     'G2pwd_CIF.py',
     'G2pwd_FP.py',
     'G2pwd_GPX.py',
+    'G2pwd_HDF5.py',
     'G2pwd_MIDAS.py',
     'G2pwd_Panalytical.py',
     'G2pwd_csv.py',

From 4d648dab1ce46c0f07722ad96b3277349257f67c Mon Sep 17 00:00:00 2001
From: BHT <toby@anl.gov>
Date: Thu, 16 Oct 2025 16:22:23 -0500
Subject: [PATCH 4/7] Got NXazint1d reader done

---
 GSASII/imports/G2pwd_HDF5.py | 222 ++++++++++++++++++++++++++---------
 1 file changed, 166 insertions(+), 56 deletions(-)

diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py
index 88bbf6b2a..0789b7c4e 100644
--- a/GSASII/imports/G2pwd_HDF5.py
+++ b/GSASII/imports/G2pwd_HDF5.py
@@ -14,6 +14,9 @@
 from .. import GSASIIobj as G2obj
 from .. import GSASIIfiles as G2fil
 
+#from .. import GSASIIpath
+#breakpoint = GSASIIpath.IPyBreak_base
+
 class HDF5_Reader(G2obj.ImportPowderData):
     '''Routine to read multiple powder patterns from an HDF5 file. 
 
@@ -125,6 +128,11 @@ def HDF5list(self, filename):
 
         :param filename: 
         '''
+        def ShowH5NeXusName(obj,keylist):
+            key = '/'.join(keylist)
+            if "NX_class" in obj[key].attrs:
+                return obj[key].attrs["NX_class"]
+
         fp = h5py.File(filename, 'r')
         #print(f'Contents of {filename}')
         HDF5entries = self.RecurseH5Element(fp)
@@ -136,6 +144,7 @@ def HDF5list(self, filename):
             for k in j:
                 m = max(m,len('/'.join(k)))
             for k in j:
+                nxname = ShowH5NeXusName(fp,k)
                 lbl = self.ShowH5Element(fp,k)
                 if '\n' in lbl:
                     lbl = '; '.join(lbl.split('\n'))
@@ -144,6 +153,7 @@ def HDF5list(self, filename):
                 # if '\n' in lbl:
                 #     lbl = lbl.split()[0] + '...'
                 if lbl != '(group)': strings.append(f"{'/'.join(k):{m}s} {lbl}")
+                if nxname: print(f"{'/'.join(k):{m}s} {lbl} {nxname}")
         with open(filename+'_contents.txt', 'w') as fp:
             for i in strings: fp.write(f'{i}\n')
                     
@@ -153,18 +163,23 @@ def ContentsValidator(self, filename):
         '''
         try:
             fp = h5py.File(filename, 'r')
-            if 'entry' in fp: # NeXus
-                #self.HDF5entries = []
-                #self.HDF5list(filename)
-                if 'definition' in fp['/entry']:
-                    # MAX IV NXazint1d file
-                    if fp['/entry/definition'][()].decode() == 'NXazint1d':
-                        return True
-        except IOError:
+            # test for MaxIV NeXus/NXazint1d & NXazint2d
+            test = True
+            while test:
+                test = False
+                entry = getNeXusBase(fp)
+                if entry is None: break # not NeXus
+                if 'definition' not in fp[entry]: break # not MaxIV NXazint*
+                definition = fp[entry+'/definition'][()].decode()
+                if definition == 'NXazint1d': return True
+                if definition == 'NXazint2d': return True
+            # test for next HDF5 type here
+            #
+        except IOError: # not HDF5
             return False
         finally:
             fp.close()
-        return False
+        return False        # nothing passed -- not valid
 
     def Reader(self, filename, ParentFrame=None, **kwarg):
         '''Scan file for sections needed by defined file types (currently 
@@ -182,19 +197,26 @@ def Reader(self, filename, ParentFrame=None, **kwarg):
                 self.blknum = 0
             else:
                 self.blknum = min(self.selections)
+        # was file read into buffer? If so skip opening file to save time
+        definition = fpbuffer.get('definition','')
+        if definition == 'NXazint1d':
+            return self.readNXazint1d(filename, fpbuffer)
+        elif definition == 'NXazint2d':
+            return self.readNXazint2d(filename, fpbuffer)
+        # first or non-buffered read
         try:
             fp = h5py.File(filename, 'r')
-            if 'entry' in fp: # NeXus
-                if 'definition' in fp['/entry']:
-                    # MAX IV NXazint1d file
-                    if fp['/entry/definition'][()].decode() == 'NXazint1d':
+            entry = getNeXusBase(fp)
+            if entry: # NeXus
+                if 'definition' in fp[entry]: # MaxIV NXazint*
+                    definition = fp[entry+'/definition'][()].decode()
+                    fpbuffer['definition'] = definition
+                    if definition == 'NXazint1d':
                         return self.readNXazint1d(filename, fpbuffer)
-
-                    # MAX IV NXazint1d file
-                    #if fp['/entry/definition'][()].decode() == 'NXazint2d':
-                    #    return self.readNXazint2d(filename, fpbuffer)
-                    #    return True
-                    # https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint2d.html
+                    elif definition == 'NXazint2d':
+                        return self.readNXazint2d(filename, fpbuffer)
+            # not a supported file type
+            return False
         except IOError:
             print ('cannot open file '+ filename)
             return False
@@ -211,67 +233,95 @@ def readNXazint1d(self, filename, fpbuffer={}):
         #self.instmsg = 'HDF file'
         self.comments = []
         doread = False # has the file already been read into a buffer?
-        arrays = ('entry/data/radial_axis','entry/data/I','entry/data/I_errors')
-        floats = ('entry/instrument/monochromator/wavelength',
-                  'entry/reduction/input/polarization_factor')
-        strings = ('entry/instrument/name','entry/reduction/input/unit',
-                   'entry/sample/name','entry/instrument/source/name')
-        for i in arrays+floats+strings:
-            if i not in fpbuffer:
+        fileItems = {
+            # arrays
+            'radial_axis':('NXdata','radial_axis'),
+            'I':('NXdata','I'),
+            'I_errors':('NXdata','I_errors'),
+            # floats
+            'wavelength':('NXmonochromator','wavelength'),
+            'polarization_factor':('NXparameters','polarization_factor'),
+            # strings
+            'instrument/name':('NXinstrument','name'),
+            'unit':('NXparameters','unit'),
+            'sample/name':('NXsample','name'),
+            'source/name':('NXsource','name'),
+        }
+        # test if we have what we need in the buffer
+        for k in fileItems:
+            if k not in fpbuffer:                
                 doread = True
                 break
-        if doread:   # read into buffer
+        if doread:
+            # Nope, need to fill the buffer
             try:
                 fp = h5py.File(filename, 'r')
-                for i in arrays:
-                    fpbuffer[i] = np.array(fp.get(i))
-                self.numbanks = len(fpbuffer['entry/data/I']) # number of scans
-                for i in floats:
-                    fpbuffer[i] = float(fp[i][()])
-                for i in strings:
-                    try:
-                        fpbuffer[i] = fp[i][()].decode()
-                        self.comments.append(f'{i}={fpbuffer[i]}')
-                    except:
-                        fpbuffer[i] = None
-                if fpbuffer['entry/reduction/input/unit'] != '2th':
-                    print('NXazint1d HDF5 file has units',fpbuffer['entry/reduction/input/unit'])
-                    self.errors = 'NXazint1d only can be read with 2th units'
-                    return False
+                entry = getNeXusBase(fp)
+                # lookup NeXus locations
+                nexusDict = {i:None for i in set([i[0] for i in fileItems.values()])}
+                recurseNeXusEntries(fp,entry,nexusDict)
+                # save selected items from file in buffer
+                savedKeys = []
+                for k,loc in fileItems.items():
+                    if nexusDict[loc[0]] is None:
+                        fpbuffer[k] = None
+                        continue
+                    key = '/'.join((nexusDict[loc[0]],)+loc[1:])
+                    savedKeys.append(key)
+                    if key not in fp:
+                        fpbuffer[k] = None
+                        continue
+                    val = fp[key]
+                    if val.shape:
+                        fpbuffer[k] = np.array(val)
+                    elif 'float' in str(val.dtype):
+                        fpbuffer[k] = float(val[()])
+                        self.comments.append(f'{k}={val[()]}')
+                    else:
+                        fpbuffer[k] = val[()].decode()
+                        self.comments.append(f'{k}={fpbuffer[k]}')
+                self.numbanks = len(fpbuffer['I'])
                 # save arrays that are potentially tracking the parametric conditions
+                # into ParamTrackingVars.
                 # e.g. variables with the same length as the humber of datasets
-                paramItems = self.RecurseH5Element(fp,length=self.numbanks)
                 fpbuffer['ParamTrackingVars'] = {}
+                paramItems = self.RecurseH5Element(fp,length=self.numbanks)
                 for i in paramItems:
                     for j in i:
                         key = '/'.join(j)
-                        if key in arrays: continue
+                        if key in savedKeys: continue # standard data array
                         obj = fp.get(key)
                         if obj is None: continue
                         if len(obj[()].shape) != 1: continue
                         # are all values the same? If so, put them into the comments
-                        # for the first histogram. If they are changing, note that and
-                        # later they will be put into every histogram.
+                        # for the first histogram only. If they are changing, note that 
+                        # here and later they will be put into every histogram.
                         if all(obj[0] == obj):
                             self.comments.append(f'{key.split("/")[-1]}={obj[0]}')
                         else:
                             fpbuffer['ParamTrackingVars'][key] = np.array(obj[()])
-                if self.selections is None or len(self.selections) == 0:
-                    self.blknum = 0
-                else:
-                    self.blknum = min(self.selections)
             except IOError:
                 print (f'Can not open or read file {filename}')
                 return False
             finally:
                 fp.close()
-        x = fpbuffer['entry/data/radial_axis']
-        y = fpbuffer['entry/data/I'][self.blknum]
+            if fpbuffer['unit'] != '2th':
+                print('NXazint1d HDF5 file has units',fpbuffer['entry/reduction/input/unit'])
+                self.errors = 'NXazint1d only can be read with 2th units'
+                return False
+            # initialize the block selection
+            if self.selections is None or len(self.selections) == 0:
+                self.blknum = 0
+            else:
+                self.blknum = min(self.selections)
+        # now pull the selected dataset from the buffer
+        x = fpbuffer['radial_axis']
+        y = fpbuffer['I'][self.blknum]
         try:
-            esd = fpbuffer['entry/data/I_errors'][self.blknum]
+            esd = fpbuffer['I_errors'][self.blknum]
             w = np.where(esd==0,0,np.nan_to_num(1/esd**2))
         except:
-            w = np.nan_to_num(1/y)    # best we can do, alas
+            w = np.nan_to_num(1/y)    # best we can do, alas w/o reported s.u.'s
         self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)]
         # add parametric var as a comment
         for key,arr in fpbuffer['ParamTrackingVars'].items():
@@ -287,12 +337,11 @@ def readNXazint1d(self, filename, fpbuffer={}):
                 self.Sample['Phi'] = val
             elif 'omega' in key:
                 self.Sample['Omega'] = val
-        #self.comments = comments[selblk]
         self.powderentry[0] = filename
         #self.powderentry[1] = Pos # position offset (never used, I hope)
         self.powderentry[2] = self.blknum  # bank number
         self.idstring = f'#{self.blknum} {os.path.split(filename)[1][:60]}'
-        self.instdict['wave'] = fpbuffer['entry/instrument/monochromator/wavelength']
+        self.instdict['wave'] = fpbuffer['wavelength']
         # if not, are there more [selected] images that after this to be read?
         self.repeat = False
         if self.blknum < self.numbanks-1:
@@ -307,3 +356,64 @@ def readNXazint1d(self, filename, fpbuffer={}):
                 except IndexError:   # last selected image has been read
                     self.repeat = False
         return True
+
+# NeXus support routines. These were influenced heavily by Frederik Holm Gjørup
+# Also see NeXus support in plaid (https://github.com/fgjorup/plaid/blob/main/plaid/nexus.py)
+
+def getNeXusBase(fp):
+    '''This returns the base entry in a NeXus compilant HDF5 file
+    (usually "/entry" for MaxIV files) or None if this is not a valid 
+    NeXus file. 
+    '''
+    for key in fp:
+        if ("NX_class" in fp[key].attrs and
+                fp[key].attrs["NX_class"] == "NXentry"):
+            return key
+
+def getNeXusEntry(fp,base,target):
+    '''This returns the entry in a NeXus compilant HDF5 file matching 
+    the name target, or None, if this is not found as a child of the key `base`.
+    Not in use as it is more practical to use :func:`recurseNeXusEntries`.
+    '''
+    for key in fp[base]:
+        subkey = '/'.join([base,key])
+        if "NX_class" in fp[subkey].attrs:
+            #print(key, list(fp[subkey].attrs),fp[subkey].attrs["NX_class"])
+            if ("NX_class" in fp[subkey].attrs and
+                    fp[subkey].attrs["NX_class"] == target):
+                return subkey
+        else:
+            print(key)
+
+def recurseNeXusEntry(fp,node,target):
+    '''Recurse through the HDF5 tree looking for NeXus class `target`. 
+    Not in use, as :func:`recurseNeXusEntries` is used to get all 
+    targets in a single pass through the tree.
+    '''
+    if node is None: return  # needed?
+    val = fp[node]
+    if ("NX_class" in val.attrs and
+                val.attrs["NX_class"] == target):
+        return node
+    if not isinstance(val, h5py.Group): return
+    for key in val:
+        subkey = '/'.join([node,key])
+        res = recurseNeXusEntry(fp,subkey,target)
+        if res: return res
+
+def recurseNeXusEntries(fp,node,targetdict):
+    '''recurse through the HDF5 tree looking for the NeXus classes
+    in `targetdict`, storing the HDF5 key for each class in the dict
+
+    :param fp: HDF5 file pointer
+    :param str node: name of current HDF5 key
+    :param dict targetdict: dict to place HDF5 keys corresponding to
+       the desired NeXus classes. As input this has the NeXus classes
+       is the dict keys and the 
+    '''
+    val = fp[node]
+    if ("NX_class" in val.attrs and val.attrs["NX_class"] in targetdict):
+        targetdict[val.attrs["NX_class"]] = node
+    if isinstance(val, h5py.Group): 
+        for key in val:
+            recurseNeXusEntries(fp,'/'.join([node,key]),targetdict)

From 22e9365ce94fe9f197591f502de848a97b468947 Mon Sep 17 00:00:00 2001
From: BHT <toby@anl.gov>
Date: Sat, 18 Oct 2025 11:47:41 -0500
Subject: [PATCH 5/7] save a working snapshot, prior to cleanup

---
 GSASII/GSASIIobj.py          |   2 +
 GSASII/imports/G2pwd_HDF5.py | 360 ++++++++++++++++++++++++++++-------
 2 files changed, 291 insertions(+), 71 deletions(-)

diff --git a/GSASII/GSASIIobj.py b/GSASII/GSASIIobj.py
index dc5cd7038..8f544e1e1 100644
--- a/GSASII/GSASIIobj.py
+++ b/GSASII/GSASIIobj.py
@@ -1433,6 +1433,8 @@ def ReInitialize(self):
         self.instdict = {} # place items here that will be transferred to the instrument parameters
         self.pwdparms = {} # place parameters that are transferred directly to the tree
                            # here (typically from an existing GPX file)
+        self.selections = []
+        self.dnames = []
 ######################################################################
 class ImportSmallAngleData(ImportBaseclass):
     '''Defines a base class for the reading of files with small angle data.
diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py
index 0789b7c4e..03fcc3f4e 100644
--- a/GSASII/imports/G2pwd_HDF5.py
+++ b/GSASII/imports/G2pwd_HDF5.py
@@ -162,6 +162,7 @@ def ContentsValidator(self, filename):
         Then get file type (currently MAX IV NeXus/NXazint[12]d only)
         '''
         try:
+            definition = ''
             fp = h5py.File(filename, 'r')
             # test for MaxIV NeXus/NXazint1d & NXazint2d
             test = True
@@ -171,8 +172,53 @@ def ContentsValidator(self, filename):
                 if entry is None: break # not NeXus
                 if 'definition' not in fp[entry]: break # not MaxIV NXazint*
                 definition = fp[entry+'/definition'][()].decode()
-                if definition == 'NXazint1d': return True
-                if definition == 'NXazint2d': return True
+                # get names for datasets so we can select them
+                if definition == 'NXazint1d':
+                    fileItems = {
+                        'I':('NXdata','I'),
+                        'unit':('NXparameters','unit'),
+                        }
+                    buffer = {}
+                    if not self.readInNeXus(filename,buffer,fileItems,'NXazint1d-validate'):
+                        return False
+                    nhist = len(buffer['I'])
+                    self.selections = list(range(nhist))
+                    for i in range(nhist):
+                        self.dnames.append(f'#{i} {os.path.split(filename)[1][:60]}')
+                    return True
+                if definition == 'NXazint2d':
+                    fileItems = {
+                        'I':('NXdata','I'),
+                        'unit':('NXparameters','unit'),
+                        'azimuthal_axis':('NXdata','azimuthal_axis'),
+                    }
+                    buffer = {}
+                    if not self.readInNeXus(filename,buffer,fileItems,'NXazint2d-validate'):
+                        return False
+                    #numazimuth = buffer['azimuth_bins']
+                    numazimuth = len(buffer['azimuthal_axis'])
+                    numbanks = len(buffer['I'])
+                    nhist = numbanks * numazimuth
+                    self.selections = list(range(nhist))
+                    for i in range(nhist):
+                        # group by parametric variable
+                        numScan = i // numazimuth
+                        numAzim = i - (numScan * numazimuth)
+                        Azimuth = buffer['azimuthal_axis'][numAzim]
+                        self.dnames.append(f'#{numScan} Azm={Azimuth} {os.path.split(filename)[1][:60]}')
+                    return True
+            # test for MaxIV NeXus combined NXazint1d & NXazint2d
+            test = True
+            while test:
+                test = False
+                entry = getNeXusBase(fp)
+                subentry = getNeXusEntry(fp,entry,'NXsubentry')
+                if len(subentry) == 0:
+                    break # nothing to read
+                for entry in subentry:
+                    definition = fp[entry+'/definition'][()].decode()
+                    if definition == 'NXazint1d' or definition == 'NXazint2d':
+                        return True
             # test for next HDF5 type here
             #
         except IOError: # not HDF5
@@ -210,44 +256,109 @@ def Reader(self, filename, ParentFrame=None, **kwarg):
             if entry: # NeXus
                 if 'definition' in fp[entry]: # MaxIV NXazint*
                     definition = fp[entry+'/definition'][()].decode()
-                    fpbuffer['definition'] = definition
-                    if definition == 'NXazint1d':
-                        return self.readNXazint1d(filename, fpbuffer)
-                    elif definition == 'NXazint2d':
-                        return self.readNXazint2d(filename, fpbuffer)
-            # not a supported file type
-            return False
+                else:
+                    subentry = getNeXusEntry(fp,entry,'NXsubentry')
+                    if len(subentry) == 0:
+                        return False
+                    elif len(subentry) == 1:
+                        entry = subentry[0]
+                    elif ParentFrame:
+                        from .. import GSASIIctrlGUI as G2G
+                        choices = ('NXazint1d 1D file','NXazint1d 2D file')
+                        sel = G2G.ItemSelector(choices, ParentFrame=ParentFrame,
+                                                   header='Select file section',
+                                                   title='Select the section of the file to read')
+                        if sel is None: return False
+                        entry = subentry[sel]
+                    else:
+                        entry = subentry[1]
+                    if 'definition' not in fp[entry]: return False
+                    definition = fp[entry+'/definition'][()].decode()
+                fpbuffer['definition'] = definition
+                if definition == 'NXazint1d':
+                    return self.readNXazint1d(filename, fpbuffer, entry)
+                elif definition == 'NXazint2d':
+                    return self.readNXazint2d(filename, fpbuffer, entry)
+            return False # not a supported file type
         except IOError:
-            print ('cannot open file '+ filename)
+            print (f'cannot open file {filename}')
             return False
         finally:
             fp.close()
-
         print (f'Unknown type of HDF5 powder file {filename}')
         return False
+    
+    # def FillBuffer(self,fp,fileItems,fpbuffer,nexusDict):
+    #     '''save selected items from file in buffer
+    #     '''
+    #     savedKeys = []
+    #     for k,loc in fileItems.items():
+    #         if nexusDict[loc[0]] is None:
+    #             fpbuffer[k] = None
+    #             continue
+    #         key = '/'.join((nexusDict[loc[0]],)+loc[1:])
+    #         savedKeys.append(key)
+    #         if key not in fp:
+    #             fpbuffer[k] = None
+    #             continue
+    #         val = fp[key]
+    #         if val.shape:
+    #             fpbuffer[k] = np.array(val)
+    #         elif 'float' in str(val.dtype):
+    #             fpbuffer[k] = float(val[()])
+    #             self.comments.append(f'{k}={val[()]}')
+    #         elif 'int' in str(val.dtype):
+    #             fpbuffer[k] = int(val[()])
+    #         else:
+    #             fpbuffer[k] = val[()].decode()
+    #             self.comments.append(f'{k}={fpbuffer[k]}')
+    #     self.numparams = len(fpbuffer['I'])
+    #     # save arrays that are potentially tracking the parametric conditions
+    #     # into ParamTrackingVars.
+    #     # e.g. variables with the same length as the humber of datasets
+    #     fpbuffer['ParamTrackingVars'] = {}
+    #     paramItems = []
+    #     for loc in nexusDict.values():
+    #         self.HDF5entries = []
+    #         paramItems = self.RecurseH5Element(fp[loc],length=self.numparams)
+    #         for i in paramItems:
+    #             for j in i:
+    #                 key = loc+'/'+'/'.join(j)
+    #                 print(key)
+    #                 obj = fp.get(key)
+    #                 if obj is None: continue
+    #                 if len(obj[()].shape) != 1: continue
+    #                 # are all values the same? If so, put them into the comments
+    #                 # for the first histogram only. If they are changing, note that 
+    #                 # here and later they will be put into every histogram.
+    #                 if all(obj[0] == obj):
+    #                     self.comments.append(f'{key.split("/")[-1]}={obj[0]}')
+    #                 else:
+    #                     fpbuffer['ParamTrackingVars'][key] = np.array(obj[()])
 
-    def readNXazint1d(self, filename, fpbuffer={}):
-        '''Read HDF5 file in NeXus as produced by MAX IV as a NXazint1d
-        see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint1d.html
+    #     breakpoint()
+    
+    #     paramItems = self.RecurseH5Element(fp,node=loc,length=self.numparams)
+    #     for i in paramItems:
+    #         for j in i:
+    #             key = '/'.join(j)
+    #             if key in savedKeys: continue # standard data array
+    #             obj = fp.get(key)
+    #             if obj is None: continue
+    #             if len(obj[()].shape) != 1: continue
+    #             # are all values the same? If so, put them into the comments
+    #             # for the first histogram only. If they are changing, note that 
+    #             # here and later they will be put into every histogram.
+    #             if all(obj[0] == obj):
+    #                 self.comments.append(f'{key.split("/")[-1]}={obj[0]}')
+    #             else:
+    #                 fpbuffer['ParamTrackingVars'][key] = np.array(obj[()])
+                    
+    def readInNeXus(self,filename,fpbuffer,fileItems,fmt,entry=None):
+        '''Read in items from NeXus labeled sections of the HDF5 file
         '''
-        #self.instmsg = 'HDF file'
         self.comments = []
         doread = False # has the file already been read into a buffer?
-        fileItems = {
-            # arrays
-            'radial_axis':('NXdata','radial_axis'),
-            'I':('NXdata','I'),
-            'I_errors':('NXdata','I_errors'),
-            # floats
-            'wavelength':('NXmonochromator','wavelength'),
-            'polarization_factor':('NXparameters','polarization_factor'),
-            # strings
-            'instrument/name':('NXinstrument','name'),
-            'unit':('NXparameters','unit'),
-            'sample/name':('NXsample','name'),
-            'source/name':('NXsource','name'),
-        }
-        # test if we have what we need in the buffer
         for k in fileItems:
             if k not in fpbuffer:                
                 doread = True
@@ -256,12 +367,14 @@ def readNXazint1d(self, filename, fpbuffer={}):
             # Nope, need to fill the buffer
             try:
                 fp = h5py.File(filename, 'r')
-                entry = getNeXusBase(fp)
-                # lookup NeXus locations
+                if entry is None: entry = getNeXusBase(fp)
+                # lookup keys for NeXus labels we will use
                 nexusDict = {i:None for i in set([i[0] for i in fileItems.values()])}
                 recurseNeXusEntries(fp,entry,nexusDict)
-                # save selected items from file in buffer
-                savedKeys = []
+                # save selected items from file into buffer
+                # convert all objects into values or non-HDF5 objects so file
+                # be closed
+                savedKeys = [] # things we will not need to save in the 2nd scan
                 for k,loc in fileItems.items():
                     if nexusDict[loc[0]] is None:
                         fpbuffer[k] = None
@@ -277,55 +390,55 @@ def readNXazint1d(self, filename, fpbuffer={}):
                     elif 'float' in str(val.dtype):
                         fpbuffer[k] = float(val[()])
                         self.comments.append(f'{k}={val[()]}')
+                    elif 'int' in str(val.dtype):
+                        fpbuffer[k] = int(val[()])
                     else:
                         fpbuffer[k] = val[()].decode()
                         self.comments.append(f'{k}={fpbuffer[k]}')
-                self.numbanks = len(fpbuffer['I'])
+                if fpbuffer['unit'] != '2th':
+                    print(f'{fmt} HDF5 file has units',fpbuffer['unit'])
+                    self.errors = f'{fmt} only can be read with 2theta units'
+                    return False
+                self.numparams = len(fpbuffer['I'])
                 # save arrays that are potentially tracking the parametric conditions
                 # into ParamTrackingVars.
                 # e.g. variables with the same length as the humber of datasets
-                fpbuffer['ParamTrackingVars'] = {}
-                paramItems = self.RecurseH5Element(fp,length=self.numbanks)
-                for i in paramItems:
-                    for j in i:
-                        key = '/'.join(j)
-                        if key in savedKeys: continue # standard data array
-                        obj = fp.get(key)
-                        if obj is None: continue
-                        if len(obj[()].shape) != 1: continue
-                        # are all values the same? If so, put them into the comments
-                        # for the first histogram only. If they are changing, note that 
-                        # here and later they will be put into every histogram.
-                        if all(obj[0] == obj):
-                            self.comments.append(f'{key.split("/")[-1]}={obj[0]}')
-                        else:
-                            fpbuffer['ParamTrackingVars'][key] = np.array(obj[()])
+                if 'validate' not in fmt:
+                    fpbuffer['ParamTrackingVars'] = {}
+                    paramItems = []
+                    for loc in nexusDict.values():
+                        self.HDF5entries = []
+                        paramItems = self.RecurseH5Element(fp[loc],length=self.numparams)
+                        for i in paramItems:
+                            for j in i:
+                                key = loc+'/'+'/'.join(j)
+                                print(key)
+                                obj = fp.get(key)
+                                if obj is None: continue
+                                if len(obj[()].shape) != 1: continue
+                                # are all values the same? If so, put them into the comments
+                                # for the first histogram only. If they are changing, note that 
+                                # here and later they will be put into every histogram.
+                                if all(obj[0] == obj):
+                                    self.comments.append(f'{key.split("/")[-1]}={obj[0]}')
+                                else:
+                                    fpbuffer['ParamTrackingVars'][key] = np.array(obj[()])
             except IOError:
                 print (f'Can not open or read file {filename}')
                 return False
             finally:
                 fp.close()
-            if fpbuffer['unit'] != '2th':
-                print('NXazint1d HDF5 file has units',fpbuffer['entry/reduction/input/unit'])
-                self.errors = 'NXazint1d only can be read with 2th units'
-                return False
             # initialize the block selection
             if self.selections is None or len(self.selections) == 0:
                 self.blknum = 0
             else:
                 self.blknum = min(self.selections)
-        # now pull the selected dataset from the buffer
-        x = fpbuffer['radial_axis']
-        y = fpbuffer['I'][self.blknum]
-        try:
-            esd = fpbuffer['I_errors'][self.blknum]
-            w = np.where(esd==0,0,np.nan_to_num(1/esd**2))
-        except:
-            w = np.nan_to_num(1/y)    # best we can do, alas w/o reported s.u.'s
-        self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)]
+        return True
+
+    def FillInParametics(self,fpbuffer,count):
         # add parametric var as a comment
         for key,arr in fpbuffer['ParamTrackingVars'].items():
-            val = arr[self.blknum]
+            val = arr[count]
             self.comments.append(f'{key.split("/")[-1]}={val}')
             if 'temperature' in key:
                 self.Sample['Temperature'] = val # in K already
@@ -337,6 +450,43 @@ def readNXazint1d(self, filename, fpbuffer={}):
                 self.Sample['Phi'] = val
             elif 'omega' in key:
                 self.Sample['Omega'] = val
+
+    def readNXazint1d(self, filename, fpbuffer={}, entry=None):
+        '''Read HDF5 file in NeXus as produced by MAX IV as a NXazint1d.
+        In this file, multiple scans are placed in a 2-D array (I and I_errors in 
+        section NXdata), where one dimension is 2-theta and the other is a parametric 
+        value such as temperature, time, etc. 
+
+        see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint1d.html
+        '''
+        #self.instmsg = 'HDF file'
+        fileItems = {
+            # arrays
+            'radial_axis':('NXdata','radial_axis'),
+            'I':('NXdata','I'),
+            'I_errors':('NXdata','I_errors'),
+            # floats
+            'wavelength':('NXmonochromator','wavelength'),
+            'polarization_factor':('NXparameters','polarization_factor'),
+            # strings
+            'instrument/name':('NXinstrument','name'),
+            'unit':('NXparameters','unit'),
+            'sample/name':('NXsample','name'),
+            'source/name':('NXsource','name'),
+        }
+        # test if we have what we need in the buffer and if not read it in
+        if not self.readInNeXus(filename,fpbuffer,fileItems,'NXazint1d',entry): return False
+        # now pull the selected dataset from the buffer
+        self.numbanks = self.numparams 
+        x = fpbuffer['radial_axis']
+        y = fpbuffer['I'][self.blknum]
+        try:
+            esd = fpbuffer['I_errors'][self.blknum]
+            w = np.where(esd==0,0,np.nan_to_num(1/esd**2))
+        except:
+            w = np.nan_to_num(1/y)    # best we can do, alas w/o reported s.u.'s
+        self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)]
+        self.FillInParametics(fpbuffer,self.blknum)
         self.powderentry[0] = filename
         #self.powderentry[1] = Pos # position offset (never used, I hope)
         self.powderentry[2] = self.blknum  # bank number
@@ -357,6 +507,71 @@ def readNXazint1d(self, filename, fpbuffer={}):
                     self.repeat = False
         return True
 
+    def readNXazint2d(self, filename, fpbuffer={}, entry=None):
+        '''Read HDF5 file in NeXus as produced by MAX IV as a NXazint2d
+
+        In this file, multiple scans are placed in a 3-D array (I and I_errors in 
+        section NXdata), where one dimension is 2-theta and another is the azimuthal value
+        and the third are a parametric value(s) such as temperature, time, etc.
+
+        see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint2d.html
+        '''
+        self.comments = []
+        fileItems = {
+            # arrays
+            'radial_axis':('NXdata','radial_axis'),
+            'azimuthal_axis':('NXdata','azimuthal_axis'),
+            'I':('NXdata','I'),
+            'I_errors':('NXdata','I_errors'),
+            # floats
+            'wavelength':('NXmonochromator','wavelength'),
+            'polarization_factor':('NXparameters','polarization_factor'),
+            # strings
+            'instrument/name':('NXinstrument','name'),
+            'unit':('NXparameters','unit'),
+            'azimuth_bins':('NXparameters','azimuth_bins'),
+            'sample/name':('NXsample','name'),
+            'source/name':('NXsource','name'),
+        }
+        # test if we have what we need in the buffer and if not read it in
+        if not self.readInNeXus(filename,fpbuffer,fileItems,'NXazint2d',entry): return False
+        # now pull the selected dataset from the buffer
+        self.numazimuth = fpbuffer['azimuth_bins']
+        self.numbanks = self.numparams * self.numazimuth
+        # group by parametric variable
+        numScan = self.blknum // self.numazimuth
+        numAzim = self.blknum - (numScan * self.numazimuth)
+        x = fpbuffer['radial_axis']
+        y = fpbuffer['I'][numScan][numAzim]
+        try:
+            esd = fpbuffer['I_errors'][numScan][numAzim]
+            w = np.where(esd==0,0,np.nan_to_num(1/esd**2))
+        except:
+            w = np.nan_to_num(1/y)    # best we can do, alas w/o reported s.u.'s
+        self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)]
+        self.Sample['Azimuth'] = fpbuffer['azimuthal_axis'][numAzim]
+        # add parametric var as a comment
+        self.FillInParametics(fpbuffer,numScan)
+        self.powderentry[0] = filename
+        #self.powderentry[1] = Pos # position offset (never used, I hope)
+        self.powderentry[2] = self.blknum  # bank number
+        self.idstring = f'#{numScan} Azm={self.Sample["Azimuth"]} {os.path.split(filename)[1][:60]}'
+        self.instdict['wave'] = fpbuffer['wavelength']
+        # if not, are there more [selected] images that after this to be read?
+        self.repeat = False
+        if self.blknum < self.numbanks-1:
+            if self.selections is None or len(self.selections) == 0:
+                self.blknum += 1
+                self.repeat = True
+            else:
+                try:
+                    s = sorted(self.selections)
+                    self.blknum = s[s.index(self.blknum)+1]
+                    self.repeat = True
+                except IndexError:   # last selected image has been read
+                    self.repeat = False
+        return True
+    
 # NeXus support routines. These were influenced heavily by Frederik Holm Gjørup
 # Also see NeXus support in plaid (https://github.com/fgjorup/plaid/blob/main/plaid/nexus.py)
 
@@ -371,19 +586,22 @@ def getNeXusBase(fp):
             return key
 
 def getNeXusEntry(fp,base,target):
-    '''This returns the entry in a NeXus compilant HDF5 file matching 
-    the name target, or None, if this is not found as a child of the key `base`.
-    Not in use as it is more practical to use :func:`recurseNeXusEntries`.
+    '''This returns a list of entries in a NeXus compilant HDF5 file matching 
+    the name target, or an empty list, if this is not found. This only
+    looks for the direct children of the key `base`.
     '''
+    keyList = []
     for key in fp[base]:
         subkey = '/'.join([base,key])
         if "NX_class" in fp[subkey].attrs:
             #print(key, list(fp[subkey].attrs),fp[subkey].attrs["NX_class"])
             if ("NX_class" in fp[subkey].attrs and
                     fp[subkey].attrs["NX_class"] == target):
-                return subkey
-        else:
-            print(key)
+                keyList.append(subkey)
+    return keyList
+#        else:
+#            print(key)
+
 
 def recurseNeXusEntry(fp,node,target):
     '''Recurse through the HDF5 tree looking for NeXus class `target`. 

From d41d4b46744b2990a8cc9a68374536c187356dc3 Mon Sep 17 00:00:00 2001
From: BHT <toby@anl.gov>
Date: Sat, 18 Oct 2025 14:04:52 -0500
Subject: [PATCH 6/7] clean up and test against files

---
 GSASII/imports/G2pwd_HDF5.py | 571 ++++++++++++++++-------------------
 1 file changed, 255 insertions(+), 316 deletions(-)

diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py
index 03fcc3f4e..c3d2a5d88 100644
--- a/GSASII/imports/G2pwd_HDF5.py
+++ b/GSASII/imports/G2pwd_HDF5.py
@@ -1,11 +1,9 @@
 # -*- coding: utf-8 -*-
 '''Use to read powder patterns from HDF5 files. At present the only supported 
-format is a NeXus variant named NXazint1d. 
+format are two NeXus variants from MaxIV named NXazint1d and NXazint1d,
+but this can be expanded to handle more HDF5/NeXus formats
 '''
-
-from __future__ import division, print_function
 import os
-
 try:
     import h5py
 except ImportError:
@@ -14,9 +12,6 @@
 from .. import GSASIIobj as G2obj
 from .. import GSASIIfiles as G2fil
 
-#from .. import GSASIIpath
-#breakpoint = GSASIIpath.IPyBreak_base
-
 class HDF5_Reader(G2obj.ImportPowderData):
     '''Routine to read multiple powder patterns from an HDF5 file. 
 
@@ -29,7 +24,6 @@ class HDF5_Reader(G2obj.ImportPowderData):
     Any parameters placed in that file will override values set in the HDF5
     file. 
     '''
-    #mode = None
     def __init__(self):
         if h5py is None:
             self.UseReader = False
@@ -37,136 +31,20 @@ def __init__(self):
             G2fil.ImportErrorMsg(msg,{'HDF5 importer':['h5py','hdf5']})
         super(self.__class__,self).__init__( # fancy way to self-reference
             extensionlist=('.hdf','.h5'),strictExtension=True,
-            formatName = 'MAX IV HDF5',longFormatName = 'MaxIV NXazint1d HDF5 integrated scans')
+            formatName = 'MAXIV NeXus',longFormatName = 'Max IV NXazintXd NeXus integrated scans')
         self.scriptable = True
-        #self.Iparm = {} #only filled for EDS data
-
-    def ShowH5Element(self,obj,keylist):
-        '''Format the contents of an HDF5 entry as a single line. Not used for 
-        reading files, only used in :meth:`HDF5list` which is here for software
-        development. 
-        '''
-        k = '/'.join(keylist)
-        l = obj.get(k, getlink=True)
-        if isinstance(l, h5py.ExternalLink): 
-            return f'link to file {l.filename}'
-        try:
-            typ = str(type(obj[k]))
-        except:
-            return f'**Error** with key {k}'
-            
-        if ".Dataset'" in typ:
-            datfmt = obj[k].dtype
-            if datfmt == 'O' or str(datfmt).startswith('|S'):
-                # byte string
-                return f'value={obj[k][()].decode()}'
-            elif datfmt == 'bool': # Bool
-                return f'value={bool(obj[k][()])}'
-            elif datfmt in ('<f8', 'uint8', 'int64', '<f4'): # scalar value or array of values
-                try:
-                    len(obj[k][()])
-                    return f'array {obj[k].shape}'
-                except:
-                    return f'value={obj[k][()]}'
-            else:
-                return f'dataset of type {repr(datfmt)}'
-        elif ".Group'" in typ:
-            return "(group)"
-        else:
-            return f'type is {type(obj[k])}'
-
-    def RecurseH5Element(self,obj,prefix=[],length=None):
-        '''Returns a list of entries of all keys in the HDF5 file
-        (or group) in `obj`. Note that `obj` can be a file object, created by 
-        `h5py.File` or can be a subset `fp['key/subkey']`.
-        
-        If length is specified, only the entries with arrays of that
-        length are returned.
 
-        The returned list is organized where: 
-          * entry 0 is the top-level keys (/a, /b,...),
-          * entry 1 has the first level keys (/a/c /a/d, /b/d, /b/e,...)
-          * ...
-        '''
-        try:
-            self.HDF5entries
-        except AttributeError:
-            self.HDF5entries = []
-        depth = len(prefix)
-        if len(self.HDF5entries) < depth+1:
-            self.HDF5entries.append([])
-        for i in obj:
-            nextprefix = prefix+[i]
-            if length is None:
-                self.HDF5entries[depth].append(nextprefix)
-            try:
-                typ = str(type(obj[i]))
-            except:
-                print(f'**Error** with key {prefix}/{i}')
-                continue
-            if length is not None and ".Group'" not in typ:
-                # get length of this obj[i]
-                try:
-                    if len(obj[i]) == length:
-                        self.HDF5entries[depth].append(nextprefix)
-                except TypeError:
-                    continue
-            # check for link objects
-            l = obj.get(i, getlink=True)
-            if isinstance(l, h5py.ExternalLink): continue
-            if ".Group'" in typ:
-                #t = f'{prefix}/{i}'
-                #print(f'\n{nextprefix} contents {(60-len(t))*'='}')
-                self.RecurseH5Element(obj[i],nextprefix,length)
-        return self.HDF5entries
-        
-                
-    def HDF5list(self, filename):
-        '''Shows the contents of an HDF5 file as a short listing. 
-        This is not used for HDF5 reading, but is of help with a new
-        type of HDF5 file to see what is present.
-
-        :param filename: 
-        '''
-        def ShowH5NeXusName(obj,keylist):
-            key = '/'.join(keylist)
-            if "NX_class" in obj[key].attrs:
-                return obj[key].attrs["NX_class"]
-
-        fp = h5py.File(filename, 'r')
-        #print(f'Contents of {filename}')
-        HDF5entries = self.RecurseH5Element(fp)
-        strings = []
-        for i,j in enumerate(HDF5entries):
-            if not strings or strings[-1] != 60*'=': 
-                strings.append(60*'=')
-            m = 0
-            for k in j:
-                m = max(m,len('/'.join(k)))
-            for k in j:
-                nxname = ShowH5NeXusName(fp,k)
-                lbl = self.ShowH5Element(fp,k)
-                if '\n' in lbl:
-                    lbl = '; '.join(lbl.split('\n'))
-                if len(lbl) > 50:
-                    lbl = lbl[:50] + '...'
-                # if '\n' in lbl:
-                #     lbl = lbl.split()[0] + '...'
-                if lbl != '(group)': strings.append(f"{'/'.join(k):{m}s} {lbl}")
-                if nxname: print(f"{'/'.join(k):{m}s} {lbl} {nxname}")
-        with open(filename+'_contents.txt', 'w') as fp:
-            for i in strings: fp.write(f'{i}\n')
-                    
     def ContentsValidator(self, filename):
         '''Test if valid by seeing if the HDF5 library recognizes the file. 
-        Then get file type (currently MAX IV NeXus/NXazint[12]d only)
+        Then get file type (currently MAX IV NXazint[12]d (NeXus) only)
         '''
         try:
             definition = ''
             fp = h5py.File(filename, 'r')
             # test for MaxIV NeXus/NXazint1d & NXazint2d
             test = True
-            while test:
+            while test: # block for standard NXazint1d and NXazint2d files,
+                        # use break to bail out and try next block
                 test = False
                 entry = getNeXusBase(fp)
                 if entry is None: break # not NeXus
@@ -209,7 +87,8 @@ def ContentsValidator(self, filename):
                     return True
             # test for MaxIV NeXus combined NXazint1d & NXazint2d
             test = True
-            while test:
+            while test:  # block for combined NXazint1d and NXazint2d files,
+                         # use break to bail out and try next block
                 test = False
                 entry = getNeXusBase(fp)
                 subentry = getNeXusEntry(fp,entry,'NXsubentry')
@@ -234,8 +113,8 @@ def Reader(self, filename, ParentFrame=None, **kwarg):
 
         Since usually there will be lots of scans in a single file, 
         the goal is that the first pass should read the file into 
-        a buffer (if available) and subsequent calls will not 
-        need to access the file. 
+        a buffer (if available) and subsequent calls can use the 
+        buffer and will not need to access the file. 
         '''
         fpbuffer = kwarg.get('buffer',{})
         if not hasattr(self,'blknum'):
@@ -243,26 +122,26 @@ def Reader(self, filename, ParentFrame=None, **kwarg):
                 self.blknum = 0
             else:
                 self.blknum = min(self.selections)
-        # was file read into buffer? If so skip opening file to save time
+        # was file already read into buffer? If so, skip opening file to save time
         definition = fpbuffer.get('definition','')
         if definition == 'NXazint1d':
             return self.readNXazint1d(filename, fpbuffer)
         elif definition == 'NXazint2d':
             return self.readNXazint2d(filename, fpbuffer)
-        # first or non-buffered read
-        try:
+
+        try:        # first or non-buffered read
             fp = h5py.File(filename, 'r')
-            entry = getNeXusBase(fp)
-            if entry: # NeXus
+            entry = getNeXusBase(fp) # test for NeXus
+            if entry:   # This is NeXus
                 if 'definition' in fp[entry]: # MaxIV NXazint*
                     definition = fp[entry+'/definition'][()].decode()
-                else:
+                else: # is this a combined NXazint1d/NXazint2d file?
                     subentry = getNeXusEntry(fp,entry,'NXsubentry')
                     if len(subentry) == 0:
                         return False
                     elif len(subentry) == 1:
                         entry = subentry[0]
-                    elif ParentFrame:
+                    elif ParentFrame: # interactive, let the user decide
                         from .. import GSASIIctrlGUI as G2G
                         choices = ('NXazint1d 1D file','NXazint1d 2D file')
                         sel = G2G.ItemSelector(choices, ParentFrame=ParentFrame,
@@ -270,186 +149,24 @@ def Reader(self, filename, ParentFrame=None, **kwarg):
                                                    title='Select the section of the file to read')
                         if sel is None: return False
                         entry = subentry[sel]
-                    else:
+                    else:   # scripted, assume if 2D is present, that is what is wanted
                         entry = subentry[1]
                     if 'definition' not in fp[entry]: return False
                     definition = fp[entry+'/definition'][()].decode()
+                # got a file type, save it and if recognized, read it
                 fpbuffer['definition'] = definition
                 if definition == 'NXazint1d':
                     return self.readNXazint1d(filename, fpbuffer, entry)
                 elif definition == 'NXazint2d':
                     return self.readNXazint2d(filename, fpbuffer, entry)
             return False # not a supported file type
-        except IOError:
+        except IOError:  # unexpected since this was validated
             print (f'cannot open file {filename}')
             return False
         finally:
             fp.close()
         print (f'Unknown type of HDF5 powder file {filename}')
         return False
-    
-    # def FillBuffer(self,fp,fileItems,fpbuffer,nexusDict):
-    #     '''save selected items from file in buffer
-    #     '''
-    #     savedKeys = []
-    #     for k,loc in fileItems.items():
-    #         if nexusDict[loc[0]] is None:
-    #             fpbuffer[k] = None
-    #             continue
-    #         key = '/'.join((nexusDict[loc[0]],)+loc[1:])
-    #         savedKeys.append(key)
-    #         if key not in fp:
-    #             fpbuffer[k] = None
-    #             continue
-    #         val = fp[key]
-    #         if val.shape:
-    #             fpbuffer[k] = np.array(val)
-    #         elif 'float' in str(val.dtype):
-    #             fpbuffer[k] = float(val[()])
-    #             self.comments.append(f'{k}={val[()]}')
-    #         elif 'int' in str(val.dtype):
-    #             fpbuffer[k] = int(val[()])
-    #         else:
-    #             fpbuffer[k] = val[()].decode()
-    #             self.comments.append(f'{k}={fpbuffer[k]}')
-    #     self.numparams = len(fpbuffer['I'])
-    #     # save arrays that are potentially tracking the parametric conditions
-    #     # into ParamTrackingVars.
-    #     # e.g. variables with the same length as the humber of datasets
-    #     fpbuffer['ParamTrackingVars'] = {}
-    #     paramItems = []
-    #     for loc in nexusDict.values():
-    #         self.HDF5entries = []
-    #         paramItems = self.RecurseH5Element(fp[loc],length=self.numparams)
-    #         for i in paramItems:
-    #             for j in i:
-    #                 key = loc+'/'+'/'.join(j)
-    #                 print(key)
-    #                 obj = fp.get(key)
-    #                 if obj is None: continue
-    #                 if len(obj[()].shape) != 1: continue
-    #                 # are all values the same? If so, put them into the comments
-    #                 # for the first histogram only. If they are changing, note that 
-    #                 # here and later they will be put into every histogram.
-    #                 if all(obj[0] == obj):
-    #                     self.comments.append(f'{key.split("/")[-1]}={obj[0]}')
-    #                 else:
-    #                     fpbuffer['ParamTrackingVars'][key] = np.array(obj[()])
-
-    #     breakpoint()
-    
-    #     paramItems = self.RecurseH5Element(fp,node=loc,length=self.numparams)
-    #     for i in paramItems:
-    #         for j in i:
-    #             key = '/'.join(j)
-    #             if key in savedKeys: continue # standard data array
-    #             obj = fp.get(key)
-    #             if obj is None: continue
-    #             if len(obj[()].shape) != 1: continue
-    #             # are all values the same? If so, put them into the comments
-    #             # for the first histogram only. If they are changing, note that 
-    #             # here and later they will be put into every histogram.
-    #             if all(obj[0] == obj):
-    #                 self.comments.append(f'{key.split("/")[-1]}={obj[0]}')
-    #             else:
-    #                 fpbuffer['ParamTrackingVars'][key] = np.array(obj[()])
-                    
-    def readInNeXus(self,filename,fpbuffer,fileItems,fmt,entry=None):
-        '''Read in items from NeXus labeled sections of the HDF5 file
-        '''
-        self.comments = []
-        doread = False # has the file already been read into a buffer?
-        for k in fileItems:
-            if k not in fpbuffer:                
-                doread = True
-                break
-        if doread:
-            # Nope, need to fill the buffer
-            try:
-                fp = h5py.File(filename, 'r')
-                if entry is None: entry = getNeXusBase(fp)
-                # lookup keys for NeXus labels we will use
-                nexusDict = {i:None for i in set([i[0] for i in fileItems.values()])}
-                recurseNeXusEntries(fp,entry,nexusDict)
-                # save selected items from file into buffer
-                # convert all objects into values or non-HDF5 objects so file
-                # be closed
-                savedKeys = [] # things we will not need to save in the 2nd scan
-                for k,loc in fileItems.items():
-                    if nexusDict[loc[0]] is None:
-                        fpbuffer[k] = None
-                        continue
-                    key = '/'.join((nexusDict[loc[0]],)+loc[1:])
-                    savedKeys.append(key)
-                    if key not in fp:
-                        fpbuffer[k] = None
-                        continue
-                    val = fp[key]
-                    if val.shape:
-                        fpbuffer[k] = np.array(val)
-                    elif 'float' in str(val.dtype):
-                        fpbuffer[k] = float(val[()])
-                        self.comments.append(f'{k}={val[()]}')
-                    elif 'int' in str(val.dtype):
-                        fpbuffer[k] = int(val[()])
-                    else:
-                        fpbuffer[k] = val[()].decode()
-                        self.comments.append(f'{k}={fpbuffer[k]}')
-                if fpbuffer['unit'] != '2th':
-                    print(f'{fmt} HDF5 file has units',fpbuffer['unit'])
-                    self.errors = f'{fmt} only can be read with 2theta units'
-                    return False
-                self.numparams = len(fpbuffer['I'])
-                # save arrays that are potentially tracking the parametric conditions
-                # into ParamTrackingVars.
-                # e.g. variables with the same length as the humber of datasets
-                if 'validate' not in fmt:
-                    fpbuffer['ParamTrackingVars'] = {}
-                    paramItems = []
-                    for loc in nexusDict.values():
-                        self.HDF5entries = []
-                        paramItems = self.RecurseH5Element(fp[loc],length=self.numparams)
-                        for i in paramItems:
-                            for j in i:
-                                key = loc+'/'+'/'.join(j)
-                                print(key)
-                                obj = fp.get(key)
-                                if obj is None: continue
-                                if len(obj[()].shape) != 1: continue
-                                # are all values the same? If so, put them into the comments
-                                # for the first histogram only. If they are changing, note that 
-                                # here and later they will be put into every histogram.
-                                if all(obj[0] == obj):
-                                    self.comments.append(f'{key.split("/")[-1]}={obj[0]}')
-                                else:
-                                    fpbuffer['ParamTrackingVars'][key] = np.array(obj[()])
-            except IOError:
-                print (f'Can not open or read file {filename}')
-                return False
-            finally:
-                fp.close()
-            # initialize the block selection
-            if self.selections is None or len(self.selections) == 0:
-                self.blknum = 0
-            else:
-                self.blknum = min(self.selections)
-        return True
-
-    def FillInParametics(self,fpbuffer,count):
-        # add parametric var as a comment
-        for key,arr in fpbuffer['ParamTrackingVars'].items():
-            val = arr[count]
-            self.comments.append(f'{key.split("/")[-1]}={val}')
-            if 'temperature' in key:
-                self.Sample['Temperature'] = val # in K already
-            elif 'time' in key:
-                self.Sample['Time'] = val # should be seconds
-            elif 'chi' in key:
-                self.Sample['Chi'] = val # not sure if correct mapping
-            elif 'phi' in key:
-                self.Sample['Phi'] = val
-            elif 'omega' in key:
-                self.Sample['Omega'] = val
 
     def readNXazint1d(self, filename, fpbuffer={}, entry=None):
         '''Read HDF5 file in NeXus as produced by MAX IV as a NXazint1d.
@@ -459,7 +176,6 @@ def readNXazint1d(self, filename, fpbuffer={}, entry=None):
 
         see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint1d.html
         '''
-        #self.instmsg = 'HDF file'
         fileItems = {
             # arrays
             'radial_axis':('NXdata','radial_axis'),
@@ -484,11 +200,10 @@ def readNXazint1d(self, filename, fpbuffer={}, entry=None):
             esd = fpbuffer['I_errors'][self.blknum]
             w = np.where(esd==0,0,np.nan_to_num(1/esd**2))
         except:
-            w = np.nan_to_num(1/y)    # best we can do, alas w/o reported s.u.'s
+            w = np.nan_to_num(1/y)    # best we can do, alas. W/o reported s.u.'s
         self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)]
         self.FillInParametics(fpbuffer,self.blknum)
         self.powderentry[0] = filename
-        #self.powderentry[1] = Pos # position offset (never used, I hope)
         self.powderentry[2] = self.blknum  # bank number
         self.idstring = f'#{self.blknum} {os.path.split(filename)[1][:60]}'
         self.instdict['wave'] = fpbuffer['wavelength']
@@ -547,13 +262,12 @@ def readNXazint2d(self, filename, fpbuffer={}, entry=None):
             esd = fpbuffer['I_errors'][numScan][numAzim]
             w = np.where(esd==0,0,np.nan_to_num(1/esd**2))
         except:
-            w = np.nan_to_num(1/y)    # best we can do, alas w/o reported s.u.'s
+            w = np.nan_to_num(1/y)    # best we can do, alas. W/o reported s.u.'s
         self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)]
         self.Sample['Azimuth'] = fpbuffer['azimuthal_axis'][numAzim]
         # add parametric var as a comment
         self.FillInParametics(fpbuffer,numScan)
         self.powderentry[0] = filename
-        #self.powderentry[1] = Pos # position offset (never used, I hope)
         self.powderentry[2] = self.blknum  # bank number
         self.idstring = f'#{numScan} Azm={self.Sample["Azimuth"]} {os.path.split(filename)[1][:60]}'
         self.instdict['wave'] = fpbuffer['wavelength']
@@ -572,9 +286,232 @@ def readNXazint2d(self, filename, fpbuffer={}, entry=None):
                     self.repeat = False
         return True
     
+    def readInNeXus(self,filename,fpbuffer,fileItems,fmt,entry=None):
+        '''Read in items from NeXus labeled sections of the HDF5 file.
+
+        For files where we are reading from a NXsubentry section
+        rather than NXentry, variable `entry` is pointer to the 
+        the selected NXsubentry section. If None, the NXentry
+        is found. Otherwise `entry` points to the NXsubentry
+        location, so only that portion of the tree is used.
+        '''
+        self.comments = []
+        doread = False # has the file already been read into a buffer?
+        for k in fileItems:
+            if k not in fpbuffer:                
+                doread = True
+                break
+        if doread:
+            # Nope, need to fill the buffer
+            try:
+                fp = h5py.File(filename, 'r')
+                if entry is None: entry = getNeXusBase(fp)
+                # assemble list of used NeXus labels
+                nexusDict = {i:None for i in set([i[0] for i in fileItems.values()])}
+                # lookup keys for NeXus labels we will use
+                recurseNeXusEntries(fp,entry,nexusDict)
+                # save selected items from file into buffer
+                # Convert all entries read into values or non-HDF5 objects so file
+                # can be closed.
+                savedKeys = [] # things that have already been read
+                for k,loc in fileItems.items():
+                    if nexusDict[loc[0]] is None:
+                        fpbuffer[k] = None
+                        continue
+                    key = '/'.join((nexusDict[loc[0]],)+loc[1:])
+                    savedKeys.append(key)
+                    if key not in fp:
+                        fpbuffer[k] = None
+                        continue
+                    val = fp[key]
+                    if val.shape:
+                        fpbuffer[k] = np.array(val)
+                    elif 'float' in str(val.dtype):
+                        fpbuffer[k] = float(val[()])
+                        self.comments.append(f'{k}={val[()]}')
+                    elif 'int' in str(val.dtype):
+                        fpbuffer[k] = int(val[()])
+                    else:
+                        fpbuffer[k] = val[()].decode()
+                        self.comments.append(f'{k}={fpbuffer[k]}')
+                if fpbuffer['unit'] != '2th':
+                    print(f'{fmt} HDF5 file has units',fpbuffer['unit'])
+                    self.errors = f'{fmt} only can be read with 2theta units'
+                    return False
+                self.numparams = len(fpbuffer['I'])
+                # save arrays that are potentially tracking the parametric 
+                # conditions into ParamTrackingVars. These arrays will have 
+                # the same length as the number of datasets (self.numparams)
+                if 'validate' not in fmt: # skip if we are validating the file rather than reading it
+                    fpbuffer['ParamTrackingVars'] = {}
+                    paramItems = []
+                    for loc in nexusDict.values():
+                        if loc is None: continue  # a NeXus label is not present
+                        self.HDF5entries = []
+                        paramItems = self.RecurseH5Element(fp[loc],length=self.numparams)
+                        for i in paramItems:
+                            for j in i:
+                                key = loc+'/'+'/'.join(j)
+                                if key in savedKeys: continue
+                                savedKeys.append(key)
+                                obj = fp.get(key)
+                                if obj is None: continue
+                                if len(obj[()].shape) != 1: continue
+                                # are all values the same? If so, put them into the comments
+                                # for the first histogram only. If they are changing, note that 
+                                # here and later they will be put into every histogram.
+                                if all(obj[0] == obj):
+                                    self.comments.append(f'{key.split("/")[-1]}={obj[0]}')
+                                else:
+                                    fpbuffer['ParamTrackingVars'][key] = np.array(obj[()])
+            except IOError:
+                print (f'Cannot open or read file {filename}')
+                self.errors = f'{fmt} Can not open or read file {filename}'
+                return False
+            finally:
+                fp.close()
+            # initialize the block selection
+            if self.selections is None or len(self.selections) == 0:
+                self.blknum = 0
+            else:
+                self.blknum = min(self.selections)
+        return True
+
+    def FillInParametics(self,fpbuffer,count):
+        '''put changing parametric variables into the comments
+        '''
+        for key,arr in fpbuffer['ParamTrackingVars'].items():
+            val = arr[count]
+            self.comments.append(f'{key.split("/")[-1]}={val}')
+            if 'temperature' in key:
+                self.Sample['Temperature'] = val # in K already
+            elif 'time' in key:
+                self.Sample['Time'] = val # should be seconds
+            elif 'chi' in key:
+                self.Sample['Chi'] = val # not sure if correct mapping
+            elif 'phi' in key:
+                self.Sample['Phi'] = val
+            elif 'omega' in key:
+                self.Sample['Omega'] = val
+    
+    # HDF5 support routines.
+    def RecurseH5Element(self,obj,prefix=[],length=None):
+        '''Returns a list of entries of all keys in the HDF5 file
+        (or group) in `obj`. Note that `obj` can be a file object, created by 
+        `h5py.File` or can be a subsetgroup `fp['key/subkey']`.
+        
+        If length is specified, only the entries with arrays of that
+        length are returned.
+
+        The returned list is organized where: 
+          * entry 0 is the top-level keys (/a, /b,...),
+          * entry 1 has the first level keys (/a/c /a/d, /b/d, /b/e,...)
+          * ...
+        '''
+        try:
+            self.HDF5entries
+        except AttributeError:
+            self.HDF5entries = []
+        depth = len(prefix)
+        if len(self.HDF5entries) < depth+1:
+            self.HDF5entries.append([])
+        for i in obj:
+            nextprefix = prefix+[i]
+            if length is None:
+                self.HDF5entries[depth].append(nextprefix)
+            try:
+                typ = str(type(obj[i]))
+            except:
+                print(f'**Error** with key {prefix}/{i}')
+                continue
+            if length is not None and ".Group'" not in typ:
+                # get length of this obj[i]
+                try:
+                    if len(obj[i]) == length:
+                        self.HDF5entries[depth].append(nextprefix)
+                except TypeError:
+                    continue
+            # check for link objects
+            l = obj.get(i, getlink=True)
+            if isinstance(l, h5py.ExternalLink): continue
+            if ".Group'" in typ:
+                #t = f'{prefix}/{i}'
+                #print(f'\n{nextprefix} contents {(60-len(t))*'='}')
+                self.RecurseH5Element(obj[i],nextprefix,length)
+        return self.HDF5entries
+
+    def HDF5list(self, filename):
+        '''Shows the contents of an HDF5 file as a short listing. 
+        This is not used for HDF5 reading, but is of help with a new
+        type of HDF5 file to see what is present.
+
+        :param filename: 
+        '''
+        def ShowH5NeXusName(obj,keylist):
+            key = '/'.join(keylist)
+            if "NX_class" in obj[key].attrs:
+                return obj[key].attrs["NX_class"]
+
+        fp = h5py.File(filename, 'r')
+        #print(f'Contents of {filename}')
+        HDF5entries = self.RecurseH5Element(fp)
+        strings = []
+        for i,j in enumerate(HDF5entries):
+            if not strings or strings[-1] != 60*'=': 
+                strings.append(60*'=')
+            m = 0
+            for k in j:
+                m = max(m,len('/'.join(k)))
+            for k in j:
+                nxname = ShowH5NeXusName(fp,k)
+                lbl = self.ShowH5Element(fp,k)
+                if '\n' in lbl:
+                    lbl = '; '.join(lbl.split('\n'))
+                if len(lbl) > 50:
+                    lbl = lbl[:50] + '...'
+                # if '\n' in lbl:
+                #     lbl = lbl.split()[0] + '...'
+                if lbl != '(group)': strings.append(f"{'/'.join(k):{m}s} {lbl}")
+                if nxname: print(f"{'/'.join(k):{m}s} {lbl} {nxname}")
+        with open(filename+'_contents.txt', 'w') as fp:
+            for i in strings: fp.write(f'{i}\n')
+
+    def ShowH5Element(self,obj,keylist):
+        '''Format the contents of an HDF5 entry as a single line. Not used for 
+        reading files, only used in :meth:`HDF5list`, which is here for software
+        development. 
+        '''
+        k = '/'.join(keylist)
+        l = obj.get(k, getlink=True)
+        if isinstance(l, h5py.ExternalLink): 
+            return f'link to file {l.filename}'
+        try:
+            typ = str(type(obj[k]))
+        except:
+            return f'**Error** with key {k}'
+            
+        if ".Dataset'" in typ:
+            datfmt = obj[k].dtype
+            if datfmt == 'O' or str(datfmt).startswith('|S'):
+                # byte string
+                return f'value={obj[k][()].decode()}'
+            elif datfmt == 'bool': # Bool
+                return f'value={bool(obj[k][()])}'
+            elif datfmt in ('<f8', 'uint8', 'int64', '<f4'): # scalar value or array of values
+                try:
+                    len(obj[k][()])
+                    return f'array {obj[k].shape}'
+                except:
+                    return f'value={obj[k][()]}'
+            else:
+                return f'dataset of type {repr(datfmt)}'
+        elif ".Group'" in typ:
+            return "(group)"
+        else:
+            return f'type is {type(obj[k])}'
+
 # NeXus support routines. These were influenced heavily by Frederik Holm Gjørup
 # Also see NeXus support in plaid (https://github.com/fgjorup/plaid/blob/main/plaid/nexus.py)
-
 def getNeXusBase(fp):
     '''This returns the base entry in a NeXus compilant HDF5 file
     (usually "/entry" for MaxIV files) or None if this is not a valid 
@@ -599,19 +536,18 @@ def getNeXusEntry(fp,base,target):
                     fp[subkey].attrs["NX_class"] == target):
                 keyList.append(subkey)
     return keyList
-#        else:
-#            print(key)
-
 
 def recurseNeXusEntry(fp,node,target):
-    '''Recurse through the HDF5 tree looking for NeXus class `target`. 
+    '''Recurse through the HDF5 tree looking for NeXus class `target`.
+    This stops after the first entry is found, and might be more useful
+    if it returned a list when multiple definitions are present. 
+
     Not in use, as :func:`recurseNeXusEntries` is used to get all 
     targets in a single pass through the tree.
     '''
     if node is None: return  # needed?
     val = fp[node]
-    if ("NX_class" in val.attrs and
-                val.attrs["NX_class"] == target):
+    if ("NX_class" in val.attrs and val.attrs["NX_class"] == target):
         return node
     if not isinstance(val, h5py.Group): return
     for key in val:
@@ -621,7 +557,10 @@ def recurseNeXusEntry(fp,node,target):
 
 def recurseNeXusEntries(fp,node,targetdict):
     '''recurse through the HDF5 tree looking for the NeXus classes
-    in `targetdict`, storing the HDF5 key for each class in the dict
+    in `targetdict`, storing the HDF5 key for each class in the dict.
+    Note that if a NeXus classes is used more than once, only the 
+    last encountered location will be saved. Use :func:`getNeXusEntry`
+    when one needs to process all entries tagged with a class.
 
     :param fp: HDF5 file pointer
     :param str node: name of current HDF5 key

From 68798d036f1bfd81fd7e529851f7d3a73e75ea81 Mon Sep 17 00:00:00 2001
From: BHT <toby@anl.gov>
Date: Sat, 18 Oct 2025 14:29:01 -0500
Subject: [PATCH 7/7] typo

---
 GSASII/imports/G2pwd_HDF5.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py
index c3d2a5d88..004146af6 100644
--- a/GSASII/imports/G2pwd_HDF5.py
+++ b/GSASII/imports/G2pwd_HDF5.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 '''Use to read powder patterns from HDF5 files. At present the only supported 
-format are two NeXus variants from MaxIV named NXazint1d and NXazint1d,
+format are two NeXus variants from MaxIV named NXazint1d and NXazint2d,
 but this can be expanded to handle more HDF5/NeXus formats
 '''
 import os