rework HDF5 importers for MaxIV

briantoby · briantoby · commit c7ff0e46ef3c · 2025-10-02T19:09:39.000-05:00
diff --git a/GSASII/GSASIIdataGUI.py b/GSASII/GSASIIdataGUI.py
@@ -1616,7 +1616,7 @@ def GetDefaultParms(self,rd):
                 else:
                     rd.instmsg = 'default: '+dI.defaultIparm_lbl[res]
                     inst1,inst2 = self.ReadPowderInstprm(dI.defaultIparms[res],bank,rd)
-                    if rd.instdict.get('wave'):
+                    if rd.instdict.get('wave') and 'Lam' in inst1:
                         inst1['Lam'][0] = rd.instdict.get('wave')
                         inst1['Lam'][1] = rd.instdict.get('wave')
                     return [inst1,inst2]
diff --git a/GSASII/imports/G2img_HDF5.py b/GSASII/imports/G2img_HDF5.py
@@ -54,12 +54,17 @@ def Reader(self, filename, ParentFrame=None, **kwarg):
             return False
         imagenum = kwarg.get('blocknum')
         if imagenum is None: imagenum = 1
+        quick = False
         # do we have a image number or a map to the section with the image?
         try:
-            int(imagenum)
-            # set up an index as to where images are found
-            self.buffer = kwarg.get('buffer',{})
-            if not self.buffer.get('imagemap'):
+            int(imagenum) # test if image # is a tuple
+        except: # pull the section name and number out from the imagenum value
+            kwargs = {'name':imagenum[0],'num':imagenum[1]}
+            quick = True
+        # set up an index as to where images are found
+        self.buffer = kwarg.get('buffer',{})
+        if not quick and not self.buffer.get('imagemap'):
+            try:
                 if GSASIIpath.GetConfigValue('debug'): print('Scanning for image map')
                 self.buffer['imagemap'] = []
                 self.Comments = self.visit(fp)
@@ -93,9 +98,13 @@ def Reader(self, filename, ParentFrame=None, **kwarg):
                     self.errors = 'No images selected from file'
                     fp.close()
                     return False
+            except Exception as msg:
+                print(f'Error mapping file:\n{msg}')
+                return False
+        if not quick: 
             self.buffer['selectedImages'] = self.buffer.get('selectedImages',
                                                 list(range(len(self.buffer['imagemap']))))
-            # get the first selected image
+            # get the next selected image
             while imagenum <= len(self.buffer['imagemap']):
                 if imagenum-1 in self.buffer['selectedImages']:
                     del self.buffer['selectedImages'][self.buffer['selectedImages'].index(imagenum-1)]
@@ -107,11 +116,6 @@ def Reader(self, filename, ParentFrame=None, **kwarg):
                 fp.close()
                 return False
             kwargs = {'imagenum':imagenum}
-            quick = False
-        except:
-            kwargs = {'name':imagenum[0],'num':imagenum[1]}
-            quick = True
-        # we have been passed a map to images
         self.Data,self.Npix,self.Image = self.readDataset(fp,**kwargs)
         if quick:
             fp.close()
@@ -153,19 +157,26 @@ def func(name, dset):
             if not hasattr(dset,'shape'): return # not array, can't be image
             if isinstance(dset, h5py.Dataset):
                 dims = dset.shape
-                if len(dims) < 2:
-                    head.append('%s: %s'%(dset.name,str(dset[()][0])))
-                elif len(dims) == 4:
-                    size = dims[2:]
-                    self.buffer['imagemap'] += [(dset.name,i,size) for i in range(dims[1])]
-                elif len(dims) == 3:
-                    size = dims[1:]
-                    self.buffer['imagemap'] += [(dset.name,i,size) for i in range(dims[0])]
-                elif len(dims) == 2:
-                    size = dims
-                    self.buffer['imagemap'] += [(dset.name,None,size)]
-                else:
-                    print('Skipping entry '+str(dset.name)+'. Shape is '+str(dims))
+                try:
+                    if len(dims) == 0:
+                        val = dset[()]
+                        if type(val) is bytes: val = val.decode()
+                        head.append(f'{dset.name}: {val}')
+                    elif len(dims) < 2:
+                        head.append(f'{dset.name}: {dset[()][0]}')
+                    elif len(dims) == 4:
+                        size = dims[2:]
+                        self.buffer['imagemap'] += [(dset.name,i,size) for i in range(dims[1])]
+                    elif len(dims) == 3:
+                        size = dims[1:]
+                        self.buffer['imagemap'] += [(dset.name,i,size) for i in range(dims[0])]
+                    elif len(dims) == 2:
+                        size = dims
+                        self.buffer['imagemap'] += [(dset.name,None,size)]
+                    else:
+                        print(f'Skipping entry {dset.name}. Shape is {dims}')
+                except Exception as msg:
+                    print(f'Skipping entry {dset.name} Error getting shape\n{msg}')
         fp.visititems(func)
         return head
 
diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py
@@ -1,36 +1,18 @@
 # -*- coding: utf-8 -*-
-'''
+'''Use to read powder patterns from HDF5 files. At present the only supported 
+format is a NeXus variant named NXazint1d. 
 '''
 
 from __future__ import division, print_function
 import os
-import sys
+
 try:
     import h5py
 except ImportError:
     h5py = None
 import numpy as np
 from .. import GSASIIobj as G2obj
 from .. import GSASIIfiles as G2fil
-#from .. import GSASIIpath
-
-# things to do:
-#   uncertainties
-#   instr. parms
-#instprmList = [('Bank',1.0), ('Lam',0.413263), ('Polariz.',0.99), 
-#            ('SH/L',0.002), ('Type','PXC'), ('U',1.163), ('V',-0.126), 
-#            ('W',0.063), ('X',0.0), ('Y',0.0), ('Z',0.0), ('Zero',0.0)]
-#   comments
-#   dataset naming
-#   sample parameters
-#sampleprmList = [('InstrName','APS 1-ID'), ('Temperature', 295.0)]
-#  'Scale': [1.0, True], 'Type': 'Debye-Scherrer',
-# 'Absorption': [0.0, False], 'DisplaceX': [0.0, False], 'DisplaceY': [0.0, False]# 'Pressure': 0.1, 'Time': 0.0, 'FreePrm1': 0.0,
-# 'FreePrm2': 0.0, 'FreePrm3': 0.0, 'Gonio. radius': 200.0, 'Omega': 0.0,
-# 'Chi': 0.0, 'Phi': 180.0, 'Azimuth': 0.0,
-# 'Materials': [{'Name': 'vacuum', 'VolFrac': 1.0}, {'Name': 'vacuum', 'VolFrac': 0.0}],
-# 'Thick': 1.0, 'Contrast': [0.0, 0.0], 'Trans': 1.0, 'SlitLen': 0.0}
-
 
 class HDF5_Reader(G2obj.ImportPowderData):
     '''Routine to read multiple powder patterns from an HDF5 file. 
@@ -52,13 +34,14 @@ def __init__(self):
             G2fil.ImportErrorMsg(msg,{'HDF5 importer':['h5py','hdf5']})
         super(self.__class__,self).__init__( # fancy way to self-reference
             extensionlist=('.hdf','.h5'),strictExtension=True,
-            formatName = 'MAX IV HDF5',longFormatName = 'HDF5 integrated scans')
+            formatName = 'MAX IV HDF5',longFormatName = 'MaxIV NXazint1d HDF5 integrated scans')
         self.scriptable = True
         #self.Iparm = {} #only filled for EDS data
 
     def ShowH5Element(self,obj,keylist):
         '''Format the contents of an HDF5 entry as a single line. Not used for 
-        reading files, only used in :meth:`HDF5list`
+        reading files, only used in :meth:`HDF5list` which is here for software
+        development. 
         '''
         k = '/'.join(keylist)
         l = obj.get(k, getlink=True)
@@ -89,16 +72,18 @@ def ShowH5Element(self,obj,keylist):
         else:
             return f'type is {type(obj[k])}'
 
-    def RecurseH5Element(self,obj,prefix=[]):
+    def RecurseH5Element(self,obj,prefix=[],length=None):
         '''Returns a list of entries of all keys in the HDF5 file
         (or group) in `obj`. Note that `obj` can be a file object, created by 
         `h5py.File` or can be a subset `fp['key/subkey']`.
+        
+        If length is specified, only the entries with arrays of that
+        length are returned.
 
         The returned list is organized where: 
           * entry 0 is the top-level keys (/a, /b,...),
           * entry 1 has the first level keys (/a/c /a/d, /b/d, /b/e,...)
           * ...
-        Not used for reading files, used only in :meth:`HDF5list`
         '''
         try:
             self.HDF5entries
@@ -109,19 +94,27 @@ def RecurseH5Element(self,obj,prefix=[]):
             self.HDF5entries.append([])
         for i in obj:
             nextprefix = prefix+[i]
-            self.HDF5entries[depth].append(nextprefix)
-            # check for link objects
-            l = obj.get(i, getlink=True)
-            if isinstance(l, h5py.ExternalLink): continue
+            if length is None:
+                self.HDF5entries[depth].append(nextprefix)
             try:
                 typ = str(type(obj[i]))
             except:
                 print(f'**Error** with key {prefix}/{i}')
                 continue
+            if length is not None and ".Group'" not in typ:
+                # get length of this obj[i]
+                try:
+                    if len(obj[i]) == length:
+                        self.HDF5entries[depth].append(nextprefix)
+                except TypeError:
+                    continue
+            # check for link objects
+            l = obj.get(i, getlink=True)
+            if isinstance(l, h5py.ExternalLink): continue
             if ".Group'" in typ:
                 #t = f'{prefix}/{i}'
                 #print(f'\n{nextprefix} contents {(60-len(t))*'='}')
-                self.RecurseH5Element(obj[i],nextprefix)
+                self.RecurseH5Element(obj[i],nextprefix,length)
         return self.HDF5entries
         
                 
@@ -158,7 +151,6 @@ def ContentsValidator(self, filename):
         '''Test if valid by seeing if the HDF5 library recognizes the file. 
         Then get file type (currently MAX IV NeXus/NXazint[12]d only)
         '''
-        #from .. import GSASIIpath
         try:
             fp = h5py.File(filename, 'r')
             if 'entry' in fp: # NeXus
@@ -168,9 +160,6 @@ def ContentsValidator(self, filename):
                     # MAX IV NXazint1d file
                     if fp['/entry/definition'][()].decode() == 'NXazint1d':
                         return True
-                    # MAX IV NXazint1d file
-                    #if fp['/entry/definition'][()].decode() == 'NXazint2d':
-                    #    return True
         except IOError:
             return False
         finally:
@@ -220,11 +209,13 @@ def readNXazint1d(self, filename, fpbuffer={}):
         see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint1d.html
         '''
         #self.instmsg = 'HDF file'
+        self.comments = []
         doread = False # has the file already been read into a buffer?
-        arrays = ('entry/data/radial_axis','entry/data/I')
+        arrays = ('entry/data/radial_axis','entry/data/I','entry/data/I_errors')
         floats = ('entry/instrument/monochromator/wavelength',
                   'entry/reduction/input/polarization_factor')
-        strings = ('entry/instrument/source/name','entry/reduction/input/unit')
+        strings = ('entry/instrument/name','entry/reduction/input/unit',
+                   'entry/sample/name','entry/instrument/source/name')
         for i in arrays+floats+strings:
             if i not in fpbuffer:
                 doread = True
@@ -234,72 +225,68 @@ def readNXazint1d(self, filename, fpbuffer={}):
                 fp = h5py.File(filename, 'r')
                 for i in arrays:
                     fpbuffer[i] = np.array(fp.get(i))
+                self.numbanks = len(fpbuffer['entry/data/I']) # number of scans
                 for i in floats:
                     fpbuffer[i] = float(fp[i][()])
                 for i in strings:
-                    fpbuffer[i] = fp[i][()].decode()
+                    try:
+                        fpbuffer[i] = fp[i][()].decode()
+                        self.comments.append(f'{i}={fpbuffer[i]}')
+                    except:
+                        fpbuffer[i] = None
                 if fpbuffer['entry/reduction/input/unit'] != '2th':
                     print('NXazint1d HDF5 file has units',fpbuffer['entry/reduction/input/unit'])
                     self.errors = 'NXazint1d only can be read with 2th units'
                     return False
+                # save arrays that are potentially tracking the parametric conditions
+                # e.g. variables with the same length as the humber of datasets
+                paramItems = self.RecurseH5Element(fp,length=self.numbanks)
+                fpbuffer['ParamTrackingVars'] = {}
+                for i in paramItems:
+                    for j in i:
+                        key = '/'.join(j)
+                        if key in arrays: continue
+                        obj = fp.get(key)
+                        if obj is None: continue
+                        if len(obj[()].shape) != 1: continue
+                        # are all values the same? If so, put them into the comments
+                        # for the first histogram. If they are changing, note that and
+                        # later they will be put into every histogram.
+                        if all(obj[0] == obj):
+                            self.comments.append(f'{key.split("/")[-1]}={obj[0]}')
+                        else:
+                            fpbuffer['ParamTrackingVars'][key] = np.array(obj[()])
                 if self.selections is None or len(self.selections) == 0:
                     self.blknum = 0
                 else:
                     self.blknum = min(self.selections)
             except IOError:
-                print ('cannot open file '+ filename)
+                print (f'Can not open or read file {filename}')
                 return False
             finally:
                 fp.close()
-            self.numbanks=len(fpbuffer['entry/data/I'])
-            # # get overriding sample & instrument parameters
-            # fpbuffer['sampleprm'] = {}
-            # samplefile = os.path.splitext(filename)[0] + '.samprm'
-            # if os.path.exists(samplefile):
-            #     fp = open(samplefile,'r')
-            #     S = fp.readline()
-            #     while S:
-            #         if not S.strip().startswith('#'):
-            #             [item,val] = S[:-1].split(':')
-            #             fpbuffer['sampleprm'][item.strip("'")] = eval(val)
-            #         S = fp.readline()
-            #     fp.close()
-            # fpbuffer['instprm'] = {}
-            # instfile = os.path.splitext(filename)[0] + '.instprm'
-            # if os.path.exists(instfile):
-            #     self.instmsg = 'HDF and .instprm files'
-            #     fp = open(instfile,'r')
-            #     S = fp.readline()
-            #     while S:
-            #         if not S.strip().startswith('#'):
-            #             [item,val] = S[:-1].split(':')
-            #             fpbuffer['instprm'][item.strip("'")] = eval(val)
-            #         S = fp.readline()
-            #     fp.close()
-        # now transfer information into current histogram 
-        #self.pwdparms['Instrument Parameters'] = [
-        #    {'Type': ['PXC', 'PXC', False]},
-        #    {}]
-        # inst = {}
-        # inst.update(instprmList)
-        # inst.update(fpbuffer['instprm'])
-        # for key,val in inst.items():
-        #     self.pwdparms['Instrument Parameters'][0][key] = [val,val,False]
-        # samp = {}
-        # samp.update(sampleprmList)
-        # samp.update(fpbuffer['sampleprm'])
-        # for key,val in samp.items():
-        #     self.Sample[key] = val
         x = fpbuffer['entry/data/radial_axis']
         y = fpbuffer['entry/data/I'][self.blknum]
-        w = np.nan_to_num(1/y)    # this is not correct
-        #self.pwdparms['Instrument Parameters'][0]['Azimuth'] = [90-eta,90-eta,False]
-        #self.pwdparms['Instrument Parameters'][0]['Bank'] = [self.blknum,self.blknum,False]
-#        self.Sample['Gonio. radius'] = float(S.split('=')[1])
-#        self.Sample['Omega'] = float(S.split('=')[1])
-#        self.Sample['Chi'] = float(S.split('=')[1])
-        #self.Sample['Phi'] = Omega = fpbuffer['Omegas'][self.blknum]
+        try:
+            esd = fpbuffer['entry/data/I_errors'][self.blknum]
+            w = np.where(esd==0,0,np.nan_to_num(1/esd**2))
+        except:
+            w = np.nan_to_num(1/y)    # best we can do, alas
         self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)]
+        # add parametric var as a comment
+        for key,arr in fpbuffer['ParamTrackingVars'].items():
+            val = arr[self.blknum]
+            self.comments.append(f'{key.split("/")[-1]}={val}')
+            if 'temperature' in key:
+                self.Sample['Temperature'] = val # in K already
+            elif 'time' in key:
+                self.Sample['Time'] = val # should be seconds
+            elif 'chi' in key:
+                self.Sample['Chi'] = val # not sure if correct mapping
+            elif 'phi' in key:
+                self.Sample['Phi'] = val
+            elif 'omega' in key:
+                self.Sample['Omega'] = val
         #self.comments = comments[selblk]
         self.powderentry[0] = filename
         #self.powderentry[1] = Pos # position offset (never used, I hope)
diff --git a/GSASII/imports/__init__.py b/GSASII/imports/__init__.py
@@ -25,8 +25,8 @@
 from . import G2pwd_CIF
 from . import G2pwd_FP
 from . import G2pwd_GPX
-from . import G2pwd_MIDAS
 from . import G2pwd_HDF5
+from . import G2pwd_MIDAS
 from . import G2pwd_Panalytical
 from . import G2pwd_csv
 from . import G2pwd_fxye
diff --git a/GSASII/imports/meson.build b/GSASII/imports/meson.build
@@ -26,6 +26,7 @@ py.install_sources([
     'G2pwd_CIF.py',
     'G2pwd_FP.py',
     'G2pwd_GPX.py',
+    'G2pwd_HDF5.py',
     'G2pwd_MIDAS.py',
     'G2pwd_Panalytical.py',
     'G2pwd_csv.py',