From c0fc5584bdb952aa21e9a31b13236bc09606f7be Mon Sep 17 00:00:00 2001 From: BHT Date: Thu, 3 Jul 2025 16:03:49 -0500 Subject: [PATCH 1/7] start work on NeXus/HDF5 importer for MAX IV --- GSASII/imports/G2pwd_HDF5.py | 322 +++++++++++++++++++++++++++++++++++ GSASII/imports/__init__.py | 2 + 2 files changed, 324 insertions(+) create mode 100644 GSASII/imports/G2pwd_HDF5.py diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py new file mode 100644 index 000000000..1b1a304af --- /dev/null +++ b/GSASII/imports/G2pwd_HDF5.py @@ -0,0 +1,322 @@ +# -*- coding: utf-8 -*- +''' +''' + +from __future__ import division, print_function +import os +import sys +try: + import h5py +except ImportError: + h5py = None +import numpy as np +from .. import GSASIIobj as G2obj +from .. import GSASIIfiles as G2fil +#from .. import GSASIIpath + +# things to do: +# uncertainties +# instr. parms +#instprmList = [('Bank',1.0), ('Lam',0.413263), ('Polariz.',0.99), +# ('SH/L',0.002), ('Type','PXC'), ('U',1.163), ('V',-0.126), +# ('W',0.063), ('X',0.0), ('Y',0.0), ('Z',0.0), ('Zero',0.0)] +# comments +# dataset naming +# sample parameters +#sampleprmList = [('InstrName','APS 1-ID'), ('Temperature', 295.0)] +# 'Scale': [1.0, True], 'Type': 'Debye-Scherrer', +# 'Absorption': [0.0, False], 'DisplaceX': [0.0, False], 'DisplaceY': [0.0, False]# 'Pressure': 0.1, 'Time': 0.0, 'FreePrm1': 0.0, +# 'FreePrm2': 0.0, 'FreePrm3': 0.0, 'Gonio. radius': 200.0, 'Omega': 0.0, +# 'Chi': 0.0, 'Phi': 180.0, 'Azimuth': 0.0, +# 'Materials': [{'Name': 'vacuum', 'VolFrac': 1.0}, {'Name': 'vacuum', 'VolFrac': 0.0}], +# 'Thick': 1.0, 'Contrast': [0.0, 0.0], 'Trans': 1.0, 'SlitLen': 0.0} + + +class HDF5_Reader(G2obj.ImportPowderData): + '''Routine to read multiple powder patterns from an HDF5 file. + + This importer targets NXazint1d and NXazint2d NeXus files from + MAX IV. + Perhaps in the future, other types of HDF5 powder data sources as well. + + The main file is .hdf or .h5, but optionally sample and + instrument parameters can be placed in .samprm and .instprm. + Any parameters placed in that file will override values set in the HDF5 + file. + ''' + mode = None + def __init__(self): + if h5py is None: + self.UseReader = False + msg = 'HDF5 Reader skipped because h5py module is not installed' + G2fil.ImportErrorMsg(msg,{'HDF5 importer':['h5py','hdf5']}) + super(self.__class__,self).__init__( # fancy way to self-reference + extensionlist=('.hdf','.h5'),strictExtension=True, + formatName = 'MAX IV HDF5',longFormatName = 'HDF5 integrated scans') + self.scriptable = True + #self.Iparm = {} #only filled for EDS data + + def ShowH5Element(self,obj,keylist): + '''Format the contents of an HDF5 entry as a single line. Not used for + reading files, only for use in :meth:`HDF5list` + ''' + k = '/'.join(keylist) + try: + typ = str(type(obj[k])) + except: + return f'**Error** with key {k}' + + if ".Dataset'" in typ: + datfmt = obj[k].dtype + if datfmt == 'O' or str(datfmt).startswith('|S'): + # byte string + return f'value={obj[k][()].decode()}' + elif datfmt == 'bool': # Bool + return f'value={bool(obj[k][()])}' + elif datfmt in (' 50: + lbl = lbl[:50] + '...' + if '\n' in lbl: + lbl = lbl.split()[0] + '...' + if lbl != '(group)': strings.append(f"{'/'.join(k):{m}s} {lbl}") + with open(filename+'_contents.txt', 'w') as fp: + for i in strings: fp.write(f'{i}\n') + + def ContentsValidator(self, filename): + '''Test if valid by seeing if the HDF5 library recognizes the file. + Then get file type (currently MAX IV NeXus/NXazint[12]d only) + ''' + from .. import GSASIIpath + try: + fp = h5py.File(filename, 'r') + if 'entry' in fp: # NeXus + if 'definition' in fp['/entry']: + # MAX IV NXazint1d file + if fp['/entry/definition'][()].decode() == 'NXazint1d': + return True + # MAX IV NXazint1d file + if fp['/entry/definition'][()].decode() == 'NXazint2d': + return True + except IOError: + return False + finally: + fp.close() + return False + + def Reader(self, filename, ParentFrame=None, **kwarg): + '''Scan file for sections needed by defined file types (currently + MAX IV NeXus/NXazint[12]d only) + and then use appropriate routine to read the file. + + Since usually there will be lots of scans in a single file, + the goal is that the first pass should read the file into + a buffer (if available) and subsequent calls will not + need to access the file. + ''' + fpbuffer = kwarg.get('buffer',{}) + if not hasattr(self,'blknum'): + if self.selections is None or len(self.selections) == 0: + self.blknum = 0 + else: + self.blknum = min(self.selections) + try: + self.mode = None + fp = h5py.File(filename, 'r') + try: + fp = h5py.File(filename, 'r') + if 'entry' in fp: # NeXus + if 'definition' in fp['/entry']: + # MAX IV NXazint1d file + if fp['/entry/definition'][()].decode() == 'NXazint1d': + return self.readNXazint1d(filename, fpbuffer) + + # MAX IV NXazint1d file + #if fp['/entry/definition'][()].decode() == 'NXazint2d': + # return self.readNXazint2d(filename, fpbuffer) + # return True + # https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint2d.html + except IOError: + print ('cannot open file '+ filename) + return False + finally: + fp.close() + + print (f'Unknown type of HDF5 powder file {filename}') + return False + + def readNXazint1d(self, filename, fpbuffer={}): + '''Read HDF5 file in NeXus as produced by MAX IV as a NXazint1d + see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint1d.html + ''' + self.instmsg = 'HDF file' + doread = False # has the file already been read into a buffer? + for i in ('blkmap','intenArr_blknum')+self.midassections: + if i not in fpbuffer: + doread = True + break + else: # do we have the right section buffered? + doread = fpbuffer['intenArr_blknum'] != self.blknum + + if doread: # read into buffer + try: + fp = h5py.File(filename, 'r') + if 'blkmap' not in fpbuffer: + fpbuffer['blkmap'] = list(fp.get('OmegaSumFrame').keys()) + if 'REtaMap' not in fpbuffer: + fpbuffer['REtaMap'] = np.array(fp.get('REtaMap')) + if 'intenArr' not in fpbuffer or fpbuffer.get('intenArr_blknum',-1) != self.blknum: + fpbuffer['intenArr'] = np.array(fp.get('OmegaSumFrame').get( + fpbuffer['blkmap'][self.blknum])) + fpbuffer['intenArr_blknum'] = self.blknum + self.azmcnt = -1 + if 'Omegas' not in fpbuffer: + fpbuffer['Omegas'] = np.array(fp.get('Omegas')) + except IOError: + print ('cannot open file '+ filename) + return False + finally: + fp.close() + # get overriding sample & instrument parameters + fpbuffer['sampleprm'] = {} + samplefile = os.path.splitext(filename)[0] + '.samprm' + if os.path.exists(samplefile): + fp = open(samplefile,'r') + S = fp.readline() + while S: + if not S.strip().startswith('#'): + [item,val] = S[:-1].split(':') + fpbuffer['sampleprm'][item.strip("'")] = eval(val) + S = fp.readline() + fp.close() + fpbuffer['instprm'] = {} + instfile = os.path.splitext(filename)[0] + '.instprm' + if os.path.exists(instfile): + self.instmsg = 'HDF and .instprm files' + fp = open(instfile,'r') + S = fp.readline() + while S: + if not S.strip().startswith('#'): + [item,val] = S[:-1].split(':') + fpbuffer['instprm'][item.strip("'")] = eval(val) + S = fp.readline() + fp.close() + # look for a non-empty scan (lineout) + use = [0] + while sum(use) == 0 and self.azmcnt < fpbuffer['intenArr'].shape[1]: + self.azmcnt += 1 + if self.azmcnt >= fpbuffer['intenArr'].shape[1]: + return False + use = fpbuffer['REtaMap'][3,:,self.azmcnt] != 0 + + # now transfer information into current histogram + self.pwdparms['Instrument Parameters'] = [ + {'Type': ['PXC', 'PXC', False]}, + {}] + inst = {} + inst.update(instprmList) + inst.update(fpbuffer['instprm']) + for key,val in inst.items(): + self.pwdparms['Instrument Parameters'][0][key] = [val,val,False] + samp = {} + samp.update(sampleprmList) + samp.update(fpbuffer['sampleprm']) + for key,val in samp.items(): + self.Sample[key] = val + self.numbanks=len(fpbuffer['blkmap']) + x = fpbuffer['REtaMap'][1,:,self.azmcnt][use] + y = fpbuffer['intenArr'][:,self.azmcnt][use] + w = np.nan_to_num(1/y) # this is probably not correct + eta = np.average(fpbuffer['REtaMap'][2,:,self.azmcnt][use]) + self.pwdparms['Instrument Parameters'][0]['Azimuth'] = [90-eta,90-eta,False] + self.pwdparms['Instrument Parameters'][0]['Bank'] = [self.azmcnt,self.azmcnt,False] +# self.Sample['Gonio. radius'] = float(S.split('=')[1]) +# self.Sample['Omega'] = float(S.split('=')[1]) +# self.Sample['Chi'] = float(S.split('=')[1]) + self.Sample['Phi'] = Omega = fpbuffer['Omegas'][self.blknum] + self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)] + #self.comments = comments[selblk] + self.powderentry[0] = filename + #self.powderentry[1] = Pos # position offset (never used, I hope) + self.powderentry[2] = self.blknum # bank number + self.idstring = f'{os.path.split(filename)[1][:10]} omega={Omega} eta={eta}' +# if GSASIIpath.GetConfigValue('debug'): print( +# f'Read entry #{self.azmcnt} img# {self.blknum} from file {filename}') + # are there more lineouts after this one in current image to read? + self.repeat = sum(sum(fpbuffer['REtaMap'][3,:,self.azmcnt+1:])) != 0 + if self.repeat: return True + # if not, are there more [selected] images that after this to be read? + if self.blknum < self.numbanks-1: + if self.selections is None or len(self.selections) == 0: + self.blknum += 1 + self.repeat = True + else: + try: + s = sorted(self.selections) + self.blknum = s[s.index(self.blknum)+1] + self.repeat = True + except IndexError: # last selected image has been read + self.repeat = False + return True diff --git a/GSASII/imports/__init__.py b/GSASII/imports/__init__.py index 3d2b8ba1c..b2288eb08 100644 --- a/GSASII/imports/__init__.py +++ b/GSASII/imports/__init__.py @@ -24,6 +24,7 @@ from . import G2pwd_FP from . import G2pwd_GPX from . import G2pwd_MIDAS +from . import G2pwd_HDF5 from . import G2pwd_Panalytical from . import G2pwd_csv from . import G2pwd_fxye @@ -63,6 +64,7 @@ "G2pwd_FP", "G2pwd_GPX", "G2pwd_MIDAS", + "G2pwd_HDF5", "G2pwd_Panalytical", "G2pwd_csv", "G2pwd_fxye", From 69cab021041828d145dfe73f14474fc7353be5de Mon Sep 17 00:00:00 2001 From: BHT Date: Wed, 30 Jul 2025 19:52:45 -0500 Subject: [PATCH 2/7] first working HDF5-MAXIV importer --- GSASII/imports/G2pwd_HDF5.py | 178 +++++++++++++++++------------------ 1 file changed, 89 insertions(+), 89 deletions(-) diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py index 1b1a304af..b8312061f 100644 --- a/GSASII/imports/G2pwd_HDF5.py +++ b/GSASII/imports/G2pwd_HDF5.py @@ -44,7 +44,7 @@ class HDF5_Reader(G2obj.ImportPowderData): Any parameters placed in that file will override values set in the HDF5 file. ''' - mode = None + #mode = None def __init__(self): if h5py is None: self.UseReader = False @@ -58,9 +58,12 @@ def __init__(self): def ShowH5Element(self,obj,keylist): '''Format the contents of an HDF5 entry as a single line. Not used for - reading files, only for use in :meth:`HDF5list` + reading files, only used in :meth:`HDF5list` ''' k = '/'.join(keylist) + l = obj.get(k, getlink=True) + if isinstance(l, h5py.ExternalLink): + return f'link to file {l.filename}' try: typ = str(type(obj[k])) except: @@ -75,7 +78,8 @@ def ShowH5Element(self,obj,keylist): return f'value={bool(obj[k][()])}' elif datfmt in (' 50: lbl = lbl[:50] + '...' - if '\n' in lbl: - lbl = lbl.split()[0] + '...' + # if '\n' in lbl: + # lbl = lbl.split()[0] + '...' if lbl != '(group)': strings.append(f"{'/'.join(k):{m}s} {lbl}") with open(filename+'_contents.txt', 'w') as fp: for i in strings: fp.write(f'{i}\n') @@ -149,17 +158,19 @@ def ContentsValidator(self, filename): '''Test if valid by seeing if the HDF5 library recognizes the file. Then get file type (currently MAX IV NeXus/NXazint[12]d only) ''' - from .. import GSASIIpath + #from .. import GSASIIpath try: fp = h5py.File(filename, 'r') if 'entry' in fp: # NeXus + #self.HDF5entries = [] + #self.HDF5list(filename) if 'definition' in fp['/entry']: # MAX IV NXazint1d file if fp['/entry/definition'][()].decode() == 'NXazint1d': return True # MAX IV NXazint1d file - if fp['/entry/definition'][()].decode() == 'NXazint2d': - return True + #if fp['/entry/definition'][()].decode() == 'NXazint2d': + # return True except IOError: return False finally: @@ -182,9 +193,6 @@ def Reader(self, filename, ParentFrame=None, **kwarg): self.blknum = 0 else: self.blknum = min(self.selections) - try: - self.mode = None - fp = h5py.File(filename, 'r') try: fp = h5py.File(filename, 'r') if 'entry' in fp: # NeXus @@ -211,103 +219,95 @@ def readNXazint1d(self, filename, fpbuffer={}): '''Read HDF5 file in NeXus as produced by MAX IV as a NXazint1d see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint1d.html ''' - self.instmsg = 'HDF file' + #self.instmsg = 'HDF file' doread = False # has the file already been read into a buffer? - for i in ('blkmap','intenArr_blknum')+self.midassections: + arrays = ('entry/data/radial_axis','entry/data/I') + floats = ('entry/instrument/monochromator/wavelength', + 'entry/reduction/input/polarization_factor') + strings = ('entry/instrument/source/name','entry/reduction/input/unit') + for i in arrays+floats+strings: if i not in fpbuffer: doread = True break - else: # do we have the right section buffered? - doread = fpbuffer['intenArr_blknum'] != self.blknum - if doread: # read into buffer try: fp = h5py.File(filename, 'r') - if 'blkmap' not in fpbuffer: - fpbuffer['blkmap'] = list(fp.get('OmegaSumFrame').keys()) - if 'REtaMap' not in fpbuffer: - fpbuffer['REtaMap'] = np.array(fp.get('REtaMap')) - if 'intenArr' not in fpbuffer or fpbuffer.get('intenArr_blknum',-1) != self.blknum: - fpbuffer['intenArr'] = np.array(fp.get('OmegaSumFrame').get( - fpbuffer['blkmap'][self.blknum])) - fpbuffer['intenArr_blknum'] = self.blknum - self.azmcnt = -1 - if 'Omegas' not in fpbuffer: - fpbuffer['Omegas'] = np.array(fp.get('Omegas')) + for i in arrays: + fpbuffer[i] = np.array(fp.get(i)) + for i in floats: + fpbuffer[i] = float(fp[i][()]) + for i in strings: + fpbuffer[i] = fp[i][()].decode() + if fpbuffer['entry/reduction/input/unit'] != '2th': + print('NXazint1d HDF5 file has units',fpbuffer['entry/reduction/input/unit']) + self.errors = 'NXazint1d only can be read with 2th units' + return False + if self.selections is None or len(self.selections) == 0: + self.blknum = 0 + else: + self.blknum = min(self.selections) except IOError: print ('cannot open file '+ filename) return False finally: fp.close() - # get overriding sample & instrument parameters - fpbuffer['sampleprm'] = {} - samplefile = os.path.splitext(filename)[0] + '.samprm' - if os.path.exists(samplefile): - fp = open(samplefile,'r') - S = fp.readline() - while S: - if not S.strip().startswith('#'): - [item,val] = S[:-1].split(':') - fpbuffer['sampleprm'][item.strip("'")] = eval(val) - S = fp.readline() - fp.close() - fpbuffer['instprm'] = {} - instfile = os.path.splitext(filename)[0] + '.instprm' - if os.path.exists(instfile): - self.instmsg = 'HDF and .instprm files' - fp = open(instfile,'r') - S = fp.readline() - while S: - if not S.strip().startswith('#'): - [item,val] = S[:-1].split(':') - fpbuffer['instprm'][item.strip("'")] = eval(val) - S = fp.readline() - fp.close() - # look for a non-empty scan (lineout) - use = [0] - while sum(use) == 0 and self.azmcnt < fpbuffer['intenArr'].shape[1]: - self.azmcnt += 1 - if self.azmcnt >= fpbuffer['intenArr'].shape[1]: - return False - use = fpbuffer['REtaMap'][3,:,self.azmcnt] != 0 - + self.numbanks=len(fpbuffer['entry/data/I']) + # # get overriding sample & instrument parameters + # fpbuffer['sampleprm'] = {} + # samplefile = os.path.splitext(filename)[0] + '.samprm' + # if os.path.exists(samplefile): + # fp = open(samplefile,'r') + # S = fp.readline() + # while S: + # if not S.strip().startswith('#'): + # [item,val] = S[:-1].split(':') + # fpbuffer['sampleprm'][item.strip("'")] = eval(val) + # S = fp.readline() + # fp.close() + # fpbuffer['instprm'] = {} + # instfile = os.path.splitext(filename)[0] + '.instprm' + # if os.path.exists(instfile): + # self.instmsg = 'HDF and .instprm files' + # fp = open(instfile,'r') + # S = fp.readline() + # while S: + # if not S.strip().startswith('#'): + # [item,val] = S[:-1].split(':') + # fpbuffer['instprm'][item.strip("'")] = eval(val) + # S = fp.readline() + # fp.close() # now transfer information into current histogram - self.pwdparms['Instrument Parameters'] = [ - {'Type': ['PXC', 'PXC', False]}, - {}] - inst = {} - inst.update(instprmList) - inst.update(fpbuffer['instprm']) - for key,val in inst.items(): - self.pwdparms['Instrument Parameters'][0][key] = [val,val,False] - samp = {} - samp.update(sampleprmList) - samp.update(fpbuffer['sampleprm']) - for key,val in samp.items(): - self.Sample[key] = val - self.numbanks=len(fpbuffer['blkmap']) - x = fpbuffer['REtaMap'][1,:,self.azmcnt][use] - y = fpbuffer['intenArr'][:,self.azmcnt][use] - w = np.nan_to_num(1/y) # this is probably not correct - eta = np.average(fpbuffer['REtaMap'][2,:,self.azmcnt][use]) - self.pwdparms['Instrument Parameters'][0]['Azimuth'] = [90-eta,90-eta,False] - self.pwdparms['Instrument Parameters'][0]['Bank'] = [self.azmcnt,self.azmcnt,False] + #self.pwdparms['Instrument Parameters'] = [ + # {'Type': ['PXC', 'PXC', False]}, + # {}] + # inst = {} + # inst.update(instprmList) + # inst.update(fpbuffer['instprm']) + # for key,val in inst.items(): + # self.pwdparms['Instrument Parameters'][0][key] = [val,val,False] + # samp = {} + # samp.update(sampleprmList) + # samp.update(fpbuffer['sampleprm']) + # for key,val in samp.items(): + # self.Sample[key] = val + x = fpbuffer['entry/data/radial_axis'] + y = fpbuffer['entry/data/I'][self.blknum] + w = np.nan_to_num(1/y) # this is not correct + #self.pwdparms['Instrument Parameters'][0]['Azimuth'] = [90-eta,90-eta,False] + #self.pwdparms['Instrument Parameters'][0]['Bank'] = [self.blknum,self.blknum,False] # self.Sample['Gonio. radius'] = float(S.split('=')[1]) # self.Sample['Omega'] = float(S.split('=')[1]) # self.Sample['Chi'] = float(S.split('=')[1]) - self.Sample['Phi'] = Omega = fpbuffer['Omegas'][self.blknum] + #self.Sample['Phi'] = Omega = fpbuffer['Omegas'][self.blknum] self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)] #self.comments = comments[selblk] self.powderentry[0] = filename #self.powderentry[1] = Pos # position offset (never used, I hope) self.powderentry[2] = self.blknum # bank number - self.idstring = f'{os.path.split(filename)[1][:10]} omega={Omega} eta={eta}' -# if GSASIIpath.GetConfigValue('debug'): print( -# f'Read entry #{self.azmcnt} img# {self.blknum} from file {filename}') - # are there more lineouts after this one in current image to read? - self.repeat = sum(sum(fpbuffer['REtaMap'][3,:,self.azmcnt+1:])) != 0 - if self.repeat: return True + self.idstring = f'#{self.blknum} {os.path.split(filename)[1][:60]}' + self.instdict['wave'] = fpbuffer['entry/instrument/monochromator/wavelength'] # if not, are there more [selected] images that after this to be read? + self.repeat = False if self.blknum < self.numbanks-1: if self.selections is None or len(self.selections) == 0: self.blknum += 1 From c7ff0e46ef3ca0d6a56210c9cac617dc93e307b6 Mon Sep 17 00:00:00 2001 From: BHT Date: Thu, 2 Oct 2025 19:09:39 -0500 Subject: [PATCH 3/7] rework HDF5 importers for MaxIV --- GSASII/GSASIIdataGUI.py | 2 +- GSASII/imports/G2img_HDF5.py | 57 +++++++------ GSASII/imports/G2pwd_HDF5.py | 155 ++++++++++++++++------------------- GSASII/imports/__init__.py | 2 +- GSASII/imports/meson.build | 1 + 5 files changed, 108 insertions(+), 109 deletions(-) diff --git a/GSASII/GSASIIdataGUI.py b/GSASII/GSASIIdataGUI.py index ebdf296d7..a05f03561 100644 --- a/GSASII/GSASIIdataGUI.py +++ b/GSASII/GSASIIdataGUI.py @@ -1616,7 +1616,7 @@ def GetDefaultParms(self,rd): else: rd.instmsg = 'default: '+dI.defaultIparm_lbl[res] inst1,inst2 = self.ReadPowderInstprm(dI.defaultIparms[res],bank,rd) - if rd.instdict.get('wave'): + if rd.instdict.get('wave') and 'Lam' in inst1: inst1['Lam'][0] = rd.instdict.get('wave') inst1['Lam'][1] = rd.instdict.get('wave') return [inst1,inst2] diff --git a/GSASII/imports/G2img_HDF5.py b/GSASII/imports/G2img_HDF5.py index d4f237a3d..388c673a7 100644 --- a/GSASII/imports/G2img_HDF5.py +++ b/GSASII/imports/G2img_HDF5.py @@ -54,12 +54,17 @@ def Reader(self, filename, ParentFrame=None, **kwarg): return False imagenum = kwarg.get('blocknum') if imagenum is None: imagenum = 1 + quick = False # do we have a image number or a map to the section with the image? try: - int(imagenum) - # set up an index as to where images are found - self.buffer = kwarg.get('buffer',{}) - if not self.buffer.get('imagemap'): + int(imagenum) # test if image # is a tuple + except: # pull the section name and number out from the imagenum value + kwargs = {'name':imagenum[0],'num':imagenum[1]} + quick = True + # set up an index as to where images are found + self.buffer = kwarg.get('buffer',{}) + if not quick and not self.buffer.get('imagemap'): + try: if GSASIIpath.GetConfigValue('debug'): print('Scanning for image map') self.buffer['imagemap'] = [] self.Comments = self.visit(fp) @@ -93,9 +98,13 @@ def Reader(self, filename, ParentFrame=None, **kwarg): self.errors = 'No images selected from file' fp.close() return False + except Exception as msg: + print(f'Error mapping file:\n{msg}') + return False + if not quick: self.buffer['selectedImages'] = self.buffer.get('selectedImages', list(range(len(self.buffer['imagemap'])))) - # get the first selected image + # get the next selected image while imagenum <= len(self.buffer['imagemap']): if imagenum-1 in self.buffer['selectedImages']: del self.buffer['selectedImages'][self.buffer['selectedImages'].index(imagenum-1)] @@ -107,11 +116,6 @@ def Reader(self, filename, ParentFrame=None, **kwarg): fp.close() return False kwargs = {'imagenum':imagenum} - quick = False - except: - kwargs = {'name':imagenum[0],'num':imagenum[1]} - quick = True - # we have been passed a map to images self.Data,self.Npix,self.Image = self.readDataset(fp,**kwargs) if quick: fp.close() @@ -153,19 +157,26 @@ def func(name, dset): if not hasattr(dset,'shape'): return # not array, can't be image if isinstance(dset, h5py.Dataset): dims = dset.shape - if len(dims) < 2: - head.append('%s: %s'%(dset.name,str(dset[()][0]))) - elif len(dims) == 4: - size = dims[2:] - self.buffer['imagemap'] += [(dset.name,i,size) for i in range(dims[1])] - elif len(dims) == 3: - size = dims[1:] - self.buffer['imagemap'] += [(dset.name,i,size) for i in range(dims[0])] - elif len(dims) == 2: - size = dims - self.buffer['imagemap'] += [(dset.name,None,size)] - else: - print('Skipping entry '+str(dset.name)+'. Shape is '+str(dims)) + try: + if len(dims) == 0: + val = dset[()] + if type(val) is bytes: val = val.decode() + head.append(f'{dset.name}: {val}') + elif len(dims) < 2: + head.append(f'{dset.name}: {dset[()][0]}') + elif len(dims) == 4: + size = dims[2:] + self.buffer['imagemap'] += [(dset.name,i,size) for i in range(dims[1])] + elif len(dims) == 3: + size = dims[1:] + self.buffer['imagemap'] += [(dset.name,i,size) for i in range(dims[0])] + elif len(dims) == 2: + size = dims + self.buffer['imagemap'] += [(dset.name,None,size)] + else: + print(f'Skipping entry {dset.name}. Shape is {dims}') + except Exception as msg: + print(f'Skipping entry {dset.name} Error getting shape\n{msg}') fp.visititems(func) return head diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py index b8312061f..88bbf6b2a 100644 --- a/GSASII/imports/G2pwd_HDF5.py +++ b/GSASII/imports/G2pwd_HDF5.py @@ -1,10 +1,11 @@ # -*- coding: utf-8 -*- -''' +'''Use to read powder patterns from HDF5 files. At present the only supported +format is a NeXus variant named NXazint1d. ''' from __future__ import division, print_function import os -import sys + try: import h5py except ImportError: @@ -12,25 +13,6 @@ import numpy as np from .. import GSASIIobj as G2obj from .. import GSASIIfiles as G2fil -#from .. import GSASIIpath - -# things to do: -# uncertainties -# instr. parms -#instprmList = [('Bank',1.0), ('Lam',0.413263), ('Polariz.',0.99), -# ('SH/L',0.002), ('Type','PXC'), ('U',1.163), ('V',-0.126), -# ('W',0.063), ('X',0.0), ('Y',0.0), ('Z',0.0), ('Zero',0.0)] -# comments -# dataset naming -# sample parameters -#sampleprmList = [('InstrName','APS 1-ID'), ('Temperature', 295.0)] -# 'Scale': [1.0, True], 'Type': 'Debye-Scherrer', -# 'Absorption': [0.0, False], 'DisplaceX': [0.0, False], 'DisplaceY': [0.0, False]# 'Pressure': 0.1, 'Time': 0.0, 'FreePrm1': 0.0, -# 'FreePrm2': 0.0, 'FreePrm3': 0.0, 'Gonio. radius': 200.0, 'Omega': 0.0, -# 'Chi': 0.0, 'Phi': 180.0, 'Azimuth': 0.0, -# 'Materials': [{'Name': 'vacuum', 'VolFrac': 1.0}, {'Name': 'vacuum', 'VolFrac': 0.0}], -# 'Thick': 1.0, 'Contrast': [0.0, 0.0], 'Trans': 1.0, 'SlitLen': 0.0} - class HDF5_Reader(G2obj.ImportPowderData): '''Routine to read multiple powder patterns from an HDF5 file. @@ -52,13 +34,14 @@ def __init__(self): G2fil.ImportErrorMsg(msg,{'HDF5 importer':['h5py','hdf5']}) super(self.__class__,self).__init__( # fancy way to self-reference extensionlist=('.hdf','.h5'),strictExtension=True, - formatName = 'MAX IV HDF5',longFormatName = 'HDF5 integrated scans') + formatName = 'MAX IV HDF5',longFormatName = 'MaxIV NXazint1d HDF5 integrated scans') self.scriptable = True #self.Iparm = {} #only filled for EDS data def ShowH5Element(self,obj,keylist): '''Format the contents of an HDF5 entry as a single line. Not used for - reading files, only used in :meth:`HDF5list` + reading files, only used in :meth:`HDF5list` which is here for software + development. ''' k = '/'.join(keylist) l = obj.get(k, getlink=True) @@ -89,16 +72,18 @@ def ShowH5Element(self,obj,keylist): else: return f'type is {type(obj[k])}' - def RecurseH5Element(self,obj,prefix=[]): + def RecurseH5Element(self,obj,prefix=[],length=None): '''Returns a list of entries of all keys in the HDF5 file (or group) in `obj`. Note that `obj` can be a file object, created by `h5py.File` or can be a subset `fp['key/subkey']`. + + If length is specified, only the entries with arrays of that + length are returned. The returned list is organized where: * entry 0 is the top-level keys (/a, /b,...), * entry 1 has the first level keys (/a/c /a/d, /b/d, /b/e,...) * ... - Not used for reading files, used only in :meth:`HDF5list` ''' try: self.HDF5entries @@ -109,19 +94,27 @@ def RecurseH5Element(self,obj,prefix=[]): self.HDF5entries.append([]) for i in obj: nextprefix = prefix+[i] - self.HDF5entries[depth].append(nextprefix) - # check for link objects - l = obj.get(i, getlink=True) - if isinstance(l, h5py.ExternalLink): continue + if length is None: + self.HDF5entries[depth].append(nextprefix) try: typ = str(type(obj[i])) except: print(f'**Error** with key {prefix}/{i}') continue + if length is not None and ".Group'" not in typ: + # get length of this obj[i] + try: + if len(obj[i]) == length: + self.HDF5entries[depth].append(nextprefix) + except TypeError: + continue + # check for link objects + l = obj.get(i, getlink=True) + if isinstance(l, h5py.ExternalLink): continue if ".Group'" in typ: #t = f'{prefix}/{i}' #print(f'\n{nextprefix} contents {(60-len(t))*'='}') - self.RecurseH5Element(obj[i],nextprefix) + self.RecurseH5Element(obj[i],nextprefix,length) return self.HDF5entries @@ -158,7 +151,6 @@ def ContentsValidator(self, filename): '''Test if valid by seeing if the HDF5 library recognizes the file. Then get file type (currently MAX IV NeXus/NXazint[12]d only) ''' - #from .. import GSASIIpath try: fp = h5py.File(filename, 'r') if 'entry' in fp: # NeXus @@ -168,9 +160,6 @@ def ContentsValidator(self, filename): # MAX IV NXazint1d file if fp['/entry/definition'][()].decode() == 'NXazint1d': return True - # MAX IV NXazint1d file - #if fp['/entry/definition'][()].decode() == 'NXazint2d': - # return True except IOError: return False finally: @@ -220,11 +209,13 @@ def readNXazint1d(self, filename, fpbuffer={}): see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint1d.html ''' #self.instmsg = 'HDF file' + self.comments = [] doread = False # has the file already been read into a buffer? - arrays = ('entry/data/radial_axis','entry/data/I') + arrays = ('entry/data/radial_axis','entry/data/I','entry/data/I_errors') floats = ('entry/instrument/monochromator/wavelength', 'entry/reduction/input/polarization_factor') - strings = ('entry/instrument/source/name','entry/reduction/input/unit') + strings = ('entry/instrument/name','entry/reduction/input/unit', + 'entry/sample/name','entry/instrument/source/name') for i in arrays+floats+strings: if i not in fpbuffer: doread = True @@ -234,72 +225,68 @@ def readNXazint1d(self, filename, fpbuffer={}): fp = h5py.File(filename, 'r') for i in arrays: fpbuffer[i] = np.array(fp.get(i)) + self.numbanks = len(fpbuffer['entry/data/I']) # number of scans for i in floats: fpbuffer[i] = float(fp[i][()]) for i in strings: - fpbuffer[i] = fp[i][()].decode() + try: + fpbuffer[i] = fp[i][()].decode() + self.comments.append(f'{i}={fpbuffer[i]}') + except: + fpbuffer[i] = None if fpbuffer['entry/reduction/input/unit'] != '2th': print('NXazint1d HDF5 file has units',fpbuffer['entry/reduction/input/unit']) self.errors = 'NXazint1d only can be read with 2th units' return False + # save arrays that are potentially tracking the parametric conditions + # e.g. variables with the same length as the humber of datasets + paramItems = self.RecurseH5Element(fp,length=self.numbanks) + fpbuffer['ParamTrackingVars'] = {} + for i in paramItems: + for j in i: + key = '/'.join(j) + if key in arrays: continue + obj = fp.get(key) + if obj is None: continue + if len(obj[()].shape) != 1: continue + # are all values the same? If so, put them into the comments + # for the first histogram. If they are changing, note that and + # later they will be put into every histogram. + if all(obj[0] == obj): + self.comments.append(f'{key.split("/")[-1]}={obj[0]}') + else: + fpbuffer['ParamTrackingVars'][key] = np.array(obj[()]) if self.selections is None or len(self.selections) == 0: self.blknum = 0 else: self.blknum = min(self.selections) except IOError: - print ('cannot open file '+ filename) + print (f'Can not open or read file {filename}') return False finally: fp.close() - self.numbanks=len(fpbuffer['entry/data/I']) - # # get overriding sample & instrument parameters - # fpbuffer['sampleprm'] = {} - # samplefile = os.path.splitext(filename)[0] + '.samprm' - # if os.path.exists(samplefile): - # fp = open(samplefile,'r') - # S = fp.readline() - # while S: - # if not S.strip().startswith('#'): - # [item,val] = S[:-1].split(':') - # fpbuffer['sampleprm'][item.strip("'")] = eval(val) - # S = fp.readline() - # fp.close() - # fpbuffer['instprm'] = {} - # instfile = os.path.splitext(filename)[0] + '.instprm' - # if os.path.exists(instfile): - # self.instmsg = 'HDF and .instprm files' - # fp = open(instfile,'r') - # S = fp.readline() - # while S: - # if not S.strip().startswith('#'): - # [item,val] = S[:-1].split(':') - # fpbuffer['instprm'][item.strip("'")] = eval(val) - # S = fp.readline() - # fp.close() - # now transfer information into current histogram - #self.pwdparms['Instrument Parameters'] = [ - # {'Type': ['PXC', 'PXC', False]}, - # {}] - # inst = {} - # inst.update(instprmList) - # inst.update(fpbuffer['instprm']) - # for key,val in inst.items(): - # self.pwdparms['Instrument Parameters'][0][key] = [val,val,False] - # samp = {} - # samp.update(sampleprmList) - # samp.update(fpbuffer['sampleprm']) - # for key,val in samp.items(): - # self.Sample[key] = val x = fpbuffer['entry/data/radial_axis'] y = fpbuffer['entry/data/I'][self.blknum] - w = np.nan_to_num(1/y) # this is not correct - #self.pwdparms['Instrument Parameters'][0]['Azimuth'] = [90-eta,90-eta,False] - #self.pwdparms['Instrument Parameters'][0]['Bank'] = [self.blknum,self.blknum,False] -# self.Sample['Gonio. radius'] = float(S.split('=')[1]) -# self.Sample['Omega'] = float(S.split('=')[1]) -# self.Sample['Chi'] = float(S.split('=')[1]) - #self.Sample['Phi'] = Omega = fpbuffer['Omegas'][self.blknum] + try: + esd = fpbuffer['entry/data/I_errors'][self.blknum] + w = np.where(esd==0,0,np.nan_to_num(1/esd**2)) + except: + w = np.nan_to_num(1/y) # best we can do, alas self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)] + # add parametric var as a comment + for key,arr in fpbuffer['ParamTrackingVars'].items(): + val = arr[self.blknum] + self.comments.append(f'{key.split("/")[-1]}={val}') + if 'temperature' in key: + self.Sample['Temperature'] = val # in K already + elif 'time' in key: + self.Sample['Time'] = val # should be seconds + elif 'chi' in key: + self.Sample['Chi'] = val # not sure if correct mapping + elif 'phi' in key: + self.Sample['Phi'] = val + elif 'omega' in key: + self.Sample['Omega'] = val #self.comments = comments[selblk] self.powderentry[0] = filename #self.powderentry[1] = Pos # position offset (never used, I hope) diff --git a/GSASII/imports/__init__.py b/GSASII/imports/__init__.py index 9549992e2..c585dbec3 100644 --- a/GSASII/imports/__init__.py +++ b/GSASII/imports/__init__.py @@ -25,8 +25,8 @@ from . import G2pwd_CIF from . import G2pwd_FP from . import G2pwd_GPX -from . import G2pwd_MIDAS from . import G2pwd_HDF5 +from . import G2pwd_MIDAS from . import G2pwd_Panalytical from . import G2pwd_csv from . import G2pwd_fxye diff --git a/GSASII/imports/meson.build b/GSASII/imports/meson.build index eb1269c4e..60b955942 100644 --- a/GSASII/imports/meson.build +++ b/GSASII/imports/meson.build @@ -26,6 +26,7 @@ py.install_sources([ 'G2pwd_CIF.py', 'G2pwd_FP.py', 'G2pwd_GPX.py', + 'G2pwd_HDF5.py', 'G2pwd_MIDAS.py', 'G2pwd_Panalytical.py', 'G2pwd_csv.py', From 4d648dab1ce46c0f07722ad96b3277349257f67c Mon Sep 17 00:00:00 2001 From: BHT Date: Thu, 16 Oct 2025 16:22:23 -0500 Subject: [PATCH 4/7] Got NXazint1d reader done --- GSASII/imports/G2pwd_HDF5.py | 222 ++++++++++++++++++++++++++--------- 1 file changed, 166 insertions(+), 56 deletions(-) diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py index 88bbf6b2a..0789b7c4e 100644 --- a/GSASII/imports/G2pwd_HDF5.py +++ b/GSASII/imports/G2pwd_HDF5.py @@ -14,6 +14,9 @@ from .. import GSASIIobj as G2obj from .. import GSASIIfiles as G2fil +#from .. import GSASIIpath +#breakpoint = GSASIIpath.IPyBreak_base + class HDF5_Reader(G2obj.ImportPowderData): '''Routine to read multiple powder patterns from an HDF5 file. @@ -125,6 +128,11 @@ def HDF5list(self, filename): :param filename: ''' + def ShowH5NeXusName(obj,keylist): + key = '/'.join(keylist) + if "NX_class" in obj[key].attrs: + return obj[key].attrs["NX_class"] + fp = h5py.File(filename, 'r') #print(f'Contents of {filename}') HDF5entries = self.RecurseH5Element(fp) @@ -136,6 +144,7 @@ def HDF5list(self, filename): for k in j: m = max(m,len('/'.join(k))) for k in j: + nxname = ShowH5NeXusName(fp,k) lbl = self.ShowH5Element(fp,k) if '\n' in lbl: lbl = '; '.join(lbl.split('\n')) @@ -144,6 +153,7 @@ def HDF5list(self, filename): # if '\n' in lbl: # lbl = lbl.split()[0] + '...' if lbl != '(group)': strings.append(f"{'/'.join(k):{m}s} {lbl}") + if nxname: print(f"{'/'.join(k):{m}s} {lbl} {nxname}") with open(filename+'_contents.txt', 'w') as fp: for i in strings: fp.write(f'{i}\n') @@ -153,18 +163,23 @@ def ContentsValidator(self, filename): ''' try: fp = h5py.File(filename, 'r') - if 'entry' in fp: # NeXus - #self.HDF5entries = [] - #self.HDF5list(filename) - if 'definition' in fp['/entry']: - # MAX IV NXazint1d file - if fp['/entry/definition'][()].decode() == 'NXazint1d': - return True - except IOError: + # test for MaxIV NeXus/NXazint1d & NXazint2d + test = True + while test: + test = False + entry = getNeXusBase(fp) + if entry is None: break # not NeXus + if 'definition' not in fp[entry]: break # not MaxIV NXazint* + definition = fp[entry+'/definition'][()].decode() + if definition == 'NXazint1d': return True + if definition == 'NXazint2d': return True + # test for next HDF5 type here + # + except IOError: # not HDF5 return False finally: fp.close() - return False + return False # nothing passed -- not valid def Reader(self, filename, ParentFrame=None, **kwarg): '''Scan file for sections needed by defined file types (currently @@ -182,19 +197,26 @@ def Reader(self, filename, ParentFrame=None, **kwarg): self.blknum = 0 else: self.blknum = min(self.selections) + # was file read into buffer? If so skip opening file to save time + definition = fpbuffer.get('definition','') + if definition == 'NXazint1d': + return self.readNXazint1d(filename, fpbuffer) + elif definition == 'NXazint2d': + return self.readNXazint2d(filename, fpbuffer) + # first or non-buffered read try: fp = h5py.File(filename, 'r') - if 'entry' in fp: # NeXus - if 'definition' in fp['/entry']: - # MAX IV NXazint1d file - if fp['/entry/definition'][()].decode() == 'NXazint1d': + entry = getNeXusBase(fp) + if entry: # NeXus + if 'definition' in fp[entry]: # MaxIV NXazint* + definition = fp[entry+'/definition'][()].decode() + fpbuffer['definition'] = definition + if definition == 'NXazint1d': return self.readNXazint1d(filename, fpbuffer) - - # MAX IV NXazint1d file - #if fp['/entry/definition'][()].decode() == 'NXazint2d': - # return self.readNXazint2d(filename, fpbuffer) - # return True - # https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint2d.html + elif definition == 'NXazint2d': + return self.readNXazint2d(filename, fpbuffer) + # not a supported file type + return False except IOError: print ('cannot open file '+ filename) return False @@ -211,67 +233,95 @@ def readNXazint1d(self, filename, fpbuffer={}): #self.instmsg = 'HDF file' self.comments = [] doread = False # has the file already been read into a buffer? - arrays = ('entry/data/radial_axis','entry/data/I','entry/data/I_errors') - floats = ('entry/instrument/monochromator/wavelength', - 'entry/reduction/input/polarization_factor') - strings = ('entry/instrument/name','entry/reduction/input/unit', - 'entry/sample/name','entry/instrument/source/name') - for i in arrays+floats+strings: - if i not in fpbuffer: + fileItems = { + # arrays + 'radial_axis':('NXdata','radial_axis'), + 'I':('NXdata','I'), + 'I_errors':('NXdata','I_errors'), + # floats + 'wavelength':('NXmonochromator','wavelength'), + 'polarization_factor':('NXparameters','polarization_factor'), + # strings + 'instrument/name':('NXinstrument','name'), + 'unit':('NXparameters','unit'), + 'sample/name':('NXsample','name'), + 'source/name':('NXsource','name'), + } + # test if we have what we need in the buffer + for k in fileItems: + if k not in fpbuffer: doread = True break - if doread: # read into buffer + if doread: + # Nope, need to fill the buffer try: fp = h5py.File(filename, 'r') - for i in arrays: - fpbuffer[i] = np.array(fp.get(i)) - self.numbanks = len(fpbuffer['entry/data/I']) # number of scans - for i in floats: - fpbuffer[i] = float(fp[i][()]) - for i in strings: - try: - fpbuffer[i] = fp[i][()].decode() - self.comments.append(f'{i}={fpbuffer[i]}') - except: - fpbuffer[i] = None - if fpbuffer['entry/reduction/input/unit'] != '2th': - print('NXazint1d HDF5 file has units',fpbuffer['entry/reduction/input/unit']) - self.errors = 'NXazint1d only can be read with 2th units' - return False + entry = getNeXusBase(fp) + # lookup NeXus locations + nexusDict = {i:None for i in set([i[0] for i in fileItems.values()])} + recurseNeXusEntries(fp,entry,nexusDict) + # save selected items from file in buffer + savedKeys = [] + for k,loc in fileItems.items(): + if nexusDict[loc[0]] is None: + fpbuffer[k] = None + continue + key = '/'.join((nexusDict[loc[0]],)+loc[1:]) + savedKeys.append(key) + if key not in fp: + fpbuffer[k] = None + continue + val = fp[key] + if val.shape: + fpbuffer[k] = np.array(val) + elif 'float' in str(val.dtype): + fpbuffer[k] = float(val[()]) + self.comments.append(f'{k}={val[()]}') + else: + fpbuffer[k] = val[()].decode() + self.comments.append(f'{k}={fpbuffer[k]}') + self.numbanks = len(fpbuffer['I']) # save arrays that are potentially tracking the parametric conditions + # into ParamTrackingVars. # e.g. variables with the same length as the humber of datasets - paramItems = self.RecurseH5Element(fp,length=self.numbanks) fpbuffer['ParamTrackingVars'] = {} + paramItems = self.RecurseH5Element(fp,length=self.numbanks) for i in paramItems: for j in i: key = '/'.join(j) - if key in arrays: continue + if key in savedKeys: continue # standard data array obj = fp.get(key) if obj is None: continue if len(obj[()].shape) != 1: continue # are all values the same? If so, put them into the comments - # for the first histogram. If they are changing, note that and - # later they will be put into every histogram. + # for the first histogram only. If they are changing, note that + # here and later they will be put into every histogram. if all(obj[0] == obj): self.comments.append(f'{key.split("/")[-1]}={obj[0]}') else: fpbuffer['ParamTrackingVars'][key] = np.array(obj[()]) - if self.selections is None or len(self.selections) == 0: - self.blknum = 0 - else: - self.blknum = min(self.selections) except IOError: print (f'Can not open or read file {filename}') return False finally: fp.close() - x = fpbuffer['entry/data/radial_axis'] - y = fpbuffer['entry/data/I'][self.blknum] + if fpbuffer['unit'] != '2th': + print('NXazint1d HDF5 file has units',fpbuffer['entry/reduction/input/unit']) + self.errors = 'NXazint1d only can be read with 2th units' + return False + # initialize the block selection + if self.selections is None or len(self.selections) == 0: + self.blknum = 0 + else: + self.blknum = min(self.selections) + # now pull the selected dataset from the buffer + x = fpbuffer['radial_axis'] + y = fpbuffer['I'][self.blknum] try: - esd = fpbuffer['entry/data/I_errors'][self.blknum] + esd = fpbuffer['I_errors'][self.blknum] w = np.where(esd==0,0,np.nan_to_num(1/esd**2)) except: - w = np.nan_to_num(1/y) # best we can do, alas + w = np.nan_to_num(1/y) # best we can do, alas w/o reported s.u.'s self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)] # add parametric var as a comment for key,arr in fpbuffer['ParamTrackingVars'].items(): @@ -287,12 +337,11 @@ def readNXazint1d(self, filename, fpbuffer={}): self.Sample['Phi'] = val elif 'omega' in key: self.Sample['Omega'] = val - #self.comments = comments[selblk] self.powderentry[0] = filename #self.powderentry[1] = Pos # position offset (never used, I hope) self.powderentry[2] = self.blknum # bank number self.idstring = f'#{self.blknum} {os.path.split(filename)[1][:60]}' - self.instdict['wave'] = fpbuffer['entry/instrument/monochromator/wavelength'] + self.instdict['wave'] = fpbuffer['wavelength'] # if not, are there more [selected] images that after this to be read? self.repeat = False if self.blknum < self.numbanks-1: @@ -307,3 +356,64 @@ def readNXazint1d(self, filename, fpbuffer={}): except IndexError: # last selected image has been read self.repeat = False return True + +# NeXus support routines. These were influenced heavily by Frederik Holm Gjørup +# Also see NeXus support in plaid (https://github.com/fgjorup/plaid/blob/main/plaid/nexus.py) + +def getNeXusBase(fp): + '''This returns the base entry in a NeXus compilant HDF5 file + (usually "/entry" for MaxIV files) or None if this is not a valid + NeXus file. + ''' + for key in fp: + if ("NX_class" in fp[key].attrs and + fp[key].attrs["NX_class"] == "NXentry"): + return key + +def getNeXusEntry(fp,base,target): + '''This returns the entry in a NeXus compilant HDF5 file matching + the name target, or None, if this is not found as a child of the key `base`. + Not in use as it is more practical to use :func:`recurseNeXusEntries`. + ''' + for key in fp[base]: + subkey = '/'.join([base,key]) + if "NX_class" in fp[subkey].attrs: + #print(key, list(fp[subkey].attrs),fp[subkey].attrs["NX_class"]) + if ("NX_class" in fp[subkey].attrs and + fp[subkey].attrs["NX_class"] == target): + return subkey + else: + print(key) + +def recurseNeXusEntry(fp,node,target): + '''Recurse through the HDF5 tree looking for NeXus class `target`. + Not in use, as :func:`recurseNeXusEntries` is used to get all + targets in a single pass through the tree. + ''' + if node is None: return # needed? + val = fp[node] + if ("NX_class" in val.attrs and + val.attrs["NX_class"] == target): + return node + if not isinstance(val, h5py.Group): return + for key in val: + subkey = '/'.join([node,key]) + res = recurseNeXusEntry(fp,subkey,target) + if res: return res + +def recurseNeXusEntries(fp,node,targetdict): + '''recurse through the HDF5 tree looking for the NeXus classes + in `targetdict`, storing the HDF5 key for each class in the dict + + :param fp: HDF5 file pointer + :param str node: name of current HDF5 key + :param dict targetdict: dict to place HDF5 keys corresponding to + the desired NeXus classes. As input this has the NeXus classes + is the dict keys and the + ''' + val = fp[node] + if ("NX_class" in val.attrs and val.attrs["NX_class"] in targetdict): + targetdict[val.attrs["NX_class"]] = node + if isinstance(val, h5py.Group): + for key in val: + recurseNeXusEntries(fp,'/'.join([node,key]),targetdict) From 22e9365ce94fe9f197591f502de848a97b468947 Mon Sep 17 00:00:00 2001 From: BHT Date: Sat, 18 Oct 2025 11:47:41 -0500 Subject: [PATCH 5/7] save a working snapshot, prior to cleanup --- GSASII/GSASIIobj.py | 2 + GSASII/imports/G2pwd_HDF5.py | 360 ++++++++++++++++++++++++++++------- 2 files changed, 291 insertions(+), 71 deletions(-) diff --git a/GSASII/GSASIIobj.py b/GSASII/GSASIIobj.py index dc5cd7038..8f544e1e1 100644 --- a/GSASII/GSASIIobj.py +++ b/GSASII/GSASIIobj.py @@ -1433,6 +1433,8 @@ def ReInitialize(self): self.instdict = {} # place items here that will be transferred to the instrument parameters self.pwdparms = {} # place parameters that are transferred directly to the tree # here (typically from an existing GPX file) + self.selections = [] + self.dnames = [] ###################################################################### class ImportSmallAngleData(ImportBaseclass): '''Defines a base class for the reading of files with small angle data. diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py index 0789b7c4e..03fcc3f4e 100644 --- a/GSASII/imports/G2pwd_HDF5.py +++ b/GSASII/imports/G2pwd_HDF5.py @@ -162,6 +162,7 @@ def ContentsValidator(self, filename): Then get file type (currently MAX IV NeXus/NXazint[12]d only) ''' try: + definition = '' fp = h5py.File(filename, 'r') # test for MaxIV NeXus/NXazint1d & NXazint2d test = True @@ -171,8 +172,53 @@ def ContentsValidator(self, filename): if entry is None: break # not NeXus if 'definition' not in fp[entry]: break # not MaxIV NXazint* definition = fp[entry+'/definition'][()].decode() - if definition == 'NXazint1d': return True - if definition == 'NXazint2d': return True + # get names for datasets so we can select them + if definition == 'NXazint1d': + fileItems = { + 'I':('NXdata','I'), + 'unit':('NXparameters','unit'), + } + buffer = {} + if not self.readInNeXus(filename,buffer,fileItems,'NXazint1d-validate'): + return False + nhist = len(buffer['I']) + self.selections = list(range(nhist)) + for i in range(nhist): + self.dnames.append(f'#{i} {os.path.split(filename)[1][:60]}') + return True + if definition == 'NXazint2d': + fileItems = { + 'I':('NXdata','I'), + 'unit':('NXparameters','unit'), + 'azimuthal_axis':('NXdata','azimuthal_axis'), + } + buffer = {} + if not self.readInNeXus(filename,buffer,fileItems,'NXazint2d-validate'): + return False + #numazimuth = buffer['azimuth_bins'] + numazimuth = len(buffer['azimuthal_axis']) + numbanks = len(buffer['I']) + nhist = numbanks * numazimuth + self.selections = list(range(nhist)) + for i in range(nhist): + # group by parametric variable + numScan = i // numazimuth + numAzim = i - (numScan * numazimuth) + Azimuth = buffer['azimuthal_axis'][numAzim] + self.dnames.append(f'#{numScan} Azm={Azimuth} {os.path.split(filename)[1][:60]}') + return True + # test for MaxIV NeXus combined NXazint1d & NXazint2d + test = True + while test: + test = False + entry = getNeXusBase(fp) + subentry = getNeXusEntry(fp,entry,'NXsubentry') + if len(subentry) == 0: + break # nothing to read + for entry in subentry: + definition = fp[entry+'/definition'][()].decode() + if definition == 'NXazint1d' or definition == 'NXazint2d': + return True # test for next HDF5 type here # except IOError: # not HDF5 @@ -210,44 +256,109 @@ def Reader(self, filename, ParentFrame=None, **kwarg): if entry: # NeXus if 'definition' in fp[entry]: # MaxIV NXazint* definition = fp[entry+'/definition'][()].decode() - fpbuffer['definition'] = definition - if definition == 'NXazint1d': - return self.readNXazint1d(filename, fpbuffer) - elif definition == 'NXazint2d': - return self.readNXazint2d(filename, fpbuffer) - # not a supported file type - return False + else: + subentry = getNeXusEntry(fp,entry,'NXsubentry') + if len(subentry) == 0: + return False + elif len(subentry) == 1: + entry = subentry[0] + elif ParentFrame: + from .. import GSASIIctrlGUI as G2G + choices = ('NXazint1d 1D file','NXazint1d 2D file') + sel = G2G.ItemSelector(choices, ParentFrame=ParentFrame, + header='Select file section', + title='Select the section of the file to read') + if sel is None: return False + entry = subentry[sel] + else: + entry = subentry[1] + if 'definition' not in fp[entry]: return False + definition = fp[entry+'/definition'][()].decode() + fpbuffer['definition'] = definition + if definition == 'NXazint1d': + return self.readNXazint1d(filename, fpbuffer, entry) + elif definition == 'NXazint2d': + return self.readNXazint2d(filename, fpbuffer, entry) + return False # not a supported file type except IOError: - print ('cannot open file '+ filename) + print (f'cannot open file {filename}') return False finally: fp.close() - print (f'Unknown type of HDF5 powder file {filename}') return False + + # def FillBuffer(self,fp,fileItems,fpbuffer,nexusDict): + # '''save selected items from file in buffer + # ''' + # savedKeys = [] + # for k,loc in fileItems.items(): + # if nexusDict[loc[0]] is None: + # fpbuffer[k] = None + # continue + # key = '/'.join((nexusDict[loc[0]],)+loc[1:]) + # savedKeys.append(key) + # if key not in fp: + # fpbuffer[k] = None + # continue + # val = fp[key] + # if val.shape: + # fpbuffer[k] = np.array(val) + # elif 'float' in str(val.dtype): + # fpbuffer[k] = float(val[()]) + # self.comments.append(f'{k}={val[()]}') + # elif 'int' in str(val.dtype): + # fpbuffer[k] = int(val[()]) + # else: + # fpbuffer[k] = val[()].decode() + # self.comments.append(f'{k}={fpbuffer[k]}') + # self.numparams = len(fpbuffer['I']) + # # save arrays that are potentially tracking the parametric conditions + # # into ParamTrackingVars. + # # e.g. variables with the same length as the humber of datasets + # fpbuffer['ParamTrackingVars'] = {} + # paramItems = [] + # for loc in nexusDict.values(): + # self.HDF5entries = [] + # paramItems = self.RecurseH5Element(fp[loc],length=self.numparams) + # for i in paramItems: + # for j in i: + # key = loc+'/'+'/'.join(j) + # print(key) + # obj = fp.get(key) + # if obj is None: continue + # if len(obj[()].shape) != 1: continue + # # are all values the same? If so, put them into the comments + # # for the first histogram only. If they are changing, note that + # # here and later they will be put into every histogram. + # if all(obj[0] == obj): + # self.comments.append(f'{key.split("/")[-1]}={obj[0]}') + # else: + # fpbuffer['ParamTrackingVars'][key] = np.array(obj[()]) - def readNXazint1d(self, filename, fpbuffer={}): - '''Read HDF5 file in NeXus as produced by MAX IV as a NXazint1d - see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint1d.html + # breakpoint() + + # paramItems = self.RecurseH5Element(fp,node=loc,length=self.numparams) + # for i in paramItems: + # for j in i: + # key = '/'.join(j) + # if key in savedKeys: continue # standard data array + # obj = fp.get(key) + # if obj is None: continue + # if len(obj[()].shape) != 1: continue + # # are all values the same? If so, put them into the comments + # # for the first histogram only. If they are changing, note that + # # here and later they will be put into every histogram. + # if all(obj[0] == obj): + # self.comments.append(f'{key.split("/")[-1]}={obj[0]}') + # else: + # fpbuffer['ParamTrackingVars'][key] = np.array(obj[()]) + + def readInNeXus(self,filename,fpbuffer,fileItems,fmt,entry=None): + '''Read in items from NeXus labeled sections of the HDF5 file ''' - #self.instmsg = 'HDF file' self.comments = [] doread = False # has the file already been read into a buffer? - fileItems = { - # arrays - 'radial_axis':('NXdata','radial_axis'), - 'I':('NXdata','I'), - 'I_errors':('NXdata','I_errors'), - # floats - 'wavelength':('NXmonochromator','wavelength'), - 'polarization_factor':('NXparameters','polarization_factor'), - # strings - 'instrument/name':('NXinstrument','name'), - 'unit':('NXparameters','unit'), - 'sample/name':('NXsample','name'), - 'source/name':('NXsource','name'), - } - # test if we have what we need in the buffer for k in fileItems: if k not in fpbuffer: doread = True @@ -256,12 +367,14 @@ def readNXazint1d(self, filename, fpbuffer={}): # Nope, need to fill the buffer try: fp = h5py.File(filename, 'r') - entry = getNeXusBase(fp) - # lookup NeXus locations + if entry is None: entry = getNeXusBase(fp) + # lookup keys for NeXus labels we will use nexusDict = {i:None for i in set([i[0] for i in fileItems.values()])} recurseNeXusEntries(fp,entry,nexusDict) - # save selected items from file in buffer - savedKeys = [] + # save selected items from file into buffer + # convert all objects into values or non-HDF5 objects so file + # be closed + savedKeys = [] # things we will not need to save in the 2nd scan for k,loc in fileItems.items(): if nexusDict[loc[0]] is None: fpbuffer[k] = None @@ -277,55 +390,55 @@ def readNXazint1d(self, filename, fpbuffer={}): elif 'float' in str(val.dtype): fpbuffer[k] = float(val[()]) self.comments.append(f'{k}={val[()]}') + elif 'int' in str(val.dtype): + fpbuffer[k] = int(val[()]) else: fpbuffer[k] = val[()].decode() self.comments.append(f'{k}={fpbuffer[k]}') - self.numbanks = len(fpbuffer['I']) + if fpbuffer['unit'] != '2th': + print(f'{fmt} HDF5 file has units',fpbuffer['unit']) + self.errors = f'{fmt} only can be read with 2theta units' + return False + self.numparams = len(fpbuffer['I']) # save arrays that are potentially tracking the parametric conditions # into ParamTrackingVars. # e.g. variables with the same length as the humber of datasets - fpbuffer['ParamTrackingVars'] = {} - paramItems = self.RecurseH5Element(fp,length=self.numbanks) - for i in paramItems: - for j in i: - key = '/'.join(j) - if key in savedKeys: continue # standard data array - obj = fp.get(key) - if obj is None: continue - if len(obj[()].shape) != 1: continue - # are all values the same? If so, put them into the comments - # for the first histogram only. If they are changing, note that - # here and later they will be put into every histogram. - if all(obj[0] == obj): - self.comments.append(f'{key.split("/")[-1]}={obj[0]}') - else: - fpbuffer['ParamTrackingVars'][key] = np.array(obj[()]) + if 'validate' not in fmt: + fpbuffer['ParamTrackingVars'] = {} + paramItems = [] + for loc in nexusDict.values(): + self.HDF5entries = [] + paramItems = self.RecurseH5Element(fp[loc],length=self.numparams) + for i in paramItems: + for j in i: + key = loc+'/'+'/'.join(j) + print(key) + obj = fp.get(key) + if obj is None: continue + if len(obj[()].shape) != 1: continue + # are all values the same? If so, put them into the comments + # for the first histogram only. If they are changing, note that + # here and later they will be put into every histogram. + if all(obj[0] == obj): + self.comments.append(f'{key.split("/")[-1]}={obj[0]}') + else: + fpbuffer['ParamTrackingVars'][key] = np.array(obj[()]) except IOError: print (f'Can not open or read file {filename}') return False finally: fp.close() - if fpbuffer['unit'] != '2th': - print('NXazint1d HDF5 file has units',fpbuffer['entry/reduction/input/unit']) - self.errors = 'NXazint1d only can be read with 2th units' - return False # initialize the block selection if self.selections is None or len(self.selections) == 0: self.blknum = 0 else: self.blknum = min(self.selections) - # now pull the selected dataset from the buffer - x = fpbuffer['radial_axis'] - y = fpbuffer['I'][self.blknum] - try: - esd = fpbuffer['I_errors'][self.blknum] - w = np.where(esd==0,0,np.nan_to_num(1/esd**2)) - except: - w = np.nan_to_num(1/y) # best we can do, alas w/o reported s.u.'s - self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)] + return True + + def FillInParametics(self,fpbuffer,count): # add parametric var as a comment for key,arr in fpbuffer['ParamTrackingVars'].items(): - val = arr[self.blknum] + val = arr[count] self.comments.append(f'{key.split("/")[-1]}={val}') if 'temperature' in key: self.Sample['Temperature'] = val # in K already @@ -337,6 +450,43 @@ def readNXazint1d(self, filename, fpbuffer={}): self.Sample['Phi'] = val elif 'omega' in key: self.Sample['Omega'] = val + + def readNXazint1d(self, filename, fpbuffer={}, entry=None): + '''Read HDF5 file in NeXus as produced by MAX IV as a NXazint1d. + In this file, multiple scans are placed in a 2-D array (I and I_errors in + section NXdata), where one dimension is 2-theta and the other is a parametric + value such as temperature, time, etc. + + see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint1d.html + ''' + #self.instmsg = 'HDF file' + fileItems = { + # arrays + 'radial_axis':('NXdata','radial_axis'), + 'I':('NXdata','I'), + 'I_errors':('NXdata','I_errors'), + # floats + 'wavelength':('NXmonochromator','wavelength'), + 'polarization_factor':('NXparameters','polarization_factor'), + # strings + 'instrument/name':('NXinstrument','name'), + 'unit':('NXparameters','unit'), + 'sample/name':('NXsample','name'), + 'source/name':('NXsource','name'), + } + # test if we have what we need in the buffer and if not read it in + if not self.readInNeXus(filename,fpbuffer,fileItems,'NXazint1d',entry): return False + # now pull the selected dataset from the buffer + self.numbanks = self.numparams + x = fpbuffer['radial_axis'] + y = fpbuffer['I'][self.blknum] + try: + esd = fpbuffer['I_errors'][self.blknum] + w = np.where(esd==0,0,np.nan_to_num(1/esd**2)) + except: + w = np.nan_to_num(1/y) # best we can do, alas w/o reported s.u.'s + self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)] + self.FillInParametics(fpbuffer,self.blknum) self.powderentry[0] = filename #self.powderentry[1] = Pos # position offset (never used, I hope) self.powderentry[2] = self.blknum # bank number @@ -357,6 +507,71 @@ def readNXazint1d(self, filename, fpbuffer={}): self.repeat = False return True + def readNXazint2d(self, filename, fpbuffer={}, entry=None): + '''Read HDF5 file in NeXus as produced by MAX IV as a NXazint2d + + In this file, multiple scans are placed in a 3-D array (I and I_errors in + section NXdata), where one dimension is 2-theta and another is the azimuthal value + and the third are a parametric value(s) such as temperature, time, etc. + + see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint2d.html + ''' + self.comments = [] + fileItems = { + # arrays + 'radial_axis':('NXdata','radial_axis'), + 'azimuthal_axis':('NXdata','azimuthal_axis'), + 'I':('NXdata','I'), + 'I_errors':('NXdata','I_errors'), + # floats + 'wavelength':('NXmonochromator','wavelength'), + 'polarization_factor':('NXparameters','polarization_factor'), + # strings + 'instrument/name':('NXinstrument','name'), + 'unit':('NXparameters','unit'), + 'azimuth_bins':('NXparameters','azimuth_bins'), + 'sample/name':('NXsample','name'), + 'source/name':('NXsource','name'), + } + # test if we have what we need in the buffer and if not read it in + if not self.readInNeXus(filename,fpbuffer,fileItems,'NXazint2d',entry): return False + # now pull the selected dataset from the buffer + self.numazimuth = fpbuffer['azimuth_bins'] + self.numbanks = self.numparams * self.numazimuth + # group by parametric variable + numScan = self.blknum // self.numazimuth + numAzim = self.blknum - (numScan * self.numazimuth) + x = fpbuffer['radial_axis'] + y = fpbuffer['I'][numScan][numAzim] + try: + esd = fpbuffer['I_errors'][numScan][numAzim] + w = np.where(esd==0,0,np.nan_to_num(1/esd**2)) + except: + w = np.nan_to_num(1/y) # best we can do, alas w/o reported s.u.'s + self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)] + self.Sample['Azimuth'] = fpbuffer['azimuthal_axis'][numAzim] + # add parametric var as a comment + self.FillInParametics(fpbuffer,numScan) + self.powderentry[0] = filename + #self.powderentry[1] = Pos # position offset (never used, I hope) + self.powderentry[2] = self.blknum # bank number + self.idstring = f'#{numScan} Azm={self.Sample["Azimuth"]} {os.path.split(filename)[1][:60]}' + self.instdict['wave'] = fpbuffer['wavelength'] + # if not, are there more [selected] images that after this to be read? + self.repeat = False + if self.blknum < self.numbanks-1: + if self.selections is None or len(self.selections) == 0: + self.blknum += 1 + self.repeat = True + else: + try: + s = sorted(self.selections) + self.blknum = s[s.index(self.blknum)+1] + self.repeat = True + except IndexError: # last selected image has been read + self.repeat = False + return True + # NeXus support routines. These were influenced heavily by Frederik Holm Gjørup # Also see NeXus support in plaid (https://github.com/fgjorup/plaid/blob/main/plaid/nexus.py) @@ -371,19 +586,22 @@ def getNeXusBase(fp): return key def getNeXusEntry(fp,base,target): - '''This returns the entry in a NeXus compilant HDF5 file matching - the name target, or None, if this is not found as a child of the key `base`. - Not in use as it is more practical to use :func:`recurseNeXusEntries`. + '''This returns a list of entries in a NeXus compilant HDF5 file matching + the name target, or an empty list, if this is not found. This only + looks for the direct children of the key `base`. ''' + keyList = [] for key in fp[base]: subkey = '/'.join([base,key]) if "NX_class" in fp[subkey].attrs: #print(key, list(fp[subkey].attrs),fp[subkey].attrs["NX_class"]) if ("NX_class" in fp[subkey].attrs and fp[subkey].attrs["NX_class"] == target): - return subkey - else: - print(key) + keyList.append(subkey) + return keyList +# else: +# print(key) + def recurseNeXusEntry(fp,node,target): '''Recurse through the HDF5 tree looking for NeXus class `target`. From d41d4b46744b2990a8cc9a68374536c187356dc3 Mon Sep 17 00:00:00 2001 From: BHT Date: Sat, 18 Oct 2025 14:04:52 -0500 Subject: [PATCH 6/7] clean up and test against files --- GSASII/imports/G2pwd_HDF5.py | 571 ++++++++++++++++------------------- 1 file changed, 255 insertions(+), 316 deletions(-) diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py index 03fcc3f4e..c3d2a5d88 100644 --- a/GSASII/imports/G2pwd_HDF5.py +++ b/GSASII/imports/G2pwd_HDF5.py @@ -1,11 +1,9 @@ # -*- coding: utf-8 -*- '''Use to read powder patterns from HDF5 files. At present the only supported -format is a NeXus variant named NXazint1d. +format are two NeXus variants from MaxIV named NXazint1d and NXazint1d, +but this can be expanded to handle more HDF5/NeXus formats ''' - -from __future__ import division, print_function import os - try: import h5py except ImportError: @@ -14,9 +12,6 @@ from .. import GSASIIobj as G2obj from .. import GSASIIfiles as G2fil -#from .. import GSASIIpath -#breakpoint = GSASIIpath.IPyBreak_base - class HDF5_Reader(G2obj.ImportPowderData): '''Routine to read multiple powder patterns from an HDF5 file. @@ -29,7 +24,6 @@ class HDF5_Reader(G2obj.ImportPowderData): Any parameters placed in that file will override values set in the HDF5 file. ''' - #mode = None def __init__(self): if h5py is None: self.UseReader = False @@ -37,136 +31,20 @@ def __init__(self): G2fil.ImportErrorMsg(msg,{'HDF5 importer':['h5py','hdf5']}) super(self.__class__,self).__init__( # fancy way to self-reference extensionlist=('.hdf','.h5'),strictExtension=True, - formatName = 'MAX IV HDF5',longFormatName = 'MaxIV NXazint1d HDF5 integrated scans') + formatName = 'MAXIV NeXus',longFormatName = 'Max IV NXazintXd NeXus integrated scans') self.scriptable = True - #self.Iparm = {} #only filled for EDS data - - def ShowH5Element(self,obj,keylist): - '''Format the contents of an HDF5 entry as a single line. Not used for - reading files, only used in :meth:`HDF5list` which is here for software - development. - ''' - k = '/'.join(keylist) - l = obj.get(k, getlink=True) - if isinstance(l, h5py.ExternalLink): - return f'link to file {l.filename}' - try: - typ = str(type(obj[k])) - except: - return f'**Error** with key {k}' - - if ".Dataset'" in typ: - datfmt = obj[k].dtype - if datfmt == 'O' or str(datfmt).startswith('|S'): - # byte string - return f'value={obj[k][()].decode()}' - elif datfmt == 'bool': # Bool - return f'value={bool(obj[k][()])}' - elif datfmt in (' 50: - lbl = lbl[:50] + '...' - # if '\n' in lbl: - # lbl = lbl.split()[0] + '...' - if lbl != '(group)': strings.append(f"{'/'.join(k):{m}s} {lbl}") - if nxname: print(f"{'/'.join(k):{m}s} {lbl} {nxname}") - with open(filename+'_contents.txt', 'w') as fp: - for i in strings: fp.write(f'{i}\n') - def ContentsValidator(self, filename): '''Test if valid by seeing if the HDF5 library recognizes the file. - Then get file type (currently MAX IV NeXus/NXazint[12]d only) + Then get file type (currently MAX IV NXazint[12]d (NeXus) only) ''' try: definition = '' fp = h5py.File(filename, 'r') # test for MaxIV NeXus/NXazint1d & NXazint2d test = True - while test: + while test: # block for standard NXazint1d and NXazint2d files, + # use break to bail out and try next block test = False entry = getNeXusBase(fp) if entry is None: break # not NeXus @@ -209,7 +87,8 @@ def ContentsValidator(self, filename): return True # test for MaxIV NeXus combined NXazint1d & NXazint2d test = True - while test: + while test: # block for combined NXazint1d and NXazint2d files, + # use break to bail out and try next block test = False entry = getNeXusBase(fp) subentry = getNeXusEntry(fp,entry,'NXsubentry') @@ -234,8 +113,8 @@ def Reader(self, filename, ParentFrame=None, **kwarg): Since usually there will be lots of scans in a single file, the goal is that the first pass should read the file into - a buffer (if available) and subsequent calls will not - need to access the file. + a buffer (if available) and subsequent calls can use the + buffer and will not need to access the file. ''' fpbuffer = kwarg.get('buffer',{}) if not hasattr(self,'blknum'): @@ -243,26 +122,26 @@ def Reader(self, filename, ParentFrame=None, **kwarg): self.blknum = 0 else: self.blknum = min(self.selections) - # was file read into buffer? If so skip opening file to save time + # was file already read into buffer? If so, skip opening file to save time definition = fpbuffer.get('definition','') if definition == 'NXazint1d': return self.readNXazint1d(filename, fpbuffer) elif definition == 'NXazint2d': return self.readNXazint2d(filename, fpbuffer) - # first or non-buffered read - try: + + try: # first or non-buffered read fp = h5py.File(filename, 'r') - entry = getNeXusBase(fp) - if entry: # NeXus + entry = getNeXusBase(fp) # test for NeXus + if entry: # This is NeXus if 'definition' in fp[entry]: # MaxIV NXazint* definition = fp[entry+'/definition'][()].decode() - else: + else: # is this a combined NXazint1d/NXazint2d file? subentry = getNeXusEntry(fp,entry,'NXsubentry') if len(subentry) == 0: return False elif len(subentry) == 1: entry = subentry[0] - elif ParentFrame: + elif ParentFrame: # interactive, let the user decide from .. import GSASIIctrlGUI as G2G choices = ('NXazint1d 1D file','NXazint1d 2D file') sel = G2G.ItemSelector(choices, ParentFrame=ParentFrame, @@ -270,186 +149,24 @@ def Reader(self, filename, ParentFrame=None, **kwarg): title='Select the section of the file to read') if sel is None: return False entry = subentry[sel] - else: + else: # scripted, assume if 2D is present, that is what is wanted entry = subentry[1] if 'definition' not in fp[entry]: return False definition = fp[entry+'/definition'][()].decode() + # got a file type, save it and if recognized, read it fpbuffer['definition'] = definition if definition == 'NXazint1d': return self.readNXazint1d(filename, fpbuffer, entry) elif definition == 'NXazint2d': return self.readNXazint2d(filename, fpbuffer, entry) return False # not a supported file type - except IOError: + except IOError: # unexpected since this was validated print (f'cannot open file {filename}') return False finally: fp.close() print (f'Unknown type of HDF5 powder file {filename}') return False - - # def FillBuffer(self,fp,fileItems,fpbuffer,nexusDict): - # '''save selected items from file in buffer - # ''' - # savedKeys = [] - # for k,loc in fileItems.items(): - # if nexusDict[loc[0]] is None: - # fpbuffer[k] = None - # continue - # key = '/'.join((nexusDict[loc[0]],)+loc[1:]) - # savedKeys.append(key) - # if key not in fp: - # fpbuffer[k] = None - # continue - # val = fp[key] - # if val.shape: - # fpbuffer[k] = np.array(val) - # elif 'float' in str(val.dtype): - # fpbuffer[k] = float(val[()]) - # self.comments.append(f'{k}={val[()]}') - # elif 'int' in str(val.dtype): - # fpbuffer[k] = int(val[()]) - # else: - # fpbuffer[k] = val[()].decode() - # self.comments.append(f'{k}={fpbuffer[k]}') - # self.numparams = len(fpbuffer['I']) - # # save arrays that are potentially tracking the parametric conditions - # # into ParamTrackingVars. - # # e.g. variables with the same length as the humber of datasets - # fpbuffer['ParamTrackingVars'] = {} - # paramItems = [] - # for loc in nexusDict.values(): - # self.HDF5entries = [] - # paramItems = self.RecurseH5Element(fp[loc],length=self.numparams) - # for i in paramItems: - # for j in i: - # key = loc+'/'+'/'.join(j) - # print(key) - # obj = fp.get(key) - # if obj is None: continue - # if len(obj[()].shape) != 1: continue - # # are all values the same? If so, put them into the comments - # # for the first histogram only. If they are changing, note that - # # here and later they will be put into every histogram. - # if all(obj[0] == obj): - # self.comments.append(f'{key.split("/")[-1]}={obj[0]}') - # else: - # fpbuffer['ParamTrackingVars'][key] = np.array(obj[()]) - - # breakpoint() - - # paramItems = self.RecurseH5Element(fp,node=loc,length=self.numparams) - # for i in paramItems: - # for j in i: - # key = '/'.join(j) - # if key in savedKeys: continue # standard data array - # obj = fp.get(key) - # if obj is None: continue - # if len(obj[()].shape) != 1: continue - # # are all values the same? If so, put them into the comments - # # for the first histogram only. If they are changing, note that - # # here and later they will be put into every histogram. - # if all(obj[0] == obj): - # self.comments.append(f'{key.split("/")[-1]}={obj[0]}') - # else: - # fpbuffer['ParamTrackingVars'][key] = np.array(obj[()]) - - def readInNeXus(self,filename,fpbuffer,fileItems,fmt,entry=None): - '''Read in items from NeXus labeled sections of the HDF5 file - ''' - self.comments = [] - doread = False # has the file already been read into a buffer? - for k in fileItems: - if k not in fpbuffer: - doread = True - break - if doread: - # Nope, need to fill the buffer - try: - fp = h5py.File(filename, 'r') - if entry is None: entry = getNeXusBase(fp) - # lookup keys for NeXus labels we will use - nexusDict = {i:None for i in set([i[0] for i in fileItems.values()])} - recurseNeXusEntries(fp,entry,nexusDict) - # save selected items from file into buffer - # convert all objects into values or non-HDF5 objects so file - # be closed - savedKeys = [] # things we will not need to save in the 2nd scan - for k,loc in fileItems.items(): - if nexusDict[loc[0]] is None: - fpbuffer[k] = None - continue - key = '/'.join((nexusDict[loc[0]],)+loc[1:]) - savedKeys.append(key) - if key not in fp: - fpbuffer[k] = None - continue - val = fp[key] - if val.shape: - fpbuffer[k] = np.array(val) - elif 'float' in str(val.dtype): - fpbuffer[k] = float(val[()]) - self.comments.append(f'{k}={val[()]}') - elif 'int' in str(val.dtype): - fpbuffer[k] = int(val[()]) - else: - fpbuffer[k] = val[()].decode() - self.comments.append(f'{k}={fpbuffer[k]}') - if fpbuffer['unit'] != '2th': - print(f'{fmt} HDF5 file has units',fpbuffer['unit']) - self.errors = f'{fmt} only can be read with 2theta units' - return False - self.numparams = len(fpbuffer['I']) - # save arrays that are potentially tracking the parametric conditions - # into ParamTrackingVars. - # e.g. variables with the same length as the humber of datasets - if 'validate' not in fmt: - fpbuffer['ParamTrackingVars'] = {} - paramItems = [] - for loc in nexusDict.values(): - self.HDF5entries = [] - paramItems = self.RecurseH5Element(fp[loc],length=self.numparams) - for i in paramItems: - for j in i: - key = loc+'/'+'/'.join(j) - print(key) - obj = fp.get(key) - if obj is None: continue - if len(obj[()].shape) != 1: continue - # are all values the same? If so, put them into the comments - # for the first histogram only. If they are changing, note that - # here and later they will be put into every histogram. - if all(obj[0] == obj): - self.comments.append(f'{key.split("/")[-1]}={obj[0]}') - else: - fpbuffer['ParamTrackingVars'][key] = np.array(obj[()]) - except IOError: - print (f'Can not open or read file {filename}') - return False - finally: - fp.close() - # initialize the block selection - if self.selections is None or len(self.selections) == 0: - self.blknum = 0 - else: - self.blknum = min(self.selections) - return True - - def FillInParametics(self,fpbuffer,count): - # add parametric var as a comment - for key,arr in fpbuffer['ParamTrackingVars'].items(): - val = arr[count] - self.comments.append(f'{key.split("/")[-1]}={val}') - if 'temperature' in key: - self.Sample['Temperature'] = val # in K already - elif 'time' in key: - self.Sample['Time'] = val # should be seconds - elif 'chi' in key: - self.Sample['Chi'] = val # not sure if correct mapping - elif 'phi' in key: - self.Sample['Phi'] = val - elif 'omega' in key: - self.Sample['Omega'] = val def readNXazint1d(self, filename, fpbuffer={}, entry=None): '''Read HDF5 file in NeXus as produced by MAX IV as a NXazint1d. @@ -459,7 +176,6 @@ def readNXazint1d(self, filename, fpbuffer={}, entry=None): see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint1d.html ''' - #self.instmsg = 'HDF file' fileItems = { # arrays 'radial_axis':('NXdata','radial_axis'), @@ -484,11 +200,10 @@ def readNXazint1d(self, filename, fpbuffer={}, entry=None): esd = fpbuffer['I_errors'][self.blknum] w = np.where(esd==0,0,np.nan_to_num(1/esd**2)) except: - w = np.nan_to_num(1/y) # best we can do, alas w/o reported s.u.'s + w = np.nan_to_num(1/y) # best we can do, alas. W/o reported s.u.'s self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)] self.FillInParametics(fpbuffer,self.blknum) self.powderentry[0] = filename - #self.powderentry[1] = Pos # position offset (never used, I hope) self.powderentry[2] = self.blknum # bank number self.idstring = f'#{self.blknum} {os.path.split(filename)[1][:60]}' self.instdict['wave'] = fpbuffer['wavelength'] @@ -547,13 +262,12 @@ def readNXazint2d(self, filename, fpbuffer={}, entry=None): esd = fpbuffer['I_errors'][numScan][numAzim] w = np.where(esd==0,0,np.nan_to_num(1/esd**2)) except: - w = np.nan_to_num(1/y) # best we can do, alas w/o reported s.u.'s + w = np.nan_to_num(1/y) # best we can do, alas. W/o reported s.u.'s self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)] self.Sample['Azimuth'] = fpbuffer['azimuthal_axis'][numAzim] # add parametric var as a comment self.FillInParametics(fpbuffer,numScan) self.powderentry[0] = filename - #self.powderentry[1] = Pos # position offset (never used, I hope) self.powderentry[2] = self.blknum # bank number self.idstring = f'#{numScan} Azm={self.Sample["Azimuth"]} {os.path.split(filename)[1][:60]}' self.instdict['wave'] = fpbuffer['wavelength'] @@ -572,9 +286,232 @@ def readNXazint2d(self, filename, fpbuffer={}, entry=None): self.repeat = False return True + def readInNeXus(self,filename,fpbuffer,fileItems,fmt,entry=None): + '''Read in items from NeXus labeled sections of the HDF5 file. + + For files where we are reading from a NXsubentry section + rather than NXentry, variable `entry` is pointer to the + the selected NXsubentry section. If None, the NXentry + is found. Otherwise `entry` points to the NXsubentry + location, so only that portion of the tree is used. + ''' + self.comments = [] + doread = False # has the file already been read into a buffer? + for k in fileItems: + if k not in fpbuffer: + doread = True + break + if doread: + # Nope, need to fill the buffer + try: + fp = h5py.File(filename, 'r') + if entry is None: entry = getNeXusBase(fp) + # assemble list of used NeXus labels + nexusDict = {i:None for i in set([i[0] for i in fileItems.values()])} + # lookup keys for NeXus labels we will use + recurseNeXusEntries(fp,entry,nexusDict) + # save selected items from file into buffer + # Convert all entries read into values or non-HDF5 objects so file + # can be closed. + savedKeys = [] # things that have already been read + for k,loc in fileItems.items(): + if nexusDict[loc[0]] is None: + fpbuffer[k] = None + continue + key = '/'.join((nexusDict[loc[0]],)+loc[1:]) + savedKeys.append(key) + if key not in fp: + fpbuffer[k] = None + continue + val = fp[key] + if val.shape: + fpbuffer[k] = np.array(val) + elif 'float' in str(val.dtype): + fpbuffer[k] = float(val[()]) + self.comments.append(f'{k}={val[()]}') + elif 'int' in str(val.dtype): + fpbuffer[k] = int(val[()]) + else: + fpbuffer[k] = val[()].decode() + self.comments.append(f'{k}={fpbuffer[k]}') + if fpbuffer['unit'] != '2th': + print(f'{fmt} HDF5 file has units',fpbuffer['unit']) + self.errors = f'{fmt} only can be read with 2theta units' + return False + self.numparams = len(fpbuffer['I']) + # save arrays that are potentially tracking the parametric + # conditions into ParamTrackingVars. These arrays will have + # the same length as the number of datasets (self.numparams) + if 'validate' not in fmt: # skip if we are validating the file rather than reading it + fpbuffer['ParamTrackingVars'] = {} + paramItems = [] + for loc in nexusDict.values(): + if loc is None: continue # a NeXus label is not present + self.HDF5entries = [] + paramItems = self.RecurseH5Element(fp[loc],length=self.numparams) + for i in paramItems: + for j in i: + key = loc+'/'+'/'.join(j) + if key in savedKeys: continue + savedKeys.append(key) + obj = fp.get(key) + if obj is None: continue + if len(obj[()].shape) != 1: continue + # are all values the same? If so, put them into the comments + # for the first histogram only. If they are changing, note that + # here and later they will be put into every histogram. + if all(obj[0] == obj): + self.comments.append(f'{key.split("/")[-1]}={obj[0]}') + else: + fpbuffer['ParamTrackingVars'][key] = np.array(obj[()]) + except IOError: + print (f'Cannot open or read file {filename}') + self.errors = f'{fmt} Can not open or read file {filename}' + return False + finally: + fp.close() + # initialize the block selection + if self.selections is None or len(self.selections) == 0: + self.blknum = 0 + else: + self.blknum = min(self.selections) + return True + + def FillInParametics(self,fpbuffer,count): + '''put changing parametric variables into the comments + ''' + for key,arr in fpbuffer['ParamTrackingVars'].items(): + val = arr[count] + self.comments.append(f'{key.split("/")[-1]}={val}') + if 'temperature' in key: + self.Sample['Temperature'] = val # in K already + elif 'time' in key: + self.Sample['Time'] = val # should be seconds + elif 'chi' in key: + self.Sample['Chi'] = val # not sure if correct mapping + elif 'phi' in key: + self.Sample['Phi'] = val + elif 'omega' in key: + self.Sample['Omega'] = val + + # HDF5 support routines. + def RecurseH5Element(self,obj,prefix=[],length=None): + '''Returns a list of entries of all keys in the HDF5 file + (or group) in `obj`. Note that `obj` can be a file object, created by + `h5py.File` or can be a subsetgroup `fp['key/subkey']`. + + If length is specified, only the entries with arrays of that + length are returned. + + The returned list is organized where: + * entry 0 is the top-level keys (/a, /b,...), + * entry 1 has the first level keys (/a/c /a/d, /b/d, /b/e,...) + * ... + ''' + try: + self.HDF5entries + except AttributeError: + self.HDF5entries = [] + depth = len(prefix) + if len(self.HDF5entries) < depth+1: + self.HDF5entries.append([]) + for i in obj: + nextprefix = prefix+[i] + if length is None: + self.HDF5entries[depth].append(nextprefix) + try: + typ = str(type(obj[i])) + except: + print(f'**Error** with key {prefix}/{i}') + continue + if length is not None and ".Group'" not in typ: + # get length of this obj[i] + try: + if len(obj[i]) == length: + self.HDF5entries[depth].append(nextprefix) + except TypeError: + continue + # check for link objects + l = obj.get(i, getlink=True) + if isinstance(l, h5py.ExternalLink): continue + if ".Group'" in typ: + #t = f'{prefix}/{i}' + #print(f'\n{nextprefix} contents {(60-len(t))*'='}') + self.RecurseH5Element(obj[i],nextprefix,length) + return self.HDF5entries + + def HDF5list(self, filename): + '''Shows the contents of an HDF5 file as a short listing. + This is not used for HDF5 reading, but is of help with a new + type of HDF5 file to see what is present. + + :param filename: + ''' + def ShowH5NeXusName(obj,keylist): + key = '/'.join(keylist) + if "NX_class" in obj[key].attrs: + return obj[key].attrs["NX_class"] + + fp = h5py.File(filename, 'r') + #print(f'Contents of {filename}') + HDF5entries = self.RecurseH5Element(fp) + strings = [] + for i,j in enumerate(HDF5entries): + if not strings or strings[-1] != 60*'=': + strings.append(60*'=') + m = 0 + for k in j: + m = max(m,len('/'.join(k))) + for k in j: + nxname = ShowH5NeXusName(fp,k) + lbl = self.ShowH5Element(fp,k) + if '\n' in lbl: + lbl = '; '.join(lbl.split('\n')) + if len(lbl) > 50: + lbl = lbl[:50] + '...' + # if '\n' in lbl: + # lbl = lbl.split()[0] + '...' + if lbl != '(group)': strings.append(f"{'/'.join(k):{m}s} {lbl}") + if nxname: print(f"{'/'.join(k):{m}s} {lbl} {nxname}") + with open(filename+'_contents.txt', 'w') as fp: + for i in strings: fp.write(f'{i}\n') + + def ShowH5Element(self,obj,keylist): + '''Format the contents of an HDF5 entry as a single line. Not used for + reading files, only used in :meth:`HDF5list`, which is here for software + development. + ''' + k = '/'.join(keylist) + l = obj.get(k, getlink=True) + if isinstance(l, h5py.ExternalLink): + return f'link to file {l.filename}' + try: + typ = str(type(obj[k])) + except: + return f'**Error** with key {k}' + + if ".Dataset'" in typ: + datfmt = obj[k].dtype + if datfmt == 'O' or str(datfmt).startswith('|S'): + # byte string + return f'value={obj[k][()].decode()}' + elif datfmt == 'bool': # Bool + return f'value={bool(obj[k][()])}' + elif datfmt in (' Date: Sat, 18 Oct 2025 14:29:01 -0500 Subject: [PATCH 7/7] typo --- GSASII/imports/G2pwd_HDF5.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GSASII/imports/G2pwd_HDF5.py b/GSASII/imports/G2pwd_HDF5.py index c3d2a5d88..004146af6 100644 --- a/GSASII/imports/G2pwd_HDF5.py +++ b/GSASII/imports/G2pwd_HDF5.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- '''Use to read powder patterns from HDF5 files. At present the only supported -format are two NeXus variants from MaxIV named NXazint1d and NXazint1d, +format are two NeXus variants from MaxIV named NXazint1d and NXazint2d, but this can be expanded to handle more HDF5/NeXus formats ''' import os