Skip to content

Commit c0fc558

Browse files
committed
start work on NeXus/HDF5 importer for MAX IV
1 parent 0842796 commit c0fc558

File tree

2 files changed

+324
-0
lines changed

2 files changed

+324
-0
lines changed

GSASII/imports/G2pwd_HDF5.py

Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
# -*- coding: utf-8 -*-
2+
'''
3+
'''
4+
5+
from __future__ import division, print_function
6+
import os
7+
import sys
8+
try:
9+
import h5py
10+
except ImportError:
11+
h5py = None
12+
import numpy as np
13+
from .. import GSASIIobj as G2obj
14+
from .. import GSASIIfiles as G2fil
15+
#from .. import GSASIIpath
16+
17+
# things to do:
18+
# uncertainties
19+
# instr. parms
20+
#instprmList = [('Bank',1.0), ('Lam',0.413263), ('Polariz.',0.99),
21+
# ('SH/L',0.002), ('Type','PXC'), ('U',1.163), ('V',-0.126),
22+
# ('W',0.063), ('X',0.0), ('Y',0.0), ('Z',0.0), ('Zero',0.0)]
23+
# comments
24+
# dataset naming
25+
# sample parameters
26+
#sampleprmList = [('InstrName','APS 1-ID'), ('Temperature', 295.0)]
27+
# 'Scale': [1.0, True], 'Type': 'Debye-Scherrer',
28+
# 'Absorption': [0.0, False], 'DisplaceX': [0.0, False], 'DisplaceY': [0.0, False]# 'Pressure': 0.1, 'Time': 0.0, 'FreePrm1': 0.0,
29+
# 'FreePrm2': 0.0, 'FreePrm3': 0.0, 'Gonio. radius': 200.0, 'Omega': 0.0,
30+
# 'Chi': 0.0, 'Phi': 180.0, 'Azimuth': 0.0,
31+
# 'Materials': [{'Name': 'vacuum', 'VolFrac': 1.0}, {'Name': 'vacuum', 'VolFrac': 0.0}],
32+
# 'Thick': 1.0, 'Contrast': [0.0, 0.0], 'Trans': 1.0, 'SlitLen': 0.0}
33+
34+
35+
class HDF5_Reader(G2obj.ImportPowderData):
36+
'''Routine to read multiple powder patterns from an HDF5 file.
37+
38+
This importer targets NXazint1d and NXazint2d NeXus files from
39+
MAX IV.
40+
Perhaps in the future, other types of HDF5 powder data sources as well.
41+
42+
The main file is <file>.hdf or <file>.h5, but optionally sample and
43+
instrument parameters can be placed in <file>.samprm and <file>.instprm.
44+
Any parameters placed in that file will override values set in the HDF5
45+
file.
46+
'''
47+
mode = None
48+
def __init__(self):
49+
if h5py is None:
50+
self.UseReader = False
51+
msg = 'HDF5 Reader skipped because h5py module is not installed'
52+
G2fil.ImportErrorMsg(msg,{'HDF5 importer':['h5py','hdf5']})
53+
super(self.__class__,self).__init__( # fancy way to self-reference
54+
extensionlist=('.hdf','.h5'),strictExtension=True,
55+
formatName = 'MAX IV HDF5',longFormatName = 'HDF5 integrated scans')
56+
self.scriptable = True
57+
#self.Iparm = {} #only filled for EDS data
58+
59+
def ShowH5Element(self,obj,keylist):
60+
'''Format the contents of an HDF5 entry as a single line. Not used for
61+
reading files, only for use in :meth:`HDF5list`
62+
'''
63+
k = '/'.join(keylist)
64+
try:
65+
typ = str(type(obj[k]))
66+
except:
67+
return f'**Error** with key {k}'
68+
69+
if ".Dataset'" in typ:
70+
datfmt = obj[k].dtype
71+
if datfmt == 'O' or str(datfmt).startswith('|S'):
72+
# byte string
73+
return f'value={obj[k][()].decode()}'
74+
elif datfmt == 'bool': # Bool
75+
return f'value={bool(obj[k][()])}'
76+
elif datfmt in ('<f8', 'uint8', 'int64', '<f4'): # scalar value or array of values
77+
try:
78+
return f'length {len(obj[k][()])}'
79+
except:
80+
return f'value={obj[k][()]}'
81+
else:
82+
return f'dataset of type {repr(datfmt)}'
83+
elif ".Group'" in typ:
84+
return "(group)"
85+
else:
86+
return f'{prefix}/{i} is {type(obj[k])}'
87+
88+
def RecurseH5Element(self,obj,prefix=[]):
89+
'''Returns a list of entries of all keys in the HDF5 file
90+
(or group) in `obj`. Note that `obj` can be a file object, created by
91+
`h5py.File` or can be a subset `fp['key/subkey']`.
92+
93+
The returned list is organized where:
94+
* entry 0 is the top-level keys (/a, /b,...),
95+
* entry 1 has the first level keys (/a/c /a/d, /b/d, /b/e,...)
96+
* ...
97+
Not used for reading files, only for use in :meth:`HDF5list`
98+
'''
99+
try:
100+
self.HDF5entries
101+
except AttributeError:
102+
self.HDF5entries = []
103+
depth = len(prefix)
104+
if len(self.HDF5entries) < depth+1:
105+
self.HDF5entries.append([])
106+
for i in obj:
107+
nextprefix = prefix+[i]
108+
self.HDF5entries[depth].append(nextprefix)
109+
try:
110+
typ = str(type(obj[i]))
111+
except:
112+
print(f'**Error** with key {prefix}/{i}')
113+
continue
114+
if ".Group'" in typ:
115+
t = f'{prefix}/{i}'
116+
#print(f'\n{nextprefix} contents {(60-len(t))*'='}')
117+
self.RecurseH5Element(obj[i],nextprefix)
118+
return self.HDF5entries
119+
120+
121+
def HDF5list(self, filename):
122+
'''Shows the contents of an HDF5 file as a short listing.
123+
This is not used for HDF5 reading, but is of help with a new
124+
type of HDF5 file to see what is present.
125+
126+
:param filename:
127+
'''
128+
fp = h5py.File(filename, 'r')
129+
#print(f'Contents of {filename}')
130+
HDF5entries = self.RecurseH5Element(fp)
131+
strings = []
132+
for i,j in enumerate(HDF5entries):
133+
if not strings or strings[-1] != 60*'=':
134+
strings.append(60*'=')
135+
m = 0
136+
for k in j:
137+
m = max(m,len('/'.join(k)))
138+
for k in j:
139+
lbl = self.ShowH5Element(fp,k)
140+
if len(lbl) > 50:
141+
lbl = lbl[:50] + '...'
142+
if '\n' in lbl:
143+
lbl = lbl.split()[0] + '...'
144+
if lbl != '(group)': strings.append(f"{'/'.join(k):{m}s} {lbl}")
145+
with open(filename+'_contents.txt', 'w') as fp:
146+
for i in strings: fp.write(f'{i}\n')
147+
148+
def ContentsValidator(self, filename):
149+
'''Test if valid by seeing if the HDF5 library recognizes the file.
150+
Then get file type (currently MAX IV NeXus/NXazint[12]d only)
151+
'''
152+
from .. import GSASIIpath
153+
try:
154+
fp = h5py.File(filename, 'r')
155+
if 'entry' in fp: # NeXus
156+
if 'definition' in fp['/entry']:
157+
# MAX IV NXazint1d file
158+
if fp['/entry/definition'][()].decode() == 'NXazint1d':
159+
return True
160+
# MAX IV NXazint1d file
161+
if fp['/entry/definition'][()].decode() == 'NXazint2d':
162+
return True
163+
except IOError:
164+
return False
165+
finally:
166+
fp.close()
167+
return False
168+
169+
def Reader(self, filename, ParentFrame=None, **kwarg):
170+
'''Scan file for sections needed by defined file types (currently
171+
MAX IV NeXus/NXazint[12]d only)
172+
and then use appropriate routine to read the file.
173+
174+
Since usually there will be lots of scans in a single file,
175+
the goal is that the first pass should read the file into
176+
a buffer (if available) and subsequent calls will not
177+
need to access the file.
178+
'''
179+
fpbuffer = kwarg.get('buffer',{})
180+
if not hasattr(self,'blknum'):
181+
if self.selections is None or len(self.selections) == 0:
182+
self.blknum = 0
183+
else:
184+
self.blknum = min(self.selections)
185+
try:
186+
self.mode = None
187+
fp = h5py.File(filename, 'r')
188+
try:
189+
fp = h5py.File(filename, 'r')
190+
if 'entry' in fp: # NeXus
191+
if 'definition' in fp['/entry']:
192+
# MAX IV NXazint1d file
193+
if fp['/entry/definition'][()].decode() == 'NXazint1d':
194+
return self.readNXazint1d(filename, fpbuffer)
195+
196+
# MAX IV NXazint1d file
197+
#if fp['/entry/definition'][()].decode() == 'NXazint2d':
198+
# return self.readNXazint2d(filename, fpbuffer)
199+
# return True
200+
# https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint2d.html
201+
except IOError:
202+
print ('cannot open file '+ filename)
203+
return False
204+
finally:
205+
fp.close()
206+
207+
print (f'Unknown type of HDF5 powder file {filename}')
208+
return False
209+
210+
def readNXazint1d(self, filename, fpbuffer={}):
211+
'''Read HDF5 file in NeXus as produced by MAX IV as a NXazint1d
212+
see https://nxazint-hdf5-nexus-3229ecbd09ba8a773fbbd8beb72cace6216dfd5063e1.gitlab-pages.esrf.fr/classes/contributed_definitions/NXazint1d.html
213+
'''
214+
self.instmsg = 'HDF file'
215+
doread = False # has the file already been read into a buffer?
216+
for i in ('blkmap','intenArr_blknum')+self.midassections:
217+
if i not in fpbuffer:
218+
doread = True
219+
break
220+
else: # do we have the right section buffered?
221+
doread = fpbuffer['intenArr_blknum'] != self.blknum
222+
223+
if doread: # read into buffer
224+
try:
225+
fp = h5py.File(filename, 'r')
226+
if 'blkmap' not in fpbuffer:
227+
fpbuffer['blkmap'] = list(fp.get('OmegaSumFrame').keys())
228+
if 'REtaMap' not in fpbuffer:
229+
fpbuffer['REtaMap'] = np.array(fp.get('REtaMap'))
230+
if 'intenArr' not in fpbuffer or fpbuffer.get('intenArr_blknum',-1) != self.blknum:
231+
fpbuffer['intenArr'] = np.array(fp.get('OmegaSumFrame').get(
232+
fpbuffer['blkmap'][self.blknum]))
233+
fpbuffer['intenArr_blknum'] = self.blknum
234+
self.azmcnt = -1
235+
if 'Omegas' not in fpbuffer:
236+
fpbuffer['Omegas'] = np.array(fp.get('Omegas'))
237+
except IOError:
238+
print ('cannot open file '+ filename)
239+
return False
240+
finally:
241+
fp.close()
242+
# get overriding sample & instrument parameters
243+
fpbuffer['sampleprm'] = {}
244+
samplefile = os.path.splitext(filename)[0] + '.samprm'
245+
if os.path.exists(samplefile):
246+
fp = open(samplefile,'r')
247+
S = fp.readline()
248+
while S:
249+
if not S.strip().startswith('#'):
250+
[item,val] = S[:-1].split(':')
251+
fpbuffer['sampleprm'][item.strip("'")] = eval(val)
252+
S = fp.readline()
253+
fp.close()
254+
fpbuffer['instprm'] = {}
255+
instfile = os.path.splitext(filename)[0] + '.instprm'
256+
if os.path.exists(instfile):
257+
self.instmsg = 'HDF and .instprm files'
258+
fp = open(instfile,'r')
259+
S = fp.readline()
260+
while S:
261+
if not S.strip().startswith('#'):
262+
[item,val] = S[:-1].split(':')
263+
fpbuffer['instprm'][item.strip("'")] = eval(val)
264+
S = fp.readline()
265+
fp.close()
266+
# look for a non-empty scan (lineout)
267+
use = [0]
268+
while sum(use) == 0 and self.azmcnt < fpbuffer['intenArr'].shape[1]:
269+
self.azmcnt += 1
270+
if self.azmcnt >= fpbuffer['intenArr'].shape[1]:
271+
return False
272+
use = fpbuffer['REtaMap'][3,:,self.azmcnt] != 0
273+
274+
# now transfer information into current histogram
275+
self.pwdparms['Instrument Parameters'] = [
276+
{'Type': ['PXC', 'PXC', False]},
277+
{}]
278+
inst = {}
279+
inst.update(instprmList)
280+
inst.update(fpbuffer['instprm'])
281+
for key,val in inst.items():
282+
self.pwdparms['Instrument Parameters'][0][key] = [val,val,False]
283+
samp = {}
284+
samp.update(sampleprmList)
285+
samp.update(fpbuffer['sampleprm'])
286+
for key,val in samp.items():
287+
self.Sample[key] = val
288+
self.numbanks=len(fpbuffer['blkmap'])
289+
x = fpbuffer['REtaMap'][1,:,self.azmcnt][use]
290+
y = fpbuffer['intenArr'][:,self.azmcnt][use]
291+
w = np.nan_to_num(1/y) # this is probably not correct
292+
eta = np.average(fpbuffer['REtaMap'][2,:,self.azmcnt][use])
293+
self.pwdparms['Instrument Parameters'][0]['Azimuth'] = [90-eta,90-eta,False]
294+
self.pwdparms['Instrument Parameters'][0]['Bank'] = [self.azmcnt,self.azmcnt,False]
295+
# self.Sample['Gonio. radius'] = float(S.split('=')[1])
296+
# self.Sample['Omega'] = float(S.split('=')[1])
297+
# self.Sample['Chi'] = float(S.split('=')[1])
298+
self.Sample['Phi'] = Omega = fpbuffer['Omegas'][self.blknum]
299+
self.powderdata = [x,y,w,np.zeros_like(x),np.zeros_like(x),np.zeros_like(x)]
300+
#self.comments = comments[selblk]
301+
self.powderentry[0] = filename
302+
#self.powderentry[1] = Pos # position offset (never used, I hope)
303+
self.powderentry[2] = self.blknum # bank number
304+
self.idstring = f'{os.path.split(filename)[1][:10]} omega={Omega} eta={eta}'
305+
# if GSASIIpath.GetConfigValue('debug'): print(
306+
# f'Read entry #{self.azmcnt} img# {self.blknum} from file {filename}')
307+
# are there more lineouts after this one in current image to read?
308+
self.repeat = sum(sum(fpbuffer['REtaMap'][3,:,self.azmcnt+1:])) != 0
309+
if self.repeat: return True
310+
# if not, are there more [selected] images that after this to be read?
311+
if self.blknum < self.numbanks-1:
312+
if self.selections is None or len(self.selections) == 0:
313+
self.blknum += 1
314+
self.repeat = True
315+
else:
316+
try:
317+
s = sorted(self.selections)
318+
self.blknum = s[s.index(self.blknum)+1]
319+
self.repeat = True
320+
except IndexError: # last selected image has been read
321+
self.repeat = False
322+
return True

GSASII/imports/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from . import G2pwd_FP
2525
from . import G2pwd_GPX
2626
from . import G2pwd_MIDAS
27+
from . import G2pwd_HDF5
2728
from . import G2pwd_Panalytical
2829
from . import G2pwd_csv
2930
from . import G2pwd_fxye
@@ -63,6 +64,7 @@
6364
"G2pwd_FP",
6465
"G2pwd_GPX",
6566
"G2pwd_MIDAS",
67+
"G2pwd_HDF5",
6668
"G2pwd_Panalytical",
6769
"G2pwd_csv",
6870
"G2pwd_fxye",

0 commit comments

Comments
 (0)