10
10
from astroquery .utils import async_to_sync
11
11
# import configurable items declared in __init__.py
12
12
from astroquery .linelists .cdms import conf
13
- from astroquery .jplspec import lookup_table
14
13
from astroquery .exceptions import InvalidQueryError , EmptyResponseError
15
14
15
+ import re
16
+ import string
16
17
17
18
__all__ = ['CDMS' , 'CDMSClass' ]
18
19
@@ -52,6 +53,13 @@ def query_lines_async(self, min_frequency, max_frequency, *,
52
53
molecule : list, string of regex if parse_name_locally=True, optional
53
54
Identifiers of the molecules to search for. If this parameter
54
55
is not provided the search will match any species. Default is 'All'.
56
+ As a first pass, the molecule will be searched for with a direct
57
+ string match. If no string match is found, a regular expression
58
+ match is attempted. Note that if the molecule name regex contains
59
+ parentheses, they must be escaped. For example, 'H2C(CN)2.*' must be
60
+ specified as 'H2C\\ (CN\\ )2.*' (but because of the first-attempt
61
+ full-string matching, 'H2C(CN)2' will match that molecule
62
+ successfully).
55
63
56
64
temperature_for_intensity : float
57
65
The temperature to use when computing the intensity Smu^2. Set
@@ -126,12 +134,12 @@ def query_lines_async(self, min_frequency, max_frequency, *,
126
134
if parse_name_locally :
127
135
self .lookup_ids = build_lookup ()
128
136
luts = self .lookup_ids .find (molecule , flags )
129
- payload ['Molecules' ] = tuple (f"{ val :06d} { key } "
130
- for key , val in luts .items ())[0 ]
131
- if len (molecule ) == 0 :
137
+ if len (luts ) == 0 :
132
138
raise InvalidQueryError ('No matching species found. Please '
133
139
'refine your search or read the Docs '
134
140
'for pointers on how to search.' )
141
+ payload ['Molecules' ] = tuple (f"{ val :06d} { key } "
142
+ for key , val in luts .items ())[0 ]
135
143
else :
136
144
payload ['Molecules' ] = molecule
137
145
@@ -187,12 +195,14 @@ def _parse_result(self, response, verbose=False):
187
195
188
196
ELO: Lower state energy in cm^{-1} relative to the ground state.
189
197
GUP: Upper state degeneracy.
190
- TAG: Species tag or molecular identifier.
191
- A negative value flags that the line frequency has
192
- been measured in the laboratory. The absolute value of TAG is then the
193
- species tag and ERR is the reported experimental error. The three most
194
- significant digits of the species tag are coded as the mass number of
195
- the species.
198
+ MOLWT: The first half of the molecular weight tag, which is the mass in atomic
199
+ mass units (Daltons).
200
+ TAG: Species tag or molecular identifier. This only includes the
201
+ last 3 digits of the CDMS tag
202
+
203
+ A negative value of MOLWT flags that the line frequency has been
204
+ measured in the laboratory. We record this boolean in the 'Lab'
205
+ column. ERR is the reported experimental error.
196
206
197
207
QNFMT: Identifies the format of the quantum numbers
198
208
Ju/Ku/vu and Jl/Kl/vl are the upper/lower QNs
@@ -215,15 +225,21 @@ def _parse_result(self, response, verbose=False):
215
225
'DR' : 36 ,
216
226
'ELO' : 38 ,
217
227
'GUP' : 48 ,
218
- 'TAG' : 51 ,
219
- 'QNFMT' : 57 ,
228
+ 'MOLWT' : 51 ,
229
+ 'TAG' : 54 ,
230
+ 'QNFMT' : 58 ,
220
231
'Ju' : 61 ,
221
232
'Ku' : 63 ,
222
233
'vu' : 65 ,
223
- 'Jl' : 67 ,
224
- 'Kl' : 69 ,
225
- 'vl' : 71 ,
226
- 'F' : 73 ,
234
+ 'F1u' : 67 ,
235
+ 'F2u' : 69 ,
236
+ 'F3u' : 71 ,
237
+ 'Jl' : 73 ,
238
+ 'Kl' : 75 ,
239
+ 'vl' : 77 ,
240
+ 'F1l' : 79 ,
241
+ 'F2l' : 81 ,
242
+ 'F3l' : 83 ,
227
243
'name' : 89 }
228
244
229
245
result = ascii .read (text , header_start = None , data_start = 0 ,
@@ -235,6 +251,18 @@ def _parse_result(self, response, verbose=False):
235
251
result ['FREQ' ].unit = u .MHz
236
252
result ['ERR' ].unit = u .MHz
237
253
254
+ result ['Lab' ] = result ['MOLWT' ] < 0
255
+ result ['MOLWT' ] = np .abs (result ['MOLWT' ])
256
+ result ['MOLWT' ].unit = u .Da
257
+
258
+ for suf in 'ul' :
259
+ for qn in ('J' , 'v' , 'K' , 'F1' , 'F2' , 'F3' ):
260
+ qnind = qn + suf
261
+ if result [qnind ].dtype != int :
262
+ intcol = np .array (list (map (parse_letternumber , result [qnind ])),
263
+ dtype = int )
264
+ result [qnind ] = intcol
265
+
238
266
# if there is a crash at this step, something went wrong with the query
239
267
# and the _last_query_temperature was not set. This shouldn't ever
240
268
# happen, but, well, I anticipate it will.
@@ -303,12 +331,66 @@ def tryfloat(x):
303
331
CDMS = CDMSClass ()
304
332
305
333
334
+ def parse_letternumber (st ):
335
+ """
336
+ Parse CDMS's two-letter QNs
337
+
338
+ From the CDMS docs:
339
+ "Exactly two characters are available for each quantum number. Therefore, half
340
+ integer quanta are rounded up ! In addition, capital letters are used to
341
+ indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Small
342
+ types are used to signal corresponding negative quantum numbers."
343
+ """
344
+ asc = string .ascii_lowercase
345
+ ASC = string .ascii_uppercase
346
+ newst = '' .join (['-' + str (asc .index (x )+ 10 ) if x in asc else
347
+ str (ASC .index (x )+ 10 ) if x in ASC else
348
+ x for x in st ])
349
+ return int (newst )
350
+
351
+
352
+ class Lookuptable (dict ):
353
+
354
+ def find (self , st , flags ):
355
+ """
356
+ Search dictionary keys for a regex match to string s
357
+
358
+ Parameters
359
+ ----------
360
+ s : str
361
+ String to compile as a regular expression
362
+ Can be entered non-specific for broader results
363
+ ('H2O' yields 'H2O' but will also yield 'HCCCH2OD')
364
+ or as the specific desired regular expression for
365
+ catered results, for example: ('H20$' yields only 'H2O')
366
+
367
+ flags : int
368
+ Regular expression flags.
369
+
370
+ Returns
371
+ -------
372
+ The list of values corresponding to the matches
373
+
374
+ """
375
+
376
+ out = {}
377
+
378
+ for kk , vv in self .items ():
379
+ # note that the string-match attempt here differs from the jplspec
380
+ # implementation
381
+ match = (st in kk ) or re .search (st , str (kk ), flags = flags )
382
+ if match :
383
+ out [kk ] = vv
384
+
385
+ return out
386
+
387
+
306
388
def build_lookup ():
307
389
308
390
result = CDMS .get_species_table ()
309
391
keys = list (result [1 ][:]) # convert NAME column to list
310
392
values = list (result [0 ][:]) # convert TAG column to list
311
393
dictionary = dict (zip (keys , values )) # make k,v dictionary
312
- lookuptable = lookup_table . Lookuptable (dictionary ) # apply the class above
394
+ lookuptable = Lookuptable (dictionary ) # apply the class above
313
395
314
396
return lookuptable
0 commit comments