Skip to content

Commit 0f2c8b9

Browse files
authored
Merge pull request #2143 from keflavich/cdms_classic
CDMS interface
2 parents ba0f0ed + 3368a15 commit 0f2c8b9

File tree

15 files changed

+1891
-3
lines changed

15 files changed

+1891
-3
lines changed

.rtd-environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,6 @@ dependencies:
1111
- matplotlib
1212
- numpy
1313
- requests
14+
- scipy
1415
- pip:
1516
- sphinx-astropy

CHANGES.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
New Tools and Services
55
----------------------
66

7+
- linelists.cdms molecular line catalog query tool provides an interface to the
8+
Cologne Database for Molecular Spectroscopy [#2143]
79

810
Service fixes and enhancements
911
------------------------------

astroquery/linelists/__init__.py

Whitespace-only changes.

astroquery/linelists/cdms/__init__.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Licensed under a 3-clause BSD style license - see LICENSE.rst
2+
"""
3+
CDMS catalog
4+
------------
5+
Cologne Database for Molecular Spectroscopy query tool
6+
7+
8+
"""
9+
from astropy import config as _config
10+
11+
12+
class Conf(_config.ConfigNamespace):
13+
"""
14+
Configuration parameters for `astroquery.linelists.cdms`.
15+
"""
16+
server = _config.ConfigItem(
17+
'https://cdms.astro.uni-koeln.de/cgi-bin/cdmssearch',
18+
'CDMS Search and Conversion Form URL.')
19+
20+
timeout = _config.ConfigItem(
21+
60,
22+
'Time limit for connecting to the CDMS server.')
23+
24+
25+
conf = Conf()
26+
27+
from .core import CDMS, CDMSClass
28+
29+
__all__ = ['CDMS', 'CDMSClass',
30+
'Conf', 'conf',
31+
]

astroquery/linelists/cdms/core.py

Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
# Licensed under a 3-clause BSD style license - see LICENSE.rst
2+
import numpy as np
3+
import os
4+
import warnings
5+
6+
from bs4 import BeautifulSoup
7+
import astropy.units as u
8+
from astropy.io import ascii
9+
from astroquery.query import BaseQuery
10+
from astroquery.utils import async_to_sync
11+
# import configurable items declared in __init__.py
12+
from astroquery.linelists.cdms import conf
13+
from astroquery.jplspec import lookup_table
14+
from astroquery.exceptions import InvalidQueryError, EmptyResponseError
15+
16+
17+
__all__ = ['CDMS', 'CDMSClass']
18+
19+
20+
def data_path(filename):
21+
data_dir = os.path.join(os.path.dirname(__file__), 'data')
22+
return os.path.join(data_dir, filename)
23+
24+
25+
@async_to_sync
26+
class CDMSClass(BaseQuery):
27+
# use the Configuration Items imported from __init__.py
28+
URL = conf.server
29+
TIMEOUT = conf.timeout
30+
31+
def query_lines_async(self, min_frequency, max_frequency, *,
32+
min_strength=-500, molecule='All',
33+
temperature_for_intensity=300, flags=0,
34+
parse_name_locally=False, get_query_payload=False,
35+
cache=True):
36+
"""
37+
Creates an HTTP POST request based on the desired parameters and
38+
returns a response.
39+
40+
Parameters
41+
----------
42+
min_frequency : `astropy.units.Quantity` or None
43+
Minimum frequency (or any spectral() equivalent).
44+
``None`` can be interpreted as zero.
45+
max_frequency : `astropy.units.Quantity` or None
46+
Maximum frequency (or any spectral() equivalent).
47+
``None`` can be interpreted as infinite.
48+
49+
min_strength : int, optional
50+
Minimum strength in catalog units, the default is -500
51+
52+
molecule : list, string of regex if parse_name_locally=True, optional
53+
Identifiers of the molecules to search for. If this parameter
54+
is not provided the search will match any species. Default is 'All'.
55+
56+
temperature_for_intensity : float
57+
The temperature to use when computing the intensity Smu^2. Set
58+
to 300 by default for compatibility with JPL and the native
59+
catalog format, which defaults to 300.
60+
** If temperature is set to zero, the return value in this column
61+
will be the Einstein A value **
62+
63+
flags : int, optional
64+
Regular expression flags. Default is set to 0
65+
66+
parse_name_locally : bool, optional
67+
When set to True it allows the method to parse through catdir.cat
68+
(see `get_species_table`) in order to match the regex inputted in
69+
the molecule parameter and request the corresponding tags of the
70+
matches instead. Default is set to False
71+
72+
get_query_payload : bool, optional
73+
When set to `True` the method should return the HTTP request
74+
parameters as a dict. Default value is set to False
75+
76+
cache : bool
77+
Cache the request and, for repeat identical requests, reuse the
78+
cache?
79+
80+
Returns
81+
-------
82+
response : `requests.Response`
83+
The HTTP response returned from the service.
84+
85+
Examples
86+
--------
87+
>>> table = CDMS.query_lines(min_frequency=100*u.GHz,
88+
... max_frequency=110*u.GHz,
89+
... min_strength=-500,
90+
... molecule="018505 H2O+") # doctest: +REMOTE_DATA
91+
>>> print(table) # doctest: +SKIP
92+
FREQ ERR LGINT DR ELO GUP TAG QNFMT Ju Ku vu Jl Kl vl F name
93+
MHz MHz MHz nm2 1 / cm
94+
----------- ----- ------- --- -------- --- ----- ----- --- --- --- --- --- --- ----------- ----
95+
103614.4941 2.237 -4.1826 3 202.8941 8 18505 2356 4 1 4 4 0 4 3 2 1 3 0 3 H2O+
96+
107814.8763 148.6 -5.4438 3 878.1191 12 18505 2356 6 5 1 7 1 6 7 4 4 8 1 7 H2O+
97+
107822.3481 148.6 -5.3846 3 878.1178 14 18505 2356 6 5 1 7 1 7 7 4 4 8 1 8 H2O+
98+
107830.1216 148.6 -5.3256 3 878.1164 16 18505 2356 6 5 1 7 1 8 7 4 4 8 1 9 H2O+
99+
"""
100+
# first initialize the dictionary of HTTP request parameters
101+
payload = dict()
102+
103+
if min_frequency is not None and max_frequency is not None:
104+
# allow setting payload without having *ANY* valid frequencies set
105+
min_frequency = min_frequency.to(u.GHz, u.spectral())
106+
max_frequency = max_frequency.to(u.GHz, u.spectral())
107+
if min_frequency > max_frequency:
108+
raise InvalidQueryError("min_frequency must be less than max_frequency")
109+
110+
payload['MinNu'] = min_frequency.value
111+
payload['MaxNu'] = max_frequency.value
112+
113+
payload['UnitNu'] = 'GHz'
114+
payload['StrLim'] = min_strength
115+
payload['temp'] = temperature_for_intensity
116+
payload['logscale'] = 'yes'
117+
payload['mol_sort_query'] = 'tag'
118+
payload['sort'] = 'frequency'
119+
payload['output'] = 'text'
120+
payload['but_action'] = 'Submit'
121+
122+
# changes interpretation of query
123+
self._last_query_temperature = temperature_for_intensity
124+
125+
if molecule is not None:
126+
if parse_name_locally:
127+
self.lookup_ids = build_lookup()
128+
luts = self.lookup_ids.find(molecule, flags)
129+
payload['Molecules'] = tuple(f"{val:06d} {key}"
130+
for key, val in luts.items())[0]
131+
if len(molecule) == 0:
132+
raise InvalidQueryError('No matching species found. Please '
133+
'refine your search or read the Docs '
134+
'for pointers on how to search.')
135+
else:
136+
payload['Molecules'] = molecule
137+
138+
payload = list(payload.items())
139+
140+
if get_query_payload:
141+
return payload
142+
# BaseQuery classes come with a _request method that includes a
143+
# built-in caching system
144+
response = self._request(method='POST', url=self.URL, data=payload,
145+
timeout=self.TIMEOUT, cache=cache)
146+
response.raise_for_status()
147+
148+
soup = BeautifulSoup(response.text, 'html.parser')
149+
150+
ok = False
151+
urls = [x.attrs['src'] for x in soup.findAll('frame',)]
152+
for url in urls:
153+
if 'tab' in url and 'head' not in url:
154+
ok = True
155+
break
156+
if not ok:
157+
raise EmptyResponseError("Did not find table in response")
158+
159+
baseurl = self.URL.split('cgi-bin')[0]
160+
fullurl = f'{baseurl}/{url}'
161+
162+
response2 = self._request(method='GET', url=fullurl,
163+
timeout=self.TIMEOUT, cache=cache)
164+
165+
return response2
166+
167+
def _parse_result(self, response, verbose=False):
168+
"""
169+
Parse a response into an `~astropy.table.Table`
170+
171+
The catalog data files are composed of fixed-width card images, with
172+
one card image per spectral line. The format of each card image is
173+
similar to the JPL version:
174+
FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN', QN"
175+
(F13.4,F8.4, F8.4, I2,F10.4, I3, I7, I4, 6I2, 6I2)
176+
but the formats are somewhat different and are encoded below.
177+
The first several entries are the same, but more detail is appended at
178+
the end of the line
179+
180+
FREQ: Frequency of the line in MHz.
181+
ERR: Estimated or experimental error of FREQ in MHz.
182+
LGINT: Base 10 logarithm of the integrated intensity in units of nm^2 MHz at
183+
300 K.
184+
185+
DR: Degrees of freedom in the rotational partition function (0 for atoms,
186+
2 for linear molecules, and 3 for nonlinear molecules).
187+
188+
ELO: Lower state energy in cm^{-1} relative to the ground state.
189+
GUP: Upper state degeneracy.
190+
TAG: Species tag or molecular identifier.
191+
A negative value flags that the line frequency has
192+
been measured in the laboratory. The absolute value of TAG is then the
193+
species tag and ERR is the reported experimental error. The three most
194+
significant digits of the species tag are coded as the mass number of
195+
the species.
196+
197+
QNFMT: Identifies the format of the quantum numbers
198+
Ju/Ku/vu and Jl/Kl/vl are the upper/lower QNs
199+
F: the hyperfine lines
200+
name: molecule name
201+
202+
The full detailed description is here:
203+
https://cdms.astro.uni-koeln.de/classic/predictions/description.html#description
204+
"""
205+
206+
if 'Zero lines were found' in response.text:
207+
raise EmptyResponseError(f"Response was empty; message was '{response.text}'.")
208+
209+
soup = BeautifulSoup(response.text, 'html.parser')
210+
text = soup.find('pre').text
211+
212+
starts = {'FREQ': 0,
213+
'ERR': 14,
214+
'LGINT': 25,
215+
'DR': 36,
216+
'ELO': 38,
217+
'GUP': 48,
218+
'TAG': 51,
219+
'QNFMT': 57,
220+
'Ju': 61,
221+
'Ku': 63,
222+
'vu': 65,
223+
'Jl': 67,
224+
'Kl': 69,
225+
'vl': 71,
226+
'F': 73,
227+
'name': 89}
228+
229+
result = ascii.read(text, header_start=None, data_start=0,
230+
comment=r'THIS|^\s{12,14}\d{4,6}.*',
231+
names=list(starts.keys()),
232+
col_starts=list(starts.values()),
233+
format='fixed_width', fast_reader=False)
234+
235+
result['FREQ'].unit = u.MHz
236+
result['ERR'].unit = u.MHz
237+
238+
# if there is a crash at this step, something went wrong with the query
239+
# and the _last_query_temperature was not set. This shouldn't ever
240+
# happen, but, well, I anticipate it will.
241+
if self._last_query_temperature == 0:
242+
result.rename_column('LGINT', 'LGAIJ')
243+
result['LGAIJ'].unit = u.s**-1
244+
else:
245+
result['LGINT'].unit = u.nm**2 * u.MHz
246+
result['ELO'].unit = u.cm**(-1)
247+
248+
return result
249+
250+
def get_species_table(self, catfile='catdir.cat'):
251+
"""
252+
A directory of the catalog is found in a file called 'catdir.cat.'
253+
254+
The table is derived from https://cdms.astro.uni-koeln.de/classic/entries/partition_function.html
255+
256+
Parameters
257+
-----------
258+
catfile : str, name of file, default 'catdir.cat'
259+
The catalog file, installed locally along with the package
260+
261+
Returns
262+
--------
263+
Table: `~astropy.table.Table`
264+
| tag : The species tag or molecular identifier.
265+
| molecule : An ASCII name for the species.
266+
| #line : The number of lines in the catalog.
267+
| lg(Q(n)) : A seven-element vector containing the base 10 logarithm of
268+
the partition function.
269+
270+
"""
271+
272+
result = ascii.read(data_path('catdir.cat'), format='csv',
273+
delimiter='|')
274+
275+
meta = {'lg(Q(1000))': 1000.0,
276+
'lg(Q(500))': 500.0,
277+
'lg(Q(300))': 300.0,
278+
'lg(Q(225))': 225.0,
279+
'lg(Q(150))': 150.0,
280+
'lg(Q(75))': 75.0,
281+
'lg(Q(37.5))': 37.5,
282+
'lg(Q(18.75))': 18.75,
283+
'lg(Q(9.375))': 9.375,
284+
'lg(Q(5.000))': 5.0,
285+
'lg(Q(2.725))': 2.725}
286+
287+
def tryfloat(x):
288+
try:
289+
return float(x)
290+
except ValueError:
291+
return np.nan
292+
293+
for key in meta:
294+
result[key].meta = {'Temperature (K)': meta[key]}
295+
result[key] = np.array([tryfloat(val) for val in result[key]])
296+
297+
result.meta = {'Temperature (K)': [1000., 500., 300., 225., 150., 75.,
298+
37.5, 18.75, 9.375, 5., 2.725]}
299+
300+
return result
301+
302+
303+
CDMS = CDMSClass()
304+
305+
306+
def build_lookup():
307+
308+
result = CDMS.get_species_table()
309+
keys = list(result[1][:]) # convert NAME column to list
310+
values = list(result[0][:]) # convert TAG column to list
311+
dictionary = dict(zip(keys, values)) # make k,v dictionary
312+
lookuptable = lookup_table.Lookuptable(dictionary) # apply the class above
313+
314+
return lookuptable

0 commit comments

Comments
 (0)