|
| 1 | +# Licensed under a 3-clause BSD style license - see LICENSE.rst |
| 2 | +import numpy as np |
| 3 | +import os |
| 4 | +import warnings |
| 5 | + |
| 6 | +from bs4 import BeautifulSoup |
| 7 | +import astropy.units as u |
| 8 | +from astropy.io import ascii |
| 9 | +from astroquery.query import BaseQuery |
| 10 | +from astroquery.utils import async_to_sync |
| 11 | +# import configurable items declared in __init__.py |
| 12 | +from astroquery.linelists.cdms import conf |
| 13 | +from astroquery.jplspec import lookup_table |
| 14 | +from astroquery.exceptions import InvalidQueryError, EmptyResponseError |
| 15 | + |
| 16 | + |
| 17 | +__all__ = ['CDMS', 'CDMSClass'] |
| 18 | + |
| 19 | + |
| 20 | +def data_path(filename): |
| 21 | + data_dir = os.path.join(os.path.dirname(__file__), 'data') |
| 22 | + return os.path.join(data_dir, filename) |
| 23 | + |
| 24 | + |
| 25 | +@async_to_sync |
| 26 | +class CDMSClass(BaseQuery): |
| 27 | + # use the Configuration Items imported from __init__.py |
| 28 | + URL = conf.server |
| 29 | + TIMEOUT = conf.timeout |
| 30 | + |
| 31 | + def query_lines_async(self, min_frequency, max_frequency, *, |
| 32 | + min_strength=-500, molecule='All', |
| 33 | + temperature_for_intensity=300, flags=0, |
| 34 | + parse_name_locally=False, get_query_payload=False, |
| 35 | + cache=True): |
| 36 | + """ |
| 37 | + Creates an HTTP POST request based on the desired parameters and |
| 38 | + returns a response. |
| 39 | +
|
| 40 | + Parameters |
| 41 | + ---------- |
| 42 | + min_frequency : `astropy.units.Quantity` or None |
| 43 | + Minimum frequency (or any spectral() equivalent). |
| 44 | + ``None`` can be interpreted as zero. |
| 45 | + max_frequency : `astropy.units.Quantity` or None |
| 46 | + Maximum frequency (or any spectral() equivalent). |
| 47 | + ``None`` can be interpreted as infinite. |
| 48 | +
|
| 49 | + min_strength : int, optional |
| 50 | + Minimum strength in catalog units, the default is -500 |
| 51 | +
|
| 52 | + molecule : list, string of regex if parse_name_locally=True, optional |
| 53 | + Identifiers of the molecules to search for. If this parameter |
| 54 | + is not provided the search will match any species. Default is 'All'. |
| 55 | +
|
| 56 | + temperature_for_intensity : float |
| 57 | + The temperature to use when computing the intensity Smu^2. Set |
| 58 | + to 300 by default for compatibility with JPL and the native |
| 59 | + catalog format, which defaults to 300. |
| 60 | + ** If temperature is set to zero, the return value in this column |
| 61 | + will be the Einstein A value ** |
| 62 | +
|
| 63 | + flags : int, optional |
| 64 | + Regular expression flags. Default is set to 0 |
| 65 | +
|
| 66 | + parse_name_locally : bool, optional |
| 67 | + When set to True it allows the method to parse through catdir.cat |
| 68 | + (see `get_species_table`) in order to match the regex inputted in |
| 69 | + the molecule parameter and request the corresponding tags of the |
| 70 | + matches instead. Default is set to False |
| 71 | +
|
| 72 | + get_query_payload : bool, optional |
| 73 | + When set to `True` the method should return the HTTP request |
| 74 | + parameters as a dict. Default value is set to False |
| 75 | +
|
| 76 | + cache : bool |
| 77 | + Cache the request and, for repeat identical requests, reuse the |
| 78 | + cache? |
| 79 | +
|
| 80 | + Returns |
| 81 | + ------- |
| 82 | + response : `requests.Response` |
| 83 | + The HTTP response returned from the service. |
| 84 | +
|
| 85 | + Examples |
| 86 | + -------- |
| 87 | + >>> table = CDMS.query_lines(min_frequency=100*u.GHz, |
| 88 | + ... max_frequency=110*u.GHz, |
| 89 | + ... min_strength=-500, |
| 90 | + ... molecule="018505 H2O+") # doctest: +REMOTE_DATA |
| 91 | + >>> print(table) # doctest: +SKIP |
| 92 | + FREQ ERR LGINT DR ELO GUP TAG QNFMT Ju Ku vu Jl Kl vl F name |
| 93 | + MHz MHz MHz nm2 1 / cm |
| 94 | + ----------- ----- ------- --- -------- --- ----- ----- --- --- --- --- --- --- ----------- ---- |
| 95 | + 103614.4941 2.237 -4.1826 3 202.8941 8 18505 2356 4 1 4 4 0 4 3 2 1 3 0 3 H2O+ |
| 96 | + 107814.8763 148.6 -5.4438 3 878.1191 12 18505 2356 6 5 1 7 1 6 7 4 4 8 1 7 H2O+ |
| 97 | + 107822.3481 148.6 -5.3846 3 878.1178 14 18505 2356 6 5 1 7 1 7 7 4 4 8 1 8 H2O+ |
| 98 | + 107830.1216 148.6 -5.3256 3 878.1164 16 18505 2356 6 5 1 7 1 8 7 4 4 8 1 9 H2O+ |
| 99 | + """ |
| 100 | + # first initialize the dictionary of HTTP request parameters |
| 101 | + payload = dict() |
| 102 | + |
| 103 | + if min_frequency is not None and max_frequency is not None: |
| 104 | + # allow setting payload without having *ANY* valid frequencies set |
| 105 | + min_frequency = min_frequency.to(u.GHz, u.spectral()) |
| 106 | + max_frequency = max_frequency.to(u.GHz, u.spectral()) |
| 107 | + if min_frequency > max_frequency: |
| 108 | + raise InvalidQueryError("min_frequency must be less than max_frequency") |
| 109 | + |
| 110 | + payload['MinNu'] = min_frequency.value |
| 111 | + payload['MaxNu'] = max_frequency.value |
| 112 | + |
| 113 | + payload['UnitNu'] = 'GHz' |
| 114 | + payload['StrLim'] = min_strength |
| 115 | + payload['temp'] = temperature_for_intensity |
| 116 | + payload['logscale'] = 'yes' |
| 117 | + payload['mol_sort_query'] = 'tag' |
| 118 | + payload['sort'] = 'frequency' |
| 119 | + payload['output'] = 'text' |
| 120 | + payload['but_action'] = 'Submit' |
| 121 | + |
| 122 | + # changes interpretation of query |
| 123 | + self._last_query_temperature = temperature_for_intensity |
| 124 | + |
| 125 | + if molecule is not None: |
| 126 | + if parse_name_locally: |
| 127 | + self.lookup_ids = build_lookup() |
| 128 | + luts = self.lookup_ids.find(molecule, flags) |
| 129 | + payload['Molecules'] = tuple(f"{val:06d} {key}" |
| 130 | + for key, val in luts.items())[0] |
| 131 | + if len(molecule) == 0: |
| 132 | + raise InvalidQueryError('No matching species found. Please ' |
| 133 | + 'refine your search or read the Docs ' |
| 134 | + 'for pointers on how to search.') |
| 135 | + else: |
| 136 | + payload['Molecules'] = molecule |
| 137 | + |
| 138 | + payload = list(payload.items()) |
| 139 | + |
| 140 | + if get_query_payload: |
| 141 | + return payload |
| 142 | + # BaseQuery classes come with a _request method that includes a |
| 143 | + # built-in caching system |
| 144 | + response = self._request(method='POST', url=self.URL, data=payload, |
| 145 | + timeout=self.TIMEOUT, cache=cache) |
| 146 | + response.raise_for_status() |
| 147 | + |
| 148 | + soup = BeautifulSoup(response.text, 'html.parser') |
| 149 | + |
| 150 | + ok = False |
| 151 | + urls = [x.attrs['src'] for x in soup.findAll('frame',)] |
| 152 | + for url in urls: |
| 153 | + if 'tab' in url and 'head' not in url: |
| 154 | + ok = True |
| 155 | + break |
| 156 | + if not ok: |
| 157 | + raise EmptyResponseError("Did not find table in response") |
| 158 | + |
| 159 | + baseurl = self.URL.split('cgi-bin')[0] |
| 160 | + fullurl = f'{baseurl}/{url}' |
| 161 | + |
| 162 | + response2 = self._request(method='GET', url=fullurl, |
| 163 | + timeout=self.TIMEOUT, cache=cache) |
| 164 | + |
| 165 | + return response2 |
| 166 | + |
| 167 | + def _parse_result(self, response, verbose=False): |
| 168 | + """ |
| 169 | + Parse a response into an `~astropy.table.Table` |
| 170 | +
|
| 171 | + The catalog data files are composed of fixed-width card images, with |
| 172 | + one card image per spectral line. The format of each card image is |
| 173 | + similar to the JPL version: |
| 174 | + FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN', QN" |
| 175 | + (F13.4,F8.4, F8.4, I2,F10.4, I3, I7, I4, 6I2, 6I2) |
| 176 | + but the formats are somewhat different and are encoded below. |
| 177 | + The first several entries are the same, but more detail is appended at |
| 178 | + the end of the line |
| 179 | +
|
| 180 | + FREQ: Frequency of the line in MHz. |
| 181 | + ERR: Estimated or experimental error of FREQ in MHz. |
| 182 | + LGINT: Base 10 logarithm of the integrated intensity in units of nm^2 MHz at |
| 183 | + 300 K. |
| 184 | +
|
| 185 | + DR: Degrees of freedom in the rotational partition function (0 for atoms, |
| 186 | + 2 for linear molecules, and 3 for nonlinear molecules). |
| 187 | +
|
| 188 | + ELO: Lower state energy in cm^{-1} relative to the ground state. |
| 189 | + GUP: Upper state degeneracy. |
| 190 | + TAG: Species tag or molecular identifier. |
| 191 | + A negative value flags that the line frequency has |
| 192 | + been measured in the laboratory. The absolute value of TAG is then the |
| 193 | + species tag and ERR is the reported experimental error. The three most |
| 194 | + significant digits of the species tag are coded as the mass number of |
| 195 | + the species. |
| 196 | +
|
| 197 | + QNFMT: Identifies the format of the quantum numbers |
| 198 | + Ju/Ku/vu and Jl/Kl/vl are the upper/lower QNs |
| 199 | + F: the hyperfine lines |
| 200 | + name: molecule name |
| 201 | +
|
| 202 | + The full detailed description is here: |
| 203 | + https://cdms.astro.uni-koeln.de/classic/predictions/description.html#description |
| 204 | + """ |
| 205 | + |
| 206 | + if 'Zero lines were found' in response.text: |
| 207 | + raise EmptyResponseError(f"Response was empty; message was '{response.text}'.") |
| 208 | + |
| 209 | + soup = BeautifulSoup(response.text, 'html.parser') |
| 210 | + text = soup.find('pre').text |
| 211 | + |
| 212 | + starts = {'FREQ': 0, |
| 213 | + 'ERR': 14, |
| 214 | + 'LGINT': 25, |
| 215 | + 'DR': 36, |
| 216 | + 'ELO': 38, |
| 217 | + 'GUP': 48, |
| 218 | + 'TAG': 51, |
| 219 | + 'QNFMT': 57, |
| 220 | + 'Ju': 61, |
| 221 | + 'Ku': 63, |
| 222 | + 'vu': 65, |
| 223 | + 'Jl': 67, |
| 224 | + 'Kl': 69, |
| 225 | + 'vl': 71, |
| 226 | + 'F': 73, |
| 227 | + 'name': 89} |
| 228 | + |
| 229 | + result = ascii.read(text, header_start=None, data_start=0, |
| 230 | + comment=r'THIS|^\s{12,14}\d{4,6}.*', |
| 231 | + names=list(starts.keys()), |
| 232 | + col_starts=list(starts.values()), |
| 233 | + format='fixed_width', fast_reader=False) |
| 234 | + |
| 235 | + result['FREQ'].unit = u.MHz |
| 236 | + result['ERR'].unit = u.MHz |
| 237 | + |
| 238 | + # if there is a crash at this step, something went wrong with the query |
| 239 | + # and the _last_query_temperature was not set. This shouldn't ever |
| 240 | + # happen, but, well, I anticipate it will. |
| 241 | + if self._last_query_temperature == 0: |
| 242 | + result.rename_column('LGINT', 'LGAIJ') |
| 243 | + result['LGAIJ'].unit = u.s**-1 |
| 244 | + else: |
| 245 | + result['LGINT'].unit = u.nm**2 * u.MHz |
| 246 | + result['ELO'].unit = u.cm**(-1) |
| 247 | + |
| 248 | + return result |
| 249 | + |
| 250 | + def get_species_table(self, catfile='catdir.cat'): |
| 251 | + """ |
| 252 | + A directory of the catalog is found in a file called 'catdir.cat.' |
| 253 | +
|
| 254 | + The table is derived from https://cdms.astro.uni-koeln.de/classic/entries/partition_function.html |
| 255 | +
|
| 256 | + Parameters |
| 257 | + ----------- |
| 258 | + catfile : str, name of file, default 'catdir.cat' |
| 259 | + The catalog file, installed locally along with the package |
| 260 | +
|
| 261 | + Returns |
| 262 | + -------- |
| 263 | + Table: `~astropy.table.Table` |
| 264 | + | tag : The species tag or molecular identifier. |
| 265 | + | molecule : An ASCII name for the species. |
| 266 | + | #line : The number of lines in the catalog. |
| 267 | + | lg(Q(n)) : A seven-element vector containing the base 10 logarithm of |
| 268 | + the partition function. |
| 269 | +
|
| 270 | + """ |
| 271 | + |
| 272 | + result = ascii.read(data_path('catdir.cat'), format='csv', |
| 273 | + delimiter='|') |
| 274 | + |
| 275 | + meta = {'lg(Q(1000))': 1000.0, |
| 276 | + 'lg(Q(500))': 500.0, |
| 277 | + 'lg(Q(300))': 300.0, |
| 278 | + 'lg(Q(225))': 225.0, |
| 279 | + 'lg(Q(150))': 150.0, |
| 280 | + 'lg(Q(75))': 75.0, |
| 281 | + 'lg(Q(37.5))': 37.5, |
| 282 | + 'lg(Q(18.75))': 18.75, |
| 283 | + 'lg(Q(9.375))': 9.375, |
| 284 | + 'lg(Q(5.000))': 5.0, |
| 285 | + 'lg(Q(2.725))': 2.725} |
| 286 | + |
| 287 | + def tryfloat(x): |
| 288 | + try: |
| 289 | + return float(x) |
| 290 | + except ValueError: |
| 291 | + return np.nan |
| 292 | + |
| 293 | + for key in meta: |
| 294 | + result[key].meta = {'Temperature (K)': meta[key]} |
| 295 | + result[key] = np.array([tryfloat(val) for val in result[key]]) |
| 296 | + |
| 297 | + result.meta = {'Temperature (K)': [1000., 500., 300., 225., 150., 75., |
| 298 | + 37.5, 18.75, 9.375, 5., 2.725]} |
| 299 | + |
| 300 | + return result |
| 301 | + |
| 302 | + |
| 303 | +CDMS = CDMSClass() |
| 304 | + |
| 305 | + |
| 306 | +def build_lookup(): |
| 307 | + |
| 308 | + result = CDMS.get_species_table() |
| 309 | + keys = list(result[1][:]) # convert NAME column to list |
| 310 | + values = list(result[0][:]) # convert TAG column to list |
| 311 | + dictionary = dict(zip(keys, values)) # make k,v dictionary |
| 312 | + lookuptable = lookup_table.Lookuptable(dictionary) # apply the class above |
| 313 | + |
| 314 | + return lookuptable |
0 commit comments