diff --git a/CHANGES.rst b/CHANGES.rst index 9540003431..70e6a88a26 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -30,6 +30,16 @@ mast - Switch to use HTTP continuation for partial downloads. [#3448] +jplspec +^^^^^^^ + +- Moved to linelists/ [#3455] +- Refactored to use linelists.core [#3456] + +linelists +^^^^^^^^^ + +- General tools for both CDMS/JPL moved to linelists.core [#3456] Infrastructure, Utility and Other Changes and Additions ------------------------------------------------------- @@ -106,6 +116,8 @@ gaia EPOCH_SPECTRUM_XP_CROWDING, MEAN_SPECTRUM_XP, EPOCH_SPECTRUM_XP and MEAN_SPECTRUM_XP_GRAVLENS. [#3382] +- Add more complete support for CDMS quantum number and other value parsing. [#3302] + heasarc ^^^^^^^ diff --git a/astroquery/jplspec/__init__.py b/astroquery/jplspec/__init__.py index 8f87702481..cf0783e2a8 100644 --- a/astroquery/jplspec/__init__.py +++ b/astroquery/jplspec/__init__.py @@ -1,33 +1,33 @@ # Licensed under a 3-clause BSD style license - see LICENSE.rst """ -JPL Spectral Catalog --------------------- +JPL Spectral Catalog (Deprecated Location) +------------------------------------------ +.. deprecated:: 0.4.8 + The `astroquery.jplspec` module has been moved to `astroquery.linelists.jplspec`. + Please update your imports to use `from astroquery.linelists.jplspec import JPLSpec` instead. + This backward compatibility layer will be removed in a future version. -:author: Giannina Guzman (gguzman2@villanova.edu) -:author: Miguel de Val-Borro (miguel.deval@gmail.com) - -""" -from astropy import config as _config +This module provides backward compatibility for the old import location. +The JPLSpec module has been reorganized under the linelists subpackage. +For new code, please use:: -class Conf(_config.ConfigNamespace): - """ - Configuration parameters for `astroquery.jplspec`. - """ - server = _config.ConfigItem( - 'https://spec.jpl.nasa.gov/cgi-bin/catform', - 'JPL Spectral Catalog URL.') - - timeout = _config.ConfigItem( - 60, - 'Time limit for connecting to JPL server.') + from astroquery.linelists.jplspec import JPLSpec +""" +import warnings -conf = Conf() +# Issue deprecation warning +warnings.warn( + "Importing from 'astroquery.jplspec' is deprecated. " + "Please use 'from astroquery.linelists.jplspec import JPLSpec' instead. " + "The old import path will be removed in a future version.", + DeprecationWarning, + stacklevel=2 +) -from .core import JPLSpec, JPLSpecClass +# Import from the new location +from ..linelists.jplspec import JPLSpec, JPLSpecClass, Conf, conf -__all__ = ['JPLSpec', 'JPLSpecClass', - 'Conf', 'conf', - ] +__all__ = ['JPLSpec', 'JPLSpecClass', 'Conf', 'conf'] diff --git a/astroquery/jplspec/core.py b/astroquery/jplspec/core.py deleted file mode 100644 index 1f72ca580b..0000000000 --- a/astroquery/jplspec/core.py +++ /dev/null @@ -1,249 +0,0 @@ -# Licensed under a 3-clause BSD style license - see LICENSE.rst -import os -import warnings - -import astropy.units as u -from astropy.io import ascii -from ..query import BaseQuery -from ..utils import async_to_sync -# import configurable items declared in __init__.py -from . import conf -from . import lookup_table -from astroquery.exceptions import EmptyResponseError, InvalidQueryError - - -__all__ = ['JPLSpec', 'JPLSpecClass'] - - -def data_path(filename): - data_dir = os.path.join(os.path.dirname(__file__), 'data') - return os.path.join(data_dir, filename) - - -@async_to_sync -class JPLSpecClass(BaseQuery): - - # use the Configuration Items imported from __init__.py - URL = conf.server - TIMEOUT = conf.timeout - - def query_lines_async(self, min_frequency, max_frequency, *, - min_strength=-500, - max_lines=2000, molecule='All', flags=0, - parse_name_locally=False, - get_query_payload=False, cache=True): - """ - Creates an HTTP POST request based on the desired parameters and - returns a response. - - Parameters - ---------- - min_frequency : `astropy.units` - Minimum frequency (or any spectral() equivalent) - max_frequency : `astropy.units` - Maximum frequency (or any spectral() equivalent) - min_strength : int, optional - Minimum strength in catalog units, the default is -500 - max_lines : int, optional - Maximum number of lines to query, the default is 2000. - The most the query allows is 100000 - - molecule : list, string of regex if parse_name_locally=True, optional - Identifiers of the molecules to search for. If this parameter - is not provided the search will match any species. Default is 'All'. - - flags : int, optional - Regular expression flags. Default is set to 0 - - parse_name_locally : bool, optional - When set to True it allows the method to parse through catdir.cat - in order to match the regex inputted in the molecule parameter - and request the corresponding tags of the matches instead. Default - is set to False - - get_query_payload : bool, optional - When set to `True` the method should return the HTTP request - parameters as a dict. Default value is set to False - cache : bool - Defaults to True. If set overrides global caching behavior. - See :ref:`caching documentation `. - - Returns - ------- - response : `requests.Response` - The HTTP response returned from the service. - - Examples - -------- - >>> table = JPLSpec.query_lines(min_frequency=100*u.GHz, - ... max_frequency=200*u.GHz, - ... min_strength=-500, molecule=18003) # doctest: +REMOTE_DATA - >>> print(table) # doctest: +SKIP - FREQ ERR LGINT DR ELO GUP TAG QNFMT QN' QN" - ----------- ------ -------- --- --------- --- ------ ----- -------- -------- - 115542.5692 0.6588 -13.2595 3 4606.1683 35 18003 1404 17 810 0 18 513 0 - 139614.293 0.15 -9.3636 3 3080.1788 87 -18003 1404 14 6 9 0 15 312 0 - 177317.068 0.15 -10.3413 3 3437.2774 31 -18003 1404 15 610 0 16 313 0 - 183310.087 0.001 -3.6463 3 136.1639 7 -18003 1404 3 1 3 0 2 2 0 0 - """ - # first initialize the dictionary of HTTP request parameters - payload = dict() - - if min_frequency is not None and max_frequency is not None: - # allow setting payload without having *ANY* valid frequencies set - min_frequency = min_frequency.to(u.GHz, u.spectral()) - max_frequency = max_frequency.to(u.GHz, u.spectral()) - if min_frequency > max_frequency: - min_frequency, max_frequency = max_frequency, min_frequency - - payload['MinNu'] = min_frequency.value - payload['MaxNu'] = max_frequency.value - - if max_lines is not None: - payload['MaxLines'] = max_lines - - payload['UnitNu'] = 'GHz' - payload['StrLim'] = min_strength - - if molecule is not None: - if parse_name_locally: - self.lookup_ids = build_lookup() - payload['Mol'] = tuple(self.lookup_ids.find(molecule, flags).values()) - if len(molecule) == 0: - raise InvalidQueryError('No matching species found. Please ' - 'refine your search or read the Docs ' - 'for pointers on how to search.') - else: - payload['Mol'] = molecule - - self.maxlines = max_lines - - payload = list(payload.items()) - - if get_query_payload: - return payload - # BaseQuery classes come with a _request method that includes a - # built-in caching system - response = self._request(method='POST', url=self.URL, data=payload, - timeout=self.TIMEOUT, cache=cache) - - return response - - def _parse_result(self, response, *, verbose=False): - """ - Parse a response into an `~astropy.table.Table` - - The catalog data files are composed of 80-character card images, with - one card image per spectral line. The format of each card image is: - FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN', QN" - (F13.4,F8.4, F8.4, I2,F10.4, I3, I7, I4, 6I2, 6I2) - - FREQ: Frequency of the line in MHz. - ERR: Estimated or experimental error of FREQ in MHz. - LGINT: Base 10 logarithm of the integrated intensity in units of nm^2 MHz at - 300 K. - - DR: Degrees of freedom in the rotational partition function (0 for atoms, - 2 for linear molecules, and 3 for nonlinear molecules). - - ELO: Lower state energy in cm^{-1} relative to the ground state. - GUP: Upper state degeneracy. - TAG: Species tag or molecular identifier. - A negative value flags that the line frequency has - been measured in the laboratory. The absolute value of TAG is then the - species tag and ERR is the reported experimental error. The three most - significant digits of the species tag are coded as the mass number of - the species. - - QNFMT: Identifies the format of the quantum numbers - QN': Quantum numbers for the upper state. - QN": Quantum numbers for the lower state. - """ - - if 'Zero lines were found' in response.text: - raise EmptyResponseError(f"Response was empty; message was '{response.text}'.") - - # data starts at 0 since regex was applied - # Warning for a result with more than 1000 lines: - # THIS form is currently limited to 1000 lines. - result = ascii.read(response.text, header_start=None, data_start=0, - comment=r'THIS|^\s{12,14}\d{4,6}.*|CADDIR CATDIR', - names=('FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', - 'TAG', 'QNFMT', 'QN\'', 'QN"'), - col_starts=(0, 13, 21, 29, 31, 41, 44, 51, 55, 67), - format='fixed_width', fast_reader=False) - - if len(result) > self.maxlines: - warnings.warn("This form is currently limited to {0} lines." - "Please limit your search.".format(self.maxlines)) - - result['FREQ'].unit = u.MHz - result['ERR'].unit = u.MHz - result['LGINT'].unit = u.nm**2 * u.MHz - result['ELO'].unit = u.cm**(-1) - - return result - - def get_species_table(self, *, catfile='catdir.cat'): - """ - A directory of the catalog is found in a file called 'catdir.cat.' - Each element of this directory is an 80-character record with the - following format: - - | TAG, NAME, NLINE, QLOG, VER - | (I6,X, A13, I6, 7F7.4, I2) - - Parameters - ---------- - catfile : str, name of file, default 'catdir.cat' - The catalog file, installed locally along with the package - - Returns - ------- - Table: `~astropy.table.Table` - | TAG : The species tag or molecular identifier. - | NAME : An ASCII name for the species. - | NLINE : The number of lines in the catalog. - | QLOG : A seven-element vector containing the base 10 logarithm of - the partition function for temperatures of 300 K, 225 K, 150 K, - 75 K, 37.5 K, 18.75 K, and 9.375 K, respectively. - | VER : The version of the calculation for this species in the catalog. - The version number is followed by * if the entry is newer than the - last edition of the catalog. - - """ - - result = ascii.read(data_path(catfile), header_start=None, data_start=0, - names=('TAG', 'NAME', 'NLINE', 'QLOG1', 'QLOG2', - 'QLOG3', 'QLOG4', 'QLOG5', 'QLOG6', - 'QLOG7', 'VER'), - col_starts=(0, 6, 20, 26, 33, 40, 47, 54, 61, - 68, 75), - format='fixed_width', fast_reader=False) - - # store the corresponding temperatures as metadata - result['QLOG1'].meta = {'Temperature (K)': 300} - result['QLOG2'].meta = {'Temperature (K)': 225} - result['QLOG3'].meta = {'Temperature (K)': 150} - result['QLOG4'].meta = {'Temperature (K)': 75} - result['QLOG5'].meta = {'Temperature (K)': 37.5} - result['QLOG6'].meta = {'Temperature (K)': 18.75} - result['QLOG7'].meta = {'Temperature (K)': 9.375} - result.meta = {'Temperature (K)': [300, 225, 150, 75, 37.5, 18.5, - 9.375]} - - return result - - -JPLSpec = JPLSpecClass() - - -def build_lookup(): - - result = JPLSpec.get_species_table() - keys = list(result['NAME']) - values = list(result['TAG']) - dictionary = dict(zip(keys, values)) - lookuptable = lookup_table.Lookuptable(dictionary) # apply the class above - - return lookuptable diff --git a/astroquery/jplspec/tests/test_jplspec.py b/astroquery/jplspec/tests/test_jplspec.py deleted file mode 100644 index b11c15b8a5..0000000000 --- a/astroquery/jplspec/tests/test_jplspec.py +++ /dev/null @@ -1,120 +0,0 @@ -import numpy as np - -import os - -from astropy import units as u -from astropy.table import Table -from ...jplspec import JPLSpec - -file1 = 'CO.data' -file2 = 'CO_6.data' -file3 = 'multi.data' - - -def data_path(filename): - - data_dir = os.path.join(os.path.dirname(__file__), 'data') - return os.path.join(data_dir, filename) - - -class MockResponseSpec: - - def __init__(self, filename): - self.filename = data_path(filename) - - @property - def text(self): - with open(self.filename) as f: - return f.read() - - -def test_input_async(): - - response = JPLSpec.query_lines_async(min_frequency=100 * u.GHz, - max_frequency=1000 * u.GHz, - min_strength=-500, - molecule="28001 CO", - get_query_payload=True) - response = dict(response) - assert response['Mol'] == "28001 CO" - np.testing.assert_almost_equal(response['MinNu'], 100.) - np.testing.assert_almost_equal(response['MaxNu'], 1000.) - - -def test_input_maxlines_async(): - - response = JPLSpec.query_lines_async(min_frequency=100 * u.GHz, - max_frequency=1000 * u.GHz, - min_strength=-500, - molecule="28001 CO", - max_lines=6, - get_query_payload=True) - response = dict(response) - assert response['Mol'] == "28001 CO" - assert response['MaxLines'] == 6. - np.testing.assert_almost_equal(response['MinNu'], 100.) - np.testing.assert_almost_equal(response['MaxNu'], 1000.) - - -def test_input_multi(): - - response = JPLSpec.query_lines_async(min_frequency=500 * u.GHz, - max_frequency=1000 * u.GHz, - min_strength=-500, - molecule=r"^H[2D]O(-\d\d|)$", - parse_name_locally=True, - get_query_payload=True) - response = dict(response) - assert set(response['Mol']) == set((18003, 19002, 19003, 20003, 21001)) - np.testing.assert_almost_equal(response['MinNu'], 500.) - np.testing.assert_almost_equal(response['MaxNu'], 1000.) - - -def test_query(): - - response = MockResponseSpec(file1) - tbl = JPLSpec._parse_result(response) - assert isinstance(tbl, Table) - assert len(tbl) == 8 - assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', - 'TAG', 'QNFMT', 'QN\'', 'QN"']) - - assert tbl['FREQ'][0] == 115271.2018 - assert tbl['ERR'][0] == .0005 - assert tbl['LGINT'][0] == -5.0105 - assert tbl['ERR'][7] == .0050 - assert tbl['FREQ'][7] == 921799.7000 - assert tbl['QN"'][7] == 7 - assert tbl['ELO'][1] == 3.8450 - - -def test_query_truncated(): - - response = MockResponseSpec(file2) - tbl = JPLSpec._parse_result(response) - assert isinstance(tbl, Table) - assert len(tbl) == 6 - assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', - 'TAG', 'QNFMT', 'QN\'', 'QN"']) - - assert tbl['FREQ'][0] == 115271.2018 - assert tbl['ERR'][0] == .0005 - assert tbl['LGINT'][0] == -5.0105 - assert tbl['ELO'][1] == 3.8450 - - -def test_query_multi(): - - response = MockResponseSpec(file3) - tbl = JPLSpec._parse_result(response) - assert isinstance(tbl, Table) - assert len(tbl) == 208 - assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', - 'TAG', 'QNFMT', 'QN\'', 'QN"']) - - assert tbl['FREQ'][0] == 503568.5200 - assert tbl['ERR'][0] == 0.0200 - assert tbl['LGINT'][0] == -4.9916 - assert tbl['TAG'][0] == -18003 - assert tbl['TAG'][38] == -19002 - assert tbl['TAG'][207] == 21001 diff --git a/astroquery/jplspec/tests/test_jplspec_remote.py b/astroquery/jplspec/tests/test_jplspec_remote.py deleted file mode 100644 index 0f60e2b4fa..0000000000 --- a/astroquery/jplspec/tests/test_jplspec_remote.py +++ /dev/null @@ -1,41 +0,0 @@ -import pytest -from astropy import units as u -from astropy.table import Table - -from ...jplspec import JPLSpec - - -@pytest.mark.remote_data -def test_remote(): - tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, - max_frequency=1000 * u.GHz, - min_strength=-500, - molecule="18003 H2O") - assert isinstance(tbl, Table) - assert len(tbl) == 36 - assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', - 'TAG', 'QNFMT', 'QN\'', 'QN"']) - - assert tbl['FREQ'][0] == 503568.5200 - assert tbl['ERR'][0] == 0.0200 - assert tbl['LGINT'][0] == -4.9916 - assert tbl['ERR'][7] == 12.4193 - assert tbl['FREQ'][35] == 987926.7590 - - -@pytest.mark.remote_data -def test_remote_regex(): - tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, - max_frequency=1000 * u.GHz, - min_strength=-500, - molecule=("28001", "28002", "28003")) - assert isinstance(tbl, Table) - assert len(tbl) == 16 - assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', - 'TAG', 'QNFMT', 'QN\'', 'QN"']) - - assert tbl['FREQ'][0] == 576267.9305 - assert tbl['ERR'][0] == .0005 - assert tbl['LGINT'][0] == -3.0118 - assert tbl['ERR'][7] == 8.3063 - assert tbl['FREQ'][15] == 946175.3151 diff --git a/astroquery/linelists/__init__.py b/astroquery/linelists/__init__.py index e69de29bb2..50ac8b0a1d 100644 --- a/astroquery/linelists/__init__.py +++ b/astroquery/linelists/__init__.py @@ -0,0 +1,11 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst +""" +Linelists module +---------------- +This module contains sub-modules for various molecular and atomic line list databases, +as well as common utilities for parsing catalog files. +""" + +from .core import parse_letternumber + +__all__ = ['parse_letternumber'] diff --git a/astroquery/linelists/cdms/core.py b/astroquery/linelists/cdms/core.py index 95c739b304..6474d7a5be 100644 --- a/astroquery/linelists/cdms/core.py +++ b/astroquery/linelists/cdms/core.py @@ -8,13 +8,14 @@ from astropy import table from astropy.io import ascii from astroquery.query import BaseQuery -from astroquery.utils import async_to_sync # import configurable items declared in __init__.py from astroquery.linelists.cdms import conf from astroquery.exceptions import InvalidQueryError, EmptyResponseError +from ..core import parse_letternumber +from astroquery.utils import process_asyncs +from astroquery import log import re -import string __all__ = ['CDMS', 'CDMSClass'] @@ -24,14 +25,81 @@ def data_path(filename): return os.path.join(data_dir, filename) -@async_to_sync class CDMSClass(BaseQuery): # use the Configuration Items imported from __init__.py URL = conf.search SERVER = conf.server CLASSIC_URL = conf.classic_server TIMEOUT = conf.timeout - MALFORMATTED_MOLECULE_LIST = ['017506 NH3-wHFS', '028582 H2NC', '058501 H2C2S', '064527 HC3HCN'] + MALFORMATTED_MOLECULE_LIST = ['017506 NH3-wHFS', '028528 H2NC', '058501 H2C2S', '064527 HC3HCN'] + + def __init__(self, fallback_to_getmolecule=False): + super().__init__() + + def _mol_to_payload(self, molecule, parse_name_locally, flags): + if parse_name_locally: + self.lookup_ids = build_lookup() + luts = self.lookup_ids.find(molecule, flags) + if len(luts) == 0: + raise InvalidQueryError('No matching species found. Please ' + 'refine your search or read the Docs ' + 'for pointers on how to search.') + return tuple(f"{val:06d} {key}" + for key, val in luts.items())[0] + else: + return molecule + + def query_lines(self, min_frequency, max_frequency, *, + min_strength=-500, molecule='All', + temperature_for_intensity=300, flags=0, + parse_name_locally=False, get_query_payload=False, + fallback_to_getmolecule=False, + verbose=False, + cache=True): + + # Check if a malformatted molecule was requested and use fallback if enabled + # accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S' + badlist = (self.MALFORMATTED_MOLECULE_LIST + + [y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()]) + + # extract molecule from the response or request + requested_molecule = self._mol_to_payload(molecule, parse_name_locally, flags) if molecule != 'All' else None + + if requested_molecule and requested_molecule in badlist and not get_query_payload: + if fallback_to_getmolecule: + try: + return self.get_molecule(requested_molecule[:6]) + except ValueError as ex: + # try to give the users good guidance on which parameters will work + if "molecule_id should be a length-6 string of numbers" in str(ex): + if parse_name_locally: + raise ValueError(f"Molecule {molecule} could not be parsed or identified." + " Check that the name was correctly specified.") + else: + raise ValueError(f"Molecule {molecule} needs to be formatted as" + " a 6-digit string ID for the get_molecule fallback to work." + " Try setting parse_name_locally=True " + "to turn your molecule name into a CDMS number ID.") + else: + raise ex + else: + raise ValueError(f"Molecule {requested_molecule} is known not to comply with standard CDMS format. " + f"Try get_molecule({requested_molecule}) instead or set " + f"CDMS.fallback_to_getmolecule = True.") + else: + response = self.query_lines_async(min_frequency=min_frequency, + max_frequency=max_frequency, + min_strength=min_strength, + molecule=molecule, + temperature_for_intensity=temperature_for_intensity, + flags=flags, + parse_name_locally=parse_name_locally, + get_query_payload=get_query_payload, + cache=cache) + if get_query_payload: + return response + else: + return self._parse_result(response, molname=molecule, verbose=verbose) def query_lines_async(self, min_frequency, max_frequency, *, min_strength=-500, molecule='All', @@ -54,7 +122,8 @@ def query_lines_async(self, min_frequency, max_frequency, *, min_strength : int, optional Minimum strength in catalog units, the default is -500 - molecule : list, string of regex if parse_name_locally=True, optional + molecule : list or string if parse_name_locally=False, + string of regex if parse_name_locally=True, optional Identifiers of the molecules to search for. If this parameter is not provided the search will match any species. Default is 'All'. As a first pass, the molecule will be searched for with a direct @@ -134,18 +203,11 @@ def query_lines_async(self, min_frequency, max_frequency, *, # changes interpretation of query self._last_query_temperature = temperature_for_intensity - if molecule is not None: - if parse_name_locally: - self.lookup_ids = build_lookup() - luts = self.lookup_ids.find(molecule, flags) - if len(luts) == 0: - raise InvalidQueryError('No matching species found. Please ' - 'refine your search or read the Docs ' - 'for pointers on how to search.') - payload['Molecules'] = tuple(f"{val:06d} {key}" - for key, val in luts.items())[0] - else: - payload['Molecules'] = molecule + if molecule == 'All': + payload['Moleculesgrp'] = 'all species' + else: + if molecule is not None: + payload['Molecules'] = self._mol_to_payload(molecule, parse_name_locally, flags) if get_query_payload: return payload @@ -177,16 +239,11 @@ def query_lines_async(self, min_frequency, max_frequency, *, response2 = self._request(method='GET', url=fullurl, timeout=self.TIMEOUT, cache=cache) - # accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S' - badlist = (self.MALFORMATTED_MOLECULE_LIST + # noqa - [y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()]) - if payload['Molecules'] in badlist: - raise ValueError(f"Molecule {payload['Molecules']} is known not to comply with standard CDMS format. " - f"Try get_molecule({payload['Molecules']}) instead.") - return response2 - def _parse_result(self, response, *, verbose=False): + query_lines.__doc__ = process_asyncs.async_to_sync_docstr(query_lines_async.__doc__) + + def _parse_result(self, response, *, verbose=False, molname=None): """ Parse a response into an `~astropy.table.Table` @@ -233,15 +290,34 @@ def _parse_result(self, response, *, verbose=False): soup = BeautifulSoup(response.text, 'html.parser') text = soup.find('pre').text + # this is a different workaround to try to make _some_ of the bad molecules parseable + # (it doesn't solve all of them, which is why the above fallback exists) + need_to_filter_bad_molecules = False + for bad_molecule in self.MALFORMATTED_MOLECULE_LIST: + if text.find(bad_molecule.split()[1]) > -1: + need_to_filter_bad_molecules = True + break + if need_to_filter_bad_molecules: + text_new = '' + text = text.split('\n') + for line in text: + need_to_include_line = True + for bad_molecule in self.MALFORMATTED_MOLECULE_LIST: + if line.find(bad_molecule.split()[1]) > -1: + need_to_include_line = False + break + if need_to_include_line: + text_new = text_new + '\n' + line + text = text_new + starts = {'FREQ': 0, 'ERR': 14, 'LGINT': 25, 'DR': 36, 'ELO': 38, 'GUP': 47, - 'MOLWT': 51, - 'TAG': 54, - 'QNFMT': 58, + 'TAG': 50, + 'QNFMT': 57, 'Ju': 61, 'Ku': 63, 'vu': 65, @@ -256,39 +332,47 @@ def _parse_result(self, response, *, verbose=False): 'F3l': 83, 'name': 89} - result = ascii.read(text, header_start=None, data_start=0, - comment=r'THIS|^\s{12,14}\d{4,6}.*', - names=list(starts.keys()), - col_starts=list(starts.values()), - format='fixed_width', fast_reader=False) - - result['FREQ'].unit = u.MHz - result['ERR'].unit = u.MHz - - result['Lab'] = result['MOLWT'] < 0 - result['MOLWT'] = np.abs(result['MOLWT']) - result['MOLWT'].unit = u.Da - - fix_keys = ['GUP'] - for suf in 'ul': - for qn in ('J', 'v', 'K', 'F1', 'F2', 'F3'): - qnind = qn+suf - fix_keys.append(qnind) - for key in fix_keys: - if not np.issubdtype(result[key].dtype, np.integer): - intcol = np.array(list(map(parse_letternumber, result[key])), - dtype=int) - result[key] = intcol - - # if there is a crash at this step, something went wrong with the query - # and the _last_query_temperature was not set. This shouldn't ever - # happen, but, well, I anticipate it will. - if self._last_query_temperature == 0: - result.rename_column('LGINT', 'LGAIJ') - result['LGAIJ'].unit = u.s**-1 - else: - result['LGINT'].unit = u.nm**2 * u.MHz - result['ELO'].unit = u.cm**(-1) + try: + result = ascii.read(text, header_start=None, data_start=0, + comment=r'THIS|^\s{12,14}\d{4,6}.*', + names=list(starts.keys()), + col_starts=list(starts.values()), + format='fixed_width', fast_reader=False) + + result['FREQ'].unit = u.MHz + result['ERR'].unit = u.MHz + + result['MOLWT'] = [int(x/1e3) for x in result['TAG']] + result['Lab'] = result['MOLWT'] < 0 + result['MOLWT'] = np.abs(result['MOLWT']) + result['MOLWT'].unit = u.Da + + fix_keys = ['GUP'] + for suf in 'ul': + for qn in ('J', 'v', 'K', 'F1', 'F2', 'F3'): + qnind = qn+suf + fix_keys.append(qnind) + for key in fix_keys: + if not np.issubdtype(result[key].dtype, np.integer): + intcol = np.array(list(map(parse_letternumber, result[key])), + dtype=int) + result[key] = intcol + + # if there is a crash at this step, something went wrong with the query + # and the _last_query_temperature was not set. This shouldn't ever + # happen, but, well, I anticipate it will. + if self._last_query_temperature == 0: + result.rename_column('LGINT', 'LGAIJ') + result['LGAIJ'].unit = u.s**-1 + else: + result['LGINT'].unit = u.nm**2 * u.MHz + result['ELO'].unit = u.cm**(-1) + except ValueError as ex: + # Give users a more helpful exception when parsing fails + new_message = ("Failed to parse CDMS response. This may be caused by a malformed search return. " + f"You can check this by running `CDMS.get_molecule('{molname}')` instead; if it works, the " + "problem is caused by the CDMS search interface and cannot be worked around.") + raise ValueError(new_message) from ex return result @@ -387,38 +471,64 @@ def tryfloat(x): return result - def get_molecule(self, molecule_id, *, cache=True): + def get_molecule(self, molecule_id, *, cache=True, return_response=False): """ Retrieve the whole molecule table for a given molecule id + + Parameters + ---------- + molecule_id : str + The 6-digit molecule identifier as a string + cache : bool + Defaults to True. If set overrides global caching behavior. + See :ref:`caching documentation `. + return_response : bool, optional + If True, return the raw `requests.Response` object instead of parsing + the response. If this is set, the response will be returned whether + or not it was successful. Default is False. """ if not isinstance(molecule_id, str) or len(molecule_id) != 6: raise ValueError("molecule_id should be a length-6 string of numbers") url = f'{self.CLASSIC_URL}/entries/c{molecule_id}.cat' response = self._request(method='GET', url=url, timeout=self.TIMEOUT, cache=cache) - result = self._parse_cat(response) + + if return_response: + return response + + response.raise_for_status() + + if 'Zero lines were found' in response.text: + raise EmptyResponseError(f"Response was empty; message was '{response.text}'.") + + result = self._parse_cat(response.text) species_table = self.get_species_table() result.meta = dict(species_table.loc[int(molecule_id)]) return result - def _parse_cat(self, response, *, verbose=False): - """ - Parse a catalog response into an `~astropy.table.Table` - - See details in _parse_response; this is a very similar function, - but the catalog responses have a slightly different format. + def _parse_cat(self, text, *, verbose=False): """ + Parse a CDMS-format catalog file into an `~astropy.table.Table`. - if 'Zero lines were found' in response.text: - raise EmptyResponseError(f"Response was empty; message was '{response.text}'.") + The catalog data files are composed of 80-character card images. + Format: [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]: + FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN - text = response.text + Parameters + ---------- + text : str + The catalog file text content. + verbose : bool, optional + Not used currently. - # notes about the format - # [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]: FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN noqa - # 13 21 29 31 41 44 51 55 57 59 61 63 65 67 69 71 73 75 77 79 noqa + Returns + ------- + Table : `~astropy.table.Table` + Parsed catalog data. + """ + # Column start positions starts = {'FREQ': 0, 'ERR': 14, 'LGINT': 22, @@ -426,21 +536,21 @@ def _parse_cat(self, response, *, verbose=False): 'ELO': 32, 'GUP': 42, 'TAG': 44, - 'QNFMT': 52, - 'Q1': 56, - 'Q2': 58, - 'Q3': 60, - 'Q4': 62, - 'Q5': 64, - 'Q6': 66, - 'Q7': 68, - 'Q8': 70, - 'Q9': 72, - 'Q10': 74, - 'Q11': 76, - 'Q12': 78, - 'Q13': 80, - 'Q14': 82, + 'QNFMT': 51, + 'Q1': 55, + 'Q2': 57, + 'Q3': 59, + 'Q4': 61, + 'Q5': 63, + 'Q6': 65, + 'Q7': 67, + 'Q8': 69, + 'Q9': 71, + 'Q10': 73, + 'Q11': 75, + 'Q12': 77, + 'Q13': 79, + 'Q14': 81, } result = ascii.read(text, header_start=None, data_start=0, @@ -449,8 +559,10 @@ def _parse_cat(self, response, *, verbose=False): col_starts=list(starts.values()), format='fixed_width', fast_reader=False) + # Ensure TAG is integer type for computation # int truncates - which is what we want - result['MOLWT'] = [int(x/1e4) for x in result['TAG']] + result['TAG'] = result['TAG'].astype(int) + result['MOLWT'] = [int(x/1e3) for x in result['TAG']] result['FREQ'].unit = u.MHz result['ERR'].unit = u.MHz @@ -460,15 +572,18 @@ def _parse_cat(self, response, *, verbose=False): result['MOLWT'].unit = u.Da fix_keys = ['GUP'] - for suf in '': - for qn in (f'Q{ii}' for ii in range(1, 15)): - qnind = qn+suf - fix_keys.append(qnind) + for qn in (f'Q{ii}' for ii in range(1, 15)): + fix_keys.append(qn) + log.debug(f"fix_keys: {fix_keys} should include Q1, Q2, ..., Q14 and GUP") for key in fix_keys: if not np.issubdtype(result[key].dtype, np.integer): intcol = np.array(list(map(parse_letternumber, result[key])), dtype=int) + if any(intcol == -999999): + intcol = np.ma.masked_where(intcol == -999999, intcol) result[key] = intcol + if not np.issubdtype(result[key].dtype, np.integer): + raise ValueError(f"Failed to parse {key} as integer") result['LGINT'].unit = u.nm**2 * u.MHz result['ELO'].unit = u.cm**(-1) @@ -479,24 +594,6 @@ def _parse_cat(self, response, *, verbose=False): CDMS = CDMSClass() -def parse_letternumber(st): - """ - Parse CDMS's two-letter QNs - - From the CDMS docs: - "Exactly two characters are available for each quantum number. Therefore, half - integer quanta are rounded up ! In addition, capital letters are used to - indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Small - types are used to signal corresponding negative quantum numbers." - """ - asc = string.ascii_lowercase - ASC = string.ascii_uppercase - newst = ''.join(['-' + str(asc.index(x)+10) if x in asc else - str(ASC.index(x)+10) if x in ASC else - x for x in st]) - return int(newst) - - class Lookuptable(dict): def find(self, st, flags): diff --git a/astroquery/linelists/cdms/setup_package.py b/astroquery/linelists/cdms/setup_package.py index 9aa4bd311e..64d5cbb99f 100644 --- a/astroquery/linelists/cdms/setup_package.py +++ b/astroquery/linelists/cdms/setup_package.py @@ -9,6 +9,7 @@ def get_package_data(): paths_test = [os.path.join('data', '028503 CO, v=0.data'), os.path.join('data', '117501 HC7S.data'), os.path.join('data', '099501 HC7N, v=0.data'), + os.path.join('data', 'c058501.cat'), os.path.join('data', 'post_response.html'), ] diff --git a/astroquery/linelists/cdms/tests/data/c058501.cat b/astroquery/linelists/cdms/tests/data/c058501.cat new file mode 100644 index 0000000000..3c7acdb7f6 --- /dev/null +++ b/astroquery/linelists/cdms/tests/data/c058501.cat @@ -0,0 +1,3 @@ + 114.9627 0.0001-10.6817 3 9.7413 9 58501 303 1 1 0 1 1 1 + 344.8868 0.0002 -9.9842 3 10.4849 15 58501 303 2 1 1 2 1 2 + 689.7699 0.0004 -9.5394 3 11.6003 21 58501 303 3 1 2 3 1 3 \ No newline at end of file diff --git a/astroquery/linelists/cdms/tests/test_cdms.py b/astroquery/linelists/cdms/tests/test_cdms.py index 597311d715..bfa654ff79 100644 --- a/astroquery/linelists/cdms/tests/test_cdms.py +++ b/astroquery/linelists/cdms/tests/test_cdms.py @@ -7,6 +7,7 @@ from astropy.table import Table from astroquery.linelists.cdms.core import CDMS, parse_letternumber, build_lookup from astroquery.utils.mocks import MockResponse +from astroquery.exceptions import InvalidQueryError colname_set = set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', 'TAG', 'QNFMT', 'Ju', 'Jl', "vu", "F1u", "F2u", "F3u", "vl", "Ku", "Kl", @@ -21,10 +22,18 @@ def data_path(filename): def mockreturn(*args, method='GET', data={}, url='', **kwargs): if method == 'GET': - molecule = url.split('cdmstab')[1].split('.')[0] - with open(data_path(molecule+".data"), 'rb') as fh: - content = fh.read() - return MockResponse(content=content) + # Handle get_molecule requests (classic URL format) + if '/entries/c' in url: + molecule = url.split('/entries/c')[1].split('.')[0] + with open(data_path(f"c{molecule}.cat"), 'rb') as fh: + content = fh.read() + return MockResponse(content=content) + # Handle regular query_lines requests + else: + molecule = url.split('cdmstab')[1].split('.')[0] + with open(data_path(molecule+".data"), 'rb') as fh: + content = fh.read() + return MockResponse(content=content) elif method == 'POST': molecule = dict(data)['Molecules'] with open(data_path("post_response.html"), 'r') as fh: @@ -83,6 +92,7 @@ def test_query(patch_post): assert tbl['LGINT'][0] == -7.1425 assert tbl['GUP'][0] == 3 assert tbl['GUP'][7] == 17 + assert tbl['MOLWT'][0] == 28 def test_parseletternumber(): @@ -99,9 +109,12 @@ def test_parseletternumber(): assert parse_letternumber("Z9") == 359 # inferred? - assert parse_letternumber("z9") == -359 + assert parse_letternumber("a0") == -10 + assert parse_letternumber("b0") == -20 assert parse_letternumber("ZZ") == 3535 + assert parse_letternumber(np.ma.masked) == -999999 + def test_hc7s(patch_post): """ @@ -201,3 +214,120 @@ def test_lut_literal(): assert thirteenco['13CO'] == 29501 thirteencostar = lut.find('13CO*', 0) assert len(thirteencostar) >= 252 + + +def test_malformatted_molecule_raises_error(patch_post): + """ + Test that querying a malformatted molecule raises an error when + fallback_to_getmolecule is False (default behavior) + """ + # H2C2S is in the MALFORMATTED_MOLECULE_LIST + with pytest.raises(ValueError, match="is known not to comply with standard CDMS format"): + CDMS.query_lines(min_frequency=100 * u.GHz, + max_frequency=300 * u.GHz, + molecule='058501 H2C2S', + fallback_to_getmolecule=False) + + +def test_malformatted_molecule_with_fallback(patch_post): + """ + Test that querying a malformatted molecule with fallback_to_getmolecule=True + successfully falls back to get_molecule + """ + # H2C2S is in the MALFORMATTED_MOLECULE_LIST + tbl = CDMS.query_lines(min_frequency=100 * u.GHz, + max_frequency=300 * u.GHz, + molecule='058501 H2C2S', + fallback_to_getmolecule=True) + + assert isinstance(tbl, Table) + assert len(tbl) == 3 + assert tbl['FREQ'][0] == 114.9627 + assert tbl['FREQ'][1] == 344.8868 + assert tbl['FREQ'][2] == 689.7699 + assert tbl['TAG'][0] == 58501 + assert tbl['GUP'][0] == 9 + + +def test_malformatted_molecule_id_only_with_fallback(patch_post): + """ + Test that querying with just the molecule ID (058501) also works with fallback + """ + # Just the ID is also in the badlist + tbl = CDMS.query_lines(min_frequency=100 * u.GHz, + max_frequency=300 * u.GHz, + molecule='058501', + fallback_to_getmolecule=True) + + assert isinstance(tbl, Table) + assert len(tbl) == 3 + assert tbl['FREQ'][0] == 114.9627 + + +def test_malformatted_molecule_name_only_with_fallback_error(patch_post): + """ + Test that querying with just the molecule name (H2C2S) without parse_name_locally + raises an error because H2C2S (5 chars) is not a valid 6-digit molecule ID. + + When parse_name_locally=False, "H2C2S" is passed as-is to _mol_to_payload, + which returns "H2C2S". This is in the badlist, so fallback is triggered, + but get_molecule("H2C2S") fails because it's not a 6-digit ID. + """ + # Just the name is also in the badlist, but it's not a 6-digit ID + with pytest.raises(ValueError, match="needs to be formatted as.*6-digit string ID"): + CDMS.query_lines(min_frequency=100 * u.GHz, + max_frequency=300 * u.GHz, + molecule='H2C2S', + parse_name_locally=False, + fallback_to_getmolecule=True) + + +def test_malformatted_molecule_name_with_parse_locally_success(patch_post): + """ + Test that querying with just the molecule name (H2C2S) WITH parse_name_locally=True + successfully resolves to "058501 H2C2S" and then falls back to get_molecule. + + When parse_name_locally=True, "H2C2S" is looked up and converted to "058501 H2C2S", + which is in the badlist, so fallback is triggered and succeeds. + """ + tbl = CDMS.query_lines(min_frequency=100 * u.GHz, + max_frequency=300 * u.GHz, + molecule='H2C2S', + parse_name_locally=True, + fallback_to_getmolecule=True) + + assert isinstance(tbl, Table) + assert len(tbl) == 3 + assert tbl['TAG'][0] == 58501 + + +def test_get_query_payload_skips_fallback(patch_post): + """ + Test that when get_query_payload=True, the fallback is not triggered + even for malformatted molecules + """ + # This should return the payload without triggering fallback or error + payload = CDMS.query_lines(min_frequency=100 * u.GHz, + max_frequency=300 * u.GHz, + molecule='058501 H2C2S', + get_query_payload=True) + + assert isinstance(payload, dict) + assert 'Molecules' in payload + assert payload['Molecules'] == '058501 H2C2S' + + +def test_malformatted_with_parse_name_locally_and_fallback_error(): + """ + Test that when parse_name_locally=True with a malformatted molecule + and fallback is enabled, but molecule can't be resolved, we get + proper error message about parsing failure + """ + # First, the lookup will fail to find 'NOTREALMOLECULE' and raise InvalidQueryError + # before we even get to the fallback logic + with pytest.raises(InvalidQueryError, match="No matching species found"): + CDMS.query_lines(min_frequency=100 * u.GHz, + max_frequency=300 * u.GHz, + molecule='NOTREALMOLECULE', + parse_name_locally=True, + fallback_to_getmolecule=True) diff --git a/astroquery/linelists/cdms/tests/test_cdms_remote.py b/astroquery/linelists/cdms/tests/test_cdms_remote.py index 5c2a2059fb..bb22ffcac6 100644 --- a/astroquery/linelists/cdms/tests/test_cdms_remote.py +++ b/astroquery/linelists/cdms/tests/test_cdms_remote.py @@ -38,6 +38,101 @@ def test_remote_300K(): assert tbl['FREQ'][0] == 505366.7875 assert tbl['ERR'][0] == 49.13 assert tbl['LGINT'][0] == -4.2182 + assert tbl['MOLWT'][0] == 18 + assert tbl['TAG'][0] == 18505 + + +@pytest.mark.remote_data +def test_co_basics(): + tbl = CDMS.get_molecule('028503') + assert tbl['Q1'][0] == 1 + assert tbl['Q7'][0] == 0 + assert tbl['Q1'][10] == 11 + assert tbl['Q7'][10] == 10 + assert tbl['MOLWT'][0] == 28 + assert tbl['TAG'][0] == -28503 + + +@pytest.mark.remote_data +def test_ch3cn_negqn(): + # 041505 = CH3CN on 2025-05-21 + tbl = CDMS.get_molecule('041505') + assert tbl.meta['molecule'] == 'CH3CN, v=0' + fourtominusthree = tbl[(tbl['Q1'] == 4) & (tbl['Q2'] == -3)] + assert len(fourtominusthree) >= 1 + + # check specifically for -21, which is encoded as `b1` + twentytwominustwentyone = tbl[(tbl['Q1'] == 22) & (tbl['Q2'] == -21)] + assert len(twentytwominustwentyone) >= 1 + + assert tbl['TAG'][0] == 41505 + + twentythreeminustwentyone = tbl[(tbl['Q1'] == 23) & (tbl['Q2'] == -21)] + assert len(twentythreeminustwentyone) >= 1 + assert twentythreeminustwentyone['TAG'][0] == -41505 + + +@pytest.mark.remote_data +def test_propanediol(): + tbl1 = CDMS.get_molecule('076513') + assert 'int' in tbl1['Q2'].dtype.name + + tbl = CDMS.query_lines(min_frequency=100.3 * u.GHz, + max_frequency=100.5 * u.GHz, + molecule='076513') + assert isinstance(tbl, Table) + assert len(tbl) >= 1 + assert 'aG\'g-1,2-Propanediol' in tbl['name'] + # check that the parser worked - this will be string or obj otherwise + assert 'int' in tbl['Ku'].dtype.name + assert tbl['MOLWT'][0] == 76 + assert tbl['TAG'][0] == 76513 + + +@pytest.mark.remote_data +@pytest.mark.xfail(reason="CDMS entry for H2NC is malformed") +def test_h2nc(): + tbl1 = CDMS.get_molecule('028528') + assert 'int' in tbl1['Q2'].dtype.name + + tbl = CDMS.query_lines(min_frequency=139.3 * u.GHz, + max_frequency=141.5 * u.GHz, + molecule='028528 H2NC') + + # these are the results that SHOULD be return if it actually worked + assert isinstance(tbl, Table) + assert len(tbl) >= 1 + assert 'H2NC' in tbl['name'] + # check that the parser worked - this will be string or obj otherwise + assert 'int' in tbl['Ku'].dtype.name + assert tbl['MOLWT'][0] == 28 + assert tbl['TAG'][0] == 28528 + + +@pytest.mark.remote_data +def test_fallback_to_getmolecule_parameter(): + """ + Test that fallback_to_getmolecule attribute controls query behavior. + + When fallback_to_getmolecule is True, query_lines should use get_molecule + internally for malformed molecules. + """ + + # Test with a malformed molecule and fallback enabled + tbl_fallback = CDMS.query_lines( + min_frequency=100 * u.GHz, + max_frequency=200 * u.GHz, + min_strength=-500, + molecule="028528 H2NC", + fallback_to_getmolecule=True + ) + + assert isinstance(tbl_fallback, Table) + assert len(tbl_fallback) > 0 + + # I don't think the state set within this module affects the rest of the + # tests but just in case + CDMS.fallback_to_getmolecule = False @pytest.mark.remote_data @@ -46,13 +141,16 @@ def test_remote_regex(): tbl = CDMS.query_lines(min_frequency=500 * u.GHz, max_frequency=600 * u.GHz, min_strength=-500, - molecule=('028501 HC-13-N, v=0', '028502 H2CN' '028503 CO, v=0')) + molecule=('028501 HC-13-N, v=0', + '028502 H2CN', + '028503 CO, v=0')) assert isinstance(tbl, Table) - assert len(tbl) == 557 + # regression test fix: there's 1 CO line that got missed because of a missing comma + assert len(tbl) == 558 assert set(tbl.keys()) == colname_set - assert set(tbl['name']) == {'H2CN', 'HC-13-N, v=0'} + assert set(tbl['name']) == {'H2CN', 'HC-13-N, v=0', 'CO, v=0'} @pytest.mark.remote_data @@ -66,16 +164,16 @@ def test_molecule_with_parens(): MC = np.ma.core.MaskedConstant() - for col, val in zip(tbl[0].colnames, (232588.7246, 0.2828, -4.1005, 3, 293.8540, 445, 66, - 506, 303, 44, 14, 30, MC, MC, MC, 45, 13, 33, MC, MC, MC, 'H2C(CN)2', False)): + for col, val in zip(tbl[0].colnames, (232588.7246, 0.2828, -4.1005, 3, 293.8540, 445, 66506, + 303, 44, 14, 30, MC, MC, MC, 45, 13, 33, MC, MC, MC, 'H2C(CN)2', 66, False)): if val is MC: assert tbl[0][col].mask else: assert tbl[0][col] == val # this test row includes degeneracy = 1225, which covers one of the weird letter-is-number parser cases - for col, val in zip(tbl[16].colnames, (233373.369, 10.26, -4.8704, 3, 1229.0674, 1125, 66, - 506, 303, 112, 10, 102, MC, MC, MC, 112, 9, 103, MC, MC, MC, 'H2C(CN)2', False),): + for col, val in zip(tbl[16].colnames, (233373.369, 10.26, -4.8704, 3, 1229.0674, 1125, 66506, + 303, 112, 10, 102, MC, MC, MC, 112, 9, 103, MC, MC, MC, 'H2C(CN)2', 66, False),): if val is MC: assert tbl[16][col].mask else: @@ -121,6 +219,20 @@ def test_retrieve_species_table(): assert 'float' in species_table['lg(Q(1000))'].dtype.name +@pytest.mark.remote_data +def test_remote_all_species(): + tbl = CDMS.query_lines(min_frequency=100.3 * u.GHz, + max_frequency=100.5 * u.GHz, + min_strength=-5) + assert isinstance(tbl, Table) + + AlS_is_in_table = (tbl['name'] == 'AlS').sum() > 0 + Propanediol_is_in_table = (tbl['name'] == "aG'g-1,2-Propanediol").sum() > 0 + + assert AlS_is_in_table + assert Propanediol_is_in_table + + @pytest.mark.bigdata @pytest.mark.remote_data class TestRegressionAllCats: diff --git a/astroquery/linelists/core.py b/astroquery/linelists/core.py new file mode 100644 index 0000000000..ba5bc5b6f9 --- /dev/null +++ b/astroquery/linelists/core.py @@ -0,0 +1,33 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst +""" +Base classes and common utilities for linelist queries (JPLSpec, CDMS, etc.) +""" +import numpy as np +import string + +__all__ = ['parse_letternumber'] + + +def parse_letternumber(st): + """ + Parse CDMS's two-letter QNs into integers. + + Masked values are converted to -999999. + + From the CDMS docs: + "Exactly two characters are available for each quantum number. Therefore, half + integer quanta are rounded up ! In addition, capital letters are used to + indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Lower case characters + are used similarly to signal negative quantum numbers smaller than –9. e. g., a0 is –10, b0 is –20, etc." + """ + if isinstance(st, (np.int32, np.int64, int)): + return st + if np.ma.is_masked(st): + return -999999 + + asc = string.ascii_lowercase + ASC = string.ascii_uppercase + newst = ''.join(['-' + str((asc.index(x)+1)) if x in asc else + str((ASC.index(x)+10)) if x in ASC else + x for x in st]) + return int(newst) diff --git a/astroquery/linelists/jplspec/__init__.py b/astroquery/linelists/jplspec/__init__.py new file mode 100644 index 0000000000..cfd9439cfe --- /dev/null +++ b/astroquery/linelists/jplspec/__init__.py @@ -0,0 +1,33 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst +""" +JPL Spectral Catalog +-------------------- + + +:author: Giannina Guzman (gguzman2@villanova.edu) +:author: Miguel de Val-Borro (miguel.deval@gmail.com) + +""" +from astropy import config as _config + + +class Conf(_config.ConfigNamespace): + """ + Configuration parameters for `astroquery.linelists.jplspec`. + """ + server = _config.ConfigItem( + 'https://spec.jpl.nasa.gov/cgi-bin/catform', + 'JPL Spectral Catalog URL.') + + timeout = _config.ConfigItem( + 60, + 'Time limit for connecting to JPL server.') + + +conf = Conf() + +from .core import JPLSpec, JPLSpecClass + +__all__ = ['JPLSpec', 'JPLSpecClass', + 'Conf', 'conf', + ] diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py new file mode 100644 index 0000000000..31ffef997c --- /dev/null +++ b/astroquery/linelists/jplspec/core.py @@ -0,0 +1,486 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst +import os +import warnings + +import astropy.units as u +import numpy as np +from astropy.io import ascii +from astropy import table +from ...query import BaseQuery +from ..core import parse_letternumber +# import configurable items declared in __init__.py +from . import conf +from . import lookup_table +from astroquery.exceptions import EmptyResponseError, InvalidQueryError +from astroquery.utils import process_asyncs +from urllib.parse import parse_qs + + +__all__ = ['JPLSpec', 'JPLSpecClass'] + + +def data_path(filename): + data_dir = os.path.join(os.path.dirname(__file__), 'data') + return os.path.join(data_dir, filename) + + +class JPLSpecClass(BaseQuery): + + # use the Configuration Items imported from __init__.py + URL = conf.server + TIMEOUT = conf.timeout + + def __init__(self): + super().__init__() + + def query_lines_async(self, min_frequency, max_frequency, *, + min_strength=-500, + max_lines=2000, molecule='All', flags=0, + parse_name_locally=False, + get_query_payload=False, cache=True + ): + """ + Creates an HTTP POST request based on the desired parameters and + returns a response. + + Parameters + ---------- + min_frequency : `astropy.units` + Minimum frequency (or any spectral() equivalent) + max_frequency : `astropy.units` + Maximum frequency (or any spectral() equivalent) + min_strength : int, optional + Minimum strength in catalog units, the default is -500 + max_lines : int, optional + Maximum number of lines to query, the default is 2000. + The most the query allows is 100000 + + molecule : list, string of regex if parse_name_locally=True, optional + Identifiers of the molecules to search for. If this parameter + is not provided the search will match any species. Default is 'All'. + + flags : int, optional + Regular expression flags. Default is set to 0 + + parse_name_locally : bool, optional + When set to True it allows the method to parse through catdir.cat + in order to match the regex inputted in the molecule parameter + and request the corresponding tags of the matches instead. Default + is set to False + + get_query_payload : bool, optional + When set to `True` the method should return the HTTP request + parameters as a dict. Default value is set to False + cache : bool + Defaults to True. If set overrides global caching behavior. + See :ref:`caching documentation `. + + Returns + ------- + response : `requests.Response` + The HTTP response returned from the service. + + Examples + -------- + >>> table = JPLSpec.query_lines(min_frequency=100*u.GHz, + ... max_frequency=200*u.GHz, + ... min_strength=-500, molecule=18003) # doctest: +REMOTE_DATA + >>> print(table) # doctest: +SKIP + FREQ ERR LGINT DR ELO GUP TAG QNFMT QN' QN" + ----------- ------ -------- --- --------- --- ------ ----- -------- -------- + 115542.5692 0.6588 -13.2595 3 4606.1683 35 18003 1404 17 810 0 18 513 0 + 139614.293 0.15 -9.3636 3 3080.1788 87 -18003 1404 14 6 9 0 15 312 0 + 177317.068 0.15 -10.3413 3 3437.2774 31 -18003 1404 15 610 0 16 313 0 + 183310.087 0.001 -3.6463 3 136.1639 7 -18003 1404 3 1 3 0 2 2 0 0 + """ + # first initialize the dictionary of HTTP request parameters + payload = dict() + + if min_frequency is not None and max_frequency is not None: + # allow setting payload without having *ANY* valid frequencies set + min_frequency = min_frequency.to(u.GHz, u.spectral()) + max_frequency = max_frequency.to(u.GHz, u.spectral()) + if min_frequency > max_frequency: + min_frequency, max_frequency = max_frequency, min_frequency + + payload['MinNu'] = min_frequency.value + payload['MaxNu'] = max_frequency.value + + if max_lines is not None: + payload['MaxLines'] = max_lines + + payload['UnitNu'] = 'GHz' + payload['StrLim'] = min_strength + + if molecule is not None: + if parse_name_locally: + self.lookup_ids = build_lookup() + payload['Mol'] = tuple(self.lookup_ids.find(molecule, flags).values()) + if len(molecule) == 0: + raise InvalidQueryError('No matching species found. Please ' + 'refine your search or read the Docs ' + 'for pointers on how to search.') + else: + payload['Mol'] = molecule + + self.maxlines = max_lines + + payload = list(payload.items()) + + if get_query_payload: + return payload + # BaseQuery classes come with a _request method that includes a + # built-in caching system + response = self._request(method='POST', url=self.URL, data=payload, + timeout=self.TIMEOUT, cache=cache) + response.raise_for_status() + + return response + + def query_lines(self, min_frequency, max_frequency, *, + min_strength=-500, + max_lines=2000, molecule='All', flags=0, + parse_name_locally=False, + get_query_payload=False, + fallback_to_getmolecule=False, + cache=True): + """ + Query the JPLSpec service for spectral lines. + + This is a synchronous version of `query_lines_async`. + See `query_lines_async` for full parameter documentation. + """ + response = self.query_lines_async(min_frequency=min_frequency, + max_frequency=max_frequency, + min_strength=min_strength, + max_lines=max_lines, + molecule=molecule, + flags=flags, + parse_name_locally=parse_name_locally, + get_query_payload=get_query_payload, + cache=cache) + if get_query_payload: + return response + else: + return self._parse_result(response, fallback_to_getmolecule=fallback_to_getmolecule) + + query_lines.__doc__ = process_asyncs.async_to_sync_docstr(query_lines_async.__doc__) + + def _parse_result(self, response, *, verbose=False, fallback_to_getmolecule=False): + """ + Parse a response into an `~astropy.table.Table` + + The catalog data files are composed of 80-character card images, with + one card image per spectral line. The format of each card image is: + FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN', QN" + (F13.4,F8.4, F8.4, I2,F10.4, I3, I7, I4, 6I2, 6I2) + + FREQ: Frequency of the line in MHz. + ERR: Estimated or experimental error of FREQ in MHz. + LGINT: Base 10 logarithm of the integrated intensity in units of nm^2 MHz at + 300 K. + + DR: Degrees of freedom in the rotational partition function (0 for atoms, + 2 for linear molecules, and 3 for nonlinear molecules). + + ELO: Lower state energy in cm^{-1} relative to the ground state. + GUP: Upper state degeneracy. + TAG: Species tag or molecular identifier. + A negative value flags that the line frequency has + been measured in the laboratory. The absolute value of TAG is then the + species tag and ERR is the reported experimental error. The three most + significant digits of the species tag are coded as the mass number of + the species. + + QNFMT: Identifies the format of the quantum numbers + QN': Quantum numbers for the upper state. + QN": Quantum numbers for the lower state. + """ + + if 'Zero lines were found' in response.text: + if fallback_to_getmolecule: + self.lookup_ids = build_lookup() + payload = parse_qs(response.request.body) + tbs = [self.get_molecule(mol) for mol in payload['Mol']] + if len(tbs) > 1: + mols = [] + for tb, mol in zip(tbs, payload['Mol']): + tb['Name'] = self.lookup_ids.find(mol, flags=0) + for key in list(tb.meta.keys()): + tb.meta[f'{mol}_{key}'] = tb.meta.pop(key) + mols.append(mol) + tb = table.vstack(tbs) + tb.meta['molecule_list'] = mols + else: + tb = tbs[0] + tb.meta['molecule_id'] = payload['Mol'][0] + tb.meta['molecule_name'] = self.lookup_ids.find(payload['Mol'][0], flags=0) + + return tb + else: + raise EmptyResponseError(f"Response was empty; message was '{response.text}'.") + + # data starts at 0 since regex was applied + # Warning for a result with more than 1000 lines: + # THIS form is currently limited to 1000 lines. + result = ascii.read(response.text, header_start=None, data_start=0, + comment=r'THIS|^\s{12,14}\d{4,6}.*|CADDIR CATDIR', + names=('FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'QN\'', 'QN"'), + col_starts=(0, 13, 21, 29, 31, 41, 44, 51, 55, 67), + format='fixed_width', fast_reader=False) + + if len(result) > self.maxlines: + warnings.warn("This form is currently limited to {0} lines." + "Please limit your search.".format(self.maxlines)) + + result['FREQ'].unit = u.MHz + result['ERR'].unit = u.MHz + result['LGINT'].unit = u.nm**2 * u.MHz + result['ELO'].unit = u.cm**(-1) + + return result + + def get_species_table(self, *, catfile='catdir.cat'): + """ + A directory of the catalog is found in a file called 'catdir.cat.' + Each element of this directory is an 80-character record with the + following format: + + | TAG, NAME, NLINE, QLOG, VER + | (I6,X, A13, I6, 7F7.4, I2) + + Parameters + ---------- + catfile : str, name of file, default 'catdir.cat' + The catalog file, installed locally along with the package + + Returns + ------- + Table: `~astropy.table.Table` + | TAG : The species tag or molecular identifier. + | NAME : An ASCII name for the species. + | NLINE : The number of lines in the catalog. + | QLOG : A seven-element vector containing the base 10 logarithm of + the partition function for temperatures of 300 K, 225 K, 150 K, + 75 K, 37.5 K, 18.75 K, and 9.375 K, respectively. + | VER : The version of the calculation for this species in the catalog. + The version number is followed by * if the entry is newer than the + last edition of the catalog. + + """ + + result = ascii.read(data_path(catfile), header_start=None, data_start=0, + names=('TAG', 'NAME', 'NLINE', 'QLOG1', 'QLOG2', + 'QLOG3', 'QLOG4', 'QLOG5', 'QLOG6', + 'QLOG7', 'VER'), + col_starts=(0, 6, 20, 26, 33, 40, 47, 54, 61, + 68, 75), + format='fixed_width', fast_reader=False) + + # store the corresponding temperatures as metadata + result['QLOG1'].meta = {'Temperature (K)': 300} + result['QLOG2'].meta = {'Temperature (K)': 225} + result['QLOG3'].meta = {'Temperature (K)': 150} + result['QLOG4'].meta = {'Temperature (K)': 75} + result['QLOG5'].meta = {'Temperature (K)': 37.5} + result['QLOG6'].meta = {'Temperature (K)': 18.75} + result['QLOG7'].meta = {'Temperature (K)': 9.375} + result.meta = {'Temperature (K)': [300, 225, 150, 75, 37.5, 18.5, + 9.375]} + + return result + + def get_molecule(self, molecule_id, *, cache=True): + """ + Retrieve the whole molecule table for a given molecule id from the JPL catalog. + + Parameters + ---------- + molecule_id : int or str + The molecule tag/identifier. Can be an integer (e.g., 18003 for H2O) + or a zero-padded 6-character string (e.g., '018003'). + cache : bool + Defaults to True. If set overrides global caching behavior. + + Returns + ------- + Table : `~astropy.table.Table` + Table containing all spectral lines for the requested molecule. + + Examples + -------- + >>> table = JPLSpec.get_molecule(18003) # doctest: +SKIP + >>> print(table) # doctest: +SKIP + """ + # Convert to string and zero-pad to 6 digits + if isinstance(molecule_id, (int, np.int32, np.int64)): + molecule_str = f'{molecule_id:06d}' + if len(molecule_str) > 6: + raise ValueError("molecule_id should be an integer with" + " fewer than 6 digits or a length-6 " + "string of numbers") + elif isinstance(molecule_id, str): + # this is for the common case where the molecule is specified e.g. as 028001 CO + try: + molecule_id = f"{int(molecule_id[:6]):06d}" + except ValueError: + raise ValueError("molecule_id should be an integer or a length-6 string of numbers") + molecule_str = molecule_id + else: + raise ValueError("molecule_id should be an integer or a length-6 string of numbers") + + # Construct the URL to the catalog file + url = f'https://spec.jpl.nasa.gov/ftp/pub/catalog/c{molecule_str}.cat' + + # Request the catalog file + response = self._request(method='GET', url=url, + timeout=self.TIMEOUT, cache=cache) + response.raise_for_status() + + if 'The requested URL was not found on this server.' in response.text: + raise EmptyResponseError(f"No data found for molecule ID {molecule_id}.") + + # Parse the catalog file + result = self._parse_cat(response) + + # Add metadata from species table + species_table = self.get_species_table() + # Find the row matching this molecule_id + int_molecule_id = int(molecule_str) + matching_rows = species_table[species_table['TAG'] == int_molecule_id] + if len(matching_rows) > 0: + # Add metadata as a dictionary + result.meta = dict(zip(matching_rows.colnames, matching_rows[0])) + + return result + + def _parse_cat(self, response, *, verbose=False): + """ + Parse a JPL-format catalog file into an `~astropy.table.Table`. + + The catalog data files are composed of 80-character card images, with + one card image per spectral line. The format of each card image is: + FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN', QN" + (F13.4,F8.4, F8.4, I2,F10.4, I3, I7, I4, 6I2, 6I2) + + https://spec.jpl.nasa.gov/ftp/pub/catalog/doc/catintro.pdf + + Parameters + ---------- + text : str + The catalog file text content. + verbose : bool, optional + Not used currently. + + Returns + ------- + Table : `~astropy.table.Table` + Parsed catalog data. + """ + text = response.text + if 'Zero lines were found' in text or len(text.strip()) == 0: + raise EmptyResponseError(f"Response was empty; message was '{text}'.") + + # Parse the catalog file with fixed-width format + # Format: FREQ(13.4), ERR(8.4), LGINT(8.4), DR(2), ELO(10.4), GUP(3), TAG(7), QNFMT(4), QN'(12), QN"(12) + result = ascii.read(text, header_start=None, data_start=0, + comment=r'THIS|^\s{12,14}\d{4,6}.*', + names=('FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'QN\'', 'QN"'), + col_starts=(0, 13, 21, 29, 31, 41, 44, 51, 55, 67), + format='fixed_width', fast_reader=False) + + # Ensure TAG is integer type + result['TAG'] = result['TAG'].astype(int) + + # Add units + result['FREQ'].unit = u.MHz + result['ERR'].unit = u.MHz + result['LGINT'].unit = u.nm**2 * u.MHz + result['ELO'].unit = u.cm**(-1) + + # split table by qnfmt; each chunk must be separately parsed. + qnfmts = np.unique(result['QNFMT']) + tables = [result[result['QNFMT'] == qq] for qq in qnfmts] + + # some tables have +/-/blank entries in QNs + # pm_is_ok should be True when the QN columns contain '+' or '-'. + # (can't do a str check on np.integer dtype so have to filter that out first) + pm_is_ok = ((not np.issubdtype(result["QN'"].dtype, np.integer)) + and any(('+' in str(line) or '-' in str(line)) for line in result["QN'"])) + + def int_or_pm(st): + try: + return int(st) + except ValueError: + try: + return parse_letternumber(st) + except ValueError: + if pm_is_ok and (st.strip() == '' or st.strip() == '+' or st.strip() == '-'): + return st.strip() + else: + raise ValueError(f'"{st}" is not a valid +/-/blank entry') + + # At least this molecule, NH, claims 5 QNs but has only 4 + bad_qnfmt_dict = { + 15001: 1234, + } + mol_tag = result['TAG'][0] + + if mol_tag in (32001,): + raise NotImplementedError("Molecule O2 (32001) does not follow the format standard.") + + for tbl in tables: + if mol_tag in bad_qnfmt_dict: + n_qns = bad_qnfmt_dict[mol_tag] % 10 + else: + n_qns = tbl['QNFMT'][0] % 10 + if n_qns > 1: + qnlen = 2 * n_qns + for ii in range(n_qns): + if tbl["QN'"].dtype in (int, np.int32, np.int64): + # for the case where it was already parsed as int + # (53005 is an example) + tbl[f"QN'{ii+1}"] = tbl["QN'"] + tbl[f'QN"{ii+1}'] = tbl['QN"'] + else: + # string parsing can truncate to length=2n or 2n-1 depending + # on whether there are any two-digit QNs in the column + ind1 = ii * 2 + ind2 = ii * 2 + 2 + # rjust(qnlen) is needed to enforce that all strings retain their exact original shape + qnp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN\'']] + qnpp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN"']] + dtype = str if any('+' in str(x) for x in qnp) else int + tbl[f"QN'{ii+1}"] = np.array(qnp, dtype=dtype) + tbl[f'QN"{ii+1}'] = np.array(qnpp, dtype=dtype) + del tbl['QN\''] + del tbl['QN"'] + else: + tbl['QN\''] = np.array(list(map(parse_letternumber, tbl['QN\''])), dtype=int) + tbl['QN"'] = np.array(list(map(parse_letternumber, tbl['QN"'])), dtype=int) + + result = table.vstack(tables) + + # Add laboratory measurement flag + # A negative TAG value indicates laboratory-measured frequency + result['Lab'] = result['TAG'] < 0 + # Convert TAG to absolute value + result['TAG'] = abs(result['TAG']) + + return result + + +JPLSpec = JPLSpecClass() + + +def build_lookup(): + + result = JPLSpec.get_species_table() + keys = list(result['NAME']) + values = list(result['TAG']) + dictionary = dict(zip(keys, values)) + lookuptable = lookup_table.Lookuptable(dictionary) # apply the class above + + return lookuptable diff --git a/astroquery/jplspec/data/catdir.cat b/astroquery/linelists/jplspec/data/catdir.cat similarity index 100% rename from astroquery/jplspec/data/catdir.cat rename to astroquery/linelists/jplspec/data/catdir.cat diff --git a/astroquery/jplspec/lookup_table.py b/astroquery/linelists/jplspec/lookup_table.py similarity index 100% rename from astroquery/jplspec/lookup_table.py rename to astroquery/linelists/jplspec/lookup_table.py diff --git a/astroquery/jplspec/setup_package.py b/astroquery/linelists/jplspec/setup_package.py similarity index 63% rename from astroquery/jplspec/setup_package.py rename to astroquery/linelists/jplspec/setup_package.py index 761f2f6829..7439815548 100644 --- a/astroquery/jplspec/setup_package.py +++ b/astroquery/linelists/jplspec/setup_package.py @@ -8,8 +8,9 @@ def get_package_data(): paths_test = [os.path.join('data', 'CO.data'), os.path.join('data', 'CO_6.data'), + os.path.join('data', 'H2O_sample.cat'), os.path.join('data', 'multi.data')] paths_data = [os.path.join('data', 'catdir.cat')] - return {'astroquery.jplspec.tests': paths_test, - 'astroquery.jplspec': paths_data, } + return {'astroquery.linelists.jplspec.tests': paths_test, + 'astroquery.linelists.jplspec': paths_data, } diff --git a/astroquery/jplspec/tests/__init__.py b/astroquery/linelists/jplspec/tests/__init__.py similarity index 100% rename from astroquery/jplspec/tests/__init__.py rename to astroquery/linelists/jplspec/tests/__init__.py diff --git a/astroquery/jplspec/tests/data/CO.data b/astroquery/linelists/jplspec/tests/data/CO.data similarity index 100% rename from astroquery/jplspec/tests/data/CO.data rename to astroquery/linelists/jplspec/tests/data/CO.data diff --git a/astroquery/jplspec/tests/data/CO_6.data b/astroquery/linelists/jplspec/tests/data/CO_6.data similarity index 100% rename from astroquery/jplspec/tests/data/CO_6.data rename to astroquery/linelists/jplspec/tests/data/CO_6.data diff --git a/astroquery/linelists/jplspec/tests/data/H2O_sample.cat b/astroquery/linelists/jplspec/tests/data/H2O_sample.cat new file mode 100644 index 0000000000..bd13a08689 --- /dev/null +++ b/astroquery/linelists/jplspec/tests/data/H2O_sample.cat @@ -0,0 +1,52 @@ + 8006.5805 2.8510-18.6204 3 6219.6192 45 18003140422 418 0 21 715 0 + 12478.2535 0.2051-13.1006 3 3623.7652 31 18003140415 7 9 0 16 412 0 + 22235.0798 0.0001 -5.8825 3 446.5107 39 -180031404 6 1 6 0 5 2 3 0 + 27206.4582 6.3643-19.1265 3 7210.5493141 18003140423 617 0 24 520 0 + 71592.4316 0.5310-13.4989 3 4606.1683 39 18003140419 416 0 18 513 0 + 115542.5692 0.6588-13.2595 3 4606.1683 35 18003140417 810 0 18 513 0 + 139614.2930 0.1500 -9.3636 3 3080.1788 87 -18003140414 6 9 0 15 312 0 + 177317.0680 0.1500-10.3413 3 3437.2774 31 -18003140415 610 0 16 313 0 + 183310.0870 0.0010 -3.6463 3 136.1639 7 -180031404 3 1 3 0 2 2 0 0 + 247440.0960 0.1500 -9.0097 3 2872.5806 29 -18003140414 410 0 15 313 0 + 259952.1820 0.2000 -8.6690 3 2739.4286 27 -18003140413 6 8 0 14 311 0 + 266574.0983 1.8473-14.1089 3 5739.2279129 18003140421 417 0 20 714 0 + 289008.0871 2.7396-15.1447 3 6167.7109129 18003140421 615 0 20 912 0 + 294805.1937 4.1586-16.0382 3 6707.3362135 18003140422 716 0 23 419 0 + 321225.6770 0.0006 -5.0909 3 1282.9191 63 -18003140410 2 9 0 9 3 6 0 + 325152.8990 0.0010 -3.5711 3 315.7795 11 -180031404 5 1 5 0 4 2 2 0 + 339043.9960 0.1500-10.0708 3 3810.9369 99 -18003140416 611 0 17 314 0 + 354808.5800 0.2000-10.4028 3 4006.0734105 -18003140417 413 0 16 710 0 + 373514.7088 6.1926-17.5865 3 7386.7750135 180031404221013 0 23 716 0 + 380197.3598 0.0001 -2.6152 3 212.1564 27 -180031404 4 1 4 0 3 2 1 0 + 390134.5100 0.0500 -6.0290 3 1525.1360 21 -18003140410 3 7 0 11 210 0 + 437346.6640 0.0020 -4.8220 3 1045.0584 15 -180031404 7 5 3 0 6 6 0 0 + 439150.7948 0.0003 -3.6615 3 742.0763 39 -180031404 6 4 3 0 5 5 0 0 + 443018.3546 0.0008 -4.3337 3 1045.0580 45 -180031404 7 5 2 0 6 6 1 0 + 448001.0775 0.0005 -2.5935 3 285.4186 27 -180031404 4 2 3 0 3 3 0 0 + 458682.8454 1.1313-13.1673 3 5276.8018 41 18003140420 416 0 19 713 0 + 470888.9030 0.0020 -4.0778 3 742.0730 13 -180031404 6 4 2 0 5 5 1 0 + 474689.1080 0.0010 -3.4856 3 488.1342 11 -180031404 5 3 3 0 4 4 0 0 + 488491.1280 0.0030 -4.1739 3 586.4792 13 -180031404 6 2 4 0 7 1 7 0 + 503568.5200 0.0200 -4.9916 3 1394.8142 51 -180031404 8 6 3 0 7 7 0 0 + 504482.6900 0.0500 -5.4671 3 1394.8142 17 -180031404 8 6 2 0 7 7 1 0 + 525890.1638 0.8432-12.2048 3 5035.1266117 18003140419 514 0 18 811 0 + 530342.8600 0.2000 -7.1006 3 2533.7932 87 -18003140414 312 0 13 4 9 0 + 534240.4544 0.3469-11.2954 3 4409.3446 37 18003140418 414 0 17 711 0 + 556935.9877 0.0003 -0.8189 3 23.7944 9 -180031404 1 1 0 0 1 0 1 0 + 557985.4794 0.6432-11.6213 3 4833.2084117 18003140419 415 0 18 712 0 + 558017.0036 12.4193-18.1025 3 7729.4622 49 18003140424 618 0 25 521 0 + 571913.6860 0.1000 -6.9705 3 2414.7235 75 -18003140412 6 7 0 13 310 0 + 591693.4339 0.2120 -8.6820 3 3244.6008 87 18003140414 7 8 0 15 411 0 + 593113.7249 7.4502-18.5975 3 7924.4438 49 18003140424 717 0 231014 0 + 593227.8163 0.4197-10.8822 3 4201.2514 35 18003140417 612 0 18 315 0 + 596308.5878 4.5348-15.8345 3 6687.8251 47 18003140423 519 0 22 616 0 + 614309.5658 2.1666-14.1672 3 5680.7868 39 18003140419 911 0 20 614 0 + 620293.9651 1.1653-12.0811 3 5031.9777117 18003140419 514 0 20 417 0 + 620700.9549 0.0006 -2.7692 3 488.1077 33 -180031404 5 3 2 0 4 4 1 0 + 624732.7750 5.8384-16.9250 3 7210.3271 47 18003140423 717 0 24 420 0 + 645766.1230 0.0300 -6.1081 3 1789.0429 19 -180031404 9 7 3 0 8 8 0 0 + 645905.7060 0.0500 -5.6308 3 1789.0429 57 -180031404 9 7 2 0 8 8 1 0 + 723142.3610 9.8873-19.4330 3 8554.6415 53 18003140426 521 0 25 818 0 + 752033.1430 0.1000 -0.9985 3 70.0908 5 -180031404 2 1 1 0 2 0 2 0 + 766793.5950 0.1000 -6.2559 3 1960.2074 23 -18003140411 5 7 0 12 210 0 + 826549.8880 0.2000 -9.9788 3 4174.2875111 -18003140418 415 0 17 512 0 \ No newline at end of file diff --git a/astroquery/jplspec/tests/data/multi.data b/astroquery/linelists/jplspec/tests/data/multi.data similarity index 100% rename from astroquery/jplspec/tests/data/multi.data rename to astroquery/linelists/jplspec/tests/data/multi.data diff --git a/astroquery/linelists/jplspec/tests/test_jplspec.py b/astroquery/linelists/jplspec/tests/test_jplspec.py new file mode 100644 index 0000000000..7c67812787 --- /dev/null +++ b/astroquery/linelists/jplspec/tests/test_jplspec.py @@ -0,0 +1,328 @@ +import numpy as np +import pytest + +from unittest.mock import Mock, MagicMock, patch +from astroquery.exceptions import EmptyResponseError + +import os + +from astropy import units as u +from astropy.table import Table +from ..core import JPLSpec + +file1 = 'CO.data' +file2 = 'CO_6.data' +file3 = 'multi.data' + + +def data_path(filename): + + data_dir = os.path.join(os.path.dirname(__file__), 'data') + return os.path.join(data_dir, filename) + + +class MockResponseSpec: + + def __init__(self, filename): + self.filename = data_path(filename) + + @property + def text(self): + with open(self.filename) as f: + return f.read() + + +def test_input_async(): + + response = JPLSpec.query_lines_async(min_frequency=100 * u.GHz, + max_frequency=1000 * u.GHz, + min_strength=-500, + molecule="28001 CO", + get_query_payload=True) + response = dict(response) + assert response['Mol'] == "28001 CO" + np.testing.assert_almost_equal(response['MinNu'], 100.) + np.testing.assert_almost_equal(response['MaxNu'], 1000.) + + +def test_input_maxlines_async(): + + response = JPLSpec.query_lines_async(min_frequency=100 * u.GHz, + max_frequency=1000 * u.GHz, + min_strength=-500, + molecule="28001 CO", + max_lines=6, + get_query_payload=True) + response = dict(response) + assert response['Mol'] == "28001 CO" + assert response['MaxLines'] == 6. + np.testing.assert_almost_equal(response['MinNu'], 100.) + np.testing.assert_almost_equal(response['MaxNu'], 1000.) + + +def test_input_multi(): + + response = JPLSpec.query_lines_async(min_frequency=500 * u.GHz, + max_frequency=1000 * u.GHz, + min_strength=-500, + molecule=r"^H[2D]O(-\d\d|)$", + parse_name_locally=True, + get_query_payload=True) + response = dict(response) + assert set(response['Mol']) == set((18003, 19002, 19003, 20003, 21001)) + np.testing.assert_almost_equal(response['MinNu'], 500.) + np.testing.assert_almost_equal(response['MaxNu'], 1000.) + + +def test_query(): + + response = MockResponseSpec(file1) + tbl = JPLSpec._parse_result(response) + assert isinstance(tbl, Table) + assert len(tbl) == 8 + assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'QN\'', 'QN"']) + + assert tbl['FREQ'][0] == 115271.2018 + assert tbl['ERR'][0] == .0005 + assert tbl['LGINT'][0] == -5.0105 + assert tbl['ERR'][7] == .0050 + assert tbl['FREQ'][7] == 921799.7000 + assert tbl['QN"'][7] == 7 + assert tbl['ELO'][1] == 3.8450 + + +def test_query_truncated(): + + response = MockResponseSpec(file2) + tbl = JPLSpec._parse_result(response) + assert isinstance(tbl, Table) + assert len(tbl) == 6 + assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'QN\'', 'QN"']) + + assert tbl['FREQ'][0] == 115271.2018 + assert tbl['ERR'][0] == .0005 + assert tbl['LGINT'][0] == -5.0105 + assert tbl['ELO'][1] == 3.8450 + + +def test_query_multi(): + + response = MockResponseSpec(file3) + tbl = JPLSpec._parse_result(response) + assert isinstance(tbl, Table) + assert len(tbl) == 208 + assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'QN\'', 'QN"']) + + assert tbl['FREQ'][0] == 503568.5200 + assert tbl['ERR'][0] == 0.0200 + assert tbl['LGINT'][0] == -4.9916 + assert tbl['TAG'][0] == -18003 + assert tbl['TAG'][38] == -19002 + assert tbl['TAG'][207] == 21001 + + +def test_parse_cat(): + """Test parsing of catalog files with _parse_cat method.""" + + response = MockResponseSpec('H2O_sample.cat') + tbl = JPLSpec._parse_cat(response) + + # Check table structure + assert isinstance(tbl, Table) + assert len(tbl) > 0 + assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'Lab', + 'QN"1', 'QN"2', 'QN"3', 'QN"4', + "QN'1", "QN'2", "QN'3", "QN'4" + ]) + + # Check units + assert tbl['FREQ'].unit == u.MHz + assert tbl['ERR'].unit == u.MHz + assert tbl['LGINT'].unit == u.nm**2 * u.MHz + assert tbl['ELO'].unit == u.cm**(-1) + + # Check Lab flag exists and is boolean + assert 'Lab' in tbl.colnames + assert tbl['Lab'].dtype == bool + + # Check TAG values are positive (absolute values) + assert all(tbl['TAG'] > 0) + + +def test_get_molecule_input_validation(): + """Test input validation for get_molecule method.""" + + # Test invalid string format + with pytest.raises(ValueError): + JPLSpec.get_molecule('invalid') + + # Test invalid type + with pytest.raises(ValueError): + JPLSpec.get_molecule(12.34) + + # Test wrong length string + with pytest.raises(ValueError): + JPLSpec.get_molecule(1234567) + + +# Helper functions for fallback tests +def _create_empty_response(molecules): + """Create a mock response with 'Zero lines were found'.""" + mock_response = Mock() + mock_response.text = "Zero lines were found" + mock_request = Mock() + if isinstance(molecules, str): + mock_request.body = f"Mol={molecules}" + else: + mock_request.body = "&".join(f"Mol={mol}" for mol in molecules) + mock_response.request = mock_request + return mock_response + + +def _setup_fallback_mocks(molecules_dict): + """ + Set up mocks for fallback testing. + + Parameters + ---------- + molecules_dict : dict + Dictionary mapping molecule IDs to (name, table_data) tuples. + table_data should be a dict with 'FREQ' and optionally other columns. + + Returns + ------- + mock_get_molecule, mock_build_lookup + The mock objects that can be used in assertions. + """ + # Mock build_lookup + mock_lookup = MagicMock() + if len(molecules_dict) == 1: + mol_id = list(molecules_dict.keys())[0] + mock_lookup.find.return_value = molecules_dict[mol_id][0] + else: + mock_lookup.find.side_effect = lambda mol_id, **kwargs: molecules_dict.get(mol_id, (None, None))[0] + + # Mock get_molecule + def get_molecule_side_effect(mol_id): + if mol_id not in molecules_dict: + raise ValueError(f"Unexpected molecule ID: {mol_id}") + name, table_data = molecules_dict[mol_id] + mock_table = Table() + mock_table['FREQ'] = table_data.get('FREQ', [100.0, 200.0]) + mock_table['TAG'] = [int(mol_id)] * len(mock_table['FREQ']) + # Add any additional columns from table_data + for key, value in table_data.items(): + if key != 'FREQ' and key not in mock_table.colnames: + mock_table[key] = value + mock_table.meta = table_data.get('meta', {}) + return mock_table + + return get_molecule_side_effect, mock_lookup + + +def test_fallback_to_getmolecule_with_empty_response(): + """Test that fallback_to_getmolecule works when query returns zero lines.""" + mock_response = _create_empty_response('18003') + + # Test with fallback disabled - should raise EmptyResponseError + with pytest.raises(EmptyResponseError, match="Response was empty"): + JPLSpec._parse_result(mock_response, fallback_to_getmolecule=False) + + # Test with fallback enabled - should call get_molecule + molecules = {'18003': ('H2O', {'FREQ': [100.0, 200.0]})} + + with patch.object(JPLSpec, 'get_molecule') as mock_get_molecule, \ + patch('astroquery.linelists.jplspec.core.build_lookup') as mock_build_lookup: + + get_mol_func, mock_lookup = _setup_fallback_mocks(molecules) + mock_get_molecule.side_effect = get_mol_func + mock_build_lookup.return_value = mock_lookup + + result = JPLSpec._parse_result(mock_response, fallback_to_getmolecule=True) + + mock_get_molecule.assert_called_once_with('18003') + assert isinstance(result, Table) + assert len(result) == 2 + assert result.meta['molecule_id'] == '18003' + assert result.meta['molecule_name'] == 'H2O' + + +def test_fallback_to_getmolecule_with_multiple_molecules(): + """Test fallback with multiple molecules in the request.""" + mock_response = _create_empty_response(['18003', '28001']) + + molecules = { + '18003': ('H2O', {'FREQ': [100.0, 200.0]}), + '28001': ('CO', {'FREQ': [300.0, 400.0]}) + } + + with patch.object(JPLSpec, 'get_molecule') as mock_get_molecule, \ + patch('astroquery.linelists.jplspec.core.build_lookup') as mock_build_lookup: + + get_mol_func, mock_lookup = _setup_fallback_mocks(molecules) + mock_get_molecule.side_effect = get_mol_func + mock_build_lookup.return_value = mock_lookup + + result = JPLSpec._parse_result(mock_response, fallback_to_getmolecule=True) + + assert mock_get_molecule.call_count == 2 + assert isinstance(result, Table) + assert len(result) == 4 # 2 rows from each molecule + assert 'molecule_list' in result.meta + assert 'Name' in result.colnames + + +def test_query_lines_with_fallback(): + """Test that query_lines uses fallback when server returns empty result.""" + + # Test with fallback disabled - should raise EmptyResponseError + with patch.object(JPLSpec, '_request') as mock_request: + mock_response = _create_empty_response('28001') + mock_response.raise_for_status = Mock() + mock_request.return_value = mock_response + + with pytest.raises(EmptyResponseError, match="Response was empty"): + JPLSpec.query_lines(min_frequency=100 * u.GHz, + max_frequency=200 * u.GHz, + min_strength=-500, + molecule="28001 CO", + fallback_to_getmolecule=False) + + # Test with fallback enabled - should call get_molecule + molecules = {'28001': ('CO', { + 'FREQ': [115271.2018, 230538.0000], + 'ERR': [0.0005, 0.0010], + 'LGINT': [-5.0105, -4.5], + 'DR': [2, 2], + 'ELO': [0.0, 3.845], + 'GUP': [3, 5], + 'QNFMT': [1, 1] + })} + + with patch.object(JPLSpec, '_request') as mock_request, \ + patch.object(JPLSpec, 'get_molecule') as mock_get_molecule, \ + patch('astroquery.linelists.jplspec.core.build_lookup') as mock_build_lookup: + + mock_response = _create_empty_response('28001') + mock_response.raise_for_status = Mock() + mock_request.return_value = mock_response + + get_mol_func, mock_lookup = _setup_fallback_mocks(molecules) + mock_get_molecule.side_effect = get_mol_func + mock_build_lookup.return_value = mock_lookup + + result = JPLSpec.query_lines( + min_frequency=100 * u.GHz, + max_frequency=200 * u.GHz, + min_strength=-500, + molecule="28001 CO", + fallback_to_getmolecule=True) + + mock_get_molecule.assert_called_once_with('28001') + assert isinstance(result, Table) + assert len(result) > 0 + assert 'molecule_id' in result.meta diff --git a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py new file mode 100644 index 0000000000..a9968421bf --- /dev/null +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -0,0 +1,254 @@ +import pytest +from astropy import units as u +from astropy.table import Table + +from ..core import JPLSpec +from astroquery.exceptions import EmptyResponseError + + +@pytest.mark.xfail(reason="2025 server problems", raises=EmptyResponseError) +@pytest.mark.remote_data +def test_remote(): + tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, + max_frequency=1000 * u.GHz, + min_strength=-500, + molecule="18003 H2O", + fallback_to_getmolecule=False) + assert isinstance(tbl, Table) + assert len(tbl) == 36 + assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'QN\'', 'QN"']) + + assert tbl['FREQ'][0] == 503568.5200 + assert tbl['ERR'][0] == 0.0200 + assert tbl['LGINT'][0] == -4.9916 + assert tbl['ERR'][7] == 12.4193 + assert tbl['FREQ'][35] == 987926.7590 + + +@pytest.mark.remote_data +def test_remote_regex_fallback(): + """ + CO, H13CN, HC15N + Some of these have different combinations of QNs + """ + tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, + max_frequency=1000 * u.GHz, + min_strength=-500, + molecule=("28001", "28002", "28003"), + fallback_to_getmolecule=True) + assert isinstance(tbl, Table) + tbl = tbl[((tbl['FREQ'].quantity > 500*u.GHz) & (tbl['FREQ'].quantity < 1*u.THz))] + assert len(tbl) == 16 + # there are more QN formats than the original query had + assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'QN\'', 'QN"', 'Lab', + 'QN"1', 'QN"2', "QN'", "QN'1", "QN'2", + 'Name' + ]) + + assert tbl['FREQ'][0] == 576267.9305 + assert tbl['ERR'][0] == .0005 + assert tbl['LGINT'][0] == -3.0118 + assert tbl['ERR'][7] == 8.3063 + assert tbl['FREQ'][15] == 946175.3151 + + +@pytest.mark.xfail(reason="2025 server problems", raises=EmptyResponseError) +@pytest.mark.remote_data +def test_remote_regex(): + tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, + max_frequency=1000 * u.GHz, + min_strength=-500, + molecule=("28001", "28002", "28003"), + fallback_to_getmolecule=False) + assert isinstance(tbl, Table) + assert len(tbl) == 16 + assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'QN\'', 'QN"', + ]) + + assert tbl['FREQ'][0] == 576267.9305 + assert tbl['ERR'][0] == .0005 + assert tbl['LGINT'][0] == -3.0118 + assert tbl['ERR'][7] == 8.3063 + assert tbl['FREQ'][15] == 946175.3151 + + +@pytest.mark.remote_data +def test_get_molecule_remote(): + """Test get_molecule with remote data retrieval.""" + # Test with H2O + tbl = JPLSpec.get_molecule(18003) + + assert isinstance(tbl, Table) + assert len(tbl) > 0 + + # Check expected columns including Lab flag + expected_cols = {'FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'Lab', + 'QN"1', 'QN"2', 'QN"3', 'QN"4', + "QN'1", "QN'2", "QN'3", "QN'4"} + assert set(tbl.keys()) == expected_cols + + # Check units + assert tbl['FREQ'].unit == u.MHz + assert tbl['ERR'].unit == u.MHz + assert tbl['LGINT'].unit == u.nm**2 * u.MHz + assert tbl['ELO'].unit == u.cm**(-1) + + # Check metadata was attached + assert 'NAME' in tbl.meta + assert tbl.meta['NAME'].strip() == 'H2O' + assert 'TAG' in tbl.meta + assert tbl.meta['TAG'] == 18003 + + # Check Lab flag + assert 'Lab' in tbl.colnames + assert tbl['Lab'].dtype == bool + + # H2O should have some lab measurements + assert sum(tbl['Lab']) > 0 + + +@pytest.mark.remote_data +def test_get_molecule_string_id(): + """Test get_molecule with string ID format.""" + # Test with CO using string ID + tbl = JPLSpec.get_molecule('028001') + + assert isinstance(tbl, Table) + assert len(tbl) > 0 + assert 'NAME' in tbl.meta + assert 'CO' in tbl.meta['NAME'] + + +@pytest.mark.remote_data +def test_remote_fallback(): + tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, + max_frequency=1000 * u.GHz, + min_strength=-500, + molecule="18003 H2O", + fallback_to_getmolecule=True) + assert isinstance(tbl, Table) + tbl = tbl[((tbl['FREQ'].quantity > 500*u.GHz) & (tbl['FREQ'].quantity < 1*u.THz))] + assert len(tbl) == 36 + assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'Lab', + 'QN"1', 'QN"2', 'QN"3', 'QN"4', + "QN'1", "QN'2", "QN'3", "QN'4" + ]) + + assert tbl['FREQ'][0] == 503568.5200 + assert tbl['ERR'][0] == 0.0200 + assert tbl['LGINT'][0] == -4.9916 + assert tbl['ERR'][7] == 12.4193 + assert tbl['FREQ'][35] == 987926.7590 + + +@pytest.mark.remote_data +@pytest.mark.parametrize('mol_id,expected_name', [ + (28001, 'CO'), # Simple diatomic + (32003, 'CH3OH'), # Complex organic + (13002, 'CH'), # another simple molecule w/5 QNs + (14004, 'CD'), # no 2-digit QNs in first col + (15001, 'NH'), # incorrect QNFMT, says there are 5 QNs, only 4 + (18004, 'NH2D'), # highlighted a mismatch between qnlen & n_qns + # (32001, 'O2'), # masked second QN set? +]) +def test_get_molecule_various(mol_id, expected_name): + """ + Test get_molecule with various molecules. + + CH & CD are both regression tests for difficult molecules with >4 QNs and + missing 2-digit QNs (i.e., columns with _only_ 1-digit QNs at the start of + the columns with QNs). + """ + tbl = JPLSpec.get_molecule(mol_id) + assert isinstance(tbl, Table) + assert len(tbl) > 0 + assert 'NAME' in tbl.meta + assert expected_name in tbl.meta['NAME'] + + # Verify TAG values are positive + assert all(tbl['TAG'] > 0) + + +@pytest.mark.remote_data +def test_get_molecule_qn1(): + tbl = JPLSpec.get_molecule(28001) + assert isinstance(tbl, Table) + assert len(tbl) > 0 + assert 'QN"' in tbl.colnames + assert 'QN1"' not in tbl.colnames + assert "QN'" in tbl.colnames + assert "QN1'" not in tbl.colnames + + +@pytest.mark.remote_data +def test_get_molecule_qn4(): + """ CN has 4 QNs """ + tbl = JPLSpec.get_molecule(26001) + assert isinstance(tbl, Table) + assert len(tbl) > 0 + for ii in range(1, 5): + assert f'QN"{ii}' in tbl.colnames + assert f"QN'{ii}" in tbl.colnames + + +@pytest.mark.remote_data +def test_get_molecule_parser_details(): + """ + Verifying a known hard-to-parse row + 982.301 0.174 -17.8172 3 464.3000 9 320031304 4-2 2 5-5 2 + 991.369 0.003 -9.8234 3 310.3570 37 32003130418 3 - 0 18 3 + 0 + """ + tbl = JPLSpec.get_molecule(32003) + testrow = tbl[5] + assert testrow['FREQ'] == 982.301 + assert testrow["QN'1"] == 4 + assert testrow["QN'2"] == -2 + assert testrow["QN'3"] == '' + assert testrow["QN'4"] == 2 + + assert testrow['QN"1'] == 5 + assert testrow['QN"2'] == -5 + assert testrow['QN"3'] == '' + assert testrow['QN"4'] == 2 + + testrow = tbl[6] + assert testrow['FREQ'] == 991.369 + assert testrow["QN'1"] == 18 + assert testrow["QN'2"] == 3 + assert testrow["QN'3"] == '-' + assert testrow["QN'4"] == 0 + + assert testrow['QN"1'] == 18 + assert testrow['QN"2'] == 3 + assert testrow['QN"3'] == '+' + assert testrow['QN"4'] == 0 + + +@pytest.mark.bigdata +@pytest.mark.remote_data +class TestRegressionAllMolecules: + """Test that we can get each molecule in JPL database""" + species_table = JPLSpec.get_species_table() + + @pytest.mark.parametrize('row', species_table) + def test_regression_all_molecules(self, row): + """ + Expensive test - try all the molecules + """ + mol_id = row['TAG'] + # O2 has masked QNs making it hard to test automatically (32...) + # 34001, 39003, 44004, 44009, 44012 are missing or corrupt molecules + # 81001 may be fine? not entirely sure what's wrong + if mol_id in (32001, 32002, 32005, + 34001, 39003, 44004, 44009, 44012, + 81001): + # N2O = 44009 is just not there + pytest.skip("Skipping O2 due to masked QNs") + tbl = JPLSpec.get_molecule(mol_id) + assert isinstance(tbl, Table) + assert len(tbl) > 0 diff --git a/docs/index.rst b/docs/index.rst index baef40b2a3..64c3d35f66 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -276,7 +276,7 @@ The following modules have been completed using a common API: ipac/irsa/irsa_dust/irsa_dust.rst ipac/irsa/ibe/ibe.rst ipac/irsa/irsa.rst - jplspec/jplspec.rst + linelists/jplspec/jplspec.rst magpis/magpis.rst mast/mast.rst mocserver/mocserver.rst @@ -412,7 +412,7 @@ well as cross section and collision rates. Those services are: atomic/atomic.rst linelists/cdms/cdms.rst hitran/hitran.rst - jplspec/jplspec.rst + linelists/jplspec/jplspec.rst lamda/lamda.rst nist/nist.rst splatalogue/splatalogue.rst diff --git a/docs/linelists/cdms/cdms.rst b/docs/linelists/cdms/cdms.rst index 52b5613b56..dc08bcfb8d 100644 --- a/docs/linelists/cdms/cdms.rst +++ b/docs/linelists/cdms/cdms.rst @@ -34,17 +34,17 @@ each setting yields: ... molecule="028503 CO", ... get_query_payload=False) >>> response.pprint(max_width=120) - FREQ ERR LGINT DR ELO GUP MOLWT TAG QNFMT Ju Ku vu F1u F2u F3u Jl Kl vl F1l F2l F3l name Lab - MHz MHz nm2 MHz 1 / cm u - ----------- ------ ------- --- -------- --- ----- --- ----- --- --- --- --- --- --- --- --- --- --- --- --- ------- ---- - 115271.2018 0.0005 -5.0105 2 0.0 3 28 503 101 1 -- -- -- -- -- 0 -- -- -- -- -- CO, v=0 True - 230538.0 0.0005 -4.1197 2 3.845 5 28 503 101 2 -- -- -- -- -- 1 -- -- -- -- -- CO, v=0 True - 345795.9899 0.0005 -3.6118 2 11.535 7 28 503 101 3 -- -- -- -- -- 2 -- -- -- -- -- CO, v=0 True - 461040.7682 0.0005 -3.2657 2 23.0695 9 28 503 101 4 -- -- -- -- -- 3 -- -- -- -- -- CO, v=0 True - 576267.9305 0.0005 -3.0118 2 38.4481 11 28 503 101 5 -- -- -- -- -- 4 -- -- -- -- -- CO, v=0 True - 691473.0763 0.0005 -2.8193 2 57.6704 13 28 503 101 6 -- -- -- -- -- 5 -- -- -- -- -- CO, v=0 True - 806651.806 0.005 -2.6716 2 80.7354 15 28 503 101 7 -- -- -- -- -- 6 -- -- -- -- -- CO, v=0 True - 921799.7 0.005 -2.559 2 107.6424 17 28 503 101 8 -- -- -- -- -- 7 -- -- -- -- -- CO, v=0 True + FREQ ERR LGINT DR ELO GUP TAG QNFMT Ju Ku vu ... F3u Jl Kl vl F1l F2l F3l name MOLWT Lab + MHz MHz nm2 MHz 1 / cm ... u + ----------- ------ ------- --- -------- --- ------ ----- --- --- --- ... --- --- --- --- --- --- --- ------- ----- ---- + 115271.2018 0.0005 -5.0105 2 0.0 3 -28503 101 1 -- -- ... -- 0 -- -- -- -- -- CO, v=0 28 True + 230538.0 0.0005 -4.1197 2 3.845 5 -28503 101 2 -- -- ... -- 1 -- -- -- -- -- CO, v=0 28 True + 345795.9899 0.0005 -3.6118 2 11.535 7 -28503 101 3 -- -- ... -- 2 -- -- -- -- -- CO, v=0 28 True + 461040.7682 0.0005 -3.2657 2 23.0695 9 -28503 101 4 -- -- ... -- 3 -- -- -- -- -- CO, v=0 28 True + 576267.9305 0.0005 -3.0118 2 38.4481 11 -28503 101 5 -- -- ... -- 4 -- -- -- -- -- CO, v=0 28 True + 691473.0763 0.0005 -2.8193 2 57.6704 13 -28503 101 6 -- -- ... -- 5 -- -- -- -- -- CO, v=0 28 True + 806651.806 0.005 -2.6716 2 80.7354 15 -28503 101 7 -- -- ... -- 6 -- -- -- -- -- CO, v=0 28 True + 921799.7 0.005 -2.559 2 107.6424 17 -28503 101 8 -- -- ... -- 7 -- -- -- -- -- CO, v=0 28 True @@ -80,7 +80,6 @@ The units of the columns of the query can be displayed by calling DR int64 Column 0 ELO float64 1 / cm Column 0 GUP int64 Column 0 - MOLWT int64 u Column 0 TAG int64 Column 0 QNFMT int64 Column 0 Ju int64 Column 0 @@ -96,7 +95,9 @@ The units of the columns of the query can be displayed by calling F2l int64 MaskedColumn 8 F3l int64 MaskedColumn 8 name str7 Column 0 + MOLWT int64 u Column 0 Lab bool Column 0 + These come in handy for converting to other units easily, an example using a simplified version of the data above is shown below: @@ -141,7 +142,7 @@ laboratory but not in space >>> result = CDMS.get_species_table() >>> mol = result[result['tag'] == 28503] >>> mol.pprint(max_width=160) - tag molecule Name #lines lg(Q(1000)) lg(Q(500)) lg(Q(300)) ... lg(Q(9.375)) lg(Q(5.000)) lg(Q(2.725)) Ver. Documentation Date of entry Entry + tag molecule Name #lines lg(Q(1000)) lg(Q(500)) lg(Q(300)) ... lg(Q(9.375)) lg(Q(5.000)) lg(Q(2.725)) Ver. Documentation Date of entry Entry ----- -------- --------- ------ ----------- ---------- ---------- ... ------------ ------------ ------------ ---- ------------- ------------- ----------- 28503 CO, v=0 CO, v = 0 95 2.5595 2.2584 2.0369 ... 0.5733 0.3389 0.1478 1 e028503.cat Oct. 2000 w028503.cat @@ -303,7 +304,15 @@ It can be valuable to check this for any given molecule. Querying the Catalog with Regexes and Relative names ---------------------------------------------------- -The regular expression parsing is analogous to that in the JPLSpec module. +The regular expression parsing is analogous to that in +:mod:`astroquery.linelists.jplspec`. See :ref:`regex_querying_linelists`. + +Handling Malformatted Molecules +------------------------------- + +There are some entries in the CDMS catalog that get mangled by the query tool, +but the underlying data are still good. This seems to affect primarily those +molecules with excessive numbers of quantum numbers such as H2NC. Troubleshooting @@ -316,7 +325,7 @@ If you are repeatedly getting failed queries, or bad/out-of-date results, try cl >>> from astroquery.linelists.cdms import CDMS >>> CDMS.clear_cache() -If this function is unavailable, upgrade your version of astroquery. +If this function is unavailable, upgrade your version of astroquery. The ``clear_cache`` function was introduced in version 0.4.7.dev8479. diff --git a/docs/jplspec/images/docplot_curvefit.png b/docs/linelists/jplspec/images/docplot_curvefit.png similarity index 100% rename from docs/jplspec/images/docplot_curvefit.png rename to docs/linelists/jplspec/images/docplot_curvefit.png diff --git a/docs/jplspec/images/docplot_jplspec.png b/docs/linelists/jplspec/images/docplot_jplspec.png similarity index 100% rename from docs/jplspec/images/docplot_jplspec.png rename to docs/linelists/jplspec/images/docplot_jplspec.png diff --git a/docs/jplspec/jplspec.rst b/docs/linelists/jplspec/jplspec.rst similarity index 64% rename from docs/jplspec/jplspec.rst rename to docs/linelists/jplspec/jplspec.rst index e60ea06e82..2ef9f3ad12 100644 --- a/docs/jplspec/jplspec.rst +++ b/docs/linelists/jplspec/jplspec.rst @@ -1,8 +1,8 @@ -.. _astroquery.jplspec: +.. _astroquery.linelists.jplspec: -********************************************* -JPL Spectroscopy Queries (astroquery.jplspec) -********************************************* +******************************************************* +JPL Spectroscopy Queries (astroquery.linelists.jplspec) +******************************************************* Getting Started =============== @@ -14,6 +14,18 @@ module outputs the results that would arise from the `browser form using similar search criteria as the ones found in the form, and presents the output as a `~astropy.table.Table`. + +.. warning:: + Starting in mid-2025, the JPL web interface query tool went down for a + prolonged period. As of November 2025, it is still not up, but JPL staff are + aware of and seeking solutions to the problem. Until that web interface is + restored, the astroquery.jplspec module relies on workarounds that involve + downloading the full catalog files, which results in slightly larger data + transfers and un-filtered full-table results. Some metadata may also be + different. The examples and documents have been updated to show what to + expect in the current, partially-functional state. + + Examples ======== @@ -26,25 +38,26 @@ what each setting yields: .. doctest-remote-data:: - >>> from astroquery.jplspec import JPLSpec + >>> from astroquery.linelists.jplspec import JPLSpec >>> import astropy.units as u >>> response = JPLSpec.query_lines(min_frequency=100 * u.GHz, ... max_frequency=1000 * u.GHz, ... min_strength=-500, ... molecule="28001 CO", ... get_query_payload=False) - >>> print(response) - FREQ ERR LGINT DR ELO GUP TAG QNFMT QN' QN" - MHz MHz nm2 MHz 1 / cm - ----------- ------ ------- --- -------- --- ------ ----- --- --- - 115271.2018 0.0005 -5.0105 2 0.0 3 -28001 101 1 0 - 230538.0 0.0005 -4.1197 2 3.845 5 -28001 101 2 1 - 345795.9899 0.0005 -3.6118 2 11.535 7 -28001 101 3 2 - 461040.7682 0.0005 -3.2657 2 23.0695 9 -28001 101 4 3 - 576267.9305 0.0005 -3.0118 2 38.4481 11 -28001 101 5 4 - 691473.0763 0.0005 -2.8193 2 57.6704 13 -28001 101 6 5 - 806651.806 0.005 -2.6716 2 80.7354 15 -28001 101 7 6 - 921799.7 0.005 -2.559 2 107.6424 17 -28001 101 8 7 + >>> response.pprint(max_lines=10) + FREQ ERR LGINT DR ELO GUP TAG QNFMT QN' QN" Lab + MHz MHz nm2 MHz 1 / cm + ------------ ------ -------- --- ---------- --- ----- ----- --- --- ----- + 115271.2018 0.0005 -5.0105 2 0.0 3 28001 101 1 0 True + 230538.0 0.0005 -4.1197 2 3.845 5 28001 101 2 1 True + ... ... ... ... ... ... ... ... ... ... ... + 9747448.9491 3.0112 -31.6588 2 14684.516 179 28001 101 89 88 False + 9845408.2504 3.1938 -32.4351 2 15009.6559 181 28001 101 90 89 False + 9942985.9145 3.3849 -33.2361 2 15338.0634 183 28001 101 91 90 False + Length = 91 rows + >>> response.meta + {'TAG': 28001, 'NAME': 'CO', 'NLINE': 91, 'QLOG1': 2.0369, 'QLOG2': 1.9123, 'QLOG3': 1.737, 'QLOG4': 1.4386, 'QLOG5': 1.1429, 'QLOG6': 0.8526, 'QLOG7': 0.5733, 'VER': '4*', 'molecule_id': '28001 CO', 'molecule_name': {}} The following example, with ``get_query_payload = True``, returns the payload: @@ -68,59 +81,63 @@ The units of the columns of the query can be displayed by calling ... min_strength=-500, ... molecule="28001 CO") >>> print(response.info) - - name dtype unit - ----- ------- ------- - FREQ float64 MHz - ERR float64 MHz - LGINT float64 nm2 MHz - DR int64 - ELO float64 1 / cm - GUP int64 - TAG int64 - QNFMT int64 - QN' int64 - QN" int64 +
+ name dtype unit + ----- ------- ------- + FREQ float64 MHz + ERR float64 MHz + LGINT float64 nm2 MHz + DR int64 + ELO float64 1 / cm + GUP int64 + TAG int64 + QNFMT int64 + QN' int64 + QN" int64 + Lab bool + These come in handy for converting to other units easily, an example using a simplified version of the data above is shown below: .. doctest-remote-data:: - >>> print (response['FREQ', 'ERR', 'ELO']) - FREQ ERR ELO - MHz MHz 1 / cm - ----------- ------ -------- - 115271.2018 0.0005 0.0 - 230538.0 0.0005 3.845 - 345795.9899 0.0005 11.535 - 461040.7682 0.0005 23.0695 - 576267.9305 0.0005 38.4481 - 691473.0763 0.0005 57.6704 - 806651.806 0.005 80.7354 - 921799.7 0.005 107.6424 - >>> response['FREQ'].quantity - - >>> response['FREQ'].to('GHz') - + >>> response['FREQ', 'ERR', 'ELO'].pprint(max_lines=10) + FREQ ERR ELO + MHz MHz 1 / cm + ------------ ------ ---------- + 115271.2018 0.0005 0.0 + 230538.0 0.0005 3.845 + ... ... ... + 9747448.9491 3.0112 14684.516 + 9845408.2504 3.1938 15009.6559 + 9942985.9145 3.3849 15338.0634 + Length = 91 rows + >>> response['FREQ'][:10].quantity + + >>> response['FREQ'][:10].to('GHz') + The parameters and response keys are described in detail under the Reference/API section. Looking Up More Information from the catdir.cat file ------------------------------------------------------- +---------------------------------------------------- -If you have found a molecule you are interested in, the TAG field -in the results provides enough information to access specific -molecule information such as the partition functions at different -temperatures. Keep in mind that a negative TAG value signifies that -the line frequency has been measured in the laboratory +If you have found a molecule you are interested in, the TAG field in the results +provides enough information to access specific molecule information such as the +partition functions at different temperatures. A negative TAG value signifies +that the line frequency has been measured in the laboratory. .. doctest-remote-data:: >>> import matplotlib.pyplot as plt - >>> from astroquery.jplspec import JPLSpec + >>> from astroquery.linelists.jplspec import JPLSpec >>> result = JPLSpec.get_species_table() >>> mol = result[result['TAG'] == 28001] #do not include signs of TAG for this >>> print(mol) @@ -139,11 +156,9 @@ through metadata: {'Temperature (K)': [300, 225, 150, 75, 37.5, 18.5, 9.375]} -One of the advantages of using JPLSpec is the availability in the catalog -of the partition function at different temperatures for the molecules. As a -continuation of the example above, an example that accesses and plots the -partition function against the temperatures found in the metadata is shown -below: +JPLSpec catalogs the partition function at several temperatures for each +molecule. This example accesses and plots the partition function against the +temperatures found in the metadata: .. doctest-skip:: @@ -153,7 +168,7 @@ below: >>> plt.scatter(temp,part) >>> plt.xlabel('Temperature (K)') >>> plt.ylabel('Partition Function Value') - >>> plt.title('Parititon Fn vs Temp') + >>> plt.title('Partition Fn vs Temp') >>> plt.show() @@ -194,6 +209,8 @@ other temperatures using curve fitting models: The resulting plot from the example above +.. _regex_querying_linelists: + Querying the Catalog with Regexes and Relative names ---------------------------------------------------- @@ -210,30 +227,34 @@ to query these directly. .. doctest-remote-data:: - >>> from astroquery.jplspec import JPLSpec + >>> from astroquery.linelists.jplspec import JPLSpec >>> import astropy.units as u >>> result = JPLSpec.query_lines(min_frequency=100 * u.GHz, ... max_frequency=1000 * u.GHz, ... min_strength=-500, ... molecule="H2O", ... parse_name_locally=True) - >>> print(result) - FREQ ERR LGINT DR ELO GUP TAG QNFMT QN' QN" - MHz MHz nm2 MHz 1 / cm - ----------- -------- -------- --- --------- --- ------ ----- -------- -------- - 115542.5692 0.6588 -13.2595 3 4606.1683 35 18003 1404 17 810 0 18 513 0 - 139614.293 0.15 -9.3636 3 3080.1788 87 -18003 1404 14 6 9 0 15 312 0 - 177317.068 0.15 -10.3413 3 3437.2774 31 -18003 1404 15 610 0 16 313 0 - 183310.087 0.001 -3.6463 3 136.1639 7 -18003 1404 3 1 3 0 2 2 0 0 - ... - Length = 2000 rows + >>> result.pprint(max_lines=10) + FREQ ERR LGINT DR ELO GUP TAG QNFMT QN'1 QN"1 QN'2 QN"2 QN'3 QN"3 QN'4 QN"4 Lab + MHz MHz nm2 MHz 1 / cm + ------------ ------ -------- --- --------- --- ----- ----- ---- ---- ---- ---- ---- ---- ---- ---- ----- + 8006.5805 2.851 -18.6204 3 6219.6192 45 18003 1404 22 21 4 7 18 15 0 0 False + 12478.2535 0.2051 -13.1006 3 3623.7652 31 18003 1404 15 16 7 4 9 12 0 0 False + ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... + 9981215.769 6.1776 -12.0101 3 5271.3682 45 18003 1404 22 23 2 1 20 23 0 0 False + 9981323.7676 6.1773 -11.5329 3 5271.3682 135 18003 1404 22 23 3 0 20 23 0 0 False + 9992065.9213 0.0482 -5.528 3 882.8904 15 18003 1404 7 8 6 1 2 7 0 0 False + Length = 1376 rows + Searches like these can lead to very broad queries, and may be limited in response length: .. doctest-remote-data:: - >>> print(result.meta['comments']) + >>> # the 'comments' metadata field is only populated if the query tool is run + >>> # the get-whole-table workaround (November 2025) will not populate it + >>> print(result.meta['comments']) # doctest: +SKIP ['', '', '', '', '', 'form is currently limilted to 2000 lines. Please limit your search.'] Inspecting the returned molecules shows that the 'H2O' string was processed as a @@ -247,7 +268,7 @@ combination of characters 'H2O': ... for (species, tag) in JPLSpec.lookup_ids.items() ... if tag in tags} >>> print(species) - {'H2O': 18003, 'H2O v2,2v2,v': 18005, 'H2O-17': 19003, 'H2O-18': 20003, 'H2O2': 34004} + {'H2O': 18003} A few examples that show the power of the regex option are the following: @@ -314,7 +335,7 @@ If you are repeatedly getting failed queries, or bad/out-of-date results, try cl .. code-block:: python - >>> from astroquery.jplspec import JPLSpec + >>> from astroquery.linelists.jplspec import JPLSpec >>> JPLSpec.clear_cache() If this function is unavailable, upgrade your version of astroquery. @@ -324,5 +345,5 @@ The ``clear_cache`` function was introduced in version 0.4.7.dev8479. Reference/API ============= -.. automodapi:: astroquery.jplspec +.. automodapi:: astroquery.linelists.jplspec :no-inheritance-diagram: diff --git a/setup.cfg b/setup.cfg index 1b93d76267..b00172e24e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -52,6 +52,8 @@ filterwarnings = # Remove along with astropy-helpers, once we switch to a new versioning scheme ignore:Use setlocale:DeprecationWarning ignore: 'locale.getdefaultlocale' is deprecated and slated for removal:DeprecationWarning +# Ignore astroquery's own module reorganization deprecation warnings during testing + ignore:Importing from 'astroquery.jplspec' is deprecated:DeprecationWarning # These are temporary measures, all of these should be fixed: # ----------------------------------------------------------- ignore:distutils Version classes are deprecated:DeprecationWarning