From 8424b0476bebb3ac88e7d1fd9da2c116cf82ac9e Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Fri, 7 Nov 2025 15:18:01 -0500 Subject: [PATCH 01/44] move jplspec to linelists fix bad file add fix ****s --- astroquery/jplspec/__init__.py | 46 +++++++++--------- astroquery/linelists/jplspec/__init__.py | 33 +++++++++++++ astroquery/{ => linelists}/jplspec/core.py | 4 +- .../{ => linelists}/jplspec/data/catdir.cat | 0 .../{ => linelists}/jplspec/lookup_table.py | 0 .../{ => linelists}/jplspec/setup_package.py | 5 +- .../{ => linelists}/jplspec/tests/__init__.py | 0 .../jplspec/tests/data/CO.data | 0 .../jplspec/tests/data/CO_6.data | 0 .../jplspec/tests/data/multi.data | 0 .../jplspec/tests/test_jplspec.py | 2 +- .../jplspec/tests/test_jplspec_remote.py | 2 +- docs/index.rst | 4 +- .../jplspec/images/docplot_curvefit.png | Bin .../jplspec/images/docplot_jplspec.png | Bin docs/{ => linelists}/jplspec/jplspec.rst | 18 +++---- 16 files changed, 74 insertions(+), 40 deletions(-) create mode 100644 astroquery/linelists/jplspec/__init__.py rename astroquery/{ => linelists}/jplspec/core.py (99%) rename astroquery/{ => linelists}/jplspec/data/catdir.cat (100%) rename astroquery/{ => linelists}/jplspec/lookup_table.py (100%) rename astroquery/{ => linelists}/jplspec/setup_package.py (72%) rename astroquery/{ => linelists}/jplspec/tests/__init__.py (100%) rename astroquery/{ => linelists}/jplspec/tests/data/CO.data (100%) rename astroquery/{ => linelists}/jplspec/tests/data/CO_6.data (100%) rename astroquery/{ => linelists}/jplspec/tests/data/multi.data (100%) rename astroquery/{ => linelists}/jplspec/tests/test_jplspec.py (99%) rename astroquery/{ => linelists}/jplspec/tests/test_jplspec_remote.py (97%) rename docs/{ => linelists}/jplspec/images/docplot_curvefit.png (100%) rename docs/{ => linelists}/jplspec/images/docplot_jplspec.png (100%) rename docs/{ => linelists}/jplspec/jplspec.rst (96%) diff --git a/astroquery/jplspec/__init__.py b/astroquery/jplspec/__init__.py index 8f87702481..e930c5ab2c 100644 --- a/astroquery/jplspec/__init__.py +++ b/astroquery/jplspec/__init__.py @@ -1,33 +1,33 @@ # Licensed under a 3-clause BSD style license - see LICENSE.rst """ -JPL Spectral Catalog --------------------- +JPL Spectral Catalog (Deprecated Location) +------------------------------------------- +.. deprecated:: 0.4.8 + The `astroquery.jplspec` module has been moved to `astroquery.linelists.jplspec`. + Please update your imports to use `from astroquery.linelists.jplspec import JPLSpec` instead. + This backward compatibility layer will be removed in a future version. -:author: Giannina Guzman (gguzman2@villanova.edu) -:author: Miguel de Val-Borro (miguel.deval@gmail.com) - -""" -from astropy import config as _config +This module provides backward compatibility for the old import location. +The JPLSpec module has been reorganized under the linelists subpackage. +For new code, please use:: -class Conf(_config.ConfigNamespace): - """ - Configuration parameters for `astroquery.jplspec`. - """ - server = _config.ConfigItem( - 'https://spec.jpl.nasa.gov/cgi-bin/catform', - 'JPL Spectral Catalog URL.') - - timeout = _config.ConfigItem( - 60, - 'Time limit for connecting to JPL server.') + from astroquery.linelists.jplspec import JPLSpec +""" +import warnings -conf = Conf() +# Issue deprecation warning +warnings.warn( + "Importing from 'astroquery.jplspec' is deprecated. " + "Please use 'from astroquery.linelists.jplspec import JPLSpec' instead. " + "The old import path will be removed in a future version.", + DeprecationWarning, + stacklevel=2 +) -from .core import JPLSpec, JPLSpecClass +# Import from the new location +from ..linelists.jplspec import JPLSpec, JPLSpecClass, Conf, conf -__all__ = ['JPLSpec', 'JPLSpecClass', - 'Conf', 'conf', - ] +__all__ = ['JPLSpec', 'JPLSpecClass', 'Conf', 'conf'] diff --git a/astroquery/linelists/jplspec/__init__.py b/astroquery/linelists/jplspec/__init__.py new file mode 100644 index 0000000000..cfd9439cfe --- /dev/null +++ b/astroquery/linelists/jplspec/__init__.py @@ -0,0 +1,33 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst +""" +JPL Spectral Catalog +-------------------- + + +:author: Giannina Guzman (gguzman2@villanova.edu) +:author: Miguel de Val-Borro (miguel.deval@gmail.com) + +""" +from astropy import config as _config + + +class Conf(_config.ConfigNamespace): + """ + Configuration parameters for `astroquery.linelists.jplspec`. + """ + server = _config.ConfigItem( + 'https://spec.jpl.nasa.gov/cgi-bin/catform', + 'JPL Spectral Catalog URL.') + + timeout = _config.ConfigItem( + 60, + 'Time limit for connecting to JPL server.') + + +conf = Conf() + +from .core import JPLSpec, JPLSpecClass + +__all__ = ['JPLSpec', 'JPLSpecClass', + 'Conf', 'conf', + ] diff --git a/astroquery/jplspec/core.py b/astroquery/linelists/jplspec/core.py similarity index 99% rename from astroquery/jplspec/core.py rename to astroquery/linelists/jplspec/core.py index 1f72ca580b..3d0165e3af 100644 --- a/astroquery/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -4,8 +4,8 @@ import astropy.units as u from astropy.io import ascii -from ..query import BaseQuery -from ..utils import async_to_sync +from ...query import BaseQuery +from ...utils import async_to_sync # import configurable items declared in __init__.py from . import conf from . import lookup_table diff --git a/astroquery/jplspec/data/catdir.cat b/astroquery/linelists/jplspec/data/catdir.cat similarity index 100% rename from astroquery/jplspec/data/catdir.cat rename to astroquery/linelists/jplspec/data/catdir.cat diff --git a/astroquery/jplspec/lookup_table.py b/astroquery/linelists/jplspec/lookup_table.py similarity index 100% rename from astroquery/jplspec/lookup_table.py rename to astroquery/linelists/jplspec/lookup_table.py diff --git a/astroquery/jplspec/setup_package.py b/astroquery/linelists/jplspec/setup_package.py similarity index 72% rename from astroquery/jplspec/setup_package.py rename to astroquery/linelists/jplspec/setup_package.py index 761f2f6829..d9e08324b9 100644 --- a/astroquery/jplspec/setup_package.py +++ b/astroquery/linelists/jplspec/setup_package.py @@ -11,5 +11,6 @@ def get_package_data(): os.path.join('data', 'multi.data')] paths_data = [os.path.join('data', 'catdir.cat')] - return {'astroquery.jplspec.tests': paths_test, - 'astroquery.jplspec': paths_data, } + return {'astroquery.linelists.jplspec.tests': paths_test, + 'astroquery.linelists.jplspec': paths_data, } + diff --git a/astroquery/jplspec/tests/__init__.py b/astroquery/linelists/jplspec/tests/__init__.py similarity index 100% rename from astroquery/jplspec/tests/__init__.py rename to astroquery/linelists/jplspec/tests/__init__.py diff --git a/astroquery/jplspec/tests/data/CO.data b/astroquery/linelists/jplspec/tests/data/CO.data similarity index 100% rename from astroquery/jplspec/tests/data/CO.data rename to astroquery/linelists/jplspec/tests/data/CO.data diff --git a/astroquery/jplspec/tests/data/CO_6.data b/astroquery/linelists/jplspec/tests/data/CO_6.data similarity index 100% rename from astroquery/jplspec/tests/data/CO_6.data rename to astroquery/linelists/jplspec/tests/data/CO_6.data diff --git a/astroquery/jplspec/tests/data/multi.data b/astroquery/linelists/jplspec/tests/data/multi.data similarity index 100% rename from astroquery/jplspec/tests/data/multi.data rename to astroquery/linelists/jplspec/tests/data/multi.data diff --git a/astroquery/jplspec/tests/test_jplspec.py b/astroquery/linelists/jplspec/tests/test_jplspec.py similarity index 99% rename from astroquery/jplspec/tests/test_jplspec.py rename to astroquery/linelists/jplspec/tests/test_jplspec.py index b11c15b8a5..7a84670dd4 100644 --- a/astroquery/jplspec/tests/test_jplspec.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec.py @@ -4,7 +4,7 @@ from astropy import units as u from astropy.table import Table -from ...jplspec import JPLSpec +from ....jplspec import JPLSpec file1 = 'CO.data' file2 = 'CO_6.data' diff --git a/astroquery/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py similarity index 97% rename from astroquery/jplspec/tests/test_jplspec_remote.py rename to astroquery/linelists/jplspec/tests/test_jplspec_remote.py index 0f60e2b4fa..5c46f7e3af 100644 --- a/astroquery/jplspec/tests/test_jplspec_remote.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -2,7 +2,7 @@ from astropy import units as u from astropy.table import Table -from ...jplspec import JPLSpec +from ....jplspec import JPLSpec @pytest.mark.remote_data diff --git a/docs/index.rst b/docs/index.rst index baef40b2a3..64c3d35f66 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -276,7 +276,7 @@ The following modules have been completed using a common API: ipac/irsa/irsa_dust/irsa_dust.rst ipac/irsa/ibe/ibe.rst ipac/irsa/irsa.rst - jplspec/jplspec.rst + linelists/jplspec/jplspec.rst magpis/magpis.rst mast/mast.rst mocserver/mocserver.rst @@ -412,7 +412,7 @@ well as cross section and collision rates. Those services are: atomic/atomic.rst linelists/cdms/cdms.rst hitran/hitran.rst - jplspec/jplspec.rst + linelists/jplspec/jplspec.rst lamda/lamda.rst nist/nist.rst splatalogue/splatalogue.rst diff --git a/docs/jplspec/images/docplot_curvefit.png b/docs/linelists/jplspec/images/docplot_curvefit.png similarity index 100% rename from docs/jplspec/images/docplot_curvefit.png rename to docs/linelists/jplspec/images/docplot_curvefit.png diff --git a/docs/jplspec/images/docplot_jplspec.png b/docs/linelists/jplspec/images/docplot_jplspec.png similarity index 100% rename from docs/jplspec/images/docplot_jplspec.png rename to docs/linelists/jplspec/images/docplot_jplspec.png diff --git a/docs/jplspec/jplspec.rst b/docs/linelists/jplspec/jplspec.rst similarity index 96% rename from docs/jplspec/jplspec.rst rename to docs/linelists/jplspec/jplspec.rst index e60ea06e82..9e8a233a1b 100644 --- a/docs/jplspec/jplspec.rst +++ b/docs/linelists/jplspec/jplspec.rst @@ -1,8 +1,8 @@ -.. _astroquery.jplspec: +.. _astroquery.linelists.jplspec: -********************************************* -JPL Spectroscopy Queries (astroquery.jplspec) -********************************************* +******************************************************* +JPL Spectroscopy Queries (astroquery.linelists.jplspec) +******************************************************* Getting Started =============== @@ -26,7 +26,7 @@ what each setting yields: .. doctest-remote-data:: - >>> from astroquery.jplspec import JPLSpec + >>> from astroquery.linelists.jplspec import JPLSpec >>> import astropy.units as u >>> response = JPLSpec.query_lines(min_frequency=100 * u.GHz, ... max_frequency=1000 * u.GHz, @@ -120,7 +120,7 @@ the line frequency has been measured in the laboratory .. doctest-remote-data:: >>> import matplotlib.pyplot as plt - >>> from astroquery.jplspec import JPLSpec + >>> from astroquery.linelists.jplspec import JPLSpec >>> result = JPLSpec.get_species_table() >>> mol = result[result['TAG'] == 28001] #do not include signs of TAG for this >>> print(mol) @@ -210,7 +210,7 @@ to query these directly. .. doctest-remote-data:: - >>> from astroquery.jplspec import JPLSpec + >>> from astroquery.linelists.jplspec import JPLSpec >>> import astropy.units as u >>> result = JPLSpec.query_lines(min_frequency=100 * u.GHz, ... max_frequency=1000 * u.GHz, @@ -314,7 +314,7 @@ If you are repeatedly getting failed queries, or bad/out-of-date results, try cl .. code-block:: python - >>> from astroquery.jplspec import JPLSpec + >>> from astroquery.linelists.jplspec import JPLSpec >>> JPLSpec.clear_cache() If this function is unavailable, upgrade your version of astroquery. @@ -324,5 +324,5 @@ The ``clear_cache`` function was introduced in version 0.4.7.dev8479. Reference/API ============= -.. automodapi:: astroquery.jplspec +.. automodapi:: astroquery.linelists.jplspec :no-inheritance-diagram: From 8b0a21684396c4c2d7eab47b8684a1d483801c85 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Fri, 7 Nov 2025 15:25:10 -0500 Subject: [PATCH 02/44] add changelog entry --- CHANGES.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 9540003431..01a130d283 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -30,6 +30,10 @@ mast - Switch to use HTTP continuation for partial downloads. [#3448] +jplspec +^^^^^^^ + +- Moved to linelists/ [#3455] Infrastructure, Utility and Other Changes and Additions ------------------------------------------------------- From 137e56bb4373b39fd36a221a8217af1901712285 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Wed, 5 Nov 2025 14:50:22 -0500 Subject: [PATCH 03/44] fix typo in test --- astroquery/linelists/cdms/tests/test_cdms_remote.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/astroquery/linelists/cdms/tests/test_cdms_remote.py b/astroquery/linelists/cdms/tests/test_cdms_remote.py index 5c2a2059fb..e0c7bf5a9d 100644 --- a/astroquery/linelists/cdms/tests/test_cdms_remote.py +++ b/astroquery/linelists/cdms/tests/test_cdms_remote.py @@ -46,7 +46,9 @@ def test_remote_regex(): tbl = CDMS.query_lines(min_frequency=500 * u.GHz, max_frequency=600 * u.GHz, min_strength=-500, - molecule=('028501 HC-13-N, v=0', '028502 H2CN' '028503 CO, v=0')) + molecule=('028501 HC-13-N, v=0', + '028502 H2CN', + '028503 CO, v=0')) assert isinstance(tbl, Table) assert len(tbl) == 557 From 67acdbc7505ec8c7beff0f3b8c56acc62be9feb6 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Fri, 7 Nov 2025 14:38:37 -0500 Subject: [PATCH 04/44] add generated get_molecule & associated tests --- astroquery/linelists/jplspec/core.py | 102 ++++++++++++++++++ .../linelists/jplspec/tests/test_jplspec.py | 43 ++++++++ .../jplspec/tests/test_jplspec_remote.py | 65 +++++++++++ 3 files changed, 210 insertions(+) diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index 3d0165e3af..8b61c5b08b 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -234,6 +234,108 @@ def get_species_table(self, *, catfile='catdir.cat'): return result + def get_molecule(self, molecule_id, *, cache=True): + """ + Retrieve the whole molecule table for a given molecule id from the JPL catalog. + + Parameters + ---------- + molecule_id : int or str + The molecule tag/identifier. Can be an integer (e.g., 18003 for H2O) + or a zero-padded 6-character string (e.g., '018003'). + cache : bool + Defaults to True. If set overrides global caching behavior. + + Returns + ------- + Table : `~astropy.table.Table` + Table containing all spectral lines for the requested molecule. + + Examples + -------- + >>> table = JPLSpec.get_molecule(18003) # doctest: +SKIP + >>> print(table) # doctest: +SKIP + """ + # Convert to string and zero-pad to 6 digits + if isinstance(molecule_id, int): + molecule_str = f'{molecule_id:06d}' + elif isinstance(molecule_id, str): + if len(molecule_id) != 6 or not molecule_id.isdigit(): + raise ValueError("molecule_id should be an integer or a length-6 string of numbers") + molecule_str = molecule_id + else: + raise ValueError("molecule_id should be an integer or a length-6 string of numbers") + + # Construct the URL to the catalog file + url = f'https://spec.jpl.nasa.gov/ftp/pub/catalog/c{molecule_str}.cat' + + # Request the catalog file + response = self._request(method='GET', url=url, + timeout=self.TIMEOUT, cache=cache) + + # Parse the catalog file + result = self._parse_cat(response) + + # Add metadata from species table + species_table = self.get_species_table() + # Find the row matching this molecule_id + int_molecule_id = int(molecule_str) + matching_rows = species_table[species_table['TAG'] == int_molecule_id] + if len(matching_rows) > 0: + # Add metadata as a dictionary + result.meta = dict(zip(matching_rows.colnames, matching_rows[0])) + + return result + + def _parse_cat(self, response, *, verbose=False): + """ + Parse a catalog file response into an `~astropy.table.Table`. + + The catalog data files are composed of 80-character card images, with + one card image per spectral line. The format of each card image is: + FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN', QN" + (F13.4,F8.4, F8.4, I2,F10.4, I3, I7, I4, 6I2, 6I2) + + Parameters + ---------- + response : `requests.Response` + The HTTP response from the catalog file request. + verbose : bool, optional + Not used currently. + + Returns + ------- + Table : `~astropy.table.Table` + Parsed catalog data. + """ + if 'Zero lines were found' in response.text or len(response.text.strip()) == 0: + raise EmptyResponseError(f"Response was empty; message was '{response.text}'.") + + text = response.text + + # Parse the catalog file with fixed-width format + # Format: FREQ(13.4), ERR(8.4), LGINT(8.4), DR(2), ELO(10.4), GUP(3), TAG(7), QNFMT(4), QN'(12), QN"(12) + result = ascii.read(text, header_start=None, data_start=0, + comment=r'THIS|^\s{12,14}\d{4,6}.*', + names=('FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'QN\'', 'QN"'), + col_starts=(0, 13, 21, 29, 31, 41, 44, 51, 55, 67), + format='fixed_width', fast_reader=False) + + # Add units + result['FREQ'].unit = u.MHz + result['ERR'].unit = u.MHz + result['LGINT'].unit = u.nm**2 * u.MHz + result['ELO'].unit = u.cm**(-1) + + # Add laboratory measurement flag + # A negative TAG value indicates laboratory-measured frequency + result['Lab'] = result['TAG'] < 0 + # Convert TAG to absolute value + result['TAG'] = abs(result['TAG']) + + return result + JPLSpec = JPLSpecClass() diff --git a/astroquery/linelists/jplspec/tests/test_jplspec.py b/astroquery/linelists/jplspec/tests/test_jplspec.py index 7a84670dd4..9d586bb8a7 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec.py @@ -118,3 +118,46 @@ def test_query_multi(): assert tbl['TAG'][0] == -18003 assert tbl['TAG'][38] == -19002 assert tbl['TAG'][207] == 21001 + + +def test_parse_cat(): + """Test parsing of catalog files with _parse_cat method.""" + + response = MockResponseSpec('H2O_sample.cat') + tbl = JPLSpec._parse_cat(response) + + # Check table structure + assert isinstance(tbl, Table) + assert len(tbl) > 0 + assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'QN\'', 'QN"', 'Lab']) + + # Check units + assert tbl['FREQ'].unit == u.MHz + assert tbl['ERR'].unit == u.MHz + assert tbl['LGINT'].unit == u.nm**2 * u.MHz + assert tbl['ELO'].unit == u.cm**(-1) + + # Check Lab flag exists and is boolean + assert 'Lab' in tbl.colnames + assert tbl['Lab'].dtype == bool + + # Check TAG values are positive (absolute values) + assert all(tbl['TAG'] > 0) + + +def test_get_molecule_input_validation(): + """Test input validation for get_molecule method.""" + import pytest + + # Test invalid string format + with pytest.raises(ValueError): + JPLSpec.get_molecule('invalid') + + # Test invalid type + with pytest.raises(ValueError): + JPLSpec.get_molecule(12.34) + + # Test wrong length string + with pytest.raises(ValueError): + JPLSpec.get_molecule('123') diff --git a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py index 5c46f7e3af..49cc7579e8 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -39,3 +39,68 @@ def test_remote_regex(): assert tbl['LGINT'][0] == -3.0118 assert tbl['ERR'][7] == 8.3063 assert tbl['FREQ'][15] == 946175.3151 + + +@pytest.mark.remote_data +def test_get_molecule_remote(): + """Test get_molecule with remote data retrieval.""" + # Test with H2O + tbl = JPLSpec.get_molecule(18003) + + assert isinstance(tbl, Table) + assert len(tbl) > 0 + + # Check expected columns including Lab flag + expected_cols = {'FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'QN\'', 'QN"', 'Lab'} + assert set(tbl.keys()) == expected_cols + + # Check units + assert tbl['FREQ'].unit == u.MHz + assert tbl['ERR'].unit == u.MHz + assert tbl['LGINT'].unit == u.nm**2 * u.MHz + assert tbl['ELO'].unit == u.cm**(-1) + + # Check metadata was attached + assert 'NAME' in tbl.meta + assert tbl.meta['NAME'].strip() == 'H2O' + assert 'TAG' in tbl.meta + assert tbl.meta['TAG'] == 18003 + + # Check Lab flag + assert 'Lab' in tbl.colnames + assert tbl['Lab'].dtype == bool + + # H2O should have some lab measurements + assert sum(tbl['Lab']) > 0 + + +@pytest.mark.remote_data +def test_get_molecule_string_id(): + """Test get_molecule with string ID format.""" + # Test with CO using string ID + tbl = JPLSpec.get_molecule('028001') + + assert isinstance(tbl, Table) + assert len(tbl) > 0 + assert 'NAME' in tbl.meta + assert 'CO' in tbl.meta['NAME'] + + +@pytest.mark.remote_data +def test_get_molecule_various(): + """Test get_molecule with various molecules.""" + test_molecules = [ + (28001, 'CO'), # Simple diatomic + (32003, 'CH3OH'), # Complex organic + ] + + for mol_id, expected_name in test_molecules: + tbl = JPLSpec.get_molecule(mol_id) + assert isinstance(tbl, Table) + assert len(tbl) > 0 + assert 'NAME' in tbl.meta + assert expected_name in tbl.meta['NAME'] + + # Verify TAG values are positive + assert all(tbl['TAG'] > 0) From d46593fe51dfc0d7560645cef2ba457a59d6d64a Mon Sep 17 00:00:00 2001 From: Parfenov Sergey Date: Sun, 27 Apr 2025 17:51:19 +0500 Subject: [PATCH 05/44] Support CDMS all species option; fix format for CDMS linelist reading; fix CDMS quantum numbers parsing 1) Adding support for CDMS queries with lines of all species 2) Fixing the CMDS lines list parsing Support CDMS all species option; fix format for CDMS linelist reading; fix CDMS quantum numbers parsing Adding test for a new functionality when all species are requested from CDMS fix parse_letternumber test and rearrange and refactor new test expand test coverage and resolve some problems discovered as a result propagate column change down trivial formatting fix my refactor; it was incorrect oops, fix to last one (yes, this needs to be squashed; pushing fast to skip tests... and spam my inbox...) add ch3cn test and shift QNFMT by one cleanup molwt/tag parsing shift tag back one spot. Fix tests to accommodate more complete "tag" name add the b1 = -21 test whitespace fix ch3cn test; it had decayed into ch3ccd which has different QNs add changelog improve error message for bad molecule parsing fix the next part of the test address review comments --- CHANGES.rst | 45 +--- astroquery/alma/core.py | 6 +- astroquery/linelists/cdms/core.py | 223 +++++++++++------- astroquery/linelists/cdms/tests/test_cdms.py | 6 +- .../linelists/cdms/tests/test_cdms_remote.py | 96 +++++++- 5 files changed, 243 insertions(+), 133 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 01a130d283..93042c8d73 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -110,6 +110,8 @@ gaia EPOCH_SPECTRUM_XP_CROWDING, MEAN_SPECTRUM_XP, EPOCH_SPECTRUM_XP and MEAN_SPECTRUM_XP_GRAVLENS. [#3382] +- Add more complete support for CDMS quantum number and other value parsing. [#3302] + heasarc ^^^^^^^ @@ -170,46 +172,11 @@ mast - Fix bug in ``utils.remove_duplicate_products`` that does not retain the order of the products in an input table. [#3314] -- Add ``return_uri_map`` parameter to ``Observations.get_cloud_uris`` to return - a mapping of the input data product URIs to the returned cloud URIs. [#3314] - -- Add ``verbose`` parameter to ``Observations.get_cloud_uris`` to control - whether warnings are logged when a product cannot be found in the cloud. - [#3314] - -- Improved ``MastMissions`` queries to accept lists for query critieria - values, in addition to comma-delimited strings. [#3319] - -- Enhanced ``filter_products`` methods in ``MastMissions`` and ``Observations`` - to support advanced filtering expressions for numeric columns and with - negative values. [#3365, #3393] - -- Fix bug where duplicate columns from server responses cause an error when - converting to an ``~astropy.table.Table``. [#3400] - -- Support for resolving multiple object names at once with ``resolve_object``, - including automatic batching into groups of up to 30 names per request to - the name translation service. [#3398] - -simbad -^^^^^^ - -- Add ``async_job`` option in all query methods. It provides slower to start, - but more robust queries for which the timeout can be increased. [#3305] - -skyview -^^^^^^^ - -- Add ``get_query_payload`` kwarg to ``Skyview.get_images()`` and - ``Skyview.get_images_list()`` to return the query payload. [#3318] - -- Changed SkyView URL to https. [#3346] - -utils.tap -^^^^^^^^^ +- Added ``return_uri_map`` parameter to ``Observations.get_cloud_uris`` to return a mapping of the input data product URIs + to the returned cloud URIs. [#3314] -- The method ``upload_table`` accepts file formats accepted by astropy's - ``Table.read()``. [#3295] +- Added ``verbose`` parameter to ``Observations.get_cloud_uris`` to control whether warnings are logged when a product cannot + be found in the cloud. [#3314] Infrastructure, Utility and Other Changes and Additions diff --git a/astroquery/alma/core.py b/astroquery/alma/core.py index 659347ca95..cd216b61e3 100644 --- a/astroquery/alma/core.py +++ b/astroquery/alma/core.py @@ -770,6 +770,7 @@ def _get_dataarchive_url(self): return self.dataarchive_url def get_data_info(self, uids, *, expand_tarfiles=False, + cutouts=True, with_auxiliary=True, with_rawdata=True): """ Return information about the data associated with ALMA uid(s) @@ -848,8 +849,9 @@ def get_data_info(self, uids, *, expand_tarfiles=False, recursive_access_url = self.get_adhoc_service_access_url(adhoc_service) file_id = recursive_access_url.split('ID=')[1] expanded_tar = self.get_data_info(file_id) - expanded_tar = expanded_tar[ - expanded_tar['semantics'] != '#cutout'] + if not cutouts: + expanded_tar = expanded_tar[ + expanded_tar['semantics'] != '#cutout'] if not expanded_result: expanded_result = expanded_tar else: diff --git a/astroquery/linelists/cdms/core.py b/astroquery/linelists/cdms/core.py index 95c739b304..9c489524e5 100644 --- a/astroquery/linelists/cdms/core.py +++ b/astroquery/linelists/cdms/core.py @@ -12,6 +12,7 @@ # import configurable items declared in __init__.py from astroquery.linelists.cdms import conf from astroquery.exceptions import InvalidQueryError, EmptyResponseError +from astroquery import log import re import string @@ -31,7 +32,7 @@ class CDMSClass(BaseQuery): SERVER = conf.server CLASSIC_URL = conf.classic_server TIMEOUT = conf.timeout - MALFORMATTED_MOLECULE_LIST = ['017506 NH3-wHFS', '028582 H2NC', '058501 H2C2S', '064527 HC3HCN'] + MALFORMATTED_MOLECULE_LIST = ['017506 NH3-wHFS', '028528 H2NC', '058501 H2C2S', '064527 HC3HCN'] def query_lines_async(self, min_frequency, max_frequency, *, min_strength=-500, molecule='All', @@ -54,7 +55,8 @@ def query_lines_async(self, min_frequency, max_frequency, *, min_strength : int, optional Minimum strength in catalog units, the default is -500 - molecule : list, string of regex if parse_name_locally=True, optional + molecule : list or string if parse_name_locally=False, + string of regex if parse_name_locally=True, optional Identifiers of the molecules to search for. If this parameter is not provided the search will match any species. Default is 'All'. As a first pass, the molecule will be searched for with a direct @@ -134,18 +136,21 @@ def query_lines_async(self, min_frequency, max_frequency, *, # changes interpretation of query self._last_query_temperature = temperature_for_intensity - if molecule is not None: - if parse_name_locally: - self.lookup_ids = build_lookup() - luts = self.lookup_ids.find(molecule, flags) - if len(luts) == 0: - raise InvalidQueryError('No matching species found. Please ' - 'refine your search or read the Docs ' - 'for pointers on how to search.') - payload['Molecules'] = tuple(f"{val:06d} {key}" - for key, val in luts.items())[0] - else: - payload['Molecules'] = molecule + if molecule == 'All': + payload['Moleculesgrp'] = 'all species' + else: + if molecule is not None: + if parse_name_locally: + self.lookup_ids = build_lookup() + luts = self.lookup_ids.find(molecule, flags) + if len(luts) == 0: + raise InvalidQueryError('No matching species found. Please ' + 'refine your search or read the Docs ' + 'for pointers on how to search.') + payload['Molecules'] = tuple(f"{val:06d} {key}" + for key, val in luts.items())[0] + else: + payload['Molecules'] = molecule if get_query_payload: return payload @@ -180,7 +185,7 @@ def query_lines_async(self, min_frequency, max_frequency, *, # accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S' badlist = (self.MALFORMATTED_MOLECULE_LIST + # noqa [y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()]) - if payload['Molecules'] in badlist: + if 'Moleculesgrp' not in payload.keys() and payload['Molecules'] in badlist: raise ValueError(f"Molecule {payload['Molecules']} is known not to comply with standard CDMS format. " f"Try get_molecule({payload['Molecules']}) instead.") @@ -233,15 +238,32 @@ def _parse_result(self, response, *, verbose=False): soup = BeautifulSoup(response.text, 'html.parser') text = soup.find('pre').text + need_to_filter_bad_molecules = False + for bad_molecule in self.MALFORMATTED_MOLECULE_LIST: + if text.find(bad_molecule.split()[1]) > -1: + need_to_filter_bad_molecules = True + break + if need_to_filter_bad_molecules: + text_new = '' + text = text.split('\n') + for line in text: + need_to_include_line = True + for bad_molecule in self.MALFORMATTED_MOLECULE_LIST: + if line.find(bad_molecule.split()[1]) > -1: + need_to_include_line = False + break + if need_to_include_line: + text_new = text_new + '\n' + line + text = text_new + starts = {'FREQ': 0, 'ERR': 14, 'LGINT': 25, 'DR': 36, 'ELO': 38, 'GUP': 47, - 'MOLWT': 51, - 'TAG': 54, - 'QNFMT': 58, + 'TAG': 50, + 'QNFMT': 57, 'Ju': 61, 'Ku': 63, 'vu': 65, @@ -256,39 +278,47 @@ def _parse_result(self, response, *, verbose=False): 'F3l': 83, 'name': 89} - result = ascii.read(text, header_start=None, data_start=0, - comment=r'THIS|^\s{12,14}\d{4,6}.*', - names=list(starts.keys()), - col_starts=list(starts.values()), - format='fixed_width', fast_reader=False) - - result['FREQ'].unit = u.MHz - result['ERR'].unit = u.MHz - - result['Lab'] = result['MOLWT'] < 0 - result['MOLWT'] = np.abs(result['MOLWT']) - result['MOLWT'].unit = u.Da - - fix_keys = ['GUP'] - for suf in 'ul': - for qn in ('J', 'v', 'K', 'F1', 'F2', 'F3'): - qnind = qn+suf - fix_keys.append(qnind) - for key in fix_keys: - if not np.issubdtype(result[key].dtype, np.integer): - intcol = np.array(list(map(parse_letternumber, result[key])), - dtype=int) - result[key] = intcol - - # if there is a crash at this step, something went wrong with the query - # and the _last_query_temperature was not set. This shouldn't ever - # happen, but, well, I anticipate it will. - if self._last_query_temperature == 0: - result.rename_column('LGINT', 'LGAIJ') - result['LGAIJ'].unit = u.s**-1 - else: - result['LGINT'].unit = u.nm**2 * u.MHz - result['ELO'].unit = u.cm**(-1) + try: + result = ascii.read(text, header_start=None, data_start=0, + comment=r'THIS|^\s{12,14}\d{4,6}.*', + names=list(starts.keys()), + col_starts=list(starts.values()), + format='fixed_width', fast_reader=False) + + result['FREQ'].unit = u.MHz + result['ERR'].unit = u.MHz + + result['MOLWT'] = [int(x/1e3) for x in result['TAG']] + result['Lab'] = result['MOLWT'] < 0 + result['MOLWT'] = np.abs(result['MOLWT']) + result['MOLWT'].unit = u.Da + + fix_keys = ['GUP'] + for suf in 'ul': + for qn in ('J', 'v', 'K', 'F1', 'F2', 'F3'): + qnind = qn+suf + fix_keys.append(qnind) + for key in fix_keys: + if not np.issubdtype(result[key].dtype, np.integer): + intcol = np.array(list(map(parse_letternumber, result[key])), + dtype=int) + result[key] = intcol + + # if there is a crash at this step, something went wrong with the query + # and the _last_query_temperature was not set. This shouldn't ever + # happen, but, well, I anticipate it will. + if self._last_query_temperature == 0: + result.rename_column('LGINT', 'LGAIJ') + result['LGAIJ'].unit = u.s**-1 + else: + result['LGINT'].unit = u.nm**2 * u.MHz + result['ELO'].unit = u.cm**(-1) + except ValueError as ex: + # Give users a more helpful exception when parsing fails + new_message = ("Failed to parse CDMS response. This may be caused by a malformed search return. " + "You can check this by running `CDMS.get_molecule('')` instead; if it works, the " + "problem is caused by the CDMS search interface and cannot be worked around.") + raise ValueError(new_message) from ex return result @@ -387,35 +417,50 @@ def tryfloat(x): return result - def get_molecule(self, molecule_id, *, cache=True): + def get_molecule(self, molecule_id, *, cache=True, return_response=False): """ Retrieve the whole molecule table for a given molecule id + + Parameters + ---------- + molecule_id : str + The 6-digit molecule identifier as a string + cache : bool + Defaults to True. If set overrides global caching behavior. + See :ref:`caching documentation `. + return_response : bool, optional + If True, return the raw `requests.Response` object instead of parsing + the response. If this is set, the response will be returned whether + or not it was successful. Default is False. """ if not isinstance(molecule_id, str) or len(molecule_id) != 6: raise ValueError("molecule_id should be a length-6 string of numbers") url = f'{self.CLASSIC_URL}/entries/c{molecule_id}.cat' response = self._request(method='GET', url=url, timeout=self.TIMEOUT, cache=cache) - result = self._parse_cat(response) + + if return_response: + return response + + response.raise_for_status() + + if 'Zero lines were found' in response.text: + raise EmptyResponseError(f"Response was empty; message was '{text}'.") + + result = self._parse_cat(response.text) species_table = self.get_species_table() result.meta = dict(species_table.loc[int(molecule_id)]) return result - def _parse_cat(self, response, *, verbose=False): + def _parse_cat(self, text, *, verbose=False): """ Parse a catalog response into an `~astropy.table.Table` See details in _parse_response; this is a very similar function, but the catalog responses have a slightly different format. """ - - if 'Zero lines were found' in response.text: - raise EmptyResponseError(f"Response was empty; message was '{response.text}'.") - - text = response.text - # notes about the format # [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]: FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN noqa # 13 21 29 31 41 44 51 55 57 59 61 63 65 67 69 71 73 75 77 79 noqa @@ -426,21 +471,21 @@ def _parse_cat(self, response, *, verbose=False): 'ELO': 32, 'GUP': 42, 'TAG': 44, - 'QNFMT': 52, - 'Q1': 56, - 'Q2': 58, - 'Q3': 60, - 'Q4': 62, - 'Q5': 64, - 'Q6': 66, - 'Q7': 68, - 'Q8': 70, - 'Q9': 72, - 'Q10': 74, - 'Q11': 76, - 'Q12': 78, - 'Q13': 80, - 'Q14': 82, + 'QNFMT': 51, + 'Q1': 55, + 'Q2': 57, + 'Q3': 59, + 'Q4': 61, + 'Q5': 63, + 'Q6': 65, + 'Q7': 67, + 'Q8': 69, + 'Q9': 71, + 'Q10': 73, + 'Q11': 75, + 'Q12': 77, + 'Q13': 79, + 'Q14': 81, } result = ascii.read(text, header_start=None, data_start=0, @@ -450,7 +495,7 @@ def _parse_cat(self, response, *, verbose=False): format='fixed_width', fast_reader=False) # int truncates - which is what we want - result['MOLWT'] = [int(x/1e4) for x in result['TAG']] + result['MOLWT'] = [int(x/1e3) for x in result['TAG']] result['FREQ'].unit = u.MHz result['ERR'].unit = u.MHz @@ -460,15 +505,18 @@ def _parse_cat(self, response, *, verbose=False): result['MOLWT'].unit = u.Da fix_keys = ['GUP'] - for suf in '': - for qn in (f'Q{ii}' for ii in range(1, 15)): - qnind = qn+suf - fix_keys.append(qnind) + for qn in (f'Q{ii}' for ii in range(1, 15)): + fix_keys.append(qn) + log.debug(f"fix_keys: {fix_keys} should include Q1, Q2, ..., Q14 and GUP") for key in fix_keys: if not np.issubdtype(result[key].dtype, np.integer): intcol = np.array(list(map(parse_letternumber, result[key])), dtype=int) + if any(intcol == -999999): + intcol = np.ma.masked_where(intcol == -999999, intcol) result[key] = intcol + if not np.issubdtype(result[key].dtype, np.integer): + raise ValueError(f"Failed to parse {key} as integer") result['LGINT'].unit = u.nm**2 * u.MHz result['ELO'].unit = u.cm**(-1) @@ -481,18 +529,23 @@ def _parse_cat(self, response, *, verbose=False): def parse_letternumber(st): """ - Parse CDMS's two-letter QNs + Parse CDMS's two-letter QNs into integers. + + Masked values are converted to -999999. From the CDMS docs: "Exactly two characters are available for each quantum number. Therefore, half integer quanta are rounded up ! In addition, capital letters are used to - indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Small - types are used to signal corresponding negative quantum numbers." + indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Lower case characters + are used similarly to signal negative quantum numbers smaller than –9. e. g., a0 is –10, b0 is –20, etc." """ + if np.ma.is_masked(st): + return -999999 + asc = string.ascii_lowercase ASC = string.ascii_uppercase - newst = ''.join(['-' + str(asc.index(x)+10) if x in asc else - str(ASC.index(x)+10) if x in ASC else + newst = ''.join(['-' + str((asc.index(x)+1)) if x in asc else + str((ASC.index(x)+10)) if x in ASC else x for x in st]) return int(newst) diff --git a/astroquery/linelists/cdms/tests/test_cdms.py b/astroquery/linelists/cdms/tests/test_cdms.py index 597311d715..0b8059105f 100644 --- a/astroquery/linelists/cdms/tests/test_cdms.py +++ b/astroquery/linelists/cdms/tests/test_cdms.py @@ -83,6 +83,7 @@ def test_query(patch_post): assert tbl['LGINT'][0] == -7.1425 assert tbl['GUP'][0] == 3 assert tbl['GUP'][7] == 17 + assert tbl['MOLWT'][0] == 28 def test_parseletternumber(): @@ -99,9 +100,12 @@ def test_parseletternumber(): assert parse_letternumber("Z9") == 359 # inferred? - assert parse_letternumber("z9") == -359 + assert parse_letternumber("a0") == -10 + assert parse_letternumber("b0") == -20 assert parse_letternumber("ZZ") == 3535 + assert parse_letternumber(np.ma.masked) == -999999 + def test_hc7s(patch_post): """ diff --git a/astroquery/linelists/cdms/tests/test_cdms_remote.py b/astroquery/linelists/cdms/tests/test_cdms_remote.py index e0c7bf5a9d..73c20f86a3 100644 --- a/astroquery/linelists/cdms/tests/test_cdms_remote.py +++ b/astroquery/linelists/cdms/tests/test_cdms_remote.py @@ -38,6 +38,75 @@ def test_remote_300K(): assert tbl['FREQ'][0] == 505366.7875 assert tbl['ERR'][0] == 49.13 assert tbl['LGINT'][0] == -4.2182 + assert tbl['MOLWT'][0] == 18 + assert tbl['TAG'][0] == 18505 + + +@pytest.mark.remote_data +def test_co_basics(): + tbl = CDMS.get_molecule('028503') + assert tbl['Q1'][0] == 1 + assert tbl['Q7'][0] == 0 + assert tbl['Q1'][10] == 11 + assert tbl['Q7'][10] == 10 + assert tbl['MOLWT'][0] == 28 + assert tbl['TAG'][0] == -28503 + + +@pytest.mark.remote_data +def test_ch3cn_negqn(): + # 041505 = CH3CN on 2025-05-21 + tbl = CDMS.get_molecule('041505') + assert tbl.meta['molecule'] == 'CH3CN, v=0' + fourtominusthree = tbl[(tbl['Q1'] == 4) & (tbl['Q2'] == -3)] + assert len(fourtominusthree) >= 1 + + # check specifically for -21, which is encoded as `b1` + twentytwominustwentyone = tbl[(tbl['Q1'] == 22) & (tbl['Q2'] == -21)] + assert len(twentytwominustwentyone) >= 1 + + assert tbl['TAG'][0] == 41505 + + twentythreeminustwentyone = tbl[(tbl['Q1'] == 23) & (tbl['Q2'] == -21)] + assert len(twentythreeminustwentyone) >= 1 + assert twentythreeminustwentyone['TAG'][0] == -41505 + + +@pytest.mark.remote_data +def test_propanediol(): + tbl1 = CDMS.get_molecule('076513') + assert 'int' in tbl1['Q2'].dtype.name + + tbl = CDMS.query_lines(min_frequency=100.3 * u.GHz, + max_frequency=100.5 * u.GHz, + molecule='076513') + assert isinstance(tbl, Table) + assert len(tbl) >= 1 + assert 'aG\'g-1,2-Propanediol' in tbl['name'] + # check that the parser worked - this will be string or obj otherwise + assert 'int' in tbl['Ku'].dtype.name + assert tbl['MOLWT'][0] == 76 + assert tbl['TAG'][0] == 76513 + + +@pytest.mark.remote_data +@pytest.mark.xfail(reason="CDMS entry for H2NC is malformed") +def test_h2nc(): + tbl1 = CDMS.get_molecule('028528') + assert 'int' in tbl1['Q2'].dtype.name + + tbl = CDMS.query_lines(min_frequency=139.3 * u.GHz, + max_frequency=141.5 * u.GHz, + molecule='028528 H2NC') + + # these are the results that SHOULD be return if it actually worked + assert isinstance(tbl, Table) + assert len(tbl) >= 1 + assert 'H2NC' in tbl['name'] + # check that the parser worked - this will be string or obj otherwise + assert 'int' in tbl['Ku'].dtype.name + assert tbl['MOLWT'][0] == 28 + assert tbl['TAG'][0] == 28528 @pytest.mark.remote_data @@ -51,10 +120,11 @@ def test_remote_regex(): '028503 CO, v=0')) assert isinstance(tbl, Table) - assert len(tbl) == 557 + # regression test fix: there's 1 CO line that got missed because of a missing comma + assert len(tbl) == 558 assert set(tbl.keys()) == colname_set - assert set(tbl['name']) == {'H2CN', 'HC-13-N, v=0'} + assert set(tbl['name']) == {'H2CN', 'HC-13-N, v=0', 'CO, v=0'} @pytest.mark.remote_data @@ -68,16 +138,16 @@ def test_molecule_with_parens(): MC = np.ma.core.MaskedConstant() - for col, val in zip(tbl[0].colnames, (232588.7246, 0.2828, -4.1005, 3, 293.8540, 445, 66, - 506, 303, 44, 14, 30, MC, MC, MC, 45, 13, 33, MC, MC, MC, 'H2C(CN)2', False)): + for col, val in zip(tbl[0].colnames, (232588.7246, 0.2828, -4.1005, 3, 293.8540, 445, 66506, + 303, 44, 14, 30, MC, MC, MC, 45, 13, 33, MC, MC, MC, 'H2C(CN)2', 66, False)): if val is MC: assert tbl[0][col].mask else: assert tbl[0][col] == val # this test row includes degeneracy = 1225, which covers one of the weird letter-is-number parser cases - for col, val in zip(tbl[16].colnames, (233373.369, 10.26, -4.8704, 3, 1229.0674, 1125, 66, - 506, 303, 112, 10, 102, MC, MC, MC, 112, 9, 103, MC, MC, MC, 'H2C(CN)2', False),): + for col, val in zip(tbl[16].colnames, (233373.369, 10.26, -4.8704, 3, 1229.0674, 1125, 66506, + 303, 112, 10, 102, MC, MC, MC, 112, 9, 103, MC, MC, MC, 'H2C(CN)2', 66, False),): if val is MC: assert tbl[16][col].mask else: @@ -123,6 +193,20 @@ def test_retrieve_species_table(): assert 'float' in species_table['lg(Q(1000))'].dtype.name +@pytest.mark.remote_data +def test_remote_all_species(): + tbl = CDMS.query_lines(min_frequency=100.3 * u.GHz, + max_frequency=100.5 * u.GHz, + min_strength=-5) + assert isinstance(tbl, Table) + + AlS_is_in_table = (tbl['name'] == 'AlS').sum() > 0 + Propanediol_is_in_table = (tbl['name'] == "aG'g-1,2-Propanediol").sum() > 0 + + assert AlS_is_in_table + assert Propanediol_is_in_table + + @pytest.mark.bigdata @pytest.mark.remote_data class TestRegressionAllCats: From 1f1c8c05fdb38054c50e144ffc41cc0446958def Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Fri, 7 Nov 2025 16:57:06 -0500 Subject: [PATCH 06/44] factor pseudo-common code into linelists from cdms/jplspec --- astroquery/linelists/__init__.py | 11 ++ astroquery/linelists/cdms/core.py | 66 +------- astroquery/linelists/core.py | 222 +++++++++++++++++++++++++++ astroquery/linelists/jplspec/core.py | 32 +--- 4 files changed, 241 insertions(+), 90 deletions(-) create mode 100644 astroquery/linelists/core.py diff --git a/astroquery/linelists/__init__.py b/astroquery/linelists/__init__.py index e69de29bb2..9b680e72f7 100644 --- a/astroquery/linelists/__init__.py +++ b/astroquery/linelists/__init__.py @@ -0,0 +1,11 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst +""" +Linelists module +---------------- +This module contains sub-modules for various molecular and atomic line list databases, +as well as common utilities for parsing catalog files. +""" + +from .core import LineListClass, parse_letternumber + +__all__ = ['LineListClass', 'parse_letternumber'] diff --git a/astroquery/linelists/cdms/core.py b/astroquery/linelists/cdms/core.py index 9c489524e5..0e202c6177 100644 --- a/astroquery/linelists/cdms/core.py +++ b/astroquery/linelists/cdms/core.py @@ -13,6 +13,7 @@ from astroquery.linelists.cdms import conf from astroquery.exceptions import InvalidQueryError, EmptyResponseError from astroquery import log +from ..core import LineListClass import re import string @@ -26,7 +27,7 @@ def data_path(filename): @async_to_sync -class CDMSClass(BaseQuery): +class CDMSClass(BaseQuery, LineListClass): # use the Configuration Items imported from __init__.py URL = conf.search SERVER = conf.server @@ -461,67 +462,8 @@ def _parse_cat(self, text, *, verbose=False): See details in _parse_response; this is a very similar function, but the catalog responses have a slightly different format. """ - # notes about the format - # [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]: FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN noqa - # 13 21 29 31 41 44 51 55 57 59 61 63 65 67 69 71 73 75 77 79 noqa - starts = {'FREQ': 0, - 'ERR': 14, - 'LGINT': 22, - 'DR': 30, - 'ELO': 32, - 'GUP': 42, - 'TAG': 44, - 'QNFMT': 51, - 'Q1': 55, - 'Q2': 57, - 'Q3': 59, - 'Q4': 61, - 'Q5': 63, - 'Q6': 65, - 'Q7': 67, - 'Q8': 69, - 'Q9': 71, - 'Q10': 73, - 'Q11': 75, - 'Q12': 77, - 'Q13': 79, - 'Q14': 81, - } - - result = ascii.read(text, header_start=None, data_start=0, - comment=r'THIS|^\s{12,14}\d{4,6}.*', - names=list(starts.keys()), - col_starts=list(starts.values()), - format='fixed_width', fast_reader=False) - - # int truncates - which is what we want - result['MOLWT'] = [int(x/1e3) for x in result['TAG']] - - result['FREQ'].unit = u.MHz - result['ERR'].unit = u.MHz - - result['Lab'] = result['MOLWT'] < 0 - result['MOLWT'] = np.abs(result['MOLWT']) - result['MOLWT'].unit = u.Da - - fix_keys = ['GUP'] - for qn in (f'Q{ii}' for ii in range(1, 15)): - fix_keys.append(qn) - log.debug(f"fix_keys: {fix_keys} should include Q1, Q2, ..., Q14 and GUP") - for key in fix_keys: - if not np.issubdtype(result[key].dtype, np.integer): - intcol = np.array(list(map(parse_letternumber, result[key])), - dtype=int) - if any(intcol == -999999): - intcol = np.ma.masked_where(intcol == -999999, intcol) - result[key] = intcol - if not np.issubdtype(result[key].dtype, np.integer): - raise ValueError(f"Failed to parse {key} as integer") - - result['LGINT'].unit = u.nm**2 * u.MHz - result['ELO'].unit = u.cm**(-1) - - return result + # Use the base class method for CDMS format parsing + return self._parse_cat_cdms_format(text, verbose=verbose) CDMS = CDMSClass() diff --git a/astroquery/linelists/core.py b/astroquery/linelists/core.py new file mode 100644 index 0000000000..932162cd6f --- /dev/null +++ b/astroquery/linelists/core.py @@ -0,0 +1,222 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst +""" +Base classes and common utilities for linelist queries (JPLSpec, CDMS, etc.) +""" +import numpy as np +import string +import astropy.units as u +from astropy.io import ascii +from astroquery.exceptions import EmptyResponseError +from astroquery import log + + +__all__ = ['LineListClass', 'parse_letternumber'] + + +def parse_letternumber(st): + """ + Parse CDMS's two-letter QNs into integers. + + Masked values are converted to -999999. + + From the CDMS docs: + "Exactly two characters are available for each quantum number. Therefore, half + integer quanta are rounded up ! In addition, capital letters are used to + indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Lower case characters + are used similarly to signal negative quantum numbers smaller than –9. e. g., a0 is –10, b0 is –20, etc." + """ + if np.ma.is_masked(st): + return -999999 + + asc = string.ascii_lowercase + ASC = string.ascii_uppercase + newst = ''.join(['-' + str((asc.index(x)+1)) if x in asc else + str((ASC.index(x)+10)) if x in ASC else + x for x in st]) + return int(newst) + + +class LineListClass: + """ + Base class for line list catalog queries (JPL, CDMS, etc.) + + This class provides common functionality for parsing catalog files + and retrieving molecule data from spectroscopic databases. + """ + + def get_molecule(self, molecule_id, *, cache=True, **kwargs): + """ + Retrieve the whole molecule table for a given molecule id from the catalog. + + This method should be overridden by subclasses to implement + catalog-specific behavior, but provides common structure. + + Parameters + ---------- + molecule_id : int or str + The molecule tag/identifier. Can be an integer or a string. + cache : bool + Defaults to True. If set overrides global caching behavior. + **kwargs : dict + Additional keyword arguments specific to the subclass implementation. + + Returns + ------- + Table : `~astropy.table.Table` + Table containing all spectral lines for the requested molecule. + """ + raise NotImplementedError("Subclasses must implement get_molecule()") + + def _parse_cat(self, response_or_text, *, verbose=False): + """ + Parse a catalog file response into an `~astropy.table.Table`. + + The catalog data files are typically composed of 80-character card images, + with one card image per spectral line. This method provides the common + parsing logic, but can be overridden by subclasses for catalog-specific formats. + + Parameters + ---------- + response_or_text : `requests.Response` or str + The HTTP response from the catalog file request or the text content. + verbose : bool, optional + If True, print additional debugging information. + + Returns + ------- + Table : `~astropy.table.Table` + Parsed catalog data. + """ + raise NotImplementedError("Subclasses must implement _parse_cat()") + + def _parse_cat_jpl_format(self, text, *, verbose=False): + """ + Parse a JPL-format catalog file into an `~astropy.table.Table`. + + The catalog data files are composed of 80-character card images, with + one card image per spectral line. The format of each card image is: + FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN', QN" + (F13.4,F8.4, F8.4, I2,F10.4, I3, I7, I4, 6I2, 6I2) + + Parameters + ---------- + text : str + The catalog file text content. + verbose : bool, optional + Not used currently. + + Returns + ------- + Table : `~astropy.table.Table` + Parsed catalog data. + """ + if 'Zero lines were found' in text or len(text.strip()) == 0: + raise EmptyResponseError(f"Response was empty; message was '{text}'.") + + # Parse the catalog file with fixed-width format + # Format: FREQ(13.4), ERR(8.4), LGINT(8.4), DR(2), ELO(10.4), GUP(3), TAG(7), QNFMT(4), QN'(12), QN"(12) + result = ascii.read(text, header_start=None, data_start=0, + comment=r'THIS|^\s{12,14}\d{4,6}.*', + names=('FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'QN\'', 'QN"'), + col_starts=(0, 13, 21, 29, 31, 41, 44, 51, 55, 67), + format='fixed_width', fast_reader=False) + + # Ensure TAG is integer type + result['TAG'] = result['TAG'].astype(int) + + # Add units + result['FREQ'].unit = u.MHz + result['ERR'].unit = u.MHz + result['LGINT'].unit = u.nm**2 * u.MHz + result['ELO'].unit = u.cm**(-1) + + # Add laboratory measurement flag + # A negative TAG value indicates laboratory-measured frequency + result['Lab'] = result['TAG'] < 0 + # Convert TAG to absolute value + result['TAG'] = abs(result['TAG']) + + return result + + def _parse_cat_cdms_format(self, text, *, verbose=False): + """ + Parse a CDMS-format catalog file into an `~astropy.table.Table`. + + The catalog data files are composed of 80-character card images. + Format: [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]: + FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN + + Parameters + ---------- + text : str + The catalog file text content. + verbose : bool, optional + Not used currently. + + Returns + ------- + Table : `~astropy.table.Table` + Parsed catalog data. + """ + # Column start positions + starts = {'FREQ': 0, + 'ERR': 14, + 'LGINT': 22, + 'DR': 30, + 'ELO': 32, + 'GUP': 42, + 'TAG': 44, + 'QNFMT': 51, + 'Q1': 55, + 'Q2': 57, + 'Q3': 59, + 'Q4': 61, + 'Q5': 63, + 'Q6': 65, + 'Q7': 67, + 'Q8': 69, + 'Q9': 71, + 'Q10': 73, + 'Q11': 75, + 'Q12': 77, + 'Q13': 79, + 'Q14': 81, + } + + result = ascii.read(text, header_start=None, data_start=0, + comment=r'THIS|^\s{12,14}\d{4,6}.*', + names=list(starts.keys()), + col_starts=list(starts.values()), + format='fixed_width', fast_reader=False) + + # Ensure TAG is integer type for computation + # int truncates - which is what we want + result['TAG'] = result['TAG'].astype(int) + result['MOLWT'] = [int(x/1e3) for x in result['TAG']] + + result['FREQ'].unit = u.MHz + result['ERR'].unit = u.MHz + + result['Lab'] = result['MOLWT'] < 0 + result['MOLWT'] = np.abs(result['MOLWT']) + result['MOLWT'].unit = u.Da + + fix_keys = ['GUP'] + for qn in (f'Q{ii}' for ii in range(1, 15)): + fix_keys.append(qn) + log.debug(f"fix_keys: {fix_keys} should include Q1, Q2, ..., Q14 and GUP") + for key in fix_keys: + if not np.issubdtype(result[key].dtype, np.integer): + intcol = np.array(list(map(parse_letternumber, result[key])), + dtype=int) + if any(intcol == -999999): + intcol = np.ma.masked_where(intcol == -999999, intcol) + result[key] = intcol + if not np.issubdtype(result[key].dtype, np.integer): + raise ValueError(f"Failed to parse {key} as integer") + + result['LGINT'].unit = u.nm**2 * u.MHz + result['ELO'].unit = u.cm**(-1) + + return result diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index 8b61c5b08b..e375119453 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -10,6 +10,7 @@ from . import conf from . import lookup_table from astroquery.exceptions import EmptyResponseError, InvalidQueryError +from ..core import LineListClass __all__ = ['JPLSpec', 'JPLSpecClass'] @@ -21,7 +22,7 @@ def data_path(filename): @async_to_sync -class JPLSpecClass(BaseQuery): +class JPLSpecClass(BaseQuery, LineListClass): # use the Configuration Items imported from __init__.py URL = conf.server @@ -308,33 +309,8 @@ def _parse_cat(self, response, *, verbose=False): Table : `~astropy.table.Table` Parsed catalog data. """ - if 'Zero lines were found' in response.text or len(response.text.strip()) == 0: - raise EmptyResponseError(f"Response was empty; message was '{response.text}'.") - - text = response.text - - # Parse the catalog file with fixed-width format - # Format: FREQ(13.4), ERR(8.4), LGINT(8.4), DR(2), ELO(10.4), GUP(3), TAG(7), QNFMT(4), QN'(12), QN"(12) - result = ascii.read(text, header_start=None, data_start=0, - comment=r'THIS|^\s{12,14}\d{4,6}.*', - names=('FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', - 'TAG', 'QNFMT', 'QN\'', 'QN"'), - col_starts=(0, 13, 21, 29, 31, 41, 44, 51, 55, 67), - format='fixed_width', fast_reader=False) - - # Add units - result['FREQ'].unit = u.MHz - result['ERR'].unit = u.MHz - result['LGINT'].unit = u.nm**2 * u.MHz - result['ELO'].unit = u.cm**(-1) - - # Add laboratory measurement flag - # A negative TAG value indicates laboratory-measured frequency - result['Lab'] = result['TAG'] < 0 - # Convert TAG to absolute value - result['TAG'] = abs(result['TAG']) - - return result + # Use the base class method for JPL format parsing + return self._parse_cat_jpl_format(response.text, verbose=verbose) JPLSpec = JPLSpecClass() From fe162e21e3c67d1a0f1b9f76427dd6ce257aeb08 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Fri, 7 Nov 2025 17:04:12 -0500 Subject: [PATCH 07/44] ignore own deprecation warnings --- setup.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.cfg b/setup.cfg index 1b93d76267..b00172e24e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -52,6 +52,8 @@ filterwarnings = # Remove along with astropy-helpers, once we switch to a new versioning scheme ignore:Use setlocale:DeprecationWarning ignore: 'locale.getdefaultlocale' is deprecated and slated for removal:DeprecationWarning +# Ignore astroquery's own module reorganization deprecation warnings during testing + ignore:Importing from 'astroquery.jplspec' is deprecated:DeprecationWarning # These are temporary measures, all of these should be fixed: # ----------------------------------------------------------- ignore:distutils Version classes are deprecated:DeprecationWarning From 4a235a1ca0444f26a830a07034920e1d6f44ec05 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Fri, 7 Nov 2025 19:07:06 -0500 Subject: [PATCH 08/44] add fallback by default --- astroquery/linelists/cdms/core.py | 13 ++++++++++--- astroquery/linelists/jplspec/core.py | 11 ++++++++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/astroquery/linelists/cdms/core.py b/astroquery/linelists/cdms/core.py index 0e202c6177..b2b70e6197 100644 --- a/astroquery/linelists/cdms/core.py +++ b/astroquery/linelists/cdms/core.py @@ -39,7 +39,7 @@ def query_lines_async(self, min_frequency, max_frequency, *, min_strength=-500, molecule='All', temperature_for_intensity=300, flags=0, parse_name_locally=False, get_query_payload=False, - cache=True): + cache=True, fallback_to_getmolecule=False): """ Creates an HTTP POST request based on the desired parameters and returns a response. @@ -92,6 +92,10 @@ def query_lines_async(self, min_frequency, max_frequency, *, Defaults to True. If set overrides global caching behavior. See :ref:`caching documentation `. + fallback_to_getmolecule : bool, optional + If specified, and if the molecule specified is in the list of + known malformatted molecules, return the get_molecule results instead. + Returns ------- response : `requests.Response` @@ -187,8 +191,11 @@ def query_lines_async(self, min_frequency, max_frequency, *, badlist = (self.MALFORMATTED_MOLECULE_LIST + # noqa [y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()]) if 'Moleculesgrp' not in payload.keys() and payload['Molecules'] in badlist: - raise ValueError(f"Molecule {payload['Molecules']} is known not to comply with standard CDMS format. " - f"Try get_molecule({payload['Molecules']}) instead.") + if fallback_to_getmolecule: + return self.get_molecule(payload['Molecules'], cache=cache) + else: + raise ValueError(f"Molecule {payload['Molecules']} is known not to comply with standard CDMS format. " + f"Try get_molecule({payload['Molecules']}) instead.") return response2 diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index e375119453..e621173a7c 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -32,7 +32,9 @@ def query_lines_async(self, min_frequency, max_frequency, *, min_strength=-500, max_lines=2000, molecule='All', flags=0, parse_name_locally=False, - get_query_payload=False, cache=True): + get_query_payload=False, cache=True, + fallback_to_getmolecule=True + ): """ Creates an HTTP POST request based on the desired parameters and returns a response. @@ -127,6 +129,13 @@ def query_lines_async(self, min_frequency, max_frequency, *, # built-in caching system response = self._request(method='POST', url=self.URL, data=payload, timeout=self.TIMEOUT, cache=cache) + response.raise_for_status() + + if 'Zero lines were found for your search criteria' in response.text: + if fallback_to_getmolecule: + return self.get_molecule(payload['Mol'], cache=cache) + else: + raise ValueError(response.text) return response From 72f04a1828572fd74f8773d3e08c9279a7894469 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Fri, 7 Nov 2025 20:03:07 -0500 Subject: [PATCH 09/44] move the jpl fallback around --- astroquery/linelists/jplspec/core.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index e621173a7c..46441beeb8 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -11,6 +11,7 @@ from . import lookup_table from astroquery.exceptions import EmptyResponseError, InvalidQueryError from ..core import LineListClass +from urllib.parse import parse_qs __all__ = ['JPLSpec', 'JPLSpecClass'] @@ -21,6 +22,9 @@ def data_path(filename): return os.path.join(data_dir, filename) +dead_server_message = "The requested URL was not found on this server." + + @async_to_sync class JPLSpecClass(BaseQuery, LineListClass): @@ -28,12 +32,15 @@ class JPLSpecClass(BaseQuery, LineListClass): URL = conf.server TIMEOUT = conf.timeout + def __init__(self, fallback_to_getmolecule=True): + super().__init__() + self.fallback_to_getmolecule = fallback_to_getmolecule + def query_lines_async(self, min_frequency, max_frequency, *, min_strength=-500, max_lines=2000, molecule='All', flags=0, parse_name_locally=False, - get_query_payload=False, cache=True, - fallback_to_getmolecule=True + get_query_payload=False, cache=True ): """ Creates an HTTP POST request based on the desired parameters and @@ -131,12 +138,6 @@ def query_lines_async(self, min_frequency, max_frequency, *, timeout=self.TIMEOUT, cache=cache) response.raise_for_status() - if 'Zero lines were found for your search criteria' in response.text: - if fallback_to_getmolecule: - return self.get_molecule(payload['Mol'], cache=cache) - else: - raise ValueError(response.text) - return response def _parse_result(self, response, *, verbose=False): @@ -171,7 +172,12 @@ def _parse_result(self, response, *, verbose=False): """ if 'Zero lines were found' in response.text: - raise EmptyResponseError(f"Response was empty; message was '{response.text}'.") + if self.fallback_to_getmolecule: + payload = parse_qs(response.request.body) + mol = payload['Mol'][0] + return self.get_molecule(mol) + else: + raise EmptyResponseError(f"Response was empty; message was '{response.text}'.") # data starts at 0 since regex was applied # Warning for a result with more than 1000 lines: @@ -270,7 +276,9 @@ def get_molecule(self, molecule_id, *, cache=True): if isinstance(molecule_id, int): molecule_str = f'{molecule_id:06d}' elif isinstance(molecule_id, str): - if len(molecule_id) != 6 or not molecule_id.isdigit(): + try: + molecule_id = f"{int(molecule_id[:6]):06d}" + except ValueError: raise ValueError("molecule_id should be an integer or a length-6 string of numbers") molecule_str = molecule_id else: From cd0dd1d57ffd460ad14c079c0cb919ceb333f802 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Fri, 7 Nov 2025 21:50:25 -0500 Subject: [PATCH 10/44] add new test file --- .../jplspec/tests/data/H2O_sample.cat | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 astroquery/linelists/jplspec/tests/data/H2O_sample.cat diff --git a/astroquery/linelists/jplspec/tests/data/H2O_sample.cat b/astroquery/linelists/jplspec/tests/data/H2O_sample.cat new file mode 100644 index 0000000000..bd13a08689 --- /dev/null +++ b/astroquery/linelists/jplspec/tests/data/H2O_sample.cat @@ -0,0 +1,52 @@ + 8006.5805 2.8510-18.6204 3 6219.6192 45 18003140422 418 0 21 715 0 + 12478.2535 0.2051-13.1006 3 3623.7652 31 18003140415 7 9 0 16 412 0 + 22235.0798 0.0001 -5.8825 3 446.5107 39 -180031404 6 1 6 0 5 2 3 0 + 27206.4582 6.3643-19.1265 3 7210.5493141 18003140423 617 0 24 520 0 + 71592.4316 0.5310-13.4989 3 4606.1683 39 18003140419 416 0 18 513 0 + 115542.5692 0.6588-13.2595 3 4606.1683 35 18003140417 810 0 18 513 0 + 139614.2930 0.1500 -9.3636 3 3080.1788 87 -18003140414 6 9 0 15 312 0 + 177317.0680 0.1500-10.3413 3 3437.2774 31 -18003140415 610 0 16 313 0 + 183310.0870 0.0010 -3.6463 3 136.1639 7 -180031404 3 1 3 0 2 2 0 0 + 247440.0960 0.1500 -9.0097 3 2872.5806 29 -18003140414 410 0 15 313 0 + 259952.1820 0.2000 -8.6690 3 2739.4286 27 -18003140413 6 8 0 14 311 0 + 266574.0983 1.8473-14.1089 3 5739.2279129 18003140421 417 0 20 714 0 + 289008.0871 2.7396-15.1447 3 6167.7109129 18003140421 615 0 20 912 0 + 294805.1937 4.1586-16.0382 3 6707.3362135 18003140422 716 0 23 419 0 + 321225.6770 0.0006 -5.0909 3 1282.9191 63 -18003140410 2 9 0 9 3 6 0 + 325152.8990 0.0010 -3.5711 3 315.7795 11 -180031404 5 1 5 0 4 2 2 0 + 339043.9960 0.1500-10.0708 3 3810.9369 99 -18003140416 611 0 17 314 0 + 354808.5800 0.2000-10.4028 3 4006.0734105 -18003140417 413 0 16 710 0 + 373514.7088 6.1926-17.5865 3 7386.7750135 180031404221013 0 23 716 0 + 380197.3598 0.0001 -2.6152 3 212.1564 27 -180031404 4 1 4 0 3 2 1 0 + 390134.5100 0.0500 -6.0290 3 1525.1360 21 -18003140410 3 7 0 11 210 0 + 437346.6640 0.0020 -4.8220 3 1045.0584 15 -180031404 7 5 3 0 6 6 0 0 + 439150.7948 0.0003 -3.6615 3 742.0763 39 -180031404 6 4 3 0 5 5 0 0 + 443018.3546 0.0008 -4.3337 3 1045.0580 45 -180031404 7 5 2 0 6 6 1 0 + 448001.0775 0.0005 -2.5935 3 285.4186 27 -180031404 4 2 3 0 3 3 0 0 + 458682.8454 1.1313-13.1673 3 5276.8018 41 18003140420 416 0 19 713 0 + 470888.9030 0.0020 -4.0778 3 742.0730 13 -180031404 6 4 2 0 5 5 1 0 + 474689.1080 0.0010 -3.4856 3 488.1342 11 -180031404 5 3 3 0 4 4 0 0 + 488491.1280 0.0030 -4.1739 3 586.4792 13 -180031404 6 2 4 0 7 1 7 0 + 503568.5200 0.0200 -4.9916 3 1394.8142 51 -180031404 8 6 3 0 7 7 0 0 + 504482.6900 0.0500 -5.4671 3 1394.8142 17 -180031404 8 6 2 0 7 7 1 0 + 525890.1638 0.8432-12.2048 3 5035.1266117 18003140419 514 0 18 811 0 + 530342.8600 0.2000 -7.1006 3 2533.7932 87 -18003140414 312 0 13 4 9 0 + 534240.4544 0.3469-11.2954 3 4409.3446 37 18003140418 414 0 17 711 0 + 556935.9877 0.0003 -0.8189 3 23.7944 9 -180031404 1 1 0 0 1 0 1 0 + 557985.4794 0.6432-11.6213 3 4833.2084117 18003140419 415 0 18 712 0 + 558017.0036 12.4193-18.1025 3 7729.4622 49 18003140424 618 0 25 521 0 + 571913.6860 0.1000 -6.9705 3 2414.7235 75 -18003140412 6 7 0 13 310 0 + 591693.4339 0.2120 -8.6820 3 3244.6008 87 18003140414 7 8 0 15 411 0 + 593113.7249 7.4502-18.5975 3 7924.4438 49 18003140424 717 0 231014 0 + 593227.8163 0.4197-10.8822 3 4201.2514 35 18003140417 612 0 18 315 0 + 596308.5878 4.5348-15.8345 3 6687.8251 47 18003140423 519 0 22 616 0 + 614309.5658 2.1666-14.1672 3 5680.7868 39 18003140419 911 0 20 614 0 + 620293.9651 1.1653-12.0811 3 5031.9777117 18003140419 514 0 20 417 0 + 620700.9549 0.0006 -2.7692 3 488.1077 33 -180031404 5 3 2 0 4 4 1 0 + 624732.7750 5.8384-16.9250 3 7210.3271 47 18003140423 717 0 24 420 0 + 645766.1230 0.0300 -6.1081 3 1789.0429 19 -180031404 9 7 3 0 8 8 0 0 + 645905.7060 0.0500 -5.6308 3 1789.0429 57 -180031404 9 7 2 0 8 8 1 0 + 723142.3610 9.8873-19.4330 3 8554.6415 53 18003140426 521 0 25 818 0 + 752033.1430 0.1000 -0.9985 3 70.0908 5 -180031404 2 1 1 0 2 0 2 0 + 766793.5950 0.1000 -6.2559 3 1960.2074 23 -18003140411 5 7 0 12 210 0 + 826549.8880 0.2000 -9.9788 3 4174.2875111 -18003140418 415 0 17 512 0 \ No newline at end of file From 264a908aba4ff0e4f54410f55efda1eec4b36496 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Fri, 7 Nov 2025 22:01:46 -0500 Subject: [PATCH 11/44] change a test and an import --- astroquery/linelists/jplspec/core.py | 3 +++ astroquery/linelists/jplspec/tests/test_jplspec.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index 46441beeb8..e2a69b440f 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -275,7 +275,10 @@ def get_molecule(self, molecule_id, *, cache=True): # Convert to string and zero-pad to 6 digits if isinstance(molecule_id, int): molecule_str = f'{molecule_id:06d}' + if len(molecule_str) > 6: + raise ValueError("molecule_id should be an integer with fewer than 6 digits or a length-6 string of numbers") elif isinstance(molecule_id, str): + # this is for the common case where the molecule is specified e.g. as 028001 CO try: molecule_id = f"{int(molecule_id[:6]):06d}" except ValueError: diff --git a/astroquery/linelists/jplspec/tests/test_jplspec.py b/astroquery/linelists/jplspec/tests/test_jplspec.py index 9d586bb8a7..2b6de42bf5 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec.py @@ -4,7 +4,7 @@ from astropy import units as u from astropy.table import Table -from ....jplspec import JPLSpec +from ..core import JPLSpec file1 = 'CO.data' file2 = 'CO_6.data' @@ -160,4 +160,4 @@ def test_get_molecule_input_validation(): # Test wrong length string with pytest.raises(ValueError): - JPLSpec.get_molecule('123') + JPLSpec.get_molecule(1234567) From d1b4c95a2e56ffe1f17d7e8cb92240e48bfa1569 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sat, 8 Nov 2025 06:46:58 -0500 Subject: [PATCH 12/44] make QN reading better, add tests --- astroquery/linelists/core.py | 12 ++++ astroquery/linelists/jplspec/core.py | 6 +- .../jplspec/tests/test_jplspec_remote.py | 66 ++++++++++++++++++- 3 files changed, 81 insertions(+), 3 deletions(-) diff --git a/astroquery/linelists/core.py b/astroquery/linelists/core.py index 932162cd6f..57de7b5289 100644 --- a/astroquery/linelists/core.py +++ b/astroquery/linelists/core.py @@ -98,6 +98,8 @@ def _parse_cat_jpl_format(self, text, *, verbose=False): FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN', QN" (F13.4,F8.4, F8.4, I2,F10.4, I3, I7, I4, 6I2, 6I2) + https://spec.jpl.nasa.gov/ftp/pub/catalog/doc/catintro.pdf + Parameters ---------- text : str @@ -131,6 +133,16 @@ def _parse_cat_jpl_format(self, text, *, verbose=False): result['LGINT'].unit = u.nm**2 * u.MHz result['ELO'].unit = u.cm**(-1) + # parse QNs + n_qns = result['QNFMT'] % 10 + assert len(set(n_qns)) == 1, "All QNFMT values should have the same number of QNs" + n_qns = n_qns[0] + for ii in range(n_qns): + qn_col = f'QN{ii+1}' + result[qn_col] = np.array( + [int(line[8 - (ii + 1) * 2: 8 - ii * 2].strip()) for line in result['QN\'']], + dtype=int) + # Add laboratory measurement flag # A negative TAG value indicates laboratory-measured frequency result['Lab'] = result['TAG'] < 0 diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index e2a69b440f..41adab7397 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -4,6 +4,7 @@ import astropy.units as u from astropy.io import ascii +from astropy import table from ...query import BaseQuery from ...utils import async_to_sync # import configurable items declared in __init__.py @@ -174,8 +175,9 @@ def _parse_result(self, response, *, verbose=False): if 'Zero lines were found' in response.text: if self.fallback_to_getmolecule: payload = parse_qs(response.request.body) - mol = payload['Mol'][0] - return self.get_molecule(mol) + tbs = [self.get_molecule(mol) for mol in payload['Mol']] + tb = table.vstack(tbs) + return tb else: raise EmptyResponseError(f"Response was empty; message was '{response.text}'.") diff --git a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py index 49cc7579e8..3af3385274 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -2,11 +2,14 @@ from astropy import units as u from astropy.table import Table -from ....jplspec import JPLSpec +from ..core import JPLSpec +from astroquery.exceptions import EmptyResponseError +@pytest.mark.xfail(reason="2025 server problems", raises=EmptyResponseError) @pytest.mark.remote_data def test_remote(): + JPLSpec.fallback_to_getmolecule = False tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, max_frequency=1000 * u.GHz, min_strength=-500, @@ -23,8 +26,50 @@ def test_remote(): assert tbl['FREQ'][35] == 987926.7590 +@pytest.mark.remote_data +def test_remote_fallback(): + JPLSpec.fallback_to_getmolecule = True + tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, + max_frequency=1000 * u.GHz, + min_strength=-500, + molecule="18003 H2O") + assert isinstance(tbl, Table) + tbl = tbl[((tbl['FREQ'].quantity > 500*u.GHz) & (tbl['FREQ'].quantity < 1*u.THz))] + assert len(tbl) == 36 + assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'QN\'', 'QN"', 'Lab']) + + assert tbl['FREQ'][0] == 503568.5200 + assert tbl['ERR'][0] == 0.0200 + assert tbl['LGINT'][0] == -4.9916 + assert tbl['ERR'][7] == 12.4193 + assert tbl['FREQ'][35] == 987926.7590 + + +@pytest.mark.remote_data +def test_remote_regex_fallback(): + JPLSpec.fallback_to_getmolecule = True + tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, + max_frequency=1000 * u.GHz, + min_strength=-500, + molecule=("28001", "28002", "28003")) + assert isinstance(tbl, Table) + tbl = tbl[((tbl['FREQ'].quantity > 500*u.GHz) & (tbl['FREQ'].quantity < 1*u.THz))] + assert len(tbl) == 16 + assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'QN\'', 'QN"', 'Lab']) + + assert tbl['FREQ'][0] == 576267.9305 + assert tbl['ERR'][0] == .0005 + assert tbl['LGINT'][0] == -3.0118 + assert tbl['ERR'][7] == 8.3063 + assert tbl['FREQ'][15] == 946175.3151 + + +@pytest.mark.xfail(reason="2025 server problems", raises=EmptyResponseError) @pytest.mark.remote_data def test_remote_regex(): + JPLSpec.fallback_to_getmolecule = False tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, max_frequency=1000 * u.GHz, min_strength=-500, @@ -104,3 +149,22 @@ def test_get_molecule_various(): # Verify TAG values are positive assert all(tbl['TAG'] > 0) + + +def test_get_molecule_qn1(): + tbl = JPLSpec.get_molecule(28001) + assert isinstance(tbl, Table) + assert len(tbl) > 0 + assert 'QN1' in tbl.colnames + assert all(tbl['QN1'] > 0) + assert 'QN2' not in tbl.colnames + + +def test_get_molecule_qn4(): + """ CN has 4 QNs """ + tbl = JPLSpec.get_molecule(26001) + assert isinstance(tbl, Table) + assert len(tbl) > 0 + for ii in range(1, 5): + assert f'QN{ii}' in tbl.colnames + assert all(tbl[f'QN{ii}'] > 0) \ No newline at end of file From 60da9d0ecad77299a775ab99f869be2b23fde815 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sat, 8 Nov 2025 09:03:01 -0500 Subject: [PATCH 13/44] add deeper QN parsing logic --- astroquery/linelists/core.py | 35 +++++++++++++++---- astroquery/linelists/jplspec/core.py | 6 +++- .../jplspec/tests/test_jplspec_remote.py | 18 ++++++++-- 3 files changed, 48 insertions(+), 11 deletions(-) diff --git a/astroquery/linelists/core.py b/astroquery/linelists/core.py index 57de7b5289..b79e940c97 100644 --- a/astroquery/linelists/core.py +++ b/astroquery/linelists/core.py @@ -8,6 +8,7 @@ from astropy.io import ascii from astroquery.exceptions import EmptyResponseError from astroquery import log +from astropy import table __all__ = ['LineListClass', 'parse_letternumber'] @@ -135,13 +136,33 @@ def _parse_cat_jpl_format(self, text, *, verbose=False): # parse QNs n_qns = result['QNFMT'] % 10 - assert len(set(n_qns)) == 1, "All QNFMT values should have the same number of QNs" - n_qns = n_qns[0] - for ii in range(n_qns): - qn_col = f'QN{ii+1}' - result[qn_col] = np.array( - [int(line[8 - (ii + 1) * 2: 8 - ii * 2].strip()) for line in result['QN\'']], - dtype=int) + tables = [result[result['QNFMT'] % 10 == qq] for qq in set(n_qns)] + + for tbl in tables: + n_qns = tbl['QNFMT'][0] % 10 + if n_qns > 1: + qnlen = int(str(tbl['QN\''].dtype)[-1]) + for ii in range(n_qns): + qn_col = f'QN\'{ii+1}' + # string parsing can truncate to length=2n or 2n-1 depending + # on whether there are any two-digit QNs in the column + ind1 = max(0, qnlen - (ii + 1) * 2) + ind2 = qnlen - ii * 2 + tbl[qn_col] = np.array( + [int(line[ind1: ind2].strip()) for line in tbl['QN\'']], + dtype=int) + qn_col = f'QN"{ii+1}' + tbl[qn_col] = np.array( + [int(line[ind1: ind2].strip()) for line in tbl['QN"']], + dtype=int) + del tbl['QN\''] + del tbl['QN"'] + else: + tbl['QN\''] = np.array(tbl['QN\''], dtype=int) + tbl['QN"'] = np.array(tbl['QN"'], dtype=int) + + result = table.vstack(tables) + # Add laboratory measurement flag # A negative TAG value indicates laboratory-measured frequency diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index 41adab7397..9f37510645 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -118,7 +118,6 @@ def query_lines_async(self, min_frequency, max_frequency, *, if molecule is not None: if parse_name_locally: - self.lookup_ids = build_lookup() payload['Mol'] = tuple(self.lookup_ids.find(molecule, flags).values()) if len(molecule) == 0: raise InvalidQueryError('No matching species found. Please ' @@ -174,8 +173,13 @@ def _parse_result(self, response, *, verbose=False): if 'Zero lines were found' in response.text: if self.fallback_to_getmolecule: + self.lookup_ids = build_lookup() payload = parse_qs(response.request.body) tbs = [self.get_molecule(mol) for mol in payload['Mol']] + for tb, mol in zip(tbs, payload['Mol']): + tb['Name'] = self.lookup_ids.find(mol, flags=0) + for key in tb.meta: + tb.meta[f'{mol}_{key}'] = tb.meta.pop(key) tb = table.vstack(tbs) return tb else: diff --git a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py index 3af3385274..70516edd77 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -37,7 +37,10 @@ def test_remote_fallback(): tbl = tbl[((tbl['FREQ'].quantity > 500*u.GHz) & (tbl['FREQ'].quantity < 1*u.THz))] assert len(tbl) == 36 assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', - 'TAG', 'QNFMT', 'QN\'', 'QN"', 'Lab']) + 'TAG', 'QNFMT', 'Lab', 'Name', + 'QN"1', 'QN"2', 'QN"3', 'QN"4', + "QN'1", "QN'2", "QN'3", "QN'4" + ]) assert tbl['FREQ'][0] == 503568.5200 assert tbl['ERR'][0] == 0.0200 @@ -48,6 +51,10 @@ def test_remote_fallback(): @pytest.mark.remote_data def test_remote_regex_fallback(): + """ + CO, H13CN, HC15N + Some of these have different combinations of QNs + """ JPLSpec.fallback_to_getmolecule = True tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, max_frequency=1000 * u.GHz, @@ -56,8 +63,12 @@ def test_remote_regex_fallback(): assert isinstance(tbl, Table) tbl = tbl[((tbl['FREQ'].quantity > 500*u.GHz) & (tbl['FREQ'].quantity < 1*u.THz))] assert len(tbl) == 16 + # there are more QN formats than the original query had assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', - 'TAG', 'QNFMT', 'QN\'', 'QN"', 'Lab']) + 'TAG', 'QNFMT', 'QN\'', 'QN"', 'Lab', + 'QN"1', 'QN"2', "QN'", "QN'1", "QN'2", + 'Name' + ]) assert tbl['FREQ'][0] == 576267.9305 assert tbl['ERR'][0] == .0005 @@ -77,7 +88,8 @@ def test_remote_regex(): assert isinstance(tbl, Table) assert len(tbl) == 16 assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', - 'TAG', 'QNFMT', 'QN\'', 'QN"']) + 'TAG', 'QNFMT', 'QN\'', 'QN"', + ]) assert tbl['FREQ'][0] == 576267.9305 assert tbl['ERR'][0] == .0005 From 6065e5ec7b0200697f4c434273b37dd514056994 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sat, 8 Nov 2025 09:03:14 -0500 Subject: [PATCH 14/44] whitespace must die --- astroquery/linelists/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/astroquery/linelists/core.py b/astroquery/linelists/core.py index b79e940c97..37875d8f2e 100644 --- a/astroquery/linelists/core.py +++ b/astroquery/linelists/core.py @@ -40,7 +40,7 @@ def parse_letternumber(st): class LineListClass: """ Base class for line list catalog queries (JPL, CDMS, etc.) - + This class provides common functionality for parsing catalog files and retrieving molecule data from spectroscopic databases. """ @@ -127,7 +127,7 @@ def _parse_cat_jpl_format(self, text, *, verbose=False): # Ensure TAG is integer type result['TAG'] = result['TAG'].astype(int) - + # Add units result['FREQ'].unit = u.MHz result['ERR'].unit = u.MHz @@ -160,7 +160,7 @@ def _parse_cat_jpl_format(self, text, *, verbose=False): else: tbl['QN\''] = np.array(tbl['QN\''], dtype=int) tbl['QN"'] = np.array(tbl['QN"'], dtype=int) - + result = table.vstack(tables) @@ -177,7 +177,7 @@ def _parse_cat_cdms_format(self, text, *, verbose=False): Parse a CDMS-format catalog file into an `~astropy.table.Table`. The catalog data files are composed of 80-character card images. - Format: [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]: + Format: [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]: FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN Parameters From 9370e869846f8b9c40d6b2cb3318cd5f314c05ba Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sat, 8 Nov 2025 09:05:35 -0500 Subject: [PATCH 15/44] restore missing lookup_id table builder --- astroquery/linelists/jplspec/core.py | 1 + astroquery/linelists/jplspec/tests/test_jplspec_remote.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index 9f37510645..85d2c9b4c6 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -118,6 +118,7 @@ def query_lines_async(self, min_frequency, max_frequency, *, if molecule is not None: if parse_name_locally: + self.lookup_ids = build_lookup() payload['Mol'] = tuple(self.lookup_ids.find(molecule, flags).values()) if len(molecule) == 0: raise InvalidQueryError('No matching species found. Please ' diff --git a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py index 70516edd77..4aa445b095 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -163,6 +163,7 @@ def test_get_molecule_various(): assert all(tbl['TAG'] > 0) +@pytest.mark.remote_data def test_get_molecule_qn1(): tbl = JPLSpec.get_molecule(28001) assert isinstance(tbl, Table) @@ -172,6 +173,7 @@ def test_get_molecule_qn1(): assert 'QN2' not in tbl.colnames +@pytest.mark.remote_data def test_get_molecule_qn4(): """ CN has 4 QNs """ tbl = JPLSpec.get_molecule(26001) From 86c98803c5b4049a4f1cf9168169ae94099638c3 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sat, 8 Nov 2025 09:06:17 -0500 Subject: [PATCH 16/44] fix a test --- astroquery/linelists/jplspec/tests/test_jplspec.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/astroquery/linelists/jplspec/tests/test_jplspec.py b/astroquery/linelists/jplspec/tests/test_jplspec.py index 2b6de42bf5..f2b14e13c2 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec.py @@ -130,7 +130,10 @@ def test_parse_cat(): assert isinstance(tbl, Table) assert len(tbl) > 0 assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', - 'TAG', 'QNFMT', 'QN\'', 'QN"', 'Lab']) + 'TAG', 'QNFMT', 'Lab', + 'QN"1', 'QN"2', 'QN"3', 'QN"4', + "QN'1", "QN'2", "QN'3", "QN'4" + ]) # Check units assert tbl['FREQ'].unit == u.MHz From 66b8eff23881b9e4e3272301ccad8def187b4094 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sat, 8 Nov 2025 09:30:04 -0500 Subject: [PATCH 17/44] yet more sophisticated parsing. methanol stymies me once again --- astroquery/linelists/core.py | 29 ++++++++++++++----- .../jplspec/tests/test_jplspec_remote.py | 15 ++++++---- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/astroquery/linelists/core.py b/astroquery/linelists/core.py index 37875d8f2e..3f7b77238f 100644 --- a/astroquery/linelists/core.py +++ b/astroquery/linelists/core.py @@ -138,6 +138,23 @@ def _parse_cat_jpl_format(self, text, *, verbose=False): n_qns = result['QNFMT'] % 10 tables = [result[result['QNFMT'] % 10 == qq] for qq in set(n_qns)] + # some tables have +/-/blank entries in QNs + # pm_is_ok should be True when the QN columns contain '+' or '-'. + # (can't do a str check on np.integer dtype so have to filter that out first) + pm_is_ok = ((not np.issubdtype(result["QN'"].dtype, np.integer)) + and any(('+' in str(line) or '-' in str(line)) for line in result["QN'"])) + def int_or_pm(st): + try: + return int(st) + except ValueError: + try: + return parse_letternumber(st) + except ValueError: + if pm_is_ok and (st.strip() == '' or st.strip() == '+' or st.strip() == '-'): + return st.strip() + else: + raise ValueError(f'"{st}" is not a valid +/-/blank entry') + for tbl in tables: n_qns = tbl['QNFMT'][0] % 10 if n_qns > 1: @@ -148,13 +165,11 @@ def _parse_cat_jpl_format(self, text, *, verbose=False): # on whether there are any two-digit QNs in the column ind1 = max(0, qnlen - (ii + 1) * 2) ind2 = qnlen - ii * 2 - tbl[qn_col] = np.array( - [int(line[ind1: ind2].strip()) for line in tbl['QN\'']], - dtype=int) - qn_col = f'QN"{ii+1}' - tbl[qn_col] = np.array( - [int(line[ind1: ind2].strip()) for line in tbl['QN"']], - dtype=int) + qnp = [int_or_pm(line[ind1: ind2].strip()) for line in tbl['QN\'']] + qnpp = [int_or_pm(line[ind1: ind2].strip()) for line in tbl['QN"']] + dtype = str if any('+' in str(x) for x in qnp) else int + tbl[f"QN'{ii+1}"] = np.array(qnp, dtype=dtype) + tbl[f'QN"{ii+1}'] = np.array(qnpp, dtype=dtype) del tbl['QN\''] del tbl['QN"'] else: diff --git a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py index 4aa445b095..a2170dbfac 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -109,7 +109,9 @@ def test_get_molecule_remote(): # Check expected columns including Lab flag expected_cols = {'FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', - 'TAG', 'QNFMT', 'QN\'', 'QN"', 'Lab'} + 'TAG', 'QNFMT', 'Lab', + 'QN"1', 'QN"2', 'QN"3', 'QN"4', + "QN'1", "QN'2", "QN'3", "QN'4"} assert set(tbl.keys()) == expected_cols # Check units @@ -168,9 +170,10 @@ def test_get_molecule_qn1(): tbl = JPLSpec.get_molecule(28001) assert isinstance(tbl, Table) assert len(tbl) > 0 - assert 'QN1' in tbl.colnames - assert all(tbl['QN1'] > 0) - assert 'QN2' not in tbl.colnames + assert 'QN1"' in tbl.colnames + assert 'QN2"' not in tbl.colnames + assert "QN1'" in tbl.colnames + assert "QN2'" not in tbl.colnames @pytest.mark.remote_data @@ -180,5 +183,5 @@ def test_get_molecule_qn4(): assert isinstance(tbl, Table) assert len(tbl) > 0 for ii in range(1, 5): - assert f'QN{ii}' in tbl.colnames - assert all(tbl[f'QN{ii}'] > 0) \ No newline at end of file + assert f'QN"{ii}' in tbl.colnames + assert f"QN'{ii}" in tbl.colnames \ No newline at end of file From 0c09510c122ee8b4b5a08ec73473262efeb01a45 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sat, 8 Nov 2025 12:34:19 -0500 Subject: [PATCH 18/44] add a general test and fix the parsing to avoid truncation-driven errors --- astroquery/linelists/core.py | 9 +++++---- .../jplspec/tests/test_jplspec_remote.py | 18 +++++++++++++++++- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/astroquery/linelists/core.py b/astroquery/linelists/core.py index 3f7b77238f..6574e40b7d 100644 --- a/astroquery/linelists/core.py +++ b/astroquery/linelists/core.py @@ -163,10 +163,11 @@ def int_or_pm(st): qn_col = f'QN\'{ii+1}' # string parsing can truncate to length=2n or 2n-1 depending # on whether there are any two-digit QNs in the column - ind1 = max(0, qnlen - (ii + 1) * 2) - ind2 = qnlen - ii * 2 - qnp = [int_or_pm(line[ind1: ind2].strip()) for line in tbl['QN\'']] - qnpp = [int_or_pm(line[ind1: ind2].strip()) for line in tbl['QN"']] + ind1 = ii * 2 + ind2 = ii * 2 + 2 + # rjust(qnlen) is needed to enforce that all strings retain their exact original shape + qnp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN\'']] + qnpp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN"']] dtype = str if any('+' in str(x) for x in qnp) else int tbl[f"QN'{ii+1}"] = np.array(qnp, dtype=dtype) tbl[f'QN"{ii+1}'] = np.array(qnpp, dtype=dtype) diff --git a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py index a2170dbfac..39b9c6978e 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -184,4 +184,20 @@ def test_get_molecule_qn4(): assert len(tbl) > 0 for ii in range(1, 5): assert f'QN"{ii}' in tbl.colnames - assert f"QN'{ii}" in tbl.colnames \ No newline at end of file + assert f"QN'{ii}" in tbl.colnames + +@pytest.mark.bigdata +@pytest.mark.remote_data +class TestRegressionAllMolecules: + """Test that we can get each molecule in JPL database""" + species_table = JPLSpec.get_species_table() + + @pytest.mark.parametrize('row', species_table) + def test_regression_all_molecules(self, row): + """ + Expensive test - try all the molecules + """ + mol_id = row['TAG'] + tbl = JPLSpec.get_molecule(mol_id) + assert isinstance(tbl, Table) + assert len(tbl) > 0 \ No newline at end of file From 05ad7ccdfaad5aae58537a34737ca049c7607d93 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sat, 8 Nov 2025 12:53:05 -0500 Subject: [PATCH 19/44] fixed more tests --- .../jplspec/tests/test_jplspec_remote.py | 45 ++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py index 39b9c6978e..b29c73db87 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -170,10 +170,10 @@ def test_get_molecule_qn1(): tbl = JPLSpec.get_molecule(28001) assert isinstance(tbl, Table) assert len(tbl) > 0 - assert 'QN1"' in tbl.colnames - assert 'QN2"' not in tbl.colnames - assert "QN1'" in tbl.colnames - assert "QN2'" not in tbl.colnames + assert 'QN"' in tbl.colnames + assert 'QN1"' not in tbl.colnames + assert "QN'" in tbl.colnames + assert "QN1'" not in tbl.colnames @pytest.mark.remote_data @@ -186,6 +186,40 @@ def test_get_molecule_qn4(): assert f'QN"{ii}' in tbl.colnames assert f"QN'{ii}" in tbl.colnames + +@pytest.mark.remote_data +def test_get_molecule_parser_details(): + """ + Verifying a known hard-to-parse row + 982.301 0.174 -17.8172 3 464.3000 9 320031304 4-2 2 5-5 2 + 991.369 0.003 -9.8234 3 310.3570 37 32003130418 3 - 0 18 3 + 0 + """ + tbl = JPLSpec.get_molecule(32003) + testrow = tbl[5] + assert testrow['FREQ'] == 982.301 + assert testrow["QN'1"] == 4 + assert testrow["QN'2"] == -2 + assert testrow["QN'3"] == '' + assert testrow["QN'4"] == 2 + + assert testrow['QN"1'] == 5 + assert testrow['QN"2'] == -5 + assert testrow['QN"3'] == '' + assert testrow['QN"4'] == 2 + + testrow = tbl[6] + assert testrow['FREQ'] == 991.369 + assert testrow["QN'1"] == 18 + assert testrow["QN'2"] == 3 + assert testrow["QN'3"] == '-' + assert testrow["QN'4"] == 0 + + assert testrow['QN"1'] == 18 + assert testrow['QN"2'] == 3 + assert testrow['QN"3'] == '+' + assert testrow['QN"4'] == 0 + + @pytest.mark.bigdata @pytest.mark.remote_data class TestRegressionAllMolecules: @@ -200,4 +234,5 @@ def test_regression_all_molecules(self, row): mol_id = row['TAG'] tbl = JPLSpec.get_molecule(mol_id) assert isinstance(tbl, Table) - assert len(tbl) > 0 \ No newline at end of file + assert len(tbl) > 0 + From 3e799a7f468239c5cfa014e47202ca5e2afe1375 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sat, 8 Nov 2025 13:25:44 -0500 Subject: [PATCH 20/44] more fixes --- astroquery/linelists/core.py | 10 +++++++++- astroquery/linelists/jplspec/core.py | 3 ++- .../linelists/jplspec/tests/test_jplspec_remote.py | 10 +++++++++- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/astroquery/linelists/core.py b/astroquery/linelists/core.py index 6574e40b7d..8ed3255872 100644 --- a/astroquery/linelists/core.py +++ b/astroquery/linelists/core.py @@ -158,7 +158,15 @@ def int_or_pm(st): for tbl in tables: n_qns = tbl['QNFMT'][0] % 10 if n_qns > 1: - qnlen = int(str(tbl['QN\''].dtype)[-1]) + if tbl['QN\''].dtype.kind == 'U': # Unicode + qnlen = tbl['QN\''].dtype.itemsize // 4 + elif tbl['QN\''].dtype.kind == 'S': # Byte string + qnlen = tbl['QN\''].dtype.itemsize + else: + raise TypeError("Unexpected dtype for QN' column") + if qnlen % 2 == 1: + # entries are always even, but the leftmost entry can get truncated by the reader + qnlen += 1 for ii in range(n_qns): qn_col = f'QN\'{ii+1}' # string parsing can truncate to length=2n or 2n-1 depending diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index 85d2c9b4c6..0cd7f90593 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -3,6 +3,7 @@ import warnings import astropy.units as u +import numpy as np from astropy.io import ascii from astropy import table from ...query import BaseQuery @@ -280,7 +281,7 @@ def get_molecule(self, molecule_id, *, cache=True): >>> print(table) # doctest: +SKIP """ # Convert to string and zero-pad to 6 digits - if isinstance(molecule_id, int): + if isinstance(molecule_id, (int, np.int32, np.int64)): molecule_str = f'{molecule_id:06d}' if len(molecule_str) > 6: raise ValueError("molecule_id should be an integer with fewer than 6 digits or a length-6 string of numbers") diff --git a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py index b29c73db87..f75c204d9d 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -148,10 +148,18 @@ def test_get_molecule_string_id(): @pytest.mark.remote_data def test_get_molecule_various(): - """Test get_molecule with various molecules.""" + """ + Test get_molecule with various molecules. + + CH & CD are both regression tests for difficult molecules with >4 QNs and + missing 2-digit QNs (i.e., columns with _only_ 1-digit QNs at the start of + the columns with QNs). + """ test_molecules = [ (28001, 'CO'), # Simple diatomic (32003, 'CH3OH'), # Complex organic + (13002, 'CH'), # another simple molecule w/5 QNs + (14004, 'CD'), ] for mol_id, expected_name in test_molecules: From aa00a66ac346b5810ea922faa2028487f0dc7130 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sat, 8 Nov 2025 13:42:51 -0500 Subject: [PATCH 21/44] a few more specific cases, plus one weird case --- astroquery/linelists/core.py | 21 ++++++++++--------- .../jplspec/tests/test_jplspec_remote.py | 4 +++- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/astroquery/linelists/core.py b/astroquery/linelists/core.py index 8ed3255872..a9021c09a4 100644 --- a/astroquery/linelists/core.py +++ b/astroquery/linelists/core.py @@ -155,18 +155,19 @@ def int_or_pm(st): else: raise ValueError(f'"{st}" is not a valid +/-/blank entry') + # At least this molecule, NH, claims 5 QNs but has only 4 + bad_qnfmt_dict = { + 15001: 1234, + } + mol_tag = result['TAG'][0] + for tbl in tables: - n_qns = tbl['QNFMT'][0] % 10 + if mol_tag in bad_qnfmt_dict: + n_qns = bad_qnfmt_dict[mol_tag] % 10 + else: + n_qns = tbl['QNFMT'][0] % 10 if n_qns > 1: - if tbl['QN\''].dtype.kind == 'U': # Unicode - qnlen = tbl['QN\''].dtype.itemsize // 4 - elif tbl['QN\''].dtype.kind == 'S': # Byte string - qnlen = tbl['QN\''].dtype.itemsize - else: - raise TypeError("Unexpected dtype for QN' column") - if qnlen % 2 == 1: - # entries are always even, but the leftmost entry can get truncated by the reader - qnlen += 1 + qnlen = 2 * n_qns for ii in range(n_qns): qn_col = f'QN\'{ii+1}' # string parsing can truncate to length=2n or 2n-1 depending diff --git a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py index f75c204d9d..8642d57202 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -159,7 +159,9 @@ def test_get_molecule_various(): (28001, 'CO'), # Simple diatomic (32003, 'CH3OH'), # Complex organic (13002, 'CH'), # another simple molecule w/5 QNs - (14004, 'CD'), + (14004, 'CD'), # no 2-digit QNs in first col + (15001, 'NH'), # incorrect QNFMT, says there are 5 QNs, only 4 + (18004, 'NH2D'), # ? ] for mol_id, expected_name in test_molecules: From ffef21db8f560e1e90f624970044c2100a356dbe Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sat, 8 Nov 2025 22:01:08 -0500 Subject: [PATCH 22/44] flag out a bunch of failing tests, only one of which _maybe_ should be passing --- astroquery/linelists/core.py | 43 ++++++++++++------- astroquery/linelists/jplspec/core.py | 7 +-- .../jplspec/tests/test_jplspec_remote.py | 40 +++++++++-------- 3 files changed, 53 insertions(+), 37 deletions(-) diff --git a/astroquery/linelists/core.py b/astroquery/linelists/core.py index a9021c09a4..74e8c9cafc 100644 --- a/astroquery/linelists/core.py +++ b/astroquery/linelists/core.py @@ -26,6 +26,8 @@ def parse_letternumber(st): indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Lower case characters are used similarly to signal negative quantum numbers smaller than –9. e. g., a0 is –10, b0 is –20, etc." """ + if isinstance(st, (np.int32, np.int64, int)): + return st if np.ma.is_masked(st): return -999999 @@ -134,9 +136,9 @@ def _parse_cat_jpl_format(self, text, *, verbose=False): result['LGINT'].unit = u.nm**2 * u.MHz result['ELO'].unit = u.cm**(-1) - # parse QNs - n_qns = result['QNFMT'] % 10 - tables = [result[result['QNFMT'] % 10 == qq] for qq in set(n_qns)] + # split table by qnfmt; each chunk must be separately parsed. + qnfmts = np.unique(result['QNFMT']) + tables = [result[result['QNFMT'] == qq] for qq in qnfmts] # some tables have +/-/blank entries in QNs # pm_is_ok should be True when the QN columns contain '+' or '-'. @@ -161,6 +163,9 @@ def int_or_pm(st): } mol_tag = result['TAG'][0] + if mol_tag in (32001,): + raise NotImplementedError("Molecule O2 (32001) does not follow the format standard.") + for tbl in tables: if mol_tag in bad_qnfmt_dict: n_qns = bad_qnfmt_dict[mol_tag] % 10 @@ -169,22 +174,27 @@ def int_or_pm(st): if n_qns > 1: qnlen = 2 * n_qns for ii in range(n_qns): - qn_col = f'QN\'{ii+1}' - # string parsing can truncate to length=2n or 2n-1 depending - # on whether there are any two-digit QNs in the column - ind1 = ii * 2 - ind2 = ii * 2 + 2 - # rjust(qnlen) is needed to enforce that all strings retain their exact original shape - qnp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN\'']] - qnpp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN"']] - dtype = str if any('+' in str(x) for x in qnp) else int - tbl[f"QN'{ii+1}"] = np.array(qnp, dtype=dtype) - tbl[f'QN"{ii+1}'] = np.array(qnpp, dtype=dtype) + if tbl["QN'"].dtype in (int, np.int32, np.int64): + # for the case where it was already parsed as int + # (53005 is an example) + tbl[f"QN'{ii+1}"] = tbl["QN'"] + tbl[f'QN"{ii+1}'] = tbl['QN"'] + else: + # string parsing can truncate to length=2n or 2n-1 depending + # on whether there are any two-digit QNs in the column + ind1 = ii * 2 + ind2 = ii * 2 + 2 + # rjust(qnlen) is needed to enforce that all strings retain their exact original shape + qnp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN\'']] + qnpp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN"']] + dtype = str if any('+' in str(x) for x in qnp) else int + tbl[f"QN'{ii+1}"] = np.array(qnp, dtype=dtype) + tbl[f'QN"{ii+1}'] = np.array(qnpp, dtype=dtype) del tbl['QN\''] del tbl['QN"'] else: - tbl['QN\''] = np.array(tbl['QN\''], dtype=int) - tbl['QN"'] = np.array(tbl['QN"'], dtype=int) + tbl['QN\''] = np.array(list(map(parse_letternumber, tbl['QN\''])), dtype=int) + tbl['QN"'] = np.array(list(map(parse_letternumber, tbl['QN"'])), dtype=int) result = table.vstack(tables) @@ -197,6 +207,7 @@ def int_or_pm(st): return result + def _parse_cat_cdms_format(self, text, *, verbose=False): """ Parse a CDMS-format catalog file into an `~astropy.table.Table`. diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index 0cd7f90593..0577b42843 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -24,9 +24,6 @@ def data_path(filename): return os.path.join(data_dir, filename) -dead_server_message = "The requested URL was not found on this server." - - @async_to_sync class JPLSpecClass(BaseQuery, LineListClass): @@ -301,6 +298,10 @@ def get_molecule(self, molecule_id, *, cache=True): # Request the catalog file response = self._request(method='GET', url=url, timeout=self.TIMEOUT, cache=cache) + response.raise_for_status() + + if 'The requested URL was not found on this server.' in response.text: + raise EmptyResponseError(f"No data found for molecule ID {molecule_id}.") # Parse the catalog file result = self._parse_cat(response) diff --git a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py index 8642d57202..c124ae04f1 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -147,7 +147,16 @@ def test_get_molecule_string_id(): @pytest.mark.remote_data -def test_get_molecule_various(): +@pytest.mark.parametrize('mol_id,expected_name', [ + (28001, 'CO'), # Simple diatomic + (32003, 'CH3OH'), # Complex organic + (13002, 'CH'), # another simple molecule w/5 QNs + (14004, 'CD'), # no 2-digit QNs in first col + (15001, 'NH'), # incorrect QNFMT, says there are 5 QNs, only 4 + (18004, 'NH2D'), # highlighted a mismatch between qnlen & n_qns + # (32001, 'O2'), # masked second QN set? +]) +def test_get_molecule_various(mol_id, expected_name): """ Test get_molecule with various molecules. @@ -155,24 +164,14 @@ def test_get_molecule_various(): missing 2-digit QNs (i.e., columns with _only_ 1-digit QNs at the start of the columns with QNs). """ - test_molecules = [ - (28001, 'CO'), # Simple diatomic - (32003, 'CH3OH'), # Complex organic - (13002, 'CH'), # another simple molecule w/5 QNs - (14004, 'CD'), # no 2-digit QNs in first col - (15001, 'NH'), # incorrect QNFMT, says there are 5 QNs, only 4 - (18004, 'NH2D'), # ? - ] + tbl = JPLSpec.get_molecule(mol_id) + assert isinstance(tbl, Table) + assert len(tbl) > 0 + assert 'NAME' in tbl.meta + assert expected_name in tbl.meta['NAME'] - for mol_id, expected_name in test_molecules: - tbl = JPLSpec.get_molecule(mol_id) - assert isinstance(tbl, Table) - assert len(tbl) > 0 - assert 'NAME' in tbl.meta - assert expected_name in tbl.meta['NAME'] - - # Verify TAG values are positive - assert all(tbl['TAG'] > 0) + # Verify TAG values are positive + assert all(tbl['TAG'] > 0) @pytest.mark.remote_data @@ -242,6 +241,11 @@ def test_regression_all_molecules(self, row): Expensive test - try all the molecules """ mol_id = row['TAG'] + if mol_id in (32001, 32002, 32005, 34001, 39003, 44004, 44009, 44012, + 81001 # may be fine? not entirely sure what's wrong + ): # O2 has masked QNs making it hard to test automatically + # N2O = 44009 is just not there + pytest.skip("Skipping O2 due to masked QNs") tbl = JPLSpec.get_molecule(mol_id) assert isinstance(tbl, Table) assert len(tbl) > 0 From ce02b3c664797a5465087c3223a5dd49a6af3d82 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 07:57:14 -0500 Subject: [PATCH 23/44] whitespace & changelogl --- CHANGES.rst | 6 ++ astroquery/linelists/core.py | 5 +- .../jplspec/tests/test_jplspec_remote.py | 62 ++++++++++--------- 3 files changed, 40 insertions(+), 33 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 93042c8d73..27951e9a72 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -34,6 +34,12 @@ jplspec ^^^^^^^ - Moved to linelists/ [#3455] +- Refactored to use linelists.core [#3456] + +linelists +^^^^^^^^^ + +- General tools for both CDMS/JPL moved to linelists.core [#3456] Infrastructure, Utility and Other Changes and Additions ------------------------------------------------------- diff --git a/astroquery/linelists/core.py b/astroquery/linelists/core.py index 74e8c9cafc..576994a061 100644 --- a/astroquery/linelists/core.py +++ b/astroquery/linelists/core.py @@ -143,8 +143,9 @@ def _parse_cat_jpl_format(self, text, *, verbose=False): # some tables have +/-/blank entries in QNs # pm_is_ok should be True when the QN columns contain '+' or '-'. # (can't do a str check on np.integer dtype so have to filter that out first) - pm_is_ok = ((not np.issubdtype(result["QN'"].dtype, np.integer)) + pm_is_ok = ((not np.issubdtype(result["QN'"].dtype, np.integer)) and any(('+' in str(line) or '-' in str(line)) for line in result["QN'"])) + def int_or_pm(st): try: return int(st) @@ -198,7 +199,6 @@ def int_or_pm(st): result = table.vstack(tables) - # Add laboratory measurement flag # A negative TAG value indicates laboratory-measured frequency result['Lab'] = result['TAG'] < 0 @@ -207,7 +207,6 @@ def int_or_pm(st): return result - def _parse_cat_cdms_format(self, text, *, verbose=False): """ Parse a CDMS-format catalog file into an `~astropy.table.Table`. diff --git a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py index c124ae04f1..f2bdb1b456 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -103,33 +103,33 @@ def test_get_molecule_remote(): """Test get_molecule with remote data retrieval.""" # Test with H2O tbl = JPLSpec.get_molecule(18003) - + assert isinstance(tbl, Table) assert len(tbl) > 0 - + # Check expected columns including Lab flag expected_cols = {'FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', 'TAG', 'QNFMT', 'Lab', 'QN"1', 'QN"2', 'QN"3', 'QN"4', "QN'1", "QN'2", "QN'3", "QN'4"} assert set(tbl.keys()) == expected_cols - + # Check units assert tbl['FREQ'].unit == u.MHz assert tbl['ERR'].unit == u.MHz assert tbl['LGINT'].unit == u.nm**2 * u.MHz assert tbl['ELO'].unit == u.cm**(-1) - + # Check metadata was attached assert 'NAME' in tbl.meta assert tbl.meta['NAME'].strip() == 'H2O' assert 'TAG' in tbl.meta assert tbl.meta['TAG'] == 18003 - + # Check Lab flag assert 'Lab' in tbl.colnames assert tbl['Lab'].dtype == bool - + # H2O should have some lab measurements assert sum(tbl['Lab']) > 0 @@ -139,7 +139,7 @@ def test_get_molecule_string_id(): """Test get_molecule with string ID format.""" # Test with CO using string ID tbl = JPLSpec.get_molecule('028001') - + assert isinstance(tbl, Table) assert len(tbl) > 0 assert 'NAME' in tbl.meta @@ -169,7 +169,7 @@ def test_get_molecule_various(mol_id, expected_name): assert len(tbl) > 0 assert 'NAME' in tbl.meta assert expected_name in tbl.meta['NAME'] - + # Verify TAG values are positive assert all(tbl['TAG'] > 0) @@ -200,33 +200,33 @@ def test_get_molecule_qn4(): def test_get_molecule_parser_details(): """ Verifying a known hard-to-parse row - 982.301 0.174 -17.8172 3 464.3000 9 320031304 4-2 2 5-5 2 - 991.369 0.003 -9.8234 3 310.3570 37 32003130418 3 - 0 18 3 + 0 + 982.301 0.174 -17.8172 3 464.3000 9 320031304 4-2 2 5-5 2 + 991.369 0.003 -9.8234 3 310.3570 37 32003130418 3 - 0 18 3 + 0 """ tbl = JPLSpec.get_molecule(32003) testrow = tbl[5] assert testrow['FREQ'] == 982.301 - assert testrow["QN'1"] == 4 - assert testrow["QN'2"] == -2 - assert testrow["QN'3"] == '' - assert testrow["QN'4"] == 2 + assert testrow["QN'1"] == 4 + assert testrow["QN'2"] == -2 + assert testrow["QN'3"] == '' + assert testrow["QN'4"] == 2 - assert testrow['QN"1'] == 5 - assert testrow['QN"2'] == -5 - assert testrow['QN"3'] == '' - assert testrow['QN"4'] == 2 + assert testrow['QN"1'] == 5 + assert testrow['QN"2'] == -5 + assert testrow['QN"3'] == '' + assert testrow['QN"4'] == 2 testrow = tbl[6] assert testrow['FREQ'] == 991.369 - assert testrow["QN'1"] == 18 - assert testrow["QN'2"] == 3 - assert testrow["QN'3"] == '-' - assert testrow["QN'4"] == 0 + assert testrow["QN'1"] == 18 + assert testrow["QN'2"] == 3 + assert testrow["QN'3"] == '-' + assert testrow["QN'4"] == 0 - assert testrow['QN"1'] == 18 - assert testrow['QN"2'] == 3 - assert testrow['QN"3'] == '+' - assert testrow['QN"4'] == 0 + assert testrow['QN"1'] == 18 + assert testrow['QN"2'] == 3 + assert testrow['QN"3'] == '+' + assert testrow['QN"4'] == 0 @pytest.mark.bigdata @@ -241,12 +241,14 @@ def test_regression_all_molecules(self, row): Expensive test - try all the molecules """ mol_id = row['TAG'] - if mol_id in (32001, 32002, 32005, 34001, 39003, 44004, 44009, 44012, - 81001 # may be fine? not entirely sure what's wrong - ): # O2 has masked QNs making it hard to test automatically + # O2 has masked QNs making it hard to test automatically (32...) + # 34001, 39003, 44004, 44009, 44012 are missing or corrupt molecules + # 81001 may be fine? not entirely sure what's wrong + if mol_id in (32001, 32002, 32005, + 34001, 39003, 44004, 44009, 44012, + 81001): # N2O = 44009 is just not there pytest.skip("Skipping O2 due to masked QNs") tbl = JPLSpec.get_molecule(mol_id) assert isinstance(tbl, Table) assert len(tbl) > 0 - From ea7cfe18e49cbd0ac53b71f94d0e0f8e7ef723dd Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 08:02:22 -0500 Subject: [PATCH 24/44] whitespace --- astroquery/linelists/cdms/core.py | 9 ++++----- .../linelists/cdms/tests/test_cdms_remote.py | 2 +- astroquery/linelists/jplspec/core.py | 12 +++++++----- astroquery/linelists/jplspec/setup_package.py | 1 - .../linelists/jplspec/tests/test_jplspec.py | 16 ++++++++-------- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/astroquery/linelists/cdms/core.py b/astroquery/linelists/cdms/core.py index b2b70e6197..3a2888d184 100644 --- a/astroquery/linelists/cdms/core.py +++ b/astroquery/linelists/cdms/core.py @@ -12,7 +12,6 @@ # import configurable items declared in __init__.py from astroquery.linelists.cdms import conf from astroquery.exceptions import InvalidQueryError, EmptyResponseError -from astroquery import log from ..core import LineListClass import re @@ -309,7 +308,7 @@ def _parse_result(self, response, *, verbose=False): for key in fix_keys: if not np.issubdtype(result[key].dtype, np.integer): intcol = np.array(list(map(parse_letternumber, result[key])), - dtype=int) + dtype=int) result[key] = intcol # if there is a crash at this step, something went wrong with the query @@ -324,8 +323,8 @@ def _parse_result(self, response, *, verbose=False): except ValueError as ex: # Give users a more helpful exception when parsing fails new_message = ("Failed to parse CDMS response. This may be caused by a malformed search return. " - "You can check this by running `CDMS.get_molecule('')` instead; if it works, the " - "problem is caused by the CDMS search interface and cannot be worked around.") + "You can check this by running `CDMS.get_molecule('')` instead; if it works, the " + "problem is caused by the CDMS search interface and cannot be worked around.") raise ValueError(new_message) from ex return result @@ -453,7 +452,7 @@ def get_molecule(self, molecule_id, *, cache=True, return_response=False): response.raise_for_status() if 'Zero lines were found' in response.text: - raise EmptyResponseError(f"Response was empty; message was '{text}'.") + raise EmptyResponseError(f"Response was empty; message was '{response.text}'.") result = self._parse_cat(response.text) diff --git a/astroquery/linelists/cdms/tests/test_cdms_remote.py b/astroquery/linelists/cdms/tests/test_cdms_remote.py index 73c20f86a3..92d237d74e 100644 --- a/astroquery/linelists/cdms/tests/test_cdms_remote.py +++ b/astroquery/linelists/cdms/tests/test_cdms_remote.py @@ -98,7 +98,7 @@ def test_h2nc(): tbl = CDMS.query_lines(min_frequency=139.3 * u.GHz, max_frequency=141.5 * u.GHz, molecule='028528 H2NC') - + # these are the results that SHOULD be return if it actually worked assert isinstance(tbl, Table) assert len(tbl) >= 1 diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index 0577b42843..25f333ca41 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -281,7 +281,9 @@ def get_molecule(self, molecule_id, *, cache=True): if isinstance(molecule_id, (int, np.int32, np.int64)): molecule_str = f'{molecule_id:06d}' if len(molecule_str) > 6: - raise ValueError("molecule_id should be an integer with fewer than 6 digits or a length-6 string of numbers") + raise ValueError("molecule_id should be an integer with" + " fewer than 6 digits or a length-6 " + "string of numbers") elif isinstance(molecule_id, str): # this is for the common case where the molecule is specified e.g. as 028001 CO try: @@ -294,7 +296,7 @@ def get_molecule(self, molecule_id, *, cache=True): # Construct the URL to the catalog file url = f'https://spec.jpl.nasa.gov/ftp/pub/catalog/c{molecule_str}.cat' - + # Request the catalog file response = self._request(method='GET', url=url, timeout=self.TIMEOUT, cache=cache) @@ -302,10 +304,10 @@ def get_molecule(self, molecule_id, *, cache=True): if 'The requested URL was not found on this server.' in response.text: raise EmptyResponseError(f"No data found for molecule ID {molecule_id}.") - + # Parse the catalog file result = self._parse_cat(response) - + # Add metadata from species table species_table = self.get_species_table() # Find the row matching this molecule_id @@ -314,7 +316,7 @@ def get_molecule(self, molecule_id, *, cache=True): if len(matching_rows) > 0: # Add metadata as a dictionary result.meta = dict(zip(matching_rows.colnames, matching_rows[0])) - + return result def _parse_cat(self, response, *, verbose=False): diff --git a/astroquery/linelists/jplspec/setup_package.py b/astroquery/linelists/jplspec/setup_package.py index d9e08324b9..585e27fa4b 100644 --- a/astroquery/linelists/jplspec/setup_package.py +++ b/astroquery/linelists/jplspec/setup_package.py @@ -13,4 +13,3 @@ def get_package_data(): return {'astroquery.linelists.jplspec.tests': paths_test, 'astroquery.linelists.jplspec': paths_data, } - diff --git a/astroquery/linelists/jplspec/tests/test_jplspec.py b/astroquery/linelists/jplspec/tests/test_jplspec.py index f2b14e13c2..fcdfd76f37 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec.py @@ -122,10 +122,10 @@ def test_query_multi(): def test_parse_cat(): """Test parsing of catalog files with _parse_cat method.""" - + response = MockResponseSpec('H2O_sample.cat') tbl = JPLSpec._parse_cat(response) - + # Check table structure assert isinstance(tbl, Table) assert len(tbl) > 0 @@ -134,17 +134,17 @@ def test_parse_cat(): 'QN"1', 'QN"2', 'QN"3', 'QN"4', "QN'1", "QN'2", "QN'3", "QN'4" ]) - + # Check units assert tbl['FREQ'].unit == u.MHz assert tbl['ERR'].unit == u.MHz assert tbl['LGINT'].unit == u.nm**2 * u.MHz assert tbl['ELO'].unit == u.cm**(-1) - + # Check Lab flag exists and is boolean assert 'Lab' in tbl.colnames assert tbl['Lab'].dtype == bool - + # Check TAG values are positive (absolute values) assert all(tbl['TAG'] > 0) @@ -152,15 +152,15 @@ def test_parse_cat(): def test_get_molecule_input_validation(): """Test input validation for get_molecule method.""" import pytest - + # Test invalid string format with pytest.raises(ValueError): JPLSpec.get_molecule('invalid') - + # Test invalid type with pytest.raises(ValueError): JPLSpec.get_molecule(12.34) - + # Test wrong length string with pytest.raises(ValueError): JPLSpec.get_molecule(1234567) From 031c9499d319b310eac39fc571d080c626b6b76c Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 08:20:14 -0500 Subject: [PATCH 25/44] add docs about failing jpl, and update docs using "--doctest-plus-generate-diff=overwrite" --- astroquery/linelists/jplspec/core.py | 18 +++- docs/linelists/cdms/cdms.rst | 25 ++--- docs/linelists/jplspec/jplspec.rst | 149 +++++++++++++++------------ 3 files changed, 110 insertions(+), 82 deletions(-) diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index 25f333ca41..965ece40aa 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -175,11 +175,19 @@ def _parse_result(self, response, *, verbose=False): self.lookup_ids = build_lookup() payload = parse_qs(response.request.body) tbs = [self.get_molecule(mol) for mol in payload['Mol']] - for tb, mol in zip(tbs, payload['Mol']): - tb['Name'] = self.lookup_ids.find(mol, flags=0) - for key in tb.meta: - tb.meta[f'{mol}_{key}'] = tb.meta.pop(key) - tb = table.vstack(tbs) + if len(tbs) > 1: + mols = [] + for tb, mol in zip(tbs, payload['Mol']): + tb['Name'] = self.lookup_ids.find(mol, flags=0) + for key in tb.meta: + tb.meta[f'{mol}_{key}'] = tb.meta.pop(key) + mols.append(mol) + tb = table.vstack(tbs) + tb.meta['molecule_list'] = mols + else: + tb = tbs[0] + tb.meta['molecule_id'] = payload['Mol'][0] + return tb else: raise EmptyResponseError(f"Response was empty; message was '{response.text}'.") diff --git a/docs/linelists/cdms/cdms.rst b/docs/linelists/cdms/cdms.rst index 52b5613b56..816f427d98 100644 --- a/docs/linelists/cdms/cdms.rst +++ b/docs/linelists/cdms/cdms.rst @@ -34,17 +34,17 @@ each setting yields: ... molecule="028503 CO", ... get_query_payload=False) >>> response.pprint(max_width=120) - FREQ ERR LGINT DR ELO GUP MOLWT TAG QNFMT Ju Ku vu F1u F2u F3u Jl Kl vl F1l F2l F3l name Lab - MHz MHz nm2 MHz 1 / cm u - ----------- ------ ------- --- -------- --- ----- --- ----- --- --- --- --- --- --- --- --- --- --- --- --- ------- ---- - 115271.2018 0.0005 -5.0105 2 0.0 3 28 503 101 1 -- -- -- -- -- 0 -- -- -- -- -- CO, v=0 True - 230538.0 0.0005 -4.1197 2 3.845 5 28 503 101 2 -- -- -- -- -- 1 -- -- -- -- -- CO, v=0 True - 345795.9899 0.0005 -3.6118 2 11.535 7 28 503 101 3 -- -- -- -- -- 2 -- -- -- -- -- CO, v=0 True - 461040.7682 0.0005 -3.2657 2 23.0695 9 28 503 101 4 -- -- -- -- -- 3 -- -- -- -- -- CO, v=0 True - 576267.9305 0.0005 -3.0118 2 38.4481 11 28 503 101 5 -- -- -- -- -- 4 -- -- -- -- -- CO, v=0 True - 691473.0763 0.0005 -2.8193 2 57.6704 13 28 503 101 6 -- -- -- -- -- 5 -- -- -- -- -- CO, v=0 True - 806651.806 0.005 -2.6716 2 80.7354 15 28 503 101 7 -- -- -- -- -- 6 -- -- -- -- -- CO, v=0 True - 921799.7 0.005 -2.559 2 107.6424 17 28 503 101 8 -- -- -- -- -- 7 -- -- -- -- -- CO, v=0 True + FREQ ERR LGINT DR ELO GUP TAG QNFMT Ju Ku vu ... F3u Jl Kl vl F1l F2l F3l name MOLWT Lab + MHz MHz nm2 MHz 1 / cm ... u + ----------- ------ ------- --- -------- --- ------ ----- --- --- --- ... --- --- --- --- --- --- --- ------- ----- ---- + 115271.2018 0.0005 -5.0105 2 0.0 3 -28503 101 1 -- -- ... -- 0 -- -- -- -- -- CO, v=0 28 True + 230538.0 0.0005 -4.1197 2 3.845 5 -28503 101 2 -- -- ... -- 1 -- -- -- -- -- CO, v=0 28 True + 345795.9899 0.0005 -3.6118 2 11.535 7 -28503 101 3 -- -- ... -- 2 -- -- -- -- -- CO, v=0 28 True + 461040.7682 0.0005 -3.2657 2 23.0695 9 -28503 101 4 -- -- ... -- 3 -- -- -- -- -- CO, v=0 28 True + 576267.9305 0.0005 -3.0118 2 38.4481 11 -28503 101 5 -- -- ... -- 4 -- -- -- -- -- CO, v=0 28 True + 691473.0763 0.0005 -2.8193 2 57.6704 13 -28503 101 6 -- -- ... -- 5 -- -- -- -- -- CO, v=0 28 True + 806651.806 0.005 -2.6716 2 80.7354 15 -28503 101 7 -- -- ... -- 6 -- -- -- -- -- CO, v=0 28 True + 921799.7 0.005 -2.559 2 107.6424 17 -28503 101 8 -- -- ... -- 7 -- -- -- -- -- CO, v=0 28 True @@ -80,7 +80,6 @@ The units of the columns of the query can be displayed by calling DR int64 Column 0 ELO float64 1 / cm Column 0 GUP int64 Column 0 - MOLWT int64 u Column 0 TAG int64 Column 0 QNFMT int64 Column 0 Ju int64 Column 0 @@ -96,7 +95,9 @@ The units of the columns of the query can be displayed by calling F2l int64 MaskedColumn 8 F3l int64 MaskedColumn 8 name str7 Column 0 + MOLWT int64 u Column 0 Lab bool Column 0 + These come in handy for converting to other units easily, an example using a simplified version of the data above is shown below: diff --git a/docs/linelists/jplspec/jplspec.rst b/docs/linelists/jplspec/jplspec.rst index 9e8a233a1b..91bf3c51cb 100644 --- a/docs/linelists/jplspec/jplspec.rst +++ b/docs/linelists/jplspec/jplspec.rst @@ -14,6 +14,18 @@ module outputs the results that would arise from the `browser form using similar search criteria as the ones found in the form, and presents the output as a `~astropy.table.Table`. + +.. warning:: + Starting in mid-2025, the JPL web interface query tool went down for a + prolonged period. As of November 2025, it is still not up, but JPL staff are + aware of and seeking solutions to the problem. Until that web interface is + restored, the astroquery.jplspec module relies on workarounds that involve + downloading the full catalog files, which results in slightly larger data + transfers and un-filtered full-table results. Some metadata may also be + different. The examples and documents have been updated to show what to + expect in the current, partially-functional state. + + Examples ======== @@ -33,18 +45,19 @@ what each setting yields: ... min_strength=-500, ... molecule="28001 CO", ... get_query_payload=False) - >>> print(response) - FREQ ERR LGINT DR ELO GUP TAG QNFMT QN' QN" - MHz MHz nm2 MHz 1 / cm - ----------- ------ ------- --- -------- --- ------ ----- --- --- - 115271.2018 0.0005 -5.0105 2 0.0 3 -28001 101 1 0 - 230538.0 0.0005 -4.1197 2 3.845 5 -28001 101 2 1 - 345795.9899 0.0005 -3.6118 2 11.535 7 -28001 101 3 2 - 461040.7682 0.0005 -3.2657 2 23.0695 9 -28001 101 4 3 - 576267.9305 0.0005 -3.0118 2 38.4481 11 -28001 101 5 4 - 691473.0763 0.0005 -2.8193 2 57.6704 13 -28001 101 6 5 - 806651.806 0.005 -2.6716 2 80.7354 15 -28001 101 7 6 - 921799.7 0.005 -2.559 2 107.6424 17 -28001 101 8 7 + >>> response.pprint(max_lines=10) + FREQ ERR LGINT DR ELO GUP TAG QNFMT QN' QN" Lab + MHz MHz nm2 MHz 1 / cm + ------------ ------ -------- --- ---------- --- ----- ----- --- --- ----- + 115271.2018 0.0005 -5.0105 2 0.0 3 28001 101 1 0 True + 230538.0 0.0005 -4.1197 2 3.845 5 28001 101 2 1 True + ... ... ... ... ... ... ... ... ... ... ... + 9747448.9491 3.0112 -31.6588 2 14684.516 179 28001 101 89 88 False + 9845408.2504 3.1938 -32.4351 2 15009.6559 181 28001 101 90 89 False + 9942985.9145 3.3849 -33.2361 2 15338.0634 183 28001 101 91 90 False + Length = 91 rows + >>> response.meta + {'TAG': 28001, 'NAME': 'CO', 'NLINE': 91, 'QLOG1': 2.0369, 'QLOG2': 1.9123, 'QLOG3': 1.737, 'QLOG4': 1.4386, 'QLOG5': 1.1429, 'QLOG6': 0.8526, 'QLOG7': 0.5733, 'VER': '4*', 'molecule_id': '28001 CO'} The following example, with ``get_query_payload = True``, returns the payload: @@ -68,54 +81,58 @@ The units of the columns of the query can be displayed by calling ... min_strength=-500, ... molecule="28001 CO") >>> print(response.info) - - name dtype unit - ----- ------- ------- - FREQ float64 MHz - ERR float64 MHz - LGINT float64 nm2 MHz - DR int64 - ELO float64 1 / cm - GUP int64 - TAG int64 - QNFMT int64 - QN' int64 - QN" int64 +
+ name dtype unit + ----- ------- ------- + FREQ float64 MHz + ERR float64 MHz + LGINT float64 nm2 MHz + DR int64 + ELO float64 1 / cm + GUP int64 + TAG int64 + QNFMT int64 + QN' int64 + QN" int64 + Lab bool + These come in handy for converting to other units easily, an example using a simplified version of the data above is shown below: .. doctest-remote-data:: - >>> print (response['FREQ', 'ERR', 'ELO']) - FREQ ERR ELO - MHz MHz 1 / cm - ----------- ------ -------- - 115271.2018 0.0005 0.0 - 230538.0 0.0005 3.845 - 345795.9899 0.0005 11.535 - 461040.7682 0.0005 23.0695 - 576267.9305 0.0005 38.4481 - 691473.0763 0.0005 57.6704 - 806651.806 0.005 80.7354 - 921799.7 0.005 107.6424 - >>> response['FREQ'].quantity - - >>> response['FREQ'].to('GHz') - + >>> response['FREQ', 'ERR', 'ELO'].pprint(max_lines=10) + FREQ ERR ELO + MHz MHz 1 / cm + ------------ ------ ---------- + 115271.2018 0.0005 0.0 + 230538.0 0.0005 3.845 + ... ... ... + 9747448.9491 3.0112 14684.516 + 9845408.2504 3.1938 15009.6559 + 9942985.9145 3.3849 15338.0634 + Length = 91 rows + >>> response['FREQ'][:10].quantity + + >>> response['FREQ'][:10].to('GHz') + The parameters and response keys are described in detail under the Reference/API section. Looking Up More Information from the catdir.cat file ------------------------------------------------------- +---------------------------------------------------- -If you have found a molecule you are interested in, the TAG field -in the results provides enough information to access specific -molecule information such as the partition functions at different -temperatures. Keep in mind that a negative TAG value signifies that -the line frequency has been measured in the laboratory +If you have found a molecule you are interested in, the TAG field in the results +provides enough information to access specific molecule information such as the +partition functions at different temperatures. A negative TAG value signifies +that the line frequency has been measured in the laboratory. .. doctest-remote-data:: @@ -139,11 +156,9 @@ through metadata: {'Temperature (K)': [300, 225, 150, 75, 37.5, 18.5, 9.375]} -One of the advantages of using JPLSpec is the availability in the catalog -of the partition function at different temperatures for the molecules. As a -continuation of the example above, an example that accesses and plots the -partition function against the temperatures found in the metadata is shown -below: +JPLSpec catalogs the partition function at several temperatures for each +molecule. This example accesses and plots the partition function against the +temperatures found in the metadata: .. doctest-skip:: @@ -153,7 +168,7 @@ below: >>> plt.scatter(temp,part) >>> plt.xlabel('Temperature (K)') >>> plt.ylabel('Partition Function Value') - >>> plt.title('Parititon Fn vs Temp') + >>> plt.title('Partition Fn vs Temp') >>> plt.show() @@ -217,23 +232,27 @@ to query these directly. ... min_strength=-500, ... molecule="H2O", ... parse_name_locally=True) - >>> print(result) - FREQ ERR LGINT DR ELO GUP TAG QNFMT QN' QN" - MHz MHz nm2 MHz 1 / cm - ----------- -------- -------- --- --------- --- ------ ----- -------- -------- - 115542.5692 0.6588 -13.2595 3 4606.1683 35 18003 1404 17 810 0 18 513 0 - 139614.293 0.15 -9.3636 3 3080.1788 87 -18003 1404 14 6 9 0 15 312 0 - 177317.068 0.15 -10.3413 3 3437.2774 31 -18003 1404 15 610 0 16 313 0 - 183310.087 0.001 -3.6463 3 136.1639 7 -18003 1404 3 1 3 0 2 2 0 0 - ... - Length = 2000 rows + >>> result.pprint(max_lines=10) + FREQ ERR LGINT DR ELO GUP TAG QNFMT QN'1 QN"1 QN'2 QN"2 QN'3 QN"3 QN'4 QN"4 Lab + MHz MHz nm2 MHz 1 / cm + ------------ ------ -------- --- --------- --- ----- ----- ---- ---- ---- ---- ---- ---- ---- ---- ----- + 8006.5805 2.851 -18.6204 3 6219.6192 45 18003 1404 22 21 4 7 18 15 0 0 False + 12478.2535 0.2051 -13.1006 3 3623.7652 31 18003 1404 15 16 7 4 9 12 0 0 False + ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... + 9981215.769 6.1776 -12.0101 3 5271.3682 45 18003 1404 22 23 2 1 20 23 0 0 False + 9981323.7676 6.1773 -11.5329 3 5271.3682 135 18003 1404 22 23 3 0 20 23 0 0 False + 9992065.9213 0.0482 -5.528 3 882.8904 15 18003 1404 7 8 6 1 2 7 0 0 False + Length = 1376 rows + Searches like these can lead to very broad queries, and may be limited in response length: .. doctest-remote-data:: - >>> print(result.meta['comments']) + >>> # the 'comments' metadata field is only populated if the query tool is run + >>> # the get-whole-table workaround (November 2025) will not populate it + >>> print(result.meta['comments']) # doctest: +SKIP ['', '', '', '', '', 'form is currently limilted to 2000 lines. Please limit your search.'] Inspecting the returned molecules shows that the 'H2O' string was processed as a @@ -247,7 +266,7 @@ combination of characters 'H2O': ... for (species, tag) in JPLSpec.lookup_ids.items() ... if tag in tags} >>> print(species) - {'H2O': 18003, 'H2O v2,2v2,v': 18005, 'H2O-17': 19003, 'H2O-18': 20003, 'H2O2': 34004} + {'H2O': 18003} A few examples that show the power of the regex option are the following: From c10a2f935d24ac7304138cc32c482c44237bf2d7 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 08:24:20 -0500 Subject: [PATCH 26/44] add data file to setup_package --- astroquery/linelists/jplspec/setup_package.py | 1 + 1 file changed, 1 insertion(+) diff --git a/astroquery/linelists/jplspec/setup_package.py b/astroquery/linelists/jplspec/setup_package.py index 585e27fa4b..7439815548 100644 --- a/astroquery/linelists/jplspec/setup_package.py +++ b/astroquery/linelists/jplspec/setup_package.py @@ -8,6 +8,7 @@ def get_package_data(): paths_test = [os.path.join('data', 'CO.data'), os.path.join('data', 'CO_6.data'), + os.path.join('data', 'H2O_sample.cat'), os.path.join('data', 'multi.data')] paths_data = [os.path.join('data', 'catdir.cat')] From 0e8a6b7b36814120523defc74cda1775ed7d2276 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 08:26:56 -0500 Subject: [PATCH 27/44] fix bad changelog merge --- CHANGES.rst | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 27951e9a72..70e6a88a26 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -178,11 +178,46 @@ mast - Fix bug in ``utils.remove_duplicate_products`` that does not retain the order of the products in an input table. [#3314] -- Added ``return_uri_map`` parameter to ``Observations.get_cloud_uris`` to return a mapping of the input data product URIs - to the returned cloud URIs. [#3314] +- Add ``return_uri_map`` parameter to ``Observations.get_cloud_uris`` to return + a mapping of the input data product URIs to the returned cloud URIs. [#3314] -- Added ``verbose`` parameter to ``Observations.get_cloud_uris`` to control whether warnings are logged when a product cannot - be found in the cloud. [#3314] +- Add ``verbose`` parameter to ``Observations.get_cloud_uris`` to control + whether warnings are logged when a product cannot be found in the cloud. + [#3314] + +- Improved ``MastMissions`` queries to accept lists for query critieria + values, in addition to comma-delimited strings. [#3319] + +- Enhanced ``filter_products`` methods in ``MastMissions`` and ``Observations`` + to support advanced filtering expressions for numeric columns and with + negative values. [#3365, #3393] + +- Fix bug where duplicate columns from server responses cause an error when + converting to an ``~astropy.table.Table``. [#3400] + +- Support for resolving multiple object names at once with ``resolve_object``, + including automatic batching into groups of up to 30 names per request to + the name translation service. [#3398] + +simbad +^^^^^^ + +- Add ``async_job`` option in all query methods. It provides slower to start, + but more robust queries for which the timeout can be increased. [#3305] + +skyview +^^^^^^^ + +- Add ``get_query_payload`` kwarg to ``Skyview.get_images()`` and + ``Skyview.get_images_list()`` to return the query payload. [#3318] + +- Changed SkyView URL to https. [#3346] + +utils.tap +^^^^^^^^^ + +- The method ``upload_table`` accepts file formats accepted by astropy's + ``Table.read()``. [#3295] Infrastructure, Utility and Other Changes and Additions From 330beb0321c96625d4c15682fdd500d97c5985fe Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 08:28:50 -0500 Subject: [PATCH 28/44] revert another bad merge --- astroquery/alma/core.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/astroquery/alma/core.py b/astroquery/alma/core.py index cd216b61e3..659347ca95 100644 --- a/astroquery/alma/core.py +++ b/astroquery/alma/core.py @@ -770,7 +770,6 @@ def _get_dataarchive_url(self): return self.dataarchive_url def get_data_info(self, uids, *, expand_tarfiles=False, - cutouts=True, with_auxiliary=True, with_rawdata=True): """ Return information about the data associated with ALMA uid(s) @@ -849,9 +848,8 @@ def get_data_info(self, uids, *, expand_tarfiles=False, recursive_access_url = self.get_adhoc_service_access_url(adhoc_service) file_id = recursive_access_url.split('ID=')[1] expanded_tar = self.get_data_info(file_id) - if not cutouts: - expanded_tar = expanded_tar[ - expanded_tar['semantics'] != '#cutout'] + expanded_tar = expanded_tar[ + expanded_tar['semantics'] != '#cutout'] if not expanded_result: expanded_result = expanded_tar else: From 745282fa5a66250828af92f42f73a753b0c04cbd Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 08:29:24 -0500 Subject: [PATCH 29/44] space --- astroquery/jplspec/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astroquery/jplspec/__init__.py b/astroquery/jplspec/__init__.py index e930c5ab2c..cf0783e2a8 100644 --- a/astroquery/jplspec/__init__.py +++ b/astroquery/jplspec/__init__.py @@ -1,7 +1,7 @@ # Licensed under a 3-clause BSD style license - see LICENSE.rst """ JPL Spectral Catalog (Deprecated Location) -------------------------------------------- +------------------------------------------ .. deprecated:: 0.4.8 The `astroquery.jplspec` module has been moved to `astroquery.linelists.jplspec`. From e4d13d901bcbf65b30b6dc14c9a17e9aa5de661e Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 09:00:10 -0500 Subject: [PATCH 30/44] add a test for the fallback, which forced refactoring b/c the original solution was wrong. Added inter-doc links --- astroquery/linelists/cdms/core.py | 91 ++++++++++--------- .../linelists/cdms/tests/test_cdms_remote.py | 24 +++++ .../jplspec/tests/test_jplspec_remote.py | 27 ++++++ docs/linelists/cdms/cdms.rst | 18 +++- docs/linelists/jplspec/jplspec.rst | 2 + 5 files changed, 115 insertions(+), 47 deletions(-) diff --git a/astroquery/linelists/cdms/core.py b/astroquery/linelists/cdms/core.py index 3a2888d184..ed07babbde 100644 --- a/astroquery/linelists/cdms/core.py +++ b/astroquery/linelists/cdms/core.py @@ -8,14 +8,13 @@ from astropy import table from astropy.io import ascii from astroquery.query import BaseQuery -from astroquery.utils import async_to_sync # import configurable items declared in __init__.py from astroquery.linelists.cdms import conf from astroquery.exceptions import InvalidQueryError, EmptyResponseError -from ..core import LineListClass +from ..core import LineListClass, parse_letternumber +from astroquery.utils import process_asyncs import re -import string __all__ = ['CDMS', 'CDMSClass'] @@ -25,7 +24,6 @@ def data_path(filename): return os.path.join(data_dir, filename) -@async_to_sync class CDMSClass(BaseQuery, LineListClass): # use the Configuration Items imported from __init__.py URL = conf.search @@ -34,11 +32,36 @@ class CDMSClass(BaseQuery, LineListClass): TIMEOUT = conf.timeout MALFORMATTED_MOLECULE_LIST = ['017506 NH3-wHFS', '028528 H2NC', '058501 H2C2S', '064527 HC3HCN'] + def __init__(self, fallback_to_getmolecule=False): + super().__init__() + + def query_lines(self, min_frequency, max_frequency, *, + min_strength=-500, molecule='All', + temperature_for_intensity=300, flags=0, + parse_name_locally=False, get_query_payload=False, + fallback_to_getmolecule=False, + cache=True): + response = self.query_lines_async(min_frequency=min_frequency, + max_frequency=max_frequency, + min_strength=min_strength, + molecule=molecule, + temperature_for_intensity=temperature_for_intensity, + flags=flags, + parse_name_locally=parse_name_locally, + get_query_payload=get_query_payload, + fallback_to_getmolecule=fallback_to_getmolecule, + cache=cache) + if fallback_to_getmolecule: + return response + else: + return self._parse_result(response) + def query_lines_async(self, min_frequency, max_frequency, *, min_strength=-500, molecule='All', temperature_for_intensity=300, flags=0, parse_name_locally=False, get_query_payload=False, - cache=True, fallback_to_getmolecule=False): + fallback_to_getmolecule=False, + cache=True): """ Creates an HTTP POST request based on the desired parameters and returns a response. @@ -91,10 +114,6 @@ def query_lines_async(self, min_frequency, max_frequency, *, Defaults to True. If set overrides global caching behavior. See :ref:`caching documentation `. - fallback_to_getmolecule : bool, optional - If specified, and if the molecule specified is in the list of - known malformatted molecules, return the get_molecule results instead. - Returns ------- response : `requests.Response` @@ -180,24 +199,32 @@ def query_lines_async(self, min_frequency, max_frequency, *, if not ok: raise EmptyResponseError("Did not find table in response") + # Check if a malformatted molecule was requested and use fallback if enabled + # accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S' + badlist = (self.MALFORMATTED_MOLECULE_LIST + + [y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()]) + + # extract molecule from the response or request + requested_molecule = payload['Molecules'][0] + + if requested_molecule and requested_molecule in badlist: + if self.fallback_to_getmolecule: + return self.get_molecule(requested_molecule) + else: + raise ValueError(f"Molecule {requested_molecule} is known not to comply with standard CDMS format. " + f"Try get_molecule({requested_molecule}) instead or set " + f"CDMS.fallback_to_getmolecule = True.") + baseurl = self.URL.split('cgi-bin')[0] fullurl = f'{baseurl}/{url}' response2 = self._request(method='GET', url=fullurl, timeout=self.TIMEOUT, cache=cache) - # accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S' - badlist = (self.MALFORMATTED_MOLECULE_LIST + # noqa - [y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()]) - if 'Moleculesgrp' not in payload.keys() and payload['Molecules'] in badlist: - if fallback_to_getmolecule: - return self.get_molecule(payload['Molecules'], cache=cache) - else: - raise ValueError(f"Molecule {payload['Molecules']} is known not to comply with standard CDMS format. " - f"Try get_molecule({payload['Molecules']}) instead.") - return response2 + query_lines.__doc__ = process_asyncs.async_to_sync_docstr(query_lines_async.__doc__) + def _parse_result(self, response, *, verbose=False): """ Parse a response into an `~astropy.table.Table` @@ -245,6 +272,9 @@ def _parse_result(self, response, *, verbose=False): soup = BeautifulSoup(response.text, 'html.parser') text = soup.find('pre').text + + # this is a different workaround to try to make _some_ of the bad molecules parseable + # (it doesn't solve all of them, which is why the above fallback exists) need_to_filter_bad_molecules = False for bad_molecule in self.MALFORMATTED_MOLECULE_LIST: if text.find(bad_molecule.split()[1]) > -1: @@ -475,29 +505,6 @@ def _parse_cat(self, text, *, verbose=False): CDMS = CDMSClass() -def parse_letternumber(st): - """ - Parse CDMS's two-letter QNs into integers. - - Masked values are converted to -999999. - - From the CDMS docs: - "Exactly two characters are available for each quantum number. Therefore, half - integer quanta are rounded up ! In addition, capital letters are used to - indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Lower case characters - are used similarly to signal negative quantum numbers smaller than –9. e. g., a0 is –10, b0 is –20, etc." - """ - if np.ma.is_masked(st): - return -999999 - - asc = string.ascii_lowercase - ASC = string.ascii_uppercase - newst = ''.join(['-' + str((asc.index(x)+1)) if x in asc else - str((ASC.index(x)+10)) if x in ASC else - x for x in st]) - return int(newst) - - class Lookuptable(dict): def find(self, st, flags): diff --git a/astroquery/linelists/cdms/tests/test_cdms_remote.py b/astroquery/linelists/cdms/tests/test_cdms_remote.py index 92d237d74e..8ccefc03bb 100644 --- a/astroquery/linelists/cdms/tests/test_cdms_remote.py +++ b/astroquery/linelists/cdms/tests/test_cdms_remote.py @@ -109,6 +109,30 @@ def test_h2nc(): assert tbl['TAG'][0] == 28528 +@pytest.mark.remote_data +def test_fallback_to_getmolecule_parameter(): + """ + Test that fallback_to_getmolecule attribute controls query behavior. + + When fallback_to_getmolecule is True, query_lines should use get_molecule + internally for malformed molecules. + """ + + # Test with a malformed molecule and fallback enabled + CDMS.fallback_to_getmolecule = True + tbl_fallback = CDMS.query_lines( + min_frequency=100 * u.GHz, + max_frequency=200 * u.GHz, + min_strength=-500, + molecule="028528 H2NC") + + assert isinstance(tbl_fallback, Table) + assert len(tbl_fallback) > 0 + + # I don't think the state set within this module affects the rest of the + # tests but just in case + CDMS.fallback_to_getmolecule = False + @pytest.mark.remote_data def test_remote_regex(): diff --git a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py index f2bdb1b456..d3f3da6ad0 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -146,6 +146,33 @@ def test_get_molecule_string_id(): assert 'CO' in tbl.meta['NAME'] +@pytest.mark.remote_data +def test_fallback_to_getmolecule_parameter(): + """ + Test that fallback_to_getmolecule parameter controls query behavior. + + When fallback_to_getmolecule is True, query_lines should use get_molecule + internally and filter the results, which adds the 'Lab' and 'Name' columns. + When False, it uses the direct query mechanism (when available). + """ + # Test with fallback enabled + JPLSpec.fallback_to_getmolecule = True + tbl_fallback = JPLSpec.query_lines(min_frequency=100 * u.GHz, + max_frequency=200 * u.GHz, + min_strength=-500, + molecule="28001 CO") + + assert isinstance(tbl_fallback, Table) + assert len(tbl_fallback) > 0 + # When using fallback, should have Lab and Name columns + assert 'Lab' in tbl_fallback.colnames + assert 'Name' in tbl_fallback.colnames + + # All returned frequencies should be within the requested range + assert all(tbl_fallback['FREQ'].quantity >= 100 * u.GHz) + assert all(tbl_fallback['FREQ'].quantity <= 200 * u.GHz) + + @pytest.mark.remote_data @pytest.mark.parametrize('mol_id,expected_name', [ (28001, 'CO'), # Simple diatomic diff --git a/docs/linelists/cdms/cdms.rst b/docs/linelists/cdms/cdms.rst index 816f427d98..dc08bcfb8d 100644 --- a/docs/linelists/cdms/cdms.rst +++ b/docs/linelists/cdms/cdms.rst @@ -34,8 +34,8 @@ each setting yields: ... molecule="028503 CO", ... get_query_payload=False) >>> response.pprint(max_width=120) - FREQ ERR LGINT DR ELO GUP TAG QNFMT Ju Ku vu ... F3u Jl Kl vl F1l F2l F3l name MOLWT Lab - MHz MHz nm2 MHz 1 / cm ... u + FREQ ERR LGINT DR ELO GUP TAG QNFMT Ju Ku vu ... F3u Jl Kl vl F1l F2l F3l name MOLWT Lab + MHz MHz nm2 MHz 1 / cm ... u ----------- ------ ------- --- -------- --- ------ ----- --- --- --- ... --- --- --- --- --- --- --- ------- ----- ---- 115271.2018 0.0005 -5.0105 2 0.0 3 -28503 101 1 -- -- ... -- 0 -- -- -- -- -- CO, v=0 28 True 230538.0 0.0005 -4.1197 2 3.845 5 -28503 101 2 -- -- ... -- 1 -- -- -- -- -- CO, v=0 28 True @@ -142,7 +142,7 @@ laboratory but not in space >>> result = CDMS.get_species_table() >>> mol = result[result['tag'] == 28503] >>> mol.pprint(max_width=160) - tag molecule Name #lines lg(Q(1000)) lg(Q(500)) lg(Q(300)) ... lg(Q(9.375)) lg(Q(5.000)) lg(Q(2.725)) Ver. Documentation Date of entry Entry + tag molecule Name #lines lg(Q(1000)) lg(Q(500)) lg(Q(300)) ... lg(Q(9.375)) lg(Q(5.000)) lg(Q(2.725)) Ver. Documentation Date of entry Entry ----- -------- --------- ------ ----------- ---------- ---------- ... ------------ ------------ ------------ ---- ------------- ------------- ----------- 28503 CO, v=0 CO, v = 0 95 2.5595 2.2584 2.0369 ... 0.5733 0.3389 0.1478 1 e028503.cat Oct. 2000 w028503.cat @@ -304,7 +304,15 @@ It can be valuable to check this for any given molecule. Querying the Catalog with Regexes and Relative names ---------------------------------------------------- -The regular expression parsing is analogous to that in the JPLSpec module. +The regular expression parsing is analogous to that in +:mod:`astroquery.linelists.jplspec`. See :ref:`regex_querying_linelists`. + +Handling Malformatted Molecules +------------------------------- + +There are some entries in the CDMS catalog that get mangled by the query tool, +but the underlying data are still good. This seems to affect primarily those +molecules with excessive numbers of quantum numbers such as H2NC. Troubleshooting @@ -317,7 +325,7 @@ If you are repeatedly getting failed queries, or bad/out-of-date results, try cl >>> from astroquery.linelists.cdms import CDMS >>> CDMS.clear_cache() -If this function is unavailable, upgrade your version of astroquery. +If this function is unavailable, upgrade your version of astroquery. The ``clear_cache`` function was introduced in version 0.4.7.dev8479. diff --git a/docs/linelists/jplspec/jplspec.rst b/docs/linelists/jplspec/jplspec.rst index 91bf3c51cb..4c8b977f4c 100644 --- a/docs/linelists/jplspec/jplspec.rst +++ b/docs/linelists/jplspec/jplspec.rst @@ -209,6 +209,8 @@ other temperatures using curve fitting models: The resulting plot from the example above +.. _regex_querying_linelists: + Querying the Catalog with Regexes and Relative names ---------------------------------------------------- From 7a1d6706b90030d473ec06a8339f1bdfdaa7da8e Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 09:14:43 -0500 Subject: [PATCH 31/44] move around the fallback machinery and give more useful user messages --- astroquery/linelists/cdms/core.py | 100 ++++++++++-------- .../linelists/cdms/tests/test_cdms_remote.py | 5 +- 2 files changed, 61 insertions(+), 44 deletions(-) diff --git a/astroquery/linelists/cdms/core.py b/astroquery/linelists/cdms/core.py index ed07babbde..b2e6ce2197 100644 --- a/astroquery/linelists/cdms/core.py +++ b/astroquery/linelists/cdms/core.py @@ -35,26 +35,68 @@ class CDMSClass(BaseQuery, LineListClass): def __init__(self, fallback_to_getmolecule=False): super().__init__() + def _mol_to_payload(self, molecule, parse_name_locally, flags): + if parse_name_locally: + self.lookup_ids = build_lookup() + luts = self.lookup_ids.find(molecule, flags) + if len(luts) == 0: + raise InvalidQueryError('No matching species found. Please ' + 'refine your search or read the Docs ' + 'for pointers on how to search.') + return tuple(f"{val:06d} {key}" + for key, val in luts.items())[0] + else: + return molecule + def query_lines(self, min_frequency, max_frequency, *, min_strength=-500, molecule='All', temperature_for_intensity=300, flags=0, parse_name_locally=False, get_query_payload=False, fallback_to_getmolecule=False, + verbose=False, cache=True): - response = self.query_lines_async(min_frequency=min_frequency, - max_frequency=max_frequency, - min_strength=min_strength, - molecule=molecule, - temperature_for_intensity=temperature_for_intensity, - flags=flags, - parse_name_locally=parse_name_locally, - get_query_payload=get_query_payload, - fallback_to_getmolecule=fallback_to_getmolecule, - cache=cache) - if fallback_to_getmolecule: - return response + + # Check if a malformatted molecule was requested and use fallback if enabled + # accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S' + badlist = (self.MALFORMATTED_MOLECULE_LIST + + [y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()]) + + # extract molecule from the response or request + requested_molecule = self._mol_to_payload(molecule, parse_name_locally, flags) if molecule != 'All' else None + + if requested_molecule and requested_molecule in badlist: + if fallback_to_getmolecule: + try: + return self.get_molecule(requested_molecule[:6]) + except ValueError as ex: + # try to give the users good guidance on which parameters will work + if "molecule_id should be a length-6 string of numbers" in str(ex): + if parse_name_locally: + raise ValueError(f"Molecule {molecule} could not be parsed or identified." + " Check that the name was correctly specified.") + else: + raise ValueError(f"Molecule {molecule} needs to be formatted as" + " a 6-digit string ID for the get_molecule fallback to work." + " Try setting parse_name_locally=True " + "to turn your molecule name into a CDMS number ID.") + else: + raise ex + else: + raise ValueError(f"Molecule {requested_molecule} is known not to comply with standard CDMS format. " + f"Try get_molecule({requested_molecule}) instead or set " + f"CDMS.fallback_to_getmolecule = True.") else: - return self._parse_result(response) + response = self.query_lines_async(min_frequency=min_frequency, + max_frequency=max_frequency, + min_strength=min_strength, + molecule=molecule, + temperature_for_intensity=temperature_for_intensity, + flags=flags, + parse_name_locally=parse_name_locally, + get_query_payload=get_query_payload, + fallback_to_getmolecule=fallback_to_getmolecule, + cache=cache) + return self._parse_result(response, molname=molecule, verbose=verbose) def query_lines_async(self, min_frequency, max_frequency, *, min_strength=-500, molecule='All', @@ -163,17 +205,7 @@ def query_lines_async(self, min_frequency, max_frequency, *, payload['Moleculesgrp'] = 'all species' else: if molecule is not None: - if parse_name_locally: - self.lookup_ids = build_lookup() - luts = self.lookup_ids.find(molecule, flags) - if len(luts) == 0: - raise InvalidQueryError('No matching species found. Please ' - 'refine your search or read the Docs ' - 'for pointers on how to search.') - payload['Molecules'] = tuple(f"{val:06d} {key}" - for key, val in luts.items())[0] - else: - payload['Molecules'] = molecule + payload['Molecules'] = self._mol_to_payload(molecule, parse_name_locally, flags) if get_query_payload: return payload @@ -199,21 +231,6 @@ def query_lines_async(self, min_frequency, max_frequency, *, if not ok: raise EmptyResponseError("Did not find table in response") - # Check if a malformatted molecule was requested and use fallback if enabled - # accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S' - badlist = (self.MALFORMATTED_MOLECULE_LIST - + [y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()]) - - # extract molecule from the response or request - requested_molecule = payload['Molecules'][0] - - if requested_molecule and requested_molecule in badlist: - if self.fallback_to_getmolecule: - return self.get_molecule(requested_molecule) - else: - raise ValueError(f"Molecule {requested_molecule} is known not to comply with standard CDMS format. " - f"Try get_molecule({requested_molecule}) instead or set " - f"CDMS.fallback_to_getmolecule = True.") baseurl = self.URL.split('cgi-bin')[0] fullurl = f'{baseurl}/{url}' @@ -225,7 +242,7 @@ def query_lines_async(self, min_frequency, max_frequency, *, query_lines.__doc__ = process_asyncs.async_to_sync_docstr(query_lines_async.__doc__) - def _parse_result(self, response, *, verbose=False): + def _parse_result(self, response, *, verbose=False, molname=None): """ Parse a response into an `~astropy.table.Table` @@ -272,7 +289,6 @@ def _parse_result(self, response, *, verbose=False): soup = BeautifulSoup(response.text, 'html.parser') text = soup.find('pre').text - # this is a different workaround to try to make _some_ of the bad molecules parseable # (it doesn't solve all of them, which is why the above fallback exists) need_to_filter_bad_molecules = False @@ -353,7 +369,7 @@ def _parse_result(self, response, *, verbose=False): except ValueError as ex: # Give users a more helpful exception when parsing fails new_message = ("Failed to parse CDMS response. This may be caused by a malformed search return. " - "You can check this by running `CDMS.get_molecule('')` instead; if it works, the " + f"You can check this by running `CDMS.get_molecule('{molname}')` instead; if it works, the " "problem is caused by the CDMS search interface and cannot be worked around.") raise ValueError(new_message) from ex diff --git a/astroquery/linelists/cdms/tests/test_cdms_remote.py b/astroquery/linelists/cdms/tests/test_cdms_remote.py index 8ccefc03bb..2bf46bb7a0 100644 --- a/astroquery/linelists/cdms/tests/test_cdms_remote.py +++ b/astroquery/linelists/cdms/tests/test_cdms_remote.py @@ -119,12 +119,13 @@ def test_fallback_to_getmolecule_parameter(): """ # Test with a malformed molecule and fallback enabled - CDMS.fallback_to_getmolecule = True tbl_fallback = CDMS.query_lines( min_frequency=100 * u.GHz, max_frequency=200 * u.GHz, min_strength=-500, - molecule="028528 H2NC") + molecule="028528 H2NC", + fallback_to_getmolecule=True + ) assert isinstance(tbl_fallback, Table) assert len(tbl_fallback) > 0 From aa23d3fe6d025ceeecd47d6fc1199a5c2b936d16 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 09:15:54 -0500 Subject: [PATCH 32/44] remove unnecessary parameter --- astroquery/linelists/cdms/core.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/astroquery/linelists/cdms/core.py b/astroquery/linelists/cdms/core.py index b2e6ce2197..ca659588a5 100644 --- a/astroquery/linelists/cdms/core.py +++ b/astroquery/linelists/cdms/core.py @@ -94,7 +94,6 @@ def query_lines(self, min_frequency, max_frequency, *, flags=flags, parse_name_locally=parse_name_locally, get_query_payload=get_query_payload, - fallback_to_getmolecule=fallback_to_getmolecule, cache=cache) return self._parse_result(response, molname=molecule, verbose=verbose) @@ -102,7 +101,6 @@ def query_lines_async(self, min_frequency, max_frequency, *, min_strength=-500, molecule='All', temperature_for_intensity=300, flags=0, parse_name_locally=False, get_query_payload=False, - fallback_to_getmolecule=False, cache=True): """ Creates an HTTP POST request based on the desired parameters and From ed1ef614d5ef25d32967f48619ffc77bb41beef4 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 09:23:15 -0500 Subject: [PATCH 33/44] remove a redundant (and wildly incorrect) test --- astroquery/linelists/jplspec/core.py | 1 + .../jplspec/tests/test_jplspec_remote.py | 65 ++++++------------- 2 files changed, 20 insertions(+), 46 deletions(-) diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index 965ece40aa..426dde2c53 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -187,6 +187,7 @@ def _parse_result(self, response, *, verbose=False): else: tb = tbs[0] tb.meta['molecule_id'] = payload['Mol'][0] + tb.meta['molecule_name'] = self.lookup_ids.find(payload['Mol'][0], flags=0) return tb else: diff --git a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py index d3f3da6ad0..33e64c0c8f 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -26,29 +26,6 @@ def test_remote(): assert tbl['FREQ'][35] == 987926.7590 -@pytest.mark.remote_data -def test_remote_fallback(): - JPLSpec.fallback_to_getmolecule = True - tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, - max_frequency=1000 * u.GHz, - min_strength=-500, - molecule="18003 H2O") - assert isinstance(tbl, Table) - tbl = tbl[((tbl['FREQ'].quantity > 500*u.GHz) & (tbl['FREQ'].quantity < 1*u.THz))] - assert len(tbl) == 36 - assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', - 'TAG', 'QNFMT', 'Lab', 'Name', - 'QN"1', 'QN"2', 'QN"3', 'QN"4', - "QN'1", "QN'2", "QN'3", "QN'4" - ]) - - assert tbl['FREQ'][0] == 503568.5200 - assert tbl['ERR'][0] == 0.0200 - assert tbl['LGINT'][0] == -4.9916 - assert tbl['ERR'][7] == 12.4193 - assert tbl['FREQ'][35] == 987926.7590 - - @pytest.mark.remote_data def test_remote_regex_fallback(): """ @@ -147,30 +124,26 @@ def test_get_molecule_string_id(): @pytest.mark.remote_data -def test_fallback_to_getmolecule_parameter(): - """ - Test that fallback_to_getmolecule parameter controls query behavior. - - When fallback_to_getmolecule is True, query_lines should use get_molecule - internally and filter the results, which adds the 'Lab' and 'Name' columns. - When False, it uses the direct query mechanism (when available). - """ - # Test with fallback enabled +def test_remote_fallback(): JPLSpec.fallback_to_getmolecule = True - tbl_fallback = JPLSpec.query_lines(min_frequency=100 * u.GHz, - max_frequency=200 * u.GHz, - min_strength=-500, - molecule="28001 CO") - - assert isinstance(tbl_fallback, Table) - assert len(tbl_fallback) > 0 - # When using fallback, should have Lab and Name columns - assert 'Lab' in tbl_fallback.colnames - assert 'Name' in tbl_fallback.colnames - - # All returned frequencies should be within the requested range - assert all(tbl_fallback['FREQ'].quantity >= 100 * u.GHz) - assert all(tbl_fallback['FREQ'].quantity <= 200 * u.GHz) + tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, + max_frequency=1000 * u.GHz, + min_strength=-500, + molecule="18003 H2O") + assert isinstance(tbl, Table) + tbl = tbl[((tbl['FREQ'].quantity > 500*u.GHz) & (tbl['FREQ'].quantity < 1*u.THz))] + assert len(tbl) == 36 + assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'Lab', + 'QN"1', 'QN"2', 'QN"3', 'QN"4', + "QN'1", "QN'2", "QN'3", "QN'4" + ]) + + assert tbl['FREQ'][0] == 503568.5200 + assert tbl['ERR'][0] == 0.0200 + assert tbl['LGINT'][0] == -4.9916 + assert tbl['ERR'][7] == 12.4193 + assert tbl['FREQ'][35] == 987926.7590 @pytest.mark.remote_data From 6f289889e1295242eea89e35999d10ab7bbb3f75 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 09:35:32 -0500 Subject: [PATCH 34/44] fix more parsing --- astroquery/linelists/cdms/core.py | 23 +++++++++++++---------- docs/linelists/jplspec/jplspec.rst | 2 +- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/astroquery/linelists/cdms/core.py b/astroquery/linelists/cdms/core.py index ca659588a5..10dcde9fff 100644 --- a/astroquery/linelists/cdms/core.py +++ b/astroquery/linelists/cdms/core.py @@ -64,7 +64,7 @@ def query_lines(self, min_frequency, max_frequency, *, # extract molecule from the response or request requested_molecule = self._mol_to_payload(molecule, parse_name_locally, flags) if molecule != 'All' else None - if requested_molecule and requested_molecule in badlist: + if requested_molecule and requested_molecule in badlist and not get_query_payload: if fallback_to_getmolecule: try: return self.get_molecule(requested_molecule[:6]) @@ -87,15 +87,18 @@ def query_lines(self, min_frequency, max_frequency, *, f"CDMS.fallback_to_getmolecule = True.") else: response = self.query_lines_async(min_frequency=min_frequency, - max_frequency=max_frequency, - min_strength=min_strength, - molecule=molecule, - temperature_for_intensity=temperature_for_intensity, - flags=flags, - parse_name_locally=parse_name_locally, - get_query_payload=get_query_payload, - cache=cache) - return self._parse_result(response, molname=molecule, verbose=verbose) + max_frequency=max_frequency, + min_strength=min_strength, + molecule=molecule, + temperature_for_intensity=temperature_for_intensity, + flags=flags, + parse_name_locally=parse_name_locally, + get_query_payload=get_query_payload, + cache=cache) + if get_query_payload: + return response + else: + return self._parse_result(response, molname=molecule, verbose=verbose) def query_lines_async(self, min_frequency, max_frequency, *, min_strength=-500, molecule='All', diff --git a/docs/linelists/jplspec/jplspec.rst b/docs/linelists/jplspec/jplspec.rst index 4c8b977f4c..2ef9f3ad12 100644 --- a/docs/linelists/jplspec/jplspec.rst +++ b/docs/linelists/jplspec/jplspec.rst @@ -57,7 +57,7 @@ what each setting yields: 9942985.9145 3.3849 -33.2361 2 15338.0634 183 28001 101 91 90 False Length = 91 rows >>> response.meta - {'TAG': 28001, 'NAME': 'CO', 'NLINE': 91, 'QLOG1': 2.0369, 'QLOG2': 1.9123, 'QLOG3': 1.737, 'QLOG4': 1.4386, 'QLOG5': 1.1429, 'QLOG6': 0.8526, 'QLOG7': 0.5733, 'VER': '4*', 'molecule_id': '28001 CO'} + {'TAG': 28001, 'NAME': 'CO', 'NLINE': 91, 'QLOG1': 2.0369, 'QLOG2': 1.9123, 'QLOG3': 1.737, 'QLOG4': 1.4386, 'QLOG5': 1.1429, 'QLOG6': 0.8526, 'QLOG7': 0.5733, 'VER': '4*', 'molecule_id': '28001 CO', 'molecule_name': {}} The following example, with ``get_query_payload = True``, returns the payload: From f8151ebdee78b66a865149785b3d69351d8ceb88 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 12:41:41 -0500 Subject: [PATCH 35/44] flake whitespace --- astroquery/linelists/cdms/core.py | 11 +++++------ astroquery/linelists/cdms/tests/test_cdms_remote.py | 3 ++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/astroquery/linelists/cdms/core.py b/astroquery/linelists/cdms/core.py index 10dcde9fff..af2f8c817f 100644 --- a/astroquery/linelists/cdms/core.py +++ b/astroquery/linelists/cdms/core.py @@ -44,7 +44,7 @@ def _mol_to_payload(self, molecule, parse_name_locally, flags): 'refine your search or read the Docs ' 'for pointers on how to search.') return tuple(f"{val:06d} {key}" - for key, val in luts.items())[0] + for key, val in luts.items())[0] else: return molecule @@ -73,12 +73,12 @@ def query_lines(self, min_frequency, max_frequency, *, if "molecule_id should be a length-6 string of numbers" in str(ex): if parse_name_locally: raise ValueError(f"Molecule {molecule} could not be parsed or identified." - " Check that the name was correctly specified.") + " Check that the name was correctly specified.") else: raise ValueError(f"Molecule {molecule} needs to be formatted as" - " a 6-digit string ID for the get_molecule fallback to work." - " Try setting parse_name_locally=True " - "to turn your molecule name into a CDMS number ID.") + " a 6-digit string ID for the get_molecule fallback to work." + " Try setting parse_name_locally=True " + "to turn your molecule name into a CDMS number ID.") else: raise ex else: @@ -232,7 +232,6 @@ def query_lines_async(self, min_frequency, max_frequency, *, if not ok: raise EmptyResponseError("Did not find table in response") - baseurl = self.URL.split('cgi-bin')[0] fullurl = f'{baseurl}/{url}' diff --git a/astroquery/linelists/cdms/tests/test_cdms_remote.py b/astroquery/linelists/cdms/tests/test_cdms_remote.py index 2bf46bb7a0..bb22ffcac6 100644 --- a/astroquery/linelists/cdms/tests/test_cdms_remote.py +++ b/astroquery/linelists/cdms/tests/test_cdms_remote.py @@ -125,7 +125,7 @@ def test_fallback_to_getmolecule_parameter(): min_strength=-500, molecule="028528 H2NC", fallback_to_getmolecule=True - ) + ) assert isinstance(tbl_fallback, Table) assert len(tbl_fallback) > 0 @@ -134,6 +134,7 @@ def test_fallback_to_getmolecule_parameter(): # tests but just in case CDMS.fallback_to_getmolecule = False + @pytest.mark.remote_data def test_remote_regex(): From c4e48fedd7839d4ddbf4b99c1183457db2ff8c0c Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 13:06:44 -0500 Subject: [PATCH 36/44] add some more tests to boost coverage --- astroquery/linelists/jplspec/core.py | 2 +- .../linelists/jplspec/tests/test_jplspec.py | 100 +++++++++++++++++- 2 files changed, 100 insertions(+), 2 deletions(-) diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index 426dde2c53..fc6350a8b7 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -179,7 +179,7 @@ def _parse_result(self, response, *, verbose=False): mols = [] for tb, mol in zip(tbs, payload['Mol']): tb['Name'] = self.lookup_ids.find(mol, flags=0) - for key in tb.meta: + for key in list(tb.meta.keys()): tb.meta[f'{mol}_{key}'] = tb.meta.pop(key) mols.append(mol) tb = table.vstack(tbs) diff --git a/astroquery/linelists/jplspec/tests/test_jplspec.py b/astroquery/linelists/jplspec/tests/test_jplspec.py index fcdfd76f37..cb19023f22 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec.py @@ -1,4 +1,8 @@ import numpy as np +import pytest + +from unittest.mock import Mock, patch, MagicMock +from astroquery.exceptions import EmptyResponseError import os @@ -151,7 +155,6 @@ def test_parse_cat(): def test_get_molecule_input_validation(): """Test input validation for get_molecule method.""" - import pytest # Test invalid string format with pytest.raises(ValueError): @@ -164,3 +167,98 @@ def test_get_molecule_input_validation(): # Test wrong length string with pytest.raises(ValueError): JPLSpec.get_molecule(1234567) + + +def test_fallback_to_getmolecule_with_empty_response(): + """Test that fallback_to_getmolecule works when query returns zero lines.""" + + # Create a mock response with "Zero lines were found" + mock_response = Mock() + mock_response.text = "Zero lines were found" + mock_request = Mock() + mock_request.body = "Mol=18003" + mock_response.request = mock_request + + # Test with fallback disabled - should raise EmptyResponseError + JPLSpec.fallback_to_getmolecule = False + with pytest.raises(EmptyResponseError, match="Response was empty"): + JPLSpec._parse_result(mock_response) + + # Test with fallback enabled - should call get_molecule + JPLSpec.fallback_to_getmolecule = True + with patch.object(JPLSpec, 'get_molecule') as mock_get_molecule, \ + patch('astroquery.linelists.jplspec.core.build_lookup') as mock_build_lookup: + + # Mock build_lookup to return a lookup object + from unittest.mock import MagicMock + mock_lookup = MagicMock() + mock_lookup.find.return_value = "H2O" + mock_build_lookup.return_value = mock_lookup + + # Mock get_molecule to return a simple table + mock_table = Table() + mock_table['FREQ'] = [100.0, 200.0] + mock_table['TAG'] = [18003, 18003] + mock_table.meta = {'NAME': 'H2O', 'TAG': 18003} + mock_get_molecule.return_value = mock_table + + result = JPLSpec._parse_result(mock_response) + + # Verify get_molecule was called with the correct molecule ID + mock_get_molecule.assert_called_once_with('18003') + + # Verify we got the mocked table back + assert isinstance(result, Table) + assert len(result) == 2 + assert result.meta['molecule_id'] == '18003' + assert result.meta['molecule_name'] == 'H2O' + + # Reset to default + JPLSpec.fallback_to_getmolecule = True + + +def test_fallback_to_getmolecule_with_multiple_molecules(): + """Test fallback with multiple molecules in the request.""" + # Create a mock response with "Zero lines were found" and multiple molecules + mock_response = Mock() + mock_response.text = "Zero lines were found" + mock_request = Mock() + mock_request.body = "Mol=18003&Mol=28001" + mock_response.request = mock_request + + JPLSpec.fallback_to_getmolecule = True + with patch.object(JPLSpec, 'get_molecule') as mock_get_molecule, \ + patch('astroquery.linelists.jplspec.core.build_lookup') as mock_build_lookup: + + # Mock build_lookup to return a lookup object + mock_lookup = MagicMock() + mock_lookup.find.side_effect = lambda mol_id, **kwargs: "H2O" if mol_id == '18003' else "CO" + mock_build_lookup.return_value = mock_lookup + + # Mock get_molecule to return different tables + def get_molecule_side_effect(mol_id): + mock_table = Table() + if mol_id == '18003': + mock_table['FREQ'] = [100.0, 200.0] + mock_table.meta = {'NAME': 'H2O', 'TAG': 18003} + else: + mock_table['FREQ'] = [300.0, 400.0] + mock_table.meta = {'NAME': 'CO', 'TAG': 28001} + mock_table['TAG'] = [int(mol_id)] * len(mock_table) + return mock_table + + mock_get_molecule.side_effect = get_molecule_side_effect + + result = JPLSpec._parse_result(mock_response) + + # Verify get_molecule was called twice + assert mock_get_molecule.call_count == 2 + + # Verify we got a stacked table + assert isinstance(result, Table) + assert len(result) == 4 # 2 rows from each molecule + assert 'molecule_list' in result.meta + assert 'Name' in result.colnames + + # Reset to default + JPLSpec.fallback_to_getmolecule = True From 78b70a679eb207852bfae85598cdeb3e07d894e6 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 13:14:36 -0500 Subject: [PATCH 37/44] add some tests for the fallback. the tests are complicated because the fallback logic is kinda complicated. But running the tests revealed an error in the code, so it's helpful already --- .../linelists/jplspec/tests/test_jplspec.py | 169 +++++++++++++----- 1 file changed, 121 insertions(+), 48 deletions(-) diff --git a/astroquery/linelists/jplspec/tests/test_jplspec.py b/astroquery/linelists/jplspec/tests/test_jplspec.py index cb19023f22..369a0818fd 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from unittest.mock import Mock, patch, MagicMock +from unittest.mock import Mock, MagicMock, patch from astroquery.exceptions import EmptyResponseError import os @@ -169,15 +169,64 @@ def test_get_molecule_input_validation(): JPLSpec.get_molecule(1234567) -def test_fallback_to_getmolecule_with_empty_response(): - """Test that fallback_to_getmolecule works when query returns zero lines.""" - - # Create a mock response with "Zero lines were found" +# Helper functions for fallback tests +def _create_empty_response(molecules): + """Create a mock response with 'Zero lines were found'.""" mock_response = Mock() mock_response.text = "Zero lines were found" mock_request = Mock() - mock_request.body = "Mol=18003" + if isinstance(molecules, str): + mock_request.body = f"Mol={molecules}" + else: + mock_request.body = "&".join(f"Mol={mol}" for mol in molecules) mock_response.request = mock_request + return mock_response + + +def _setup_fallback_mocks(molecules_dict): + """ + Set up mocks for fallback testing. + + Parameters + ---------- + molecules_dict : dict + Dictionary mapping molecule IDs to (name, table_data) tuples. + table_data should be a dict with 'FREQ' and optionally other columns. + + Returns + ------- + mock_get_molecule, mock_build_lookup + The mock objects that can be used in assertions. + """ + # Mock build_lookup + mock_lookup = MagicMock() + if len(molecules_dict) == 1: + mol_id = list(molecules_dict.keys())[0] + mock_lookup.find.return_value = molecules_dict[mol_id][0] + else: + mock_lookup.find.side_effect = lambda mol_id, **kwargs: molecules_dict.get(mol_id, (None, None))[0] + + # Mock get_molecule + def get_molecule_side_effect(mol_id): + if mol_id not in molecules_dict: + raise ValueError(f"Unexpected molecule ID: {mol_id}") + name, table_data = molecules_dict[mol_id] + mock_table = Table() + mock_table['FREQ'] = table_data.get('FREQ', [100.0, 200.0]) + mock_table['TAG'] = [int(mol_id)] * len(mock_table['FREQ']) + # Add any additional columns from table_data + for key, value in table_data.items(): + if key != 'FREQ' and key not in mock_table.colnames: + mock_table[key] = value + mock_table.meta = table_data.get('meta', {}) + return mock_table + + return get_molecule_side_effect, mock_lookup + + +def test_fallback_to_getmolecule_with_empty_response(): + """Test that fallback_to_getmolecule works when query returns zero lines.""" + mock_response = _create_empty_response('18003') # Test with fallback disabled - should raise EmptyResponseError JPLSpec.fallback_to_getmolecule = False @@ -186,79 +235,103 @@ def test_fallback_to_getmolecule_with_empty_response(): # Test with fallback enabled - should call get_molecule JPLSpec.fallback_to_getmolecule = True + molecules = {'18003': ('H2O', {'FREQ': [100.0, 200.0]})} + with patch.object(JPLSpec, 'get_molecule') as mock_get_molecule, \ patch('astroquery.linelists.jplspec.core.build_lookup') as mock_build_lookup: - # Mock build_lookup to return a lookup object - from unittest.mock import MagicMock - mock_lookup = MagicMock() - mock_lookup.find.return_value = "H2O" + get_mol_func, mock_lookup = _setup_fallback_mocks(molecules) + mock_get_molecule.side_effect = get_mol_func mock_build_lookup.return_value = mock_lookup - # Mock get_molecule to return a simple table - mock_table = Table() - mock_table['FREQ'] = [100.0, 200.0] - mock_table['TAG'] = [18003, 18003] - mock_table.meta = {'NAME': 'H2O', 'TAG': 18003} - mock_get_molecule.return_value = mock_table - result = JPLSpec._parse_result(mock_response) - # Verify get_molecule was called with the correct molecule ID mock_get_molecule.assert_called_once_with('18003') - - # Verify we got the mocked table back assert isinstance(result, Table) assert len(result) == 2 assert result.meta['molecule_id'] == '18003' assert result.meta['molecule_name'] == 'H2O' - # Reset to default JPLSpec.fallback_to_getmolecule = True def test_fallback_to_getmolecule_with_multiple_molecules(): """Test fallback with multiple molecules in the request.""" - # Create a mock response with "Zero lines were found" and multiple molecules - mock_response = Mock() - mock_response.text = "Zero lines were found" - mock_request = Mock() - mock_request.body = "Mol=18003&Mol=28001" - mock_response.request = mock_request + mock_response = _create_empty_response(['18003', '28001']) JPLSpec.fallback_to_getmolecule = True + molecules = { + '18003': ('H2O', {'FREQ': [100.0, 200.0]}), + '28001': ('CO', {'FREQ': [300.0, 400.0]}) + } + with patch.object(JPLSpec, 'get_molecule') as mock_get_molecule, \ patch('astroquery.linelists.jplspec.core.build_lookup') as mock_build_lookup: - # Mock build_lookup to return a lookup object - mock_lookup = MagicMock() - mock_lookup.find.side_effect = lambda mol_id, **kwargs: "H2O" if mol_id == '18003' else "CO" + get_mol_func, mock_lookup = _setup_fallback_mocks(molecules) + mock_get_molecule.side_effect = get_mol_func mock_build_lookup.return_value = mock_lookup - # Mock get_molecule to return different tables - def get_molecule_side_effect(mol_id): - mock_table = Table() - if mol_id == '18003': - mock_table['FREQ'] = [100.0, 200.0] - mock_table.meta = {'NAME': 'H2O', 'TAG': 18003} - else: - mock_table['FREQ'] = [300.0, 400.0] - mock_table.meta = {'NAME': 'CO', 'TAG': 28001} - mock_table['TAG'] = [int(mol_id)] * len(mock_table) - return mock_table - - mock_get_molecule.side_effect = get_molecule_side_effect - result = JPLSpec._parse_result(mock_response) - # Verify get_molecule was called twice assert mock_get_molecule.call_count == 2 - - # Verify we got a stacked table assert isinstance(result, Table) assert len(result) == 4 # 2 rows from each molecule assert 'molecule_list' in result.meta assert 'Name' in result.colnames - # Reset to default + JPLSpec.fallback_to_getmolecule = True + + +def test_query_lines_with_fallback(): + """Test that query_lines uses fallback when server returns empty result.""" + + # Test with fallback disabled - should raise EmptyResponseError + JPLSpec.fallback_to_getmolecule = False + with patch.object(JPLSpec, '_request') as mock_request: + mock_response = _create_empty_response('28001') + mock_response.raise_for_status = Mock() + mock_request.return_value = mock_response + + with pytest.raises(EmptyResponseError, match="Response was empty"): + JPLSpec.query_lines(min_frequency=100 * u.GHz, + max_frequency=200 * u.GHz, + min_strength=-500, + molecule="28001 CO") + + # Test with fallback enabled - should call get_molecule + JPLSpec.fallback_to_getmolecule = True + molecules = {'28001': ('CO', { + 'FREQ': [115271.2018, 230538.0000], + 'ERR': [0.0005, 0.0010], + 'LGINT': [-5.0105, -4.5], + 'DR': [2, 2], + 'ELO': [0.0, 3.845], + 'GUP': [3, 5], + 'QNFMT': [1, 1] + })} + + with patch.object(JPLSpec, '_request') as mock_request, \ + patch.object(JPLSpec, 'get_molecule') as mock_get_molecule, \ + patch('astroquery.linelists.jplspec.core.build_lookup') as mock_build_lookup: + + mock_response = _create_empty_response('28001') + mock_response.raise_for_status = Mock() + mock_request.return_value = mock_response + + get_mol_func, mock_lookup = _setup_fallback_mocks(molecules) + mock_get_molecule.side_effect = get_mol_func + mock_build_lookup.return_value = mock_lookup + + result = JPLSpec.query_lines( + min_frequency=100 * u.GHz, + max_frequency=200 * u.GHz, + min_strength=-500, + molecule="28001 CO") + + mock_get_molecule.assert_called_once_with('28001') + assert isinstance(result, Table) + assert len(result) > 0 + assert 'molecule_id' in result.meta + JPLSpec.fallback_to_getmolecule = True From db6ae14924b11f6a8ec0b973b61471db98d7f787 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 13:35:41 -0500 Subject: [PATCH 38/44] improve test coverage --- astroquery/linelists/cdms/tests/test_cdms.py | 134 ++++++++++++++++++- 1 file changed, 130 insertions(+), 4 deletions(-) diff --git a/astroquery/linelists/cdms/tests/test_cdms.py b/astroquery/linelists/cdms/tests/test_cdms.py index 0b8059105f..bfa654ff79 100644 --- a/astroquery/linelists/cdms/tests/test_cdms.py +++ b/astroquery/linelists/cdms/tests/test_cdms.py @@ -7,6 +7,7 @@ from astropy.table import Table from astroquery.linelists.cdms.core import CDMS, parse_letternumber, build_lookup from astroquery.utils.mocks import MockResponse +from astroquery.exceptions import InvalidQueryError colname_set = set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', 'TAG', 'QNFMT', 'Ju', 'Jl', "vu", "F1u", "F2u", "F3u", "vl", "Ku", "Kl", @@ -21,10 +22,18 @@ def data_path(filename): def mockreturn(*args, method='GET', data={}, url='', **kwargs): if method == 'GET': - molecule = url.split('cdmstab')[1].split('.')[0] - with open(data_path(molecule+".data"), 'rb') as fh: - content = fh.read() - return MockResponse(content=content) + # Handle get_molecule requests (classic URL format) + if '/entries/c' in url: + molecule = url.split('/entries/c')[1].split('.')[0] + with open(data_path(f"c{molecule}.cat"), 'rb') as fh: + content = fh.read() + return MockResponse(content=content) + # Handle regular query_lines requests + else: + molecule = url.split('cdmstab')[1].split('.')[0] + with open(data_path(molecule+".data"), 'rb') as fh: + content = fh.read() + return MockResponse(content=content) elif method == 'POST': molecule = dict(data)['Molecules'] with open(data_path("post_response.html"), 'r') as fh: @@ -205,3 +214,120 @@ def test_lut_literal(): assert thirteenco['13CO'] == 29501 thirteencostar = lut.find('13CO*', 0) assert len(thirteencostar) >= 252 + + +def test_malformatted_molecule_raises_error(patch_post): + """ + Test that querying a malformatted molecule raises an error when + fallback_to_getmolecule is False (default behavior) + """ + # H2C2S is in the MALFORMATTED_MOLECULE_LIST + with pytest.raises(ValueError, match="is known not to comply with standard CDMS format"): + CDMS.query_lines(min_frequency=100 * u.GHz, + max_frequency=300 * u.GHz, + molecule='058501 H2C2S', + fallback_to_getmolecule=False) + + +def test_malformatted_molecule_with_fallback(patch_post): + """ + Test that querying a malformatted molecule with fallback_to_getmolecule=True + successfully falls back to get_molecule + """ + # H2C2S is in the MALFORMATTED_MOLECULE_LIST + tbl = CDMS.query_lines(min_frequency=100 * u.GHz, + max_frequency=300 * u.GHz, + molecule='058501 H2C2S', + fallback_to_getmolecule=True) + + assert isinstance(tbl, Table) + assert len(tbl) == 3 + assert tbl['FREQ'][0] == 114.9627 + assert tbl['FREQ'][1] == 344.8868 + assert tbl['FREQ'][2] == 689.7699 + assert tbl['TAG'][0] == 58501 + assert tbl['GUP'][0] == 9 + + +def test_malformatted_molecule_id_only_with_fallback(patch_post): + """ + Test that querying with just the molecule ID (058501) also works with fallback + """ + # Just the ID is also in the badlist + tbl = CDMS.query_lines(min_frequency=100 * u.GHz, + max_frequency=300 * u.GHz, + molecule='058501', + fallback_to_getmolecule=True) + + assert isinstance(tbl, Table) + assert len(tbl) == 3 + assert tbl['FREQ'][0] == 114.9627 + + +def test_malformatted_molecule_name_only_with_fallback_error(patch_post): + """ + Test that querying with just the molecule name (H2C2S) without parse_name_locally + raises an error because H2C2S (5 chars) is not a valid 6-digit molecule ID. + + When parse_name_locally=False, "H2C2S" is passed as-is to _mol_to_payload, + which returns "H2C2S". This is in the badlist, so fallback is triggered, + but get_molecule("H2C2S") fails because it's not a 6-digit ID. + """ + # Just the name is also in the badlist, but it's not a 6-digit ID + with pytest.raises(ValueError, match="needs to be formatted as.*6-digit string ID"): + CDMS.query_lines(min_frequency=100 * u.GHz, + max_frequency=300 * u.GHz, + molecule='H2C2S', + parse_name_locally=False, + fallback_to_getmolecule=True) + + +def test_malformatted_molecule_name_with_parse_locally_success(patch_post): + """ + Test that querying with just the molecule name (H2C2S) WITH parse_name_locally=True + successfully resolves to "058501 H2C2S" and then falls back to get_molecule. + + When parse_name_locally=True, "H2C2S" is looked up and converted to "058501 H2C2S", + which is in the badlist, so fallback is triggered and succeeds. + """ + tbl = CDMS.query_lines(min_frequency=100 * u.GHz, + max_frequency=300 * u.GHz, + molecule='H2C2S', + parse_name_locally=True, + fallback_to_getmolecule=True) + + assert isinstance(tbl, Table) + assert len(tbl) == 3 + assert tbl['TAG'][0] == 58501 + + +def test_get_query_payload_skips_fallback(patch_post): + """ + Test that when get_query_payload=True, the fallback is not triggered + even for malformatted molecules + """ + # This should return the payload without triggering fallback or error + payload = CDMS.query_lines(min_frequency=100 * u.GHz, + max_frequency=300 * u.GHz, + molecule='058501 H2C2S', + get_query_payload=True) + + assert isinstance(payload, dict) + assert 'Molecules' in payload + assert payload['Molecules'] == '058501 H2C2S' + + +def test_malformatted_with_parse_name_locally_and_fallback_error(): + """ + Test that when parse_name_locally=True with a malformatted molecule + and fallback is enabled, but molecule can't be resolved, we get + proper error message about parsing failure + """ + # First, the lookup will fail to find 'NOTREALMOLECULE' and raise InvalidQueryError + # before we even get to the fallback logic + with pytest.raises(InvalidQueryError, match="No matching species found"): + CDMS.query_lines(min_frequency=100 * u.GHz, + max_frequency=300 * u.GHz, + molecule='NOTREALMOLECULE', + parse_name_locally=True, + fallback_to_getmolecule=True) From 3db6d02fc2dbdfd045cf978851d342e68b217927 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 13:48:38 -0500 Subject: [PATCH 39/44] move the parse cat definitions back to their homes --- astroquery/linelists/cdms/core.py | 84 ++++++++- astroquery/linelists/core.py | 253 +-------------------------- astroquery/linelists/jplspec/core.py | 103 ++++++++++- 3 files changed, 176 insertions(+), 264 deletions(-) diff --git a/astroquery/linelists/cdms/core.py b/astroquery/linelists/cdms/core.py index af2f8c817f..8e258517d5 100644 --- a/astroquery/linelists/cdms/core.py +++ b/astroquery/linelists/cdms/core.py @@ -11,7 +11,7 @@ # import configurable items declared in __init__.py from astroquery.linelists.cdms import conf from astroquery.exceptions import InvalidQueryError, EmptyResponseError -from ..core import LineListClass, parse_letternumber +from ..core import parse_letternumber from astroquery.utils import process_asyncs import re @@ -509,13 +509,85 @@ def get_molecule(self, molecule_id, *, cache=True, return_response=False): def _parse_cat(self, text, *, verbose=False): """ - Parse a catalog response into an `~astropy.table.Table` + Parse a CDMS-format catalog file into an `~astropy.table.Table`. - See details in _parse_response; this is a very similar function, - but the catalog responses have a slightly different format. + The catalog data files are composed of 80-character card images. + Format: [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]: + FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN + + Parameters + ---------- + text : str + The catalog file text content. + verbose : bool, optional + Not used currently. + + Returns + ------- + Table : `~astropy.table.Table` + Parsed catalog data. """ - # Use the base class method for CDMS format parsing - return self._parse_cat_cdms_format(text, verbose=verbose) + # Column start positions + starts = {'FREQ': 0, + 'ERR': 14, + 'LGINT': 22, + 'DR': 30, + 'ELO': 32, + 'GUP': 42, + 'TAG': 44, + 'QNFMT': 51, + 'Q1': 55, + 'Q2': 57, + 'Q3': 59, + 'Q4': 61, + 'Q5': 63, + 'Q6': 65, + 'Q7': 67, + 'Q8': 69, + 'Q9': 71, + 'Q10': 73, + 'Q11': 75, + 'Q12': 77, + 'Q13': 79, + 'Q14': 81, + } + + result = ascii.read(text, header_start=None, data_start=0, + comment=r'THIS|^\s{12,14}\d{4,6}.*', + names=list(starts.keys()), + col_starts=list(starts.values()), + format='fixed_width', fast_reader=False) + + # Ensure TAG is integer type for computation + # int truncates - which is what we want + result['TAG'] = result['TAG'].astype(int) + result['MOLWT'] = [int(x/1e3) for x in result['TAG']] + + result['FREQ'].unit = u.MHz + result['ERR'].unit = u.MHz + + result['Lab'] = result['MOLWT'] < 0 + result['MOLWT'] = np.abs(result['MOLWT']) + result['MOLWT'].unit = u.Da + + fix_keys = ['GUP'] + for qn in (f'Q{ii}' for ii in range(1, 15)): + fix_keys.append(qn) + log.debug(f"fix_keys: {fix_keys} should include Q1, Q2, ..., Q14 and GUP") + for key in fix_keys: + if not np.issubdtype(result[key].dtype, np.integer): + intcol = np.array(list(map(parse_letternumber, result[key])), + dtype=int) + if any(intcol == -999999): + intcol = np.ma.masked_where(intcol == -999999, intcol) + result[key] = intcol + if not np.issubdtype(result[key].dtype, np.integer): + raise ValueError(f"Failed to parse {key} as integer") + + result['LGINT'].unit = u.nm**2 * u.MHz + result['ELO'].unit = u.cm**(-1) + + return result CDMS = CDMSClass() diff --git a/astroquery/linelists/core.py b/astroquery/linelists/core.py index 576994a061..1a3adc8e44 100644 --- a/astroquery/linelists/core.py +++ b/astroquery/linelists/core.py @@ -11,7 +11,7 @@ from astropy import table -__all__ = ['LineListClass', 'parse_letternumber'] +__all__ = ['parse_letternumber'] def parse_letternumber(st): @@ -37,254 +37,3 @@ def parse_letternumber(st): str((ASC.index(x)+10)) if x in ASC else x for x in st]) return int(newst) - - -class LineListClass: - """ - Base class for line list catalog queries (JPL, CDMS, etc.) - - This class provides common functionality for parsing catalog files - and retrieving molecule data from spectroscopic databases. - """ - - def get_molecule(self, molecule_id, *, cache=True, **kwargs): - """ - Retrieve the whole molecule table for a given molecule id from the catalog. - - This method should be overridden by subclasses to implement - catalog-specific behavior, but provides common structure. - - Parameters - ---------- - molecule_id : int or str - The molecule tag/identifier. Can be an integer or a string. - cache : bool - Defaults to True. If set overrides global caching behavior. - **kwargs : dict - Additional keyword arguments specific to the subclass implementation. - - Returns - ------- - Table : `~astropy.table.Table` - Table containing all spectral lines for the requested molecule. - """ - raise NotImplementedError("Subclasses must implement get_molecule()") - - def _parse_cat(self, response_or_text, *, verbose=False): - """ - Parse a catalog file response into an `~astropy.table.Table`. - - The catalog data files are typically composed of 80-character card images, - with one card image per spectral line. This method provides the common - parsing logic, but can be overridden by subclasses for catalog-specific formats. - - Parameters - ---------- - response_or_text : `requests.Response` or str - The HTTP response from the catalog file request or the text content. - verbose : bool, optional - If True, print additional debugging information. - - Returns - ------- - Table : `~astropy.table.Table` - Parsed catalog data. - """ - raise NotImplementedError("Subclasses must implement _parse_cat()") - - def _parse_cat_jpl_format(self, text, *, verbose=False): - """ - Parse a JPL-format catalog file into an `~astropy.table.Table`. - - The catalog data files are composed of 80-character card images, with - one card image per spectral line. The format of each card image is: - FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN', QN" - (F13.4,F8.4, F8.4, I2,F10.4, I3, I7, I4, 6I2, 6I2) - - https://spec.jpl.nasa.gov/ftp/pub/catalog/doc/catintro.pdf - - Parameters - ---------- - text : str - The catalog file text content. - verbose : bool, optional - Not used currently. - - Returns - ------- - Table : `~astropy.table.Table` - Parsed catalog data. - """ - if 'Zero lines were found' in text or len(text.strip()) == 0: - raise EmptyResponseError(f"Response was empty; message was '{text}'.") - - # Parse the catalog file with fixed-width format - # Format: FREQ(13.4), ERR(8.4), LGINT(8.4), DR(2), ELO(10.4), GUP(3), TAG(7), QNFMT(4), QN'(12), QN"(12) - result = ascii.read(text, header_start=None, data_start=0, - comment=r'THIS|^\s{12,14}\d{4,6}.*', - names=('FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', - 'TAG', 'QNFMT', 'QN\'', 'QN"'), - col_starts=(0, 13, 21, 29, 31, 41, 44, 51, 55, 67), - format='fixed_width', fast_reader=False) - - # Ensure TAG is integer type - result['TAG'] = result['TAG'].astype(int) - - # Add units - result['FREQ'].unit = u.MHz - result['ERR'].unit = u.MHz - result['LGINT'].unit = u.nm**2 * u.MHz - result['ELO'].unit = u.cm**(-1) - - # split table by qnfmt; each chunk must be separately parsed. - qnfmts = np.unique(result['QNFMT']) - tables = [result[result['QNFMT'] == qq] for qq in qnfmts] - - # some tables have +/-/blank entries in QNs - # pm_is_ok should be True when the QN columns contain '+' or '-'. - # (can't do a str check on np.integer dtype so have to filter that out first) - pm_is_ok = ((not np.issubdtype(result["QN'"].dtype, np.integer)) - and any(('+' in str(line) or '-' in str(line)) for line in result["QN'"])) - - def int_or_pm(st): - try: - return int(st) - except ValueError: - try: - return parse_letternumber(st) - except ValueError: - if pm_is_ok and (st.strip() == '' or st.strip() == '+' or st.strip() == '-'): - return st.strip() - else: - raise ValueError(f'"{st}" is not a valid +/-/blank entry') - - # At least this molecule, NH, claims 5 QNs but has only 4 - bad_qnfmt_dict = { - 15001: 1234, - } - mol_tag = result['TAG'][0] - - if mol_tag in (32001,): - raise NotImplementedError("Molecule O2 (32001) does not follow the format standard.") - - for tbl in tables: - if mol_tag in bad_qnfmt_dict: - n_qns = bad_qnfmt_dict[mol_tag] % 10 - else: - n_qns = tbl['QNFMT'][0] % 10 - if n_qns > 1: - qnlen = 2 * n_qns - for ii in range(n_qns): - if tbl["QN'"].dtype in (int, np.int32, np.int64): - # for the case where it was already parsed as int - # (53005 is an example) - tbl[f"QN'{ii+1}"] = tbl["QN'"] - tbl[f'QN"{ii+1}'] = tbl['QN"'] - else: - # string parsing can truncate to length=2n or 2n-1 depending - # on whether there are any two-digit QNs in the column - ind1 = ii * 2 - ind2 = ii * 2 + 2 - # rjust(qnlen) is needed to enforce that all strings retain their exact original shape - qnp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN\'']] - qnpp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN"']] - dtype = str if any('+' in str(x) for x in qnp) else int - tbl[f"QN'{ii+1}"] = np.array(qnp, dtype=dtype) - tbl[f'QN"{ii+1}'] = np.array(qnpp, dtype=dtype) - del tbl['QN\''] - del tbl['QN"'] - else: - tbl['QN\''] = np.array(list(map(parse_letternumber, tbl['QN\''])), dtype=int) - tbl['QN"'] = np.array(list(map(parse_letternumber, tbl['QN"'])), dtype=int) - - result = table.vstack(tables) - - # Add laboratory measurement flag - # A negative TAG value indicates laboratory-measured frequency - result['Lab'] = result['TAG'] < 0 - # Convert TAG to absolute value - result['TAG'] = abs(result['TAG']) - - return result - - def _parse_cat_cdms_format(self, text, *, verbose=False): - """ - Parse a CDMS-format catalog file into an `~astropy.table.Table`. - - The catalog data files are composed of 80-character card images. - Format: [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]: - FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN - - Parameters - ---------- - text : str - The catalog file text content. - verbose : bool, optional - Not used currently. - - Returns - ------- - Table : `~astropy.table.Table` - Parsed catalog data. - """ - # Column start positions - starts = {'FREQ': 0, - 'ERR': 14, - 'LGINT': 22, - 'DR': 30, - 'ELO': 32, - 'GUP': 42, - 'TAG': 44, - 'QNFMT': 51, - 'Q1': 55, - 'Q2': 57, - 'Q3': 59, - 'Q4': 61, - 'Q5': 63, - 'Q6': 65, - 'Q7': 67, - 'Q8': 69, - 'Q9': 71, - 'Q10': 73, - 'Q11': 75, - 'Q12': 77, - 'Q13': 79, - 'Q14': 81, - } - - result = ascii.read(text, header_start=None, data_start=0, - comment=r'THIS|^\s{12,14}\d{4,6}.*', - names=list(starts.keys()), - col_starts=list(starts.values()), - format='fixed_width', fast_reader=False) - - # Ensure TAG is integer type for computation - # int truncates - which is what we want - result['TAG'] = result['TAG'].astype(int) - result['MOLWT'] = [int(x/1e3) for x in result['TAG']] - - result['FREQ'].unit = u.MHz - result['ERR'].unit = u.MHz - - result['Lab'] = result['MOLWT'] < 0 - result['MOLWT'] = np.abs(result['MOLWT']) - result['MOLWT'].unit = u.Da - - fix_keys = ['GUP'] - for qn in (f'Q{ii}' for ii in range(1, 15)): - fix_keys.append(qn) - log.debug(f"fix_keys: {fix_keys} should include Q1, Q2, ..., Q14 and GUP") - for key in fix_keys: - if not np.issubdtype(result[key].dtype, np.integer): - intcol = np.array(list(map(parse_letternumber, result[key])), - dtype=int) - if any(intcol == -999999): - intcol = np.ma.masked_where(intcol == -999999, intcol) - result[key] = intcol - if not np.issubdtype(result[key].dtype, np.integer): - raise ValueError(f"Failed to parse {key} as integer") - - result['LGINT'].unit = u.nm**2 * u.MHz - result['ELO'].unit = u.cm**(-1) - - return result diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index fc6350a8b7..e05f8a7452 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -8,11 +8,11 @@ from astropy import table from ...query import BaseQuery from ...utils import async_to_sync +from ..core import parse_letternumber # import configurable items declared in __init__.py from . import conf from . import lookup_table from astroquery.exceptions import EmptyResponseError, InvalidQueryError -from ..core import LineListClass from urllib.parse import parse_qs @@ -330,17 +330,19 @@ def get_molecule(self, molecule_id, *, cache=True): def _parse_cat(self, response, *, verbose=False): """ - Parse a catalog file response into an `~astropy.table.Table`. + Parse a JPL-format catalog file into an `~astropy.table.Table`. The catalog data files are composed of 80-character card images, with one card image per spectral line. The format of each card image is: FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN', QN" (F13.4,F8.4, F8.4, I2,F10.4, I3, I7, I4, 6I2, 6I2) + https://spec.jpl.nasa.gov/ftp/pub/catalog/doc/catintro.pdf + Parameters ---------- - response : `requests.Response` - The HTTP response from the catalog file request. + text : str + The catalog file text content. verbose : bool, optional Not used currently. @@ -349,8 +351,97 @@ def _parse_cat(self, response, *, verbose=False): Table : `~astropy.table.Table` Parsed catalog data. """ - # Use the base class method for JPL format parsing - return self._parse_cat_jpl_format(response.text, verbose=verbose) + if 'Zero lines were found' in text or len(text.strip()) == 0: + raise EmptyResponseError(f"Response was empty; message was '{text}'.") + + # Parse the catalog file with fixed-width format + # Format: FREQ(13.4), ERR(8.4), LGINT(8.4), DR(2), ELO(10.4), GUP(3), TAG(7), QNFMT(4), QN'(12), QN"(12) + result = ascii.read(text, header_start=None, data_start=0, + comment=r'THIS|^\s{12,14}\d{4,6}.*', + names=('FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', + 'TAG', 'QNFMT', 'QN\'', 'QN"'), + col_starts=(0, 13, 21, 29, 31, 41, 44, 51, 55, 67), + format='fixed_width', fast_reader=False) + + # Ensure TAG is integer type + result['TAG'] = result['TAG'].astype(int) + + # Add units + result['FREQ'].unit = u.MHz + result['ERR'].unit = u.MHz + result['LGINT'].unit = u.nm**2 * u.MHz + result['ELO'].unit = u.cm**(-1) + + # split table by qnfmt; each chunk must be separately parsed. + qnfmts = np.unique(result['QNFMT']) + tables = [result[result['QNFMT'] == qq] for qq in qnfmts] + + # some tables have +/-/blank entries in QNs + # pm_is_ok should be True when the QN columns contain '+' or '-'. + # (can't do a str check on np.integer dtype so have to filter that out first) + pm_is_ok = ((not np.issubdtype(result["QN'"].dtype, np.integer)) + and any(('+' in str(line) or '-' in str(line)) for line in result["QN'"])) + + def int_or_pm(st): + try: + return int(st) + except ValueError: + try: + return parse_letternumber(st) + except ValueError: + if pm_is_ok and (st.strip() == '' or st.strip() == '+' or st.strip() == '-'): + return st.strip() + else: + raise ValueError(f'"{st}" is not a valid +/-/blank entry') + + # At least this molecule, NH, claims 5 QNs but has only 4 + bad_qnfmt_dict = { + 15001: 1234, + } + mol_tag = result['TAG'][0] + + if mol_tag in (32001,): + raise NotImplementedError("Molecule O2 (32001) does not follow the format standard.") + + for tbl in tables: + if mol_tag in bad_qnfmt_dict: + n_qns = bad_qnfmt_dict[mol_tag] % 10 + else: + n_qns = tbl['QNFMT'][0] % 10 + if n_qns > 1: + qnlen = 2 * n_qns + for ii in range(n_qns): + if tbl["QN'"].dtype in (int, np.int32, np.int64): + # for the case where it was already parsed as int + # (53005 is an example) + tbl[f"QN'{ii+1}"] = tbl["QN'"] + tbl[f'QN"{ii+1}'] = tbl['QN"'] + else: + # string parsing can truncate to length=2n or 2n-1 depending + # on whether there are any two-digit QNs in the column + ind1 = ii * 2 + ind2 = ii * 2 + 2 + # rjust(qnlen) is needed to enforce that all strings retain their exact original shape + qnp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN\'']] + qnpp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN"']] + dtype = str if any('+' in str(x) for x in qnp) else int + tbl[f"QN'{ii+1}"] = np.array(qnp, dtype=dtype) + tbl[f'QN"{ii+1}'] = np.array(qnpp, dtype=dtype) + del tbl['QN\''] + del tbl['QN"'] + else: + tbl['QN\''] = np.array(list(map(parse_letternumber, tbl['QN\''])), dtype=int) + tbl['QN"'] = np.array(list(map(parse_letternumber, tbl['QN"'])), dtype=int) + + result = table.vstack(tables) + + # Add laboratory measurement flag + # A negative TAG value indicates laboratory-measured frequency + result['Lab'] = result['TAG'] < 0 + # Convert TAG to absolute value + result['TAG'] = abs(result['TAG']) + + return result JPLSpec = JPLSpecClass() From 3d4f7f52a8dd40462d6407f35c358e44a51ea621 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 13:52:09 -0500 Subject: [PATCH 40/44] cleanup from last commit --- astroquery/linelists/__init__.py | 4 ++-- astroquery/linelists/cdms/core.py | 3 ++- astroquery/linelists/jplspec/core.py | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/astroquery/linelists/__init__.py b/astroquery/linelists/__init__.py index 9b680e72f7..50ac8b0a1d 100644 --- a/astroquery/linelists/__init__.py +++ b/astroquery/linelists/__init__.py @@ -6,6 +6,6 @@ as well as common utilities for parsing catalog files. """ -from .core import LineListClass, parse_letternumber +from .core import parse_letternumber -__all__ = ['LineListClass', 'parse_letternumber'] +__all__ = ['parse_letternumber'] diff --git a/astroquery/linelists/cdms/core.py b/astroquery/linelists/cdms/core.py index 8e258517d5..6474d7a5be 100644 --- a/astroquery/linelists/cdms/core.py +++ b/astroquery/linelists/cdms/core.py @@ -13,6 +13,7 @@ from astroquery.exceptions import InvalidQueryError, EmptyResponseError from ..core import parse_letternumber from astroquery.utils import process_asyncs +from astroquery import log import re @@ -24,7 +25,7 @@ def data_path(filename): return os.path.join(data_dir, filename) -class CDMSClass(BaseQuery, LineListClass): +class CDMSClass(BaseQuery): # use the Configuration Items imported from __init__.py URL = conf.search SERVER = conf.server diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index e05f8a7452..f52062c603 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -25,7 +25,7 @@ def data_path(filename): @async_to_sync -class JPLSpecClass(BaseQuery, LineListClass): +class JPLSpecClass(BaseQuery): # use the Configuration Items imported from __init__.py URL = conf.server @@ -351,6 +351,7 @@ def _parse_cat(self, response, *, verbose=False): Table : `~astropy.table.Table` Parsed catalog data. """ + text = response.text if 'Zero lines were found' in text or len(text.strip()) == 0: raise EmptyResponseError(f"Response was empty; message was '{text}'.") From e0c46e8fce4e1f8eedbf631cbb37f68a5bfa2c1f Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 14:17:44 -0500 Subject: [PATCH 41/44] add missing test file --- astroquery/linelists/cdms/setup_package.py | 1 + astroquery/linelists/cdms/tests/data/c058501.cat | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 astroquery/linelists/cdms/tests/data/c058501.cat diff --git a/astroquery/linelists/cdms/setup_package.py b/astroquery/linelists/cdms/setup_package.py index 9aa4bd311e..64d5cbb99f 100644 --- a/astroquery/linelists/cdms/setup_package.py +++ b/astroquery/linelists/cdms/setup_package.py @@ -9,6 +9,7 @@ def get_package_data(): paths_test = [os.path.join('data', '028503 CO, v=0.data'), os.path.join('data', '117501 HC7S.data'), os.path.join('data', '099501 HC7N, v=0.data'), + os.path.join('data', 'c058501.cat'), os.path.join('data', 'post_response.html'), ] diff --git a/astroquery/linelists/cdms/tests/data/c058501.cat b/astroquery/linelists/cdms/tests/data/c058501.cat new file mode 100644 index 0000000000..3c7acdb7f6 --- /dev/null +++ b/astroquery/linelists/cdms/tests/data/c058501.cat @@ -0,0 +1,3 @@ + 114.9627 0.0001-10.6817 3 9.7413 9 58501 303 1 1 0 1 1 1 + 344.8868 0.0002 -9.9842 3 10.4849 15 58501 303 2 1 1 2 1 2 + 689.7699 0.0004 -9.5394 3 11.6003 21 58501 303 3 1 2 3 1 3 \ No newline at end of file From ca2d8fffa98c017857a8adfcad36a22a270c3ea8 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 14:21:36 -0500 Subject: [PATCH 42/44] whitespace --- astroquery/linelists/core.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/astroquery/linelists/core.py b/astroquery/linelists/core.py index 1a3adc8e44..ba5bc5b6f9 100644 --- a/astroquery/linelists/core.py +++ b/astroquery/linelists/core.py @@ -4,12 +4,6 @@ """ import numpy as np import string -import astropy.units as u -from astropy.io import ascii -from astroquery.exceptions import EmptyResponseError -from astroquery import log -from astropy import table - __all__ = ['parse_letternumber'] From 00c578f1c887f71b0d2e6413def7bae45264cef9 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 14:22:33 -0500 Subject: [PATCH 43/44] refactor jplspec to not use async machinery --- astroquery/linelists/jplspec/core.py | 39 ++++++++++++++++--- .../linelists/jplspec/tests/test_jplspec.py | 23 ++++------- .../jplspec/tests/test_jplspec_remote.py | 16 ++++---- 3 files changed, 48 insertions(+), 30 deletions(-) diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index f52062c603..a97636029f 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -7,12 +7,12 @@ from astropy.io import ascii from astropy import table from ...query import BaseQuery -from ...utils import async_to_sync from ..core import parse_letternumber # import configurable items declared in __init__.py from . import conf from . import lookup_table from astroquery.exceptions import EmptyResponseError, InvalidQueryError +from astroquery.utils import process_asyncs from urllib.parse import parse_qs @@ -24,16 +24,14 @@ def data_path(filename): return os.path.join(data_dir, filename) -@async_to_sync class JPLSpecClass(BaseQuery): # use the Configuration Items imported from __init__.py URL = conf.server TIMEOUT = conf.timeout - def __init__(self, fallback_to_getmolecule=True): + def __init__(self): super().__init__() - self.fallback_to_getmolecule = fallback_to_getmolecule def query_lines_async(self, min_frequency, max_frequency, *, min_strength=-500, @@ -139,7 +137,36 @@ def query_lines_async(self, min_frequency, max_frequency, *, return response - def _parse_result(self, response, *, verbose=False): + def query_lines(self, min_frequency, max_frequency, *, + min_strength=-500, + max_lines=2000, molecule='All', flags=0, + parse_name_locally=False, + get_query_payload=False, + fallback_to_getmolecule=False, + cache=True): + """ + Query the JPLSpec service for spectral lines. + + This is a synchronous version of `query_lines_async`. + See `query_lines_async` for full parameter documentation. + """ + response = self.query_lines_async(min_frequency=min_frequency, + max_frequency=max_frequency, + min_strength=min_strength, + max_lines=max_lines, + molecule=molecule, + flags=flags, + parse_name_locally=parse_name_locally, + get_query_payload=get_query_payload, + cache=cache) + if get_query_payload: + return response + else: + return self._parse_result(response, fallback_to_getmolecule=fallback_to_getmolecule) + + query_lines.__doc__ = process_asyncs.async_to_sync_docstr(query_lines_async.__doc__) + + def _parse_result(self, response, *, verbose=False, fallback_to_getmolecule=False): """ Parse a response into an `~astropy.table.Table` @@ -171,7 +198,7 @@ def _parse_result(self, response, *, verbose=False): """ if 'Zero lines were found' in response.text: - if self.fallback_to_getmolecule: + if fallback_to_getmolecule: self.lookup_ids = build_lookup() payload = parse_qs(response.request.body) tbs = [self.get_molecule(mol) for mol in payload['Mol']] diff --git a/astroquery/linelists/jplspec/tests/test_jplspec.py b/astroquery/linelists/jplspec/tests/test_jplspec.py index 369a0818fd..7c67812787 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec.py @@ -229,12 +229,10 @@ def test_fallback_to_getmolecule_with_empty_response(): mock_response = _create_empty_response('18003') # Test with fallback disabled - should raise EmptyResponseError - JPLSpec.fallback_to_getmolecule = False with pytest.raises(EmptyResponseError, match="Response was empty"): - JPLSpec._parse_result(mock_response) + JPLSpec._parse_result(mock_response, fallback_to_getmolecule=False) # Test with fallback enabled - should call get_molecule - JPLSpec.fallback_to_getmolecule = True molecules = {'18003': ('H2O', {'FREQ': [100.0, 200.0]})} with patch.object(JPLSpec, 'get_molecule') as mock_get_molecule, \ @@ -244,7 +242,7 @@ def test_fallback_to_getmolecule_with_empty_response(): mock_get_molecule.side_effect = get_mol_func mock_build_lookup.return_value = mock_lookup - result = JPLSpec._parse_result(mock_response) + result = JPLSpec._parse_result(mock_response, fallback_to_getmolecule=True) mock_get_molecule.assert_called_once_with('18003') assert isinstance(result, Table) @@ -252,14 +250,11 @@ def test_fallback_to_getmolecule_with_empty_response(): assert result.meta['molecule_id'] == '18003' assert result.meta['molecule_name'] == 'H2O' - JPLSpec.fallback_to_getmolecule = True - def test_fallback_to_getmolecule_with_multiple_molecules(): """Test fallback with multiple molecules in the request.""" mock_response = _create_empty_response(['18003', '28001']) - JPLSpec.fallback_to_getmolecule = True molecules = { '18003': ('H2O', {'FREQ': [100.0, 200.0]}), '28001': ('CO', {'FREQ': [300.0, 400.0]}) @@ -272,7 +267,7 @@ def test_fallback_to_getmolecule_with_multiple_molecules(): mock_get_molecule.side_effect = get_mol_func mock_build_lookup.return_value = mock_lookup - result = JPLSpec._parse_result(mock_response) + result = JPLSpec._parse_result(mock_response, fallback_to_getmolecule=True) assert mock_get_molecule.call_count == 2 assert isinstance(result, Table) @@ -280,14 +275,11 @@ def test_fallback_to_getmolecule_with_multiple_molecules(): assert 'molecule_list' in result.meta assert 'Name' in result.colnames - JPLSpec.fallback_to_getmolecule = True - def test_query_lines_with_fallback(): """Test that query_lines uses fallback when server returns empty result.""" # Test with fallback disabled - should raise EmptyResponseError - JPLSpec.fallback_to_getmolecule = False with patch.object(JPLSpec, '_request') as mock_request: mock_response = _create_empty_response('28001') mock_response.raise_for_status = Mock() @@ -297,10 +289,10 @@ def test_query_lines_with_fallback(): JPLSpec.query_lines(min_frequency=100 * u.GHz, max_frequency=200 * u.GHz, min_strength=-500, - molecule="28001 CO") + molecule="28001 CO", + fallback_to_getmolecule=False) # Test with fallback enabled - should call get_molecule - JPLSpec.fallback_to_getmolecule = True molecules = {'28001': ('CO', { 'FREQ': [115271.2018, 230538.0000], 'ERR': [0.0005, 0.0010], @@ -327,11 +319,10 @@ def test_query_lines_with_fallback(): min_frequency=100 * u.GHz, max_frequency=200 * u.GHz, min_strength=-500, - molecule="28001 CO") + molecule="28001 CO", + fallback_to_getmolecule=True) mock_get_molecule.assert_called_once_with('28001') assert isinstance(result, Table) assert len(result) > 0 assert 'molecule_id' in result.meta - - JPLSpec.fallback_to_getmolecule = True diff --git a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py index 33e64c0c8f..a9968421bf 100644 --- a/astroquery/linelists/jplspec/tests/test_jplspec_remote.py +++ b/astroquery/linelists/jplspec/tests/test_jplspec_remote.py @@ -9,11 +9,11 @@ @pytest.mark.xfail(reason="2025 server problems", raises=EmptyResponseError) @pytest.mark.remote_data def test_remote(): - JPLSpec.fallback_to_getmolecule = False tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, max_frequency=1000 * u.GHz, min_strength=-500, - molecule="18003 H2O") + molecule="18003 H2O", + fallback_to_getmolecule=False) assert isinstance(tbl, Table) assert len(tbl) == 36 assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', @@ -32,11 +32,11 @@ def test_remote_regex_fallback(): CO, H13CN, HC15N Some of these have different combinations of QNs """ - JPLSpec.fallback_to_getmolecule = True tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, max_frequency=1000 * u.GHz, min_strength=-500, - molecule=("28001", "28002", "28003")) + molecule=("28001", "28002", "28003"), + fallback_to_getmolecule=True) assert isinstance(tbl, Table) tbl = tbl[((tbl['FREQ'].quantity > 500*u.GHz) & (tbl['FREQ'].quantity < 1*u.THz))] assert len(tbl) == 16 @@ -57,11 +57,11 @@ def test_remote_regex_fallback(): @pytest.mark.xfail(reason="2025 server problems", raises=EmptyResponseError) @pytest.mark.remote_data def test_remote_regex(): - JPLSpec.fallback_to_getmolecule = False tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, max_frequency=1000 * u.GHz, min_strength=-500, - molecule=("28001", "28002", "28003")) + molecule=("28001", "28002", "28003"), + fallback_to_getmolecule=False) assert isinstance(tbl, Table) assert len(tbl) == 16 assert set(tbl.keys()) == set(['FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP', @@ -125,11 +125,11 @@ def test_get_molecule_string_id(): @pytest.mark.remote_data def test_remote_fallback(): - JPLSpec.fallback_to_getmolecule = True tbl = JPLSpec.query_lines(min_frequency=500 * u.GHz, max_frequency=1000 * u.GHz, min_strength=-500, - molecule="18003 H2O") + molecule="18003 H2O", + fallback_to_getmolecule=True) assert isinstance(tbl, Table) tbl = tbl[((tbl['FREQ'].quantity > 500*u.GHz) & (tbl['FREQ'].quantity < 1*u.THz))] assert len(tbl) == 36 From ad16c89938f797b5a26d100b2ba906fc4ba2911c Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 9 Nov 2025 14:22:55 -0500 Subject: [PATCH 44/44] whitespace --- astroquery/linelists/jplspec/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astroquery/linelists/jplspec/core.py b/astroquery/linelists/jplspec/core.py index a97636029f..31ffef997c 100644 --- a/astroquery/linelists/jplspec/core.py +++ b/astroquery/linelists/jplspec/core.py @@ -147,7 +147,7 @@ def query_lines(self, min_frequency, max_frequency, *, """ Query the JPLSpec service for spectral lines. - This is a synchronous version of `query_lines_async`. + This is a synchronous version of `query_lines_async`. See `query_lines_async` for full parameter documentation. """ response = self.query_lines_async(min_frequency=min_frequency,