Skip to content
Open
87 changes: 47 additions & 40 deletions astroquery/linelists/cdms/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class CDMSClass(BaseQuery):
SERVER = conf.server
CLASSIC_URL = conf.classic_server
TIMEOUT = conf.timeout
MALFORMATTED_MOLECULE_LIST = ['017506 NH3-wHFS', '028582 H2NC', '058501 H2C2S', '064527 HC3HCN']
MALFORMATTED_MOLECULE_LIST = ['017506 NH3-wHFS', '028528 H2NC', '058501 H2C2S', '064527 HC3HCN']

def query_lines_async(self, min_frequency, max_frequency, *,
min_strength=-500, molecule='All',
Expand Down Expand Up @@ -278,40 +278,48 @@ def _parse_result(self, response, *, verbose=False):
'F3l': 83,
'name': 89}

result = ascii.read(text, header_start=None, data_start=0,
comment=r'THIS|^\s{12,14}\d{4,6}.*',
names=list(starts.keys()),
col_starts=list(starts.values()),
format='fixed_width', fast_reader=False)

result['FREQ'].unit = u.MHz
result['ERR'].unit = u.MHz

result['MOLWT'] = [int(x/1e3) for x in result['TAG']]
result['Lab'] = result['MOLWT'] < 0
result['MOLWT'] = np.abs(result['MOLWT'])
result['MOLWT'].unit = u.Da

fix_keys = ['GUP']
for suf in 'ul':
for qn in ('J', 'v', 'K', 'F1', 'F2', 'F3'):
qnind = qn+suf
fix_keys.append(qnind)
for key in fix_keys:
if not np.issubdtype(result[key].dtype, np.integer):
intcol = np.array(list(map(parse_letternumber, result[key])),
dtype=int)
result[key] = intcol

# if there is a crash at this step, something went wrong with the query
# and the _last_query_temperature was not set. This shouldn't ever
# happen, but, well, I anticipate it will.
if self._last_query_temperature == 0:
result.rename_column('LGINT', 'LGAIJ')
result['LGAIJ'].unit = u.s**-1
else:
result['LGINT'].unit = u.nm**2 * u.MHz
result['ELO'].unit = u.cm**(-1)
try:
result = ascii.read(text, header_start=None, data_start=0,
comment=r'THIS|^\s{12,14}\d{4,6}.*',
names=list(starts.keys()),
col_starts=list(starts.values()),
format='fixed_width', fast_reader=False)

result['FREQ'].unit = u.MHz
result['ERR'].unit = u.MHz

result['MOLWT'] = [int(x/1e3) for x in result['TAG']]
result['Lab'] = result['MOLWT'] < 0
result['MOLWT'] = np.abs(result['MOLWT'])
result['MOLWT'].unit = u.Da

fix_keys = ['GUP']
for suf in 'ul':
for qn in ('J', 'v', 'K', 'F1', 'F2', 'F3'):
qnind = qn+suf
fix_keys.append(qnind)
for key in fix_keys:
if not np.issubdtype(result[key].dtype, np.integer):
intcol = np.array(list(map(parse_letternumber, result[key])),
dtype=int)
result[key] = intcol

# if there is a crash at this step, something went wrong with the query
# and the _last_query_temperature was not set. This shouldn't ever
# happen, but, well, I anticipate it will.
if self._last_query_temperature == 0:
result.rename_column('LGINT', 'LGAIJ')
result['LGAIJ'].unit = u.s**-1
else:
result['LGINT'].unit = u.nm**2 * u.MHz
result['ELO'].unit = u.cm**(-1)
except ValueError as ex:
# Give users a more helpful exception when parsing fails
original_message = str(ex)
new_message = ("Failed to parse CDMS response. This may be caused by a malformed search return. "
"You can check this by running `CDMS.get_molecule('<id>')` instead; if it works, the "
"problem is caused by the CDMS search interface and cannot be worked around.")
raise ValueError(new_message) from ex

return result

Expand Down Expand Up @@ -421,25 +429,24 @@ def get_molecule(self, molecule_id, *, cache=True, return_response=False):
timeout=self.TIMEOUT, cache=cache)
if return_response:
return response
result = self._parse_cat(response)
result = self._parse_cat(response.text)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we raise_for_status prior to this? Or maybe can we even upstream that raise for status and do it inside _request()? -- that case should/would be a follow-up rather than in this PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, just adding the raise_for_status here for now. Doing it in request is an idea, but then it changes the behavior of request from being near-equivalent to request.get/request.post to being very different - I don't like that, it will have a lot of secondary effects that I expect would make debugging harder.


species_table = self.get_species_table()
result.meta = dict(species_table.loc[int(molecule_id)])

return result

def _parse_cat(self, response, *, verbose=False):
def _parse_cat(self, text, *, verbose=False):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick, but maybe call it text_response :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this function works on any .cat file from CDMS, so I'd like to consider making it public and leaving this as-is. Maybe that can be a different PR though.

"""
Parse a catalog response into an `~astropy.table.Table`

See details in _parse_response; this is a very similar function,
but the catalog responses have a slightly different format.
"""

if 'Zero lines were found' in response.text:
raise EmptyResponseError(f"Response was empty; message was '{response.text}'.")
if 'Zero lines were found' in text:
raise EmptyResponseError(f"Response was empty; message was '{text}'.")

text = response.text

# notes about the format
# [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]: FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN noqa
Expand Down
25 changes: 23 additions & 2 deletions astroquery/linelists/cdms/tests/test_cdms_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,37 @@ def test_propanediol():
assert tbl['TAG'][0] == 76513


@pytest.mark.remote_data
@pytest.mark.xfail(reason="CDMS entry for H2NC is malformed")
def test_h2nc():
tbl1 = CDMS.get_molecule('028528')
assert 'int' in tbl1['Q2'].dtype.name

tbl = CDMS.query_lines(min_frequency=139.3 * u.GHz,
max_frequency=141.5 * u.GHz,
molecule='028528 H2NC')

# these are the results that SHOULD be return if it actually worked
assert isinstance(tbl, Table)
assert len(tbl) >= 1
assert 'H2NC' in tbl['name']
# check that the parser worked - this will be string or obj otherwise
assert 'int' in tbl['Ku'].dtype.name
assert tbl['MOLWT'][0] == 28
assert tbl['TAG'][0] == 28528


@pytest.mark.remote_data
def test_remote_regex():

tbl = CDMS.query_lines(min_frequency=500 * u.GHz,
max_frequency=600 * u.GHz,
min_strength=-500,
molecule=('028501 HC-13-N, v=0', '028502 H2CN' '028503 CO, v=0'))
molecule=('028501 HC-13-N, v=0', '028502 H2CN', '028503 CO, v=0'))

assert isinstance(tbl, Table)
assert len(tbl) == 557
# regression test fix: there's 1 CO line that got missed because of a missing comma
assert len(tbl) == 558
assert set(tbl.keys()) == colname_set

assert set(tbl['name']) == {'H2CN', 'HC-13-N, v=0'}
Expand Down