Skip to content

Commit a4e5711

Browse files
committed
improve error message for bad molecule parsing
1 parent c94ad8d commit a4e5711

File tree

2 files changed

+70
-42
lines changed

2 files changed

+70
-42
lines changed

astroquery/linelists/cdms/core.py

Lines changed: 47 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class CDMSClass(BaseQuery):
3232
SERVER = conf.server
3333
CLASSIC_URL = conf.classic_server
3434
TIMEOUT = conf.timeout
35-
MALFORMATTED_MOLECULE_LIST = ['017506 NH3-wHFS', '028582 H2NC', '058501 H2C2S', '064527 HC3HCN']
35+
MALFORMATTED_MOLECULE_LIST = ['017506 NH3-wHFS', '028528 H2NC', '058501 H2C2S', '064527 HC3HCN']
3636

3737
def query_lines_async(self, min_frequency, max_frequency, *,
3838
min_strength=-500, molecule='All',
@@ -278,40 +278,48 @@ def _parse_result(self, response, *, verbose=False):
278278
'F3l': 83,
279279
'name': 89}
280280

281-
result = ascii.read(text, header_start=None, data_start=0,
282-
comment=r'THIS|^\s{12,14}\d{4,6}.*',
283-
names=list(starts.keys()),
284-
col_starts=list(starts.values()),
285-
format='fixed_width', fast_reader=False)
286-
287-
result['FREQ'].unit = u.MHz
288-
result['ERR'].unit = u.MHz
289-
290-
result['MOLWT'] = [int(x/1e3) for x in result['TAG']]
291-
result['Lab'] = result['MOLWT'] < 0
292-
result['MOLWT'] = np.abs(result['MOLWT'])
293-
result['MOLWT'].unit = u.Da
294-
295-
fix_keys = ['GUP']
296-
for suf in 'ul':
297-
for qn in ('J', 'v', 'K', 'F1', 'F2', 'F3'):
298-
qnind = qn+suf
299-
fix_keys.append(qnind)
300-
for key in fix_keys:
301-
if not np.issubdtype(result[key].dtype, np.integer):
302-
intcol = np.array(list(map(parse_letternumber, result[key])),
303-
dtype=int)
304-
result[key] = intcol
305-
306-
# if there is a crash at this step, something went wrong with the query
307-
# and the _last_query_temperature was not set. This shouldn't ever
308-
# happen, but, well, I anticipate it will.
309-
if self._last_query_temperature == 0:
310-
result.rename_column('LGINT', 'LGAIJ')
311-
result['LGAIJ'].unit = u.s**-1
312-
else:
313-
result['LGINT'].unit = u.nm**2 * u.MHz
314-
result['ELO'].unit = u.cm**(-1)
281+
try:
282+
result = ascii.read(text, header_start=None, data_start=0,
283+
comment=r'THIS|^\s{12,14}\d{4,6}.*',
284+
names=list(starts.keys()),
285+
col_starts=list(starts.values()),
286+
format='fixed_width', fast_reader=False)
287+
288+
result['FREQ'].unit = u.MHz
289+
result['ERR'].unit = u.MHz
290+
291+
result['MOLWT'] = [int(x/1e3) for x in result['TAG']]
292+
result['Lab'] = result['MOLWT'] < 0
293+
result['MOLWT'] = np.abs(result['MOLWT'])
294+
result['MOLWT'].unit = u.Da
295+
296+
fix_keys = ['GUP']
297+
for suf in 'ul':
298+
for qn in ('J', 'v', 'K', 'F1', 'F2', 'F3'):
299+
qnind = qn+suf
300+
fix_keys.append(qnind)
301+
for key in fix_keys:
302+
if not np.issubdtype(result[key].dtype, np.integer):
303+
intcol = np.array(list(map(parse_letternumber, result[key])),
304+
dtype=int)
305+
result[key] = intcol
306+
307+
# if there is a crash at this step, something went wrong with the query
308+
# and the _last_query_temperature was not set. This shouldn't ever
309+
# happen, but, well, I anticipate it will.
310+
if self._last_query_temperature == 0:
311+
result.rename_column('LGINT', 'LGAIJ')
312+
result['LGAIJ'].unit = u.s**-1
313+
else:
314+
result['LGINT'].unit = u.nm**2 * u.MHz
315+
result['ELO'].unit = u.cm**(-1)
316+
except ValueError as ex:
317+
# Give users a more helpful exception when parsing fails
318+
original_message = str(ex)
319+
new_message = ("Failed to parse CDMS response. This may be caused by a malformed search return. "
320+
"You can check this by running `CDMS.get_molecule('<id>')` instead; if it works, the "
321+
"problem is caused by the CDMS search interface and cannot be worked around.")
322+
raise ValueError(new_message) from ex
315323

316324
return result
317325

@@ -421,25 +429,24 @@ def get_molecule(self, molecule_id, *, cache=True, return_response=False):
421429
timeout=self.TIMEOUT, cache=cache)
422430
if return_response:
423431
return response
424-
result = self._parse_cat(response)
432+
result = self._parse_cat(response.text)
425433

426434
species_table = self.get_species_table()
427435
result.meta = dict(species_table.loc[int(molecule_id)])
428436

429437
return result
430438

431-
def _parse_cat(self, response, *, verbose=False):
439+
def _parse_cat(self, text, *, verbose=False):
432440
"""
433441
Parse a catalog response into an `~astropy.table.Table`
434442
435443
See details in _parse_response; this is a very similar function,
436444
but the catalog responses have a slightly different format.
437445
"""
438446

439-
if 'Zero lines were found' in response.text:
440-
raise EmptyResponseError(f"Response was empty; message was '{response.text}'.")
447+
if 'Zero lines were found' in text:
448+
raise EmptyResponseError(f"Response was empty; message was '{text}'.")
441449

442-
text = response.text
443450

444451
# notes about the format
445452
# [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]: FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN noqa

astroquery/linelists/cdms/tests/test_cdms_remote.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,16 +89,37 @@ def test_propanediol():
8989
assert tbl['TAG'][0] == 76513
9090

9191

92+
@pytest.mark.remote_data
93+
@pytest.mark.xfail(reason="CDMS entry for H2NC is malformed")
94+
def test_h2nc():
95+
tbl1 = CDMS.get_molecule('028528')
96+
assert 'int' in tbl1['Q2'].dtype.name
97+
98+
tbl = CDMS.query_lines(min_frequency=139.3 * u.GHz,
99+
max_frequency=141.5 * u.GHz,
100+
molecule='028528 H2NC')
101+
102+
# these are the results that SHOULD be return if it actually worked
103+
assert isinstance(tbl, Table)
104+
assert len(tbl) >= 1
105+
assert 'H2NC' in tbl['name']
106+
# check that the parser worked - this will be string or obj otherwise
107+
assert 'int' in tbl['Ku'].dtype.name
108+
assert tbl['MOLWT'][0] == 28
109+
assert tbl['TAG'][0] == 28528
110+
111+
92112
@pytest.mark.remote_data
93113
def test_remote_regex():
94114

95115
tbl = CDMS.query_lines(min_frequency=500 * u.GHz,
96116
max_frequency=600 * u.GHz,
97117
min_strength=-500,
98-
molecule=('028501 HC-13-N, v=0', '028502 H2CN' '028503 CO, v=0'))
118+
molecule=('028501 HC-13-N, v=0', '028502 H2CN', '028503 CO, v=0'))
99119

100120
assert isinstance(tbl, Table)
101-
assert len(tbl) == 557
121+
# regression test fix: there's 1 CO line that got missed because of a missing comma
122+
assert len(tbl) == 558
102123
assert set(tbl.keys()) == colname_set
103124

104125
assert set(tbl['name']) == {'H2CN', 'HC-13-N, v=0'}

0 commit comments

Comments
 (0)