Skip to content

Commit e7e71ea

Browse files
ParfenovSkeflavich
authored andcommitted
Support CDMS all species option; fix format for CDMS linelist reading; fix CDMS quantum numbers parsing
1) Adding support for CDMS queries with lines of all species 2) Fixing the CMDS lines list parsing Support CDMS all species option; fix format for CDMS linelist reading; fix CDMS quantum numbers parsing Adding test for a new functionality when all species are requested from CDMS
1 parent 660eb7d commit e7e71ea

File tree

2 files changed

+59
-19
lines changed

2 files changed

+59
-19
lines changed

astroquery/linelists/cdms/core.py

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ def query_lines_async(self, min_frequency, max_frequency, *,
5454
min_strength : int, optional
5555
Minimum strength in catalog units, the default is -500
5656
57-
molecule : list, string of regex if parse_name_locally=True, optional
57+
molecule : list or string if parse_name_locally=False,
58+
string of regex if parse_name_locally=True, optional
5859
Identifiers of the molecules to search for. If this parameter
5960
is not provided the search will match any species. Default is 'All'.
6061
As a first pass, the molecule will be searched for with a direct
@@ -134,18 +135,21 @@ def query_lines_async(self, min_frequency, max_frequency, *,
134135
# changes interpretation of query
135136
self._last_query_temperature = temperature_for_intensity
136137

137-
if molecule is not None:
138-
if parse_name_locally:
139-
self.lookup_ids = build_lookup()
140-
luts = self.lookup_ids.find(molecule, flags)
141-
if len(luts) == 0:
142-
raise InvalidQueryError('No matching species found. Please '
143-
'refine your search or read the Docs '
144-
'for pointers on how to search.')
145-
payload['Molecules'] = tuple(f"{val:06d} {key}"
146-
for key, val in luts.items())[0]
147-
else:
148-
payload['Molecules'] = molecule
138+
if molecule == 'All':
139+
payload['Moleculesgrp'] = 'all species'
140+
else:
141+
if molecule is not None:
142+
if parse_name_locally:
143+
self.lookup_ids = build_lookup()
144+
luts = self.lookup_ids.find(molecule, flags)
145+
if len(luts) == 0:
146+
raise InvalidQueryError('No matching species found. Please '
147+
'refine your search or read the Docs '
148+
'for pointers on how to search.')
149+
payload['Molecules'] = tuple(f"{val:06d} {key}"
150+
for key, val in luts.items())[0]
151+
else:
152+
payload['Molecules'] = molecule
149153

150154
if get_query_payload:
151155
return payload
@@ -180,7 +184,7 @@ def query_lines_async(self, min_frequency, max_frequency, *,
180184
# accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S'
181185
badlist = (self.MALFORMATTED_MOLECULE_LIST + # noqa
182186
[y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()])
183-
if payload['Molecules'] in badlist:
187+
if 'Moleculesgrp' not in payload.keys() and payload['Molecules'] in badlist:
184188
raise ValueError(f"Molecule {payload['Molecules']} is known not to comply with standard CDMS format. "
185189
f"Try get_molecule({payload['Molecules']}) instead.")
186190

@@ -233,13 +237,31 @@ def _parse_result(self, response, *, verbose=False):
233237
soup = BeautifulSoup(response.text, 'html.parser')
234238
text = soup.find('pre').text
235239

240+
need_to_filter_bad_molecules = False
241+
for bad_molecule in self.MALFORMATTED_MOLECULE_LIST:
242+
if text.find(bad_molecule.split()[1]) > -1:
243+
need_to_filter_bad_molecules = True
244+
break
245+
if need_to_filter_bad_molecules:
246+
text_new = ''
247+
text = text.split('\n')
248+
for line in text:
249+
need_to_include_line = True
250+
for bad_molecule in self.MALFORMATTED_MOLECULE_LIST:
251+
if line.find(bad_molecule.split()[1]) > -1:
252+
need_to_include_line = False
253+
break
254+
if need_to_include_line:
255+
text_new = text_new + '\n' + line
256+
text = text_new
257+
236258
starts = {'FREQ': 0,
237259
'ERR': 14,
238260
'LGINT': 25,
239261
'DR': 36,
240262
'ELO': 38,
241263
'GUP': 47,
242-
'MOLWT': 51,
264+
'MOLWT': 50,
243265
'TAG': 54,
244266
'QNFMT': 58,
245267
'Ju': 61,
@@ -486,13 +508,13 @@ def parse_letternumber(st):
486508
From the CDMS docs:
487509
"Exactly two characters are available for each quantum number. Therefore, half
488510
integer quanta are rounded up ! In addition, capital letters are used to
489-
indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Small
490-
types are used to signal corresponding negative quantum numbers."
511+
indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Lower case characters
512+
are used similarly to signal negative quantum numbers smaller than –9. e. g., a0 is –10, b0 is –20, etc."
491513
"""
492514
asc = string.ascii_lowercase
493515
ASC = string.ascii_uppercase
494-
newst = ''.join(['-' + str(asc.index(x)+10) if x in asc else
495-
str(ASC.index(x)+10) if x in ASC else
516+
newst = ''.join(['-' + str((asc.index(x)+1)) if x in asc else
517+
str((ASC.index(x)+10)) if x in ASC else
496518
x for x in st])
497519
return int(newst)
498520

astroquery/linelists/cdms/tests/test_cdms_remote.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,21 @@ def test_regression_allcats(self, row):
134134
tag = f"{row['tag']:06d}"
135135
result = CDMS.get_molecule(tag)
136136
assert len(result) >= 1
137+
138+
@pytest.mark.remote_data
139+
def test_remote_all_species():
140+
tbl = CDMS.query_lines(min_frequency=100.3 * u.GHz,
141+
max_frequency=100.5 * u.GHz,
142+
min_strength=-5)
143+
assert isinstance(tbl, Table)
144+
145+
AlS_is_in_table = False
146+
Propanediol_is_in_table = False
147+
for row in tbl:
148+
if row['name'].strip() == 'AlS':
149+
AlS_is_in_table = True
150+
if row['name'].strip() == "aG'g-1,2-Propanediol":
151+
Propanediol_is_in_table = True
152+
153+
assert AlS_is_in_table
154+
assert Propanediol_is_in_table

0 commit comments

Comments
 (0)