Skip to content

Commit 5fa9f7a

Browse files
keflavichburnout87
authored andcommitted
revise the docstring to more accurately... and verbosely, and maybe
confusingly.... describe how string matching is done. Also do some minor performance cleanup
1 parent 9cc42f4 commit 5fa9f7a

File tree

1 file changed

+13
-17
lines changed

1 file changed

+13
-17
lines changed

astroquery/linelists/cdms/core.py

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,13 @@ def query_lines_async(self, min_frequency, max_frequency, *,
5353
molecule : list, string of regex if parse_name_locally=True, optional
5454
Identifiers of the molecules to search for. If this parameter
5555
is not provided the search will match any species. Default is 'All'.
56-
Note that if the molecule name contains parentheses, they must be
57-
escaped. For exmaple, 'H2C(CN)2' must be specified as 'H2C\\(CN\\)2'.
56+
As a first pass, the molecule will be searched for with a direct
57+
string match. If no string match is found, a regular expression
58+
match is attempted. Note that if the molecule name regex contains
59+
parentheses, they must be escaped. For example, 'H2C(CN)2.*' must be
60+
specified as 'H2C\\(CN\\)2.*' (but because of the first-attempt
61+
full-string matching, 'H2C(CN)2' will match that molecule
62+
successfully).
5863
5964
temperature_for_intensity : float
6065
The temperature to use when computing the intensity Smu^2. Set
@@ -126,15 +131,6 @@ def query_lines_async(self, min_frequency, max_frequency, *,
126131
self._last_query_temperature = temperature_for_intensity
127132

128133
if molecule is not None:
129-
130-
# escape parentheses in molecule names if needed
131-
# (assumes _no_ escapes done; if you give 'XY\(ZG)', i.e.,
132-
# escape one and not the other, this won't work)
133-
# if re.search("[()]", molecule):
134-
# if len(re.findall(r'\(', molecule)) != len(re.findall(r'(', molecule)):
135-
# molecule = re.sub(r'(', r'\(', molecule)
136-
# if len(re.findall(r'\)', molecule)) != len(re.findall(r')', molecule))
137-
# molecule = re.sub(r')', r'\)', molecule)
138134
if parse_name_locally:
139135
self.lookup_ids = build_lookup()
140136
luts = self.lookup_ids.find(molecule, flags)
@@ -339,11 +335,11 @@ def parse_letternumber(st):
339335
"""
340336
Parse CDMS's two-letter QNs
341337
342-
Very Important:
343-
Exactly two characters are available for each quantum number. Therefore, half
338+
From the CDMS docs:
339+
"Exactly two characters are available for each quantum number. Therefore, half
344340
integer quanta are rounded up ! In addition, capital letters are used to
345341
indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Small
346-
types are used to signal corresponding negative quantum numbers.
342+
types are used to signal corresponding negative quantum numbers."
347343
"""
348344
asc = string.ascii_lowercase
349345
ASC = string.ascii_uppercase
@@ -377,12 +373,12 @@ def find(self, st, flags):
377373
378374
"""
379375

380-
R = re.compile(st, flags)
381-
382376
out = {}
383377

384378
for kk, vv in self.items():
385-
match = (st in kk) or R.search(str(kk))
379+
# note that the string-match attempt here differs from the jplspec
380+
# implementation
381+
match = (st in kk) or re.search(st, str(kk), flags=flags)
386382
if match:
387383
out[kk] = vv
388384

0 commit comments

Comments
 (0)