Skip to content

Commit 6a49b89

Browse files
author
Gerit Wagner
committed
revise parse_query()
1 parent a43ccce commit 6a49b89

File tree

1 file changed

+25
-14
lines changed

1 file changed

+25
-14
lines changed

search_query/parser_pubmed.py

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -197,21 +197,28 @@ def get_operator_type(self, operator: str) -> str:
197197
def parse_query_tree(self, tokens: list) -> Query:
198198
"""Parse a query from a list of tokens"""
199199

200-
operator_indices = self._get_operator_indices(tokens)
201-
202-
if operator_indices:
203-
query = self._parse_compound_query(tokens, operator_indices)
204-
return query
205-
206-
current_token = tokens[0][0]
200+
if self.is_compound_query(tokens):
201+
query = self._parse_compound_query(tokens)
207202

208-
if current_token == "(":
203+
elif self.is_nested_query(tokens):
209204
query = self._parse_nested_query(tokens)
210-
return query
211205

212-
if self.is_term(current_token):
206+
elif self.is_term_query(tokens):
213207
query = self._parse_search_term(tokens)
214-
return query
208+
209+
else:
210+
raise ValueError()
211+
212+
return query
213+
214+
def is_term_query(self, tokens):
215+
return self.is_term(tokens[0][0]) and len(tokens) == 1
216+
217+
def is_compound_query(self, tokens):
218+
return bool(self._get_operator_indices(tokens))
219+
220+
def is_nested_query(self, tokens):
221+
return tokens[0][0] == "(" and tokens[-1][0] == ")"
215222

216223
def _get_operator_indices(self, tokens: list) -> list:
217224
"""Get indices of top-level operators in the token list"""
@@ -243,10 +250,10 @@ def _get_operator_indices(self, tokens: list) -> list:
243250

244251
return operator_indices
245252

246-
def _parse_compound_query(self, tokens: list, operator_indices: list) -> Query:
253+
def _parse_compound_query(self, tokens: list) -> Query:
247254
"""Parse a compound query consisting of two or more subqueries connected by a boolean operator"""
248-
query_start_pos = tokens[0][1][0]
249-
query_end_pos = tokens[-1][1][1]
255+
256+
operator_indices = self._get_operator_indices(tokens)
250257

251258
# Divide tokens into separate lists based on top-level operator positions.
252259
token_lists = []
@@ -262,8 +269,12 @@ def _parse_compound_query(self, tokens: list, operator_indices: list) -> Query:
262269
query = self.parse_query_tree(token_list)
263270
children.append(query)
264271

272+
# TODO : assert operators equal?
265273
operator_type = self.get_operator_type(tokens[operator_indices[0]][0])
266274

275+
query_start_pos = tokens[0][1][0]
276+
query_end_pos = tokens[-1][1][1]
277+
267278
return Query(
268279
value=operator_type,
269280
operator=True,

0 commit comments

Comments
 (0)