Skip to content

Commit 83697d2

Browse files
author
Gerit Wagner
committed
test/refactor
1 parent 9044169 commit 83697d2

File tree

15 files changed

+291
-14
lines changed

15 files changed

+291
-14
lines changed

docs/source/lint/E0005.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ E0005 — invalid-proximity-use
55

66
**Error Code**: E0005
77

8-
**Message**: ``Invalid use of the proximity operator :~``
8+
**Message**: ``Invalid use of the proximity operator``
99

10-
**Scope**: PLATFORM.PUBMED
10+
**Scope**: PLATFORM.PUBMED, PLATFORM.EBSCO
1111

12-
**Description**: Invalid use of the proximity operator :~
12+
**Description**: Invalid use of the proximity operator
1313

1414
**Back to**: :ref:`lint`

docs/source/lint/F3003.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ F3003 — invalid-list-reference
55

66
**Error Code**: F3003
77

8-
**Message**: ``Invalid list reference in list query (not found)``
8+
**Message**: ``Invalid list reference in list query``
99

1010
**Scope**: PLATFORM.WOS, PLATFORM.PUBMED
1111

12-
**Description**: Invalid list reference in list query (not found)
12+
**Description**: Invalid list reference in list query
1313

1414
**Back to**: :ref:`lint`

docs/source/lint/W0015.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
.. _W0015:
2+
3+
W0015 — unsupported-prefix
4+
==========================
5+
6+
**Error Code**: W0015
7+
8+
**Message**: ``Unsupported prefix in search query``
9+
10+
**Scope**: PLATFORM.PUBMED
11+
12+
**Description**: Unsupported prefix in search query
13+
14+
**Back to**: :ref:`lint`

docs/source/lint/W0016.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
.. _W0016:
2+
3+
W0016 — unsupported-suffix
4+
==========================
5+
6+
**Error Code**: W0016
7+
8+
**Message**: ``Unsupported suffix in search query``
9+
10+
**Scope**: PLATFORM.PUBMED
11+
12+
**Description**: Unsupported suffix in search query
13+
14+
**Back to**: :ref:`lint`

docs/source/lint/errors_index.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,7 @@ Warnings
105105
W0013
106106

107107
W0014
108+
109+
W0015
110+
111+
W0016

docs/source/platforms/pubmed.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ The advanced PubMed interface offers a dropdown for search fields such as `[Titl
3939
- If you apply the **same field** to the entire query, it is safe to store this value in the `general_search_field`.
4040
- If the query contains **multiple search fields**, `search-query` will treat each term individually, and **leaving `general_search_field` empty is preferred**.
4141

42+
TODO : explain list search: https://library.bath.ac.uk/pubmed/combine-searches
43+
-> searches an be reused/combined with the "add" button (or with "#1" references)
44+
4245
Resources
4346
---------
4447

search_query/constants.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,20 @@ class QueryErrorCode(Enum):
590590
"Journal (or publication name) filter in subquery",
591591
"",
592592
)
593+
UNSUPPORTED_PREFIX = (
594+
[PLATFORM.PUBMED],
595+
"W0015",
596+
"unsupported-prefix",
597+
"Unsupported prefix in search query",
598+
"",
599+
)
600+
UNSUPPORTED_SUFFIX = (
601+
[PLATFORM.PUBMED],
602+
"W0016",
603+
"unsupported-suffix",
604+
"Unsupported suffix in search query",
605+
"",
606+
)
593607

594608
# pylint: disable=too-many-arguments
595609
# pylint: disable=too-many-positional-arguments

search_query/ebsco/parser.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,9 @@ def parse(self) -> Query:
284284
self.query_str = self.linter.handle_nonstandard_quotes_in_query_str(
285285
self.query_str
286286
)
287+
self.query_str = self.query_str = self.linter.handle_suffix_in_query_str(
288+
self.query_str
289+
)
287290

288291
self.tokenize()
289292

search_query/linter_base.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,81 @@ def check_boolean_operator_readability(
385385
)
386386
# Replace?
387387

388+
def handle_prefix_in_query_str(self, query_str: str) -> str:
389+
"""Handle prefix in query string.
390+
391+
Removes tokens before a fully quoted query
392+
if they are not connected with a valid operator.
393+
394+
Only applies if quotes are balanced (even number of quotes).
395+
"""
396+
397+
quote_count = query_str.count('"')
398+
if quote_count % 2 != 0:
399+
return query_str # unbalanced quotes, do not attempt trimming
400+
401+
prefix_match = re.search(r"^(?!.*\b(?:AND|OR)\s*)[^()]*?(?=\()", query_str)
402+
403+
original_query_str = query_str # preserve for position calculation
404+
405+
# Handle prefix
406+
if (
407+
prefix_match
408+
and prefix_match.group(0) is not None
409+
and prefix_match.group(0).strip() != "("
410+
):
411+
prefix = prefix_match.group(0)[:-1]
412+
if prefix:
413+
query_str = query_str[len(prefix) :].lstrip()
414+
415+
start = original_query_str.find(prefix)
416+
end = start + len(prefix)
417+
self.add_linter_message(
418+
QueryErrorCode.UNSUPPORTED_PREFIX,
419+
positions=[(start, end)],
420+
details="Removed unsupported text at the beginning of the query.",
421+
)
422+
423+
return query_str
424+
425+
def handle_suffix_in_query_str(self, query_str: str) -> str:
426+
"""Handle suffix in query string.
427+
428+
Removes tokens after a fully quoted query
429+
if they are not connected with a valid operator.
430+
431+
Only applies if quotes are balanced (even number of quotes).
432+
"""
433+
434+
quote_count = query_str.count('"')
435+
if quote_count % 2 != 0:
436+
return query_str # unbalanced quotes, do not attempt trimming
437+
438+
suffix_match = re.search(r"\)(?!\s*(AND|OR))[^()\[\]]*$", query_str)
439+
440+
original_query_str = query_str # preserve for position calculation
441+
442+
# Handle suffix
443+
if (
444+
suffix_match
445+
and suffix_match.group(0) is not None
446+
and suffix_match.group(0).strip() != ")"
447+
):
448+
suffix = suffix_match.group(0)[1:]
449+
if suffix:
450+
query_str = query_str[: -len(suffix)].rstrip()
451+
452+
start = original_query_str.rfind(suffix)
453+
end = start + len(suffix)
454+
455+
self.add_linter_message(
456+
QueryErrorCode.UNSUPPORTED_SUFFIX,
457+
positions=[(start, end)],
458+
details="Removed unsupported text at the end of the query.",
459+
)
460+
461+
return query_str
462+
388463
def handle_fully_quoted_query_str(self, query_str: str) -> str:
389464
"""Handle fully quoted query string."""
390465
if '"' == query_str[0] and '"' == query_str[-1] and "(" in query_str:

search_query/parser_base.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,50 @@ def combine_subsequent_terms(self) -> None:
7373

7474
self.tokens = combined_tokens
7575

76+
def split_operators_with_missing_whitespace(self) -> None:
77+
"""Split operators that are not separated by whitespace."""
78+
# This is a workaround for the fact that some platforms do not support
79+
# operators without whitespace, e.g. "AND" or "OR"
80+
# This is not a problem for the parser, but for the linter
81+
# which expects whitespace between operators and search terms
82+
83+
i = 0
84+
while i < len(self.tokens) - 1:
85+
token = self.tokens[i]
86+
next_token = self.tokens[i + 1]
87+
88+
appended_operator_match = re.search(r"(AND|OR|NOT)$", token.value)
89+
90+
# if the end of a search term (value) is a capitalized operator
91+
# without a whitespace, split the tokens
92+
if (
93+
token.type == TokenTypes.SEARCH_TERM
94+
and next_token.type != TokenTypes.LOGIC_OPERATOR
95+
and appended_operator_match
96+
):
97+
# Split the operator from the search term
98+
99+
appended_operator = appended_operator_match.group(0)
100+
token.value = token.value[: -len(appended_operator)]
101+
token.position = (
102+
token.position[0],
103+
token.position[1] - len(appended_operator),
104+
)
105+
# insert operator token afterwards
106+
operator_token = Token(
107+
value=appended_operator,
108+
type=TokenTypes.LOGIC_OPERATOR,
109+
position=(
110+
token.position[1],
111+
token.position[1] + len(appended_operator),
112+
),
113+
)
114+
self.tokens.insert(i + 1, operator_token)
115+
116+
i += 2 # Skip over the newly inserted operator token
117+
else:
118+
i += 1
119+
76120
@abstractmethod
77121
def parse(self) -> Query:
78122
"""Parse the query."""

0 commit comments

Comments
 (0)