Skip to content

Commit 2f52f26

Browse files
author
Gerit Wagner
committed
revise linter messages/QUALITY
1 parent 58989bb commit 2f52f26

File tree

17 files changed

+632
-125
lines changed

17 files changed

+632
-125
lines changed

docs/source/dev_docs/_autosummary/search_query.serializer_structured.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@ search\_query.serializer\_structured
1414
.. autosummary::
1515

1616
to_string_structured
17+
to_string_structured_2

docs/source/index.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,11 @@ Note how the syntax is translated and how the search for :literal:`Title/Abstrac
8080
from search_query.parser import parse
8181
8282
query_string = '("digital health"[Title/Abstract]) AND ("privacy"[Title/Abstract])'
83-
query = parse(query_string, platform="pubmed")
84-
wos_query = query.translate(target_syntax="wos")
83+
pubmed_query = parse(query_string, platform="pubmed")
84+
wos_query = pubmed_query.translate(target_syntax="wos")
8585
print(wos_query.to_string())
8686
# Output:
87-
# ((AB="digital health" OR TI="digital health") AND (AB="privacy" OR TI="privacy"))
87+
# (AB="digital health" OR TI="digital health") AND (AB="privacy" OR TI="privacy")
8888
8989
Demo
9090
============

docs/source/lint/QUALITY_0004.rst

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
.. _QUALITY_0004:
2+
3+
QUALITY_0004 — unnecessary-parentheses
4+
======================================
5+
6+
**Error Code**: QUALITY_0004
7+
8+
**Message**: ``Unnecessary parentheses in queries``
9+
10+
**Problematic query**:
11+
12+
.. code-block:: text
13+
14+
("digital health" OR "eHealth") OR ("remote monitoring" OR "telehealth")
15+
16+
**Recommended query**:
17+
18+
.. code-block:: text
19+
20+
"digital health" OR "eHealth" OR "remote monitoring" OR "telehealth
21+
22+
**Explanation**: Parentheses are unnecessary when all operators used are **associative and have equal precedence** (like a series of ORs or a series of ANDs). In such cases, the grouping does not influence the evaluation result and adds unnecessary complexity.
23+
24+
**Back to**: :ref:`lint`

docs/source/lint/QUALITY_0005.rst

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
.. _QUALITY_0005:
2+
3+
QUALITY_0005 — redundant-term
4+
=============================
5+
6+
**Error Code**: QUALITY_0005
7+
8+
**Message**: ``Redundant term in the query``
9+
10+
**Problematic query (AND)**:
11+
12+
.. code-block:: text
13+
14+
"digital health" AND "health"
15+
16+
**Recommended query (AND)**:
17+
18+
.. code-block:: text
19+
20+
"digital health"
21+
22+
.. note::
23+
24+
The term "digital health" is more specific than "health".
25+
The AND query will not retrieve results that match "health" but not "digital health".
26+
Therefore, the more specific term ("digital health") is sufficient.
27+
28+
**Problematic query (OR)**:
29+
30+
.. code-block:: text
31+
32+
"digital health" OR "health"
33+
34+
**Recommended query (OR)**:
35+
36+
.. code-block:: text
37+
38+
"health"
39+
40+
.. note::
41+
42+
The term "health" is broader than "digital health".
43+
In the OR query, all results that match "digital health" will also match "health".
44+
Therefore, the broader term ("health") is sufficient.
45+
46+
**Typical fix**: Remove redundant terms that do not add value to the query.
47+
48+
**Back to**: :ref:`lint`

docs/source/lint/errors_index.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,3 +131,7 @@ Best practice qualities
131131
QUALITY_0002
132132

133133
QUALITY_0003
134+
135+
QUALITY_0004
136+
137+
QUALITY_0005

search_query/constants.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,71 @@ class QueryErrorCode(Enum):
706706
""",
707707
)
708708

709+
UNNECESSARY_PARENTHESES = (
710+
"QUALITY_0004",
711+
"unnecessary-parentheses",
712+
"Unnecessary parentheses in queries",
713+
"""
714+
715+
**Problematic query**:
716+
717+
.. code-block:: text
718+
719+
("digital health" OR "eHealth") OR ("remote monitoring" OR "telehealth")
720+
721+
**Recommended query**:
722+
723+
.. code-block:: text
724+
725+
"digital health" OR "eHealth" OR "remote monitoring" OR "telehealth
726+
727+
**Explanation**: Parentheses are unnecessary when all operators used are **associative and have equal precedence** (like a series of ORs or a series of ANDs). In such cases, the grouping does not influence the evaluation result and adds unnecessary complexity.""",
728+
)
729+
730+
REDUNDANT_TERM = (
731+
"QUALITY_0005",
732+
"redundant-term",
733+
"Redundant term in the query",
734+
"""
735+
**Problematic query (AND)**:
736+
737+
.. code-block:: text
738+
739+
"digital health" AND "health"
740+
741+
**Recommended query (AND)**:
742+
743+
.. code-block:: text
744+
745+
"digital health"
746+
747+
.. note::
748+
749+
The term "digital health" is more specific than "health".
750+
The AND query will not retrieve results that match "health" but not "digital health".
751+
Therefore, the more specific term ("digital health") is sufficient.
752+
753+
**Problematic query (OR)**:
754+
755+
.. code-block:: text
756+
757+
"digital health" OR "health"
758+
759+
**Recommended query (OR)**:
760+
761+
.. code-block:: text
762+
763+
"health"
764+
765+
.. note::
766+
767+
The term "health" is broader than "digital health".
768+
In the OR query, all results that match "digital health" will also match "health".
769+
Therefore, the broader term ("health") is sufficient.
770+
771+
**Typical fix**: Remove redundant terms that do not add value to the query.""",
772+
)
773+
709774
# -------------------------------------------------------
710775
# Structural
711776
# -------------------------------------------------------

search_query/ebsco/linter.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,10 +184,16 @@ def check_invalid_token_sequences(self) -> None:
184184
fatal=True,
185185
)
186186

187-
for i, token in enumerate(self.tokens):
187+
i = -1
188+
while i < len(self.tokens) - 1:
189+
i += 1
188190
if i == 0:
189191
continue
190192

193+
token = self.tokens[i]
194+
token_type = token.type
195+
prev_type = self.tokens[i - 1].type
196+
191197
token_type = token.type
192198
prev_type = self.tokens[i - 1].type
193199

search_query/ebsco/parser.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,21 @@ def is_potential_term(token_str: str) -> bool:
150150
# Reclassify the second field token as a TERM
151151
next_token.type = TokenTypes.TERM
152152

153+
# Operator followed by a field token followed by a closing parenthesis
154+
for i in range(len(self.tokens) - 2):
155+
current = self.tokens[i]
156+
next_token = self.tokens[i + 1]
157+
next_next_token = self.tokens[i + 2]
158+
159+
if (
160+
current.type
161+
in [TokenTypes.LOGIC_OPERATOR, TokenTypes.PROXIMITY_OPERATOR]
162+
and next_token.type == TokenTypes.FIELD
163+
and next_next_token.type == TokenTypes.PARENTHESIS_CLOSED
164+
):
165+
# Reclassify the field token as a TERM
166+
next_token.type = TokenTypes.TERM
167+
153168
def tokenize(self) -> None:
154169
"""Tokenize the query_str."""
155170

@@ -375,7 +390,10 @@ def _pre_tokenization_checks(self) -> None:
375390
self.linter.handle_suffix_in_query_str(self)
376391
self.linter.handle_prefix_in_query_str(
377392
self,
378-
prefix_regex=re.compile(r"^EBSCOHost.*\:\s*|PsycInfo", flags=re.IGNORECASE),
393+
prefix_regex=re.compile(
394+
r"^EBSCOHost.*\:\s*|PsycInfo|ERIC|CINAHL with Full Text",
395+
flags=re.IGNORECASE,
396+
),
379397
)
380398

381399
def parse(self) -> Query:

0 commit comments

Comments
 (0)