CoLRev-Environment
diff --git a/‎search_query/linter_base.py‎
Lines changed: 24 additions & 10 deletions b/‎search_query/linter_base.py‎
Lines changed: 24 additions & 10 deletions
diff --git a/‎search_query/pubmed/linter.py‎
Lines changed: 149 additions & 10 deletions b/‎search_query/pubmed/linter.py‎
Lines changed: 149 additions & 10 deletions
diff --git a/‎search_query/pubmed/parser.py‎
Lines changed: 21 additions & 0 deletions b/‎search_query/pubmed/parser.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎search_query/pubmed/serializer.py‎
Lines changed: 30 additions & 40 deletions b/‎search_query/pubmed/serializer.py‎
Lines changed: 30 additions & 40 deletions
@@ -436,7 +436,7 @@ def handle_suffix_in_query_str(self, query_str: str) -> str:
         if quote_count % 2 != 0:
             return query_str  # unbalanced quotes, do not attempt trimming
 
-        suffix_match = re.search(r"\)(?!\s*(AND|OR))[^()\[\]]*$", query_str)
+        suffix_match = re.search(r"\)(?!\s*(AND|OR|NOT))[^()\[\]]*$", query_str)
 
         original_query_str = query_str  # preserve for position calculation
 
@@ -704,6 +704,8 @@ def add_artificial_parentheses_for_operator_precedence(
         previous_value = -1
         # Added artificial parentheses
         art_par = 0
+        # Start index
+        start_index = index
 
         self._print_unequal_precedence_warning(index)
 
@@ -721,16 +723,28 @@ def add_artificial_parentheses_for_operator_precedence(
             if self.tokens[index].type == TokenTypes.PARENTHESIS_CLOSED:
                 output.append(self.tokens[index])
                 index += 1
-                # Add closed parenthesis in case there are still open ones
-                while art_par > 0:
-                    output.append(
-                        Token(
-                            value=")",
-                            type=TokenTypes.PARENTHESIS_CLOSED,
-                            position=(-1, -1),
+                # Add parentheses in case there are missing ones
+                if art_par > 0:
+                    while art_par > 0:
+                        output.append(
+                            Token(
+                                value=")",
+                                type=TokenTypes.PARENTHESIS_CLOSED,
+                                position=(-1, -1),
+                            )
                         )
-                    )
-                    art_par -= 1
+                        art_par -= 1
+                if art_par < 0:
+                    while art_par < 0:
+                        output.insert(
+                            start_index,
+                            Token(
+                                value="(",
+                                type=TokenTypes.PARENTHESIS_OPEN,
+                                position=(-1, -1),
+                            ),
+                        )
+                        art_par += 1
                 return index, output
 
             if self.tokens[index].type in [
 
@@ -1,8 +1,11 @@
 #!/usr/bin/env python3
 """Pubmed query linter."""
+from __future__ import annotations
+
 import re
 import typing
 
+from search_query.constants import Colors
 from search_query.constants import ListTokenTypes
 from search_query.constants import OperatorNodeTokenTypes
 from search_query.constants import PLATFORM
@@ -213,6 +216,147 @@ def check_invalid_token_sequences(self) -> None:
                 details=f"Cannot end with {self.tokens[-1].type.value}",
             )
 
+    def _print_unequal_precedence_warning(self, index: int) -> None:
+        unequal_precedence_operators = self._get_unequal_precedence_operators(
+            self.tokens[index:]
+        )
+        if not unequal_precedence_operators:
+            return
+
+        precedence_list = [o.value for o in unequal_precedence_operators]
+        precedence_lines = []
+        for idx, op in enumerate(precedence_list):
+            if idx == 0:
+                precedence_lines.append(
+                    f"Operator {Colors.GREEN}{op}{Colors.END} at position {idx + 1} is evaluated first "
+                    f"because it is the leftmost operator."
+                )
+            elif idx == len(precedence_list) - 1:
+                precedence_lines.append(
+                    f"Operator {Colors.ORANGE}{op}{Colors.END} at position {idx + 1} is evaluated last "
+                    f"because it is the rightmost operator."
+                )
+            else:
+                precedence_lines.append(
+                    f"Operator {Colors.ORANGE}{op}{Colors.END} at position {idx + 1} is evaluated next."
+                )
+
+        precedence_info = "\n".join(precedence_lines)
+
+        details = (
+            "The query uses multiple operators, but without parentheses to make the intended logic explicit. "
+            "PubMed evaluates queries strictly from left to right without applying traditional operator precedence. "
+            "This can lead to unexpected interpretations of the query.\n\n"
+            "Specifically:\n"
+            f"{precedence_info}\n\n"
+            "To fix this, search-query adds artificial parentheses around operators "
+            "based on their left-to-right position in the query.\n\n"
+        )
+
+        self.add_linter_message(
+            QueryErrorCode.IMPLICIT_PRECEDENCE,
+            positions=[o.position for o in unequal_precedence_operators],
+            details=details,
+        )
+
+    def add_artificial_parentheses_for_operator_precedence(
+        self,
+        index: int = 0,
+        output: typing.Optional[list] = None,
+    ) -> tuple[int, list[Token]]:
+        """
+        Adds artificial parentheses with position (-1, -1)
+        to enforce PubMed operator precedence.
+        """
+        if output is None:
+            output = []
+        # Value of operator
+        value = 0
+        # Value of previous operator
+        previous_value = -1
+        # Added artificial parentheses
+        art_par = 0
+        # Start index
+        start_index = index
+
+        self._print_unequal_precedence_warning(index)
+
+        while index < len(self.tokens):
+            # Forward iteration through tokens
+
+            if self.tokens[index].type == TokenTypes.PARENTHESIS_OPEN:
+                output.append(self.tokens[index])
+                index += 1
+                index, output = self.add_artificial_parentheses_for_operator_precedence(
+                    index, output
+                )
+                continue
+
+            if self.tokens[index].type == TokenTypes.PARENTHESIS_CLOSED:
+                output.append(self.tokens[index])
+                index += 1
+                # Add opening parentheses in case there are missing ones
+                if art_par < 0:
+                    while art_par < 0:
+                        output.insert(
+                            start_index,
+                            Token(
+                                value="(",
+                                type=TokenTypes.PARENTHESIS_OPEN,
+                                position=(-1, -1),
+                            ),
+                        )
+                        art_par += 1
+                return index, output
+
+            if self.tokens[index].type in [
+                TokenTypes.LOGIC_OPERATOR,
+                TokenTypes.PROXIMITY_OPERATOR,
+            ]:
+                value = self.get_precedence(self.tokens[index].value.upper())
+
+                if previous_value in (value, -1):
+                    # Same precedence → just add to output
+                    output.append(self.tokens[index])
+                    previous_value = value
+
+                elif value != previous_value:
+                    # Different precedence → close parenthesis
+                    output.append(
+                        Token(
+                            value=")",
+                            type=TokenTypes.PARENTHESIS_CLOSED,
+                            position=(-1, -1),
+                        )
+                    )
+                    previous_value -= 1
+                    art_par -= 1
+                    output.append(self.tokens[index])
+                    previous_value = value
+
+                index += 1
+                continue
+
+            # Default: search terms, fields, etc.
+            output.append(self.tokens[index])
+            index += 1
+
+        # Add opening parentheses in case there are missing ones
+        if art_par < 0:
+            while art_par < 0:
+                output.insert(
+                    0,
+                    Token(
+                        value="(", type=TokenTypes.PARENTHESIS_OPEN, position=(-1, -1)
+                    ),
+                )
+                art_par += 1
+
+        if index == len(self.tokens):
+            self.flatten_redundant_artificial_nesting(output)
+
+        return index, output
+
     def check_invalid_wildcard(self, query: Query) -> None:
         """Check search term for invalid wildcard *"""
 
@@ -262,15 +406,12 @@ def check_invalid_proximity_operator(self) -> None:
                 continue
 
             nr_of_terms = len(search_phrase_token.value.strip('"').split())
-            if nr_of_terms >= 2 and not (
+            if nr_of_terms < 2 or not (
                 search_phrase_token.value[0] == '"'
                 and search_phrase_token.value[-1] == '"'
             ):
                 details = (
-                    "When using proximity operators, "
-                    + "search terms consisting of 2 or more words "
-                    + f"(i.e., {search_phrase_token.value}) "
-                    + "must be enclosed in double quotes"
+                    "Proximity search requires 2 or more search terms enclosed in double quotes."
                 )
                 self.add_linter_message(
                     QueryErrorCode.INVALID_PROXIMITY_USE,
@@ -292,8 +433,6 @@ def check_invalid_proximity_operator(self) -> None:
                     positions=[field_token.position],
                     details=details,
                 )
-            # Update search field token
-            self.tokens[index].value = field_value
 
     def validate_query_tree(self, query: Query) -> None:
         """Validate the query tree"""
@@ -420,10 +559,10 @@ class PubmedQueryListLinter(QueryListLinter):
 
     def __init__(
         self,
-        parser: "PubmedListParser",
-        string_parser_class: typing.Type["QueryStringParser"],
+        parser: PubmedListParser,
+        string_parser_class: typing.Type[QueryStringParser],
     ):
-        self.parser: "PubmedListParser" = parser
+        self.parser: PubmedListParser = parser
         self.string_parser_class = string_parser_class
         super().__init__(parser, string_parser_class)
 
 
@@ -18,6 +18,7 @@
 from search_query.query import Query
 from search_query.query import SearchField
 from search_query.query_term import Term
+from search_query.query_near import NEARQuery
 
 
 class PubmedParser(QueryStringParser):
@@ -205,6 +206,26 @@ def _parse_search_term(self, tokens: list) -> Query:
 
         # Determine the search field of the search term.
         if len(tokens) > 1 and tokens[1].type == TokenTypes.FIELD:
+            if ":~" in tokens[1].value:
+                # Parse NEAR query
+                field_value, prox_value = self.PROXIMITY_REGEX.match(tokens[1].value).groups()
+                field_value = "[" + field_value + "]"
+                return NEARQuery(
+                    value=Operators.NEAR,
+                    search_field=None,
+                    children=[
+                        Term(
+                            value=search_term_token.value,
+                            search_field=SearchField(value=field_value, position=tokens[1].position),
+                            position=tokens[0].position,
+                            platform="deactivated"
+                        )
+                    ],
+                    position=(tokens[0].position[0], tokens[1].position[1]),
+                    distance=prox_value,
+                    platform="deactivated"
+                )
+
             search_field = SearchField(
                 value=tokens[1].value, position=tokens[1].position
             )
 
@@ -12,49 +12,39 @@
 
 def to_string_pubmed(query: Query) -> str:
     """Serialize the Query tree into a PubMed search string."""
-
-    # to do combine querys for PLATFORM_COMBINED_FIELDS_MAP
-
     if not query.children:
-        # query has no children, so it is a leaf node
-        if query.search_field:
-            return f"{query.value}{query.search_field.value}"
-        return query.value
-
+        # Serialize term query
+        return (
+            f"{query.value}" f"{query.search_field.value if query.search_field else ''}"
+        )
+    if query.value == Operators.NEAR:
+        # Serialize near query
+        distance = query.distance if hasattr(query, 'distance') else 0
+        return (
+            f"{query.children[0].value}"
+            f"{query.children[0].search_field.value[:-1]}"
+            f":~{distance}]"
+        )
+    if query.value == Operators.RANGE:
+        # Serialize range query
+        return (
+            f"{query.children[0].value}:{query.children[1].value}"
+            f"{query.children[0].search_field.value}"
+        )
+    # Serialize compound query
     result = ""
-    for child in query.children:
-        if not child.operator:
-            # query is not an operator
-            if (child == query.children[0]) & (child != query.children[-1]):
-                # current element is first but not only child element
-                # -->operator does not need to be appended again
-                result = (
-                    f"{result}({child.value}"
-                    f"{child.search_field.value if child.search_field else ''}"
-                )
-
-            else:
-                # current element is not first child
-                result = (
-                    f"{result} {query.value} {child.value}"
-                    f"{child.search_field.value if child.search_field else ''}"
-                )
-
-            if child == query.children[-1]:
-                # current Element is last Element -> closing parenthesis
-                result = f"{result})"
-
+    for i, child in enumerate(query.children):
+        if i > 0:
+            # Add operator between query children
+            result += f" {query.value} "
+        if isinstance(child, str):
+            result += child
         else:
-            # query is operator query
-            if child.value == Operators.NOT:
-                # current element is NOT Operator -> no parenthesis in PubMed
-                result = f"{result}{to_string_pubmed(child)}"
+            # Recursively serialize query children
+            result += to_string_pubmed(child)
 
-            elif (child == query.children[0]) & (child != query.children[-1]):
-                result = f"{result}({to_string_pubmed(child)}"
-            else:
-                result = f"{result} {query.value} {to_string_pubmed(child)}"
+    if query.get_parent():
+        # Add parentheses around nested queries
+        result = "(" + result + ")"
 
-            if (child == query.children[-1]) & (child.value != Operators.NOT):
-                result = f"{result})"
     return result