Skip to content

Commit e6502c7

Browse files
author
Gerit Wagner
committed
precedence: mark all operators in linter message
1 parent 03b205e commit e6502c7

File tree

4 files changed

+114
-37
lines changed

4 files changed

+114
-37
lines changed

search_query/linter_base.py

Lines changed: 104 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ def handle_nonstandard_quotes_in_query_str(self, query_str: str) -> str:
424424
def add_higher_value(
425425
self,
426426
output: list[Token],
427-
current_value: int,
427+
previous_value: int,
428428
value: int,
429429
art_par: int,
430430
) -> tuple[list[Token], int]:
@@ -452,11 +452,7 @@ def add_higher_value(
452452
# depth_lvl ensures that already existing blocks are ignored
453453

454454
# Insert open parenthesis after operator
455-
while current_value < value:
456-
self.add_linter_message(
457-
QueryErrorCode.IMPLICIT_PRECEDENCE,
458-
positions=[token.position],
459-
)
455+
while previous_value < value:
460456
# Insert open parenthesis after operator
461457
temp.insert(
462458
1,
@@ -466,7 +462,7 @@ def add_higher_value(
466462
position=(-1, -1),
467463
),
468464
)
469-
current_value += 1
465+
previous_value += 1
470466
art_par += 1
471467
break
472468

@@ -534,11 +530,82 @@ def flatten_redundant_artificial_nesting(self, tokens: list[Token]) -> None:
534530

535531
def get_precedence(self, token: str) -> int:
536532
"""Returns operator precedence for logical and proximity operators."""
537-
538533
if token in self.OPERATOR_PRECEDENCE:
539534
return self.OPERATOR_PRECEDENCE[token]
540535
return -1 # Not an operator
541536

537+
def _get_unequal_precedence_operators(
538+
self, tokens: list[Token]
539+
) -> typing.List[tuple[int, int]]:
540+
"""Get positions of unequal precedence operators."""
541+
unequal_precedence_operators = []
542+
previous_value = -1
543+
level = 0
544+
prev_token = None
545+
for token in tokens:
546+
if token.type == TokenTypes.PARENTHESIS_CLOSED:
547+
level -= 1
548+
elif token.type == TokenTypes.PARENTHESIS_OPEN:
549+
level += 1
550+
if level < 0:
551+
break
552+
553+
if level != 0:
554+
continue
555+
if token.type in [TokenTypes.LOGIC_OPERATOR, TokenTypes.PROXIMITY_OPERATOR]:
556+
value = self.get_precedence(token.value.upper())
557+
if value != previous_value and previous_value != -1:
558+
if not unequal_precedence_operators:
559+
unequal_precedence_operators.append(prev_token)
560+
unequal_precedence_operators.append(token)
561+
previous_value = value
562+
prev_token = token
563+
return unequal_precedence_operators
564+
565+
def _print_unequal_precedence_warning(self, index: int) -> None:
566+
unequal_precedence_operators = self._get_unequal_precedence_operators(
567+
self.tokens[index:]
568+
)
569+
if not unequal_precedence_operators:
570+
return
571+
572+
precedence_list = [
573+
(item, self.get_precedence(item.upper()))
574+
for item in {o.value for o in unequal_precedence_operators}
575+
]
576+
precedence_list.sort(key=lambda x: x[1], reverse=True)
577+
precedence_lines = []
578+
for idx, (op, prec) in enumerate(precedence_list):
579+
if idx == 0:
580+
precedence_lines.append(
581+
f"Operator {Colors.GREEN}{op}{Colors.END} is evaluated first because it has the highest precedence level ({prec})."
582+
)
583+
elif idx == len(precedence_list) - 1:
584+
precedence_lines.append(
585+
f"Operator {Colors.ORANGE}{op}{Colors.END} is evaluated last because it has the lowest precedence level ({prec})."
586+
)
587+
else:
588+
precedence_lines.append(
589+
f"Operator {Colors.ORANGE}{op}{Colors.END} has precedence level {prec}."
590+
)
591+
592+
precedence_info = "\n".join(precedence_lines)
593+
594+
details = (
595+
"The query uses multiple operators with different precedence levels, "
596+
"but without parentheses to make the intended logic explicit. "
597+
"This can lead to unexpected interpretations of the query.\n\n"
598+
"Specifically:\n"
599+
f"{precedence_info}\n\n"
600+
"✅ To fix this, search-query adds artificial parentheses around operator groups with higher precedence.\n\n"
601+
)
602+
603+
self.add_linter_message(
604+
QueryErrorCode.IMPLICIT_PRECEDENCE,
605+
positions=[o.position for o in unequal_precedence_operators],
606+
details=details,
607+
)
608+
542609
# pylint: disable=too-many-branches
543610
def add_artificial_parentheses_for_operator_precedence(
544611
self,
@@ -554,10 +621,12 @@ def add_artificial_parentheses_for_operator_precedence(
554621
# Value of operator
555622
value = 0
556623
# Value of previous operator
557-
current_value = -1
624+
previous_value = -1
558625
# Added artificial parentheses
559626
art_par = 0
560627

628+
self._print_unequal_precedence_warning(index)
629+
561630
while index < len(self.tokens):
562631
# Forward iteration through tokens
563632

@@ -590,28 +659,24 @@ def add_artificial_parentheses_for_operator_precedence(
590659
]:
591660
value = self.get_precedence(self.tokens[index].value.upper())
592661

593-
if current_value in (value, -1):
662+
if previous_value in (value, -1):
594663
# Same precedence → just add to output
595664
output.append(self.tokens[index])
596-
current_value = value
665+
previous_value = value
597666

598-
elif value > current_value:
667+
elif value > previous_value:
599668
# Higher precedence → start wrapping with artificial parenthesis
600669
temp, art_par = self.add_higher_value(
601-
output, current_value, value, art_par
670+
output, previous_value, value, art_par
602671
)
603672

604673
output.extend(temp)
605674
output.append(self.tokens[index])
606-
current_value = value
675+
previous_value = value
607676

608-
elif value < current_value:
677+
elif value < previous_value:
609678
# Insert close parenthesis for each point in value difference
610-
while current_value > value:
611-
self.add_linter_message(
612-
QueryErrorCode.IMPLICIT_PRECEDENCE,
613-
positions=[self.tokens[index].position],
614-
)
679+
while previous_value > value:
615680
# Lower precedence → close parenthesis
616681
output.append(
617682
Token(
@@ -620,10 +685,10 @@ def add_artificial_parentheses_for_operator_precedence(
620685
position=(-1, -1),
621686
)
622687
)
623-
current_value -= 1
688+
previous_value -= 1
624689
art_par -= 1
625690
output.append(self.tokens[index])
626-
current_value = value
691+
previous_value = value
627692

628693
index += 1
629694
continue
@@ -996,10 +1061,20 @@ def check_status(self) -> None:
9961061
raise ListQuerySyntaxError(self)
9971062

9981063

999-
def _print_bullet_message(message: str, indent: int = 2, bullet: str = "-"):
1000-
wrapper = textwrap.TextWrapper(
1001-
initial_indent=" " * indent + bullet + " ",
1002-
subsequent_indent=" " * (indent + len(bullet) + 3),
1003-
width=120,
1004-
)
1005-
print(wrapper.fill(message))
1064+
def _print_bullet_message(message: str, indent: int = 2, bullet: str = "-") -> None:
1065+
lines = []
1066+
paragraphs = message.strip().split("\n")
1067+
1068+
for idx, paragraph in enumerate(paragraphs):
1069+
if not paragraph.strip():
1070+
lines.append("") # preserve blank lines
1071+
continue
1072+
1073+
wrapper = textwrap.TextWrapper(
1074+
width=120,
1075+
initial_indent=" " * indent + (bullet + " " if idx == 0 else " "),
1076+
subsequent_indent=" " * (indent + len(bullet) + 1),
1077+
)
1078+
lines.append(wrapper.fill(paragraph))
1079+
1080+
print("\n".join(lines))

test/test_cli.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,7 @@ def test_linter_cli() -> None:
5555

5656
assert "Lint: search_history_file_2_linter.json (wos)" in result.stdout
5757
assert "Unbalanced closing parenthesis" in result.stdout
58-
assert "Operator changed at the same level" in result.stdout
58+
assert (
59+
"The query uses multiple operators with different precedence levels"
60+
in result.stdout
61+
)

test/test_pubmed.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,8 +317,8 @@ def test_pubmed_invalid_token_sequences(
317317
"label": "implicit-precedence",
318318
"message": "Operator changed at the same level (explicit parentheses are recommended)",
319319
"is_fatal": False,
320-
"position": [(18, 20)],
321-
"details": "",
320+
"position": [(18, 20), (49, 52)],
321+
"details": "The query uses multiple operators with different precedence levels, but without parentheses to make the intended logic explicit. This can lead to unexpected interpretations of the query.\n\nSpecifically:\nOperator \x1b[92mAND\x1b[0m is evaluated first because it has the highest precedence level (1).\nOperator \x1b[93mOR\x1b[0m is evaluated last because it has the lowest precedence level (0).\n\n✅ To fix this, search-query adds artificial parentheses around operator groups with higher precedence.\n\n",
322322
},
323323
{
324324
"code": "E0001",

test/test_wos.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,8 @@ def test_tokenization(query_str: str, expected_tokens: list) -> None:
211211
"label": "implicit-precedence",
212212
"message": "Operator changed at the same level (explicit parentheses are recommended)",
213213
"is_fatal": False,
214-
"position": [(13, 15)],
215-
"details": "",
214+
"position": [(9, 12), (13, 15)],
215+
"details": "The query uses multiple operators with different precedence levels, but without parentheses to make the intended logic explicit. This can lead to unexpected interpretations of the query.\n\nSpecifically:\nOperator \x1b[92mAND\x1b[0m is evaluated first because it has the highest precedence level (1).\nOperator \x1b[93mOR\x1b[0m is evaluated last because it has the lowest precedence level (0).\n\n✅ To fix this, search-query adds artificial parentheses around operator groups with higher precedence.\n\n",
216216
},
217217
],
218218
),
@@ -733,7 +733,6 @@ def test_implicit_precedence(query_str: str, expected_query: str) -> None:
733733
assert msg["code"] == "W0007"
734734
assert msg["label"] == "implicit-precedence"
735735
assert msg["is_fatal"] is False
736-
assert msg["details"] == ""
737736

738737

739738
def test_query_parsing_basic_vs_advanced() -> None:
@@ -861,9 +860,9 @@ def test_artificial_parentheses() -> None:
861860
"code": "W0007",
862861
"label": "implicit-precedence",
863862
"message": "Operator changed at the same level (explicit parentheses are recommended)",
864-
"position": [(7, 9)],
865863
"is_fatal": False,
866-
"details": "",
864+
"position": [(7, 9), (17, 20)],
865+
"details": "The query uses multiple operators with different precedence levels, but without parentheses to make the intended logic explicit. This can lead to unexpected interpretations of the query.\n\nSpecifically:\nOperator \x1b[92mAND\x1b[0m is evaluated first because it has the highest precedence level (1).\nOperator \x1b[93mOR\x1b[0m is evaluated last because it has the lowest precedence level (0).\n\n✅ To fix this, search-query adds artificial parentheses around operator groups with higher precedence.\n\n",
867866
}
868867
assert query.to_generic_string() == "OR[ALL=][remote, AND[online, work]]"
869868

0 commit comments

Comments
 (0)