Skip to content

Commit 7a144fd

Browse files
author
Gerit Wagner
committed
refactoring and testing
1 parent 77d1095 commit 7a144fd

27 files changed

+422
-386
lines changed

.coverage

-68 KB
Binary file not shown.

.gitignore

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,18 @@ search_query/ebsco/__pycache__/*
66
search_query/pubmed/__pycache__/*
77
search_query/wos/__pycache__/*
88
search_query/generic/__pycache__/*
9+
10+
# Unit test / coverage reports
11+
htmlcov/
12+
.tox/
13+
.nox/
14+
.coverage
15+
.coverage.*
16+
.cache
17+
nosetests.xml
18+
coverage.xml
19+
*.cover
20+
*.py,cover
21+
.hypothesis/
22+
.pytest_cache/
23+
pytestdebug.log

search_query/ebsco/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def syntax_str_to_generic_search_field_set(field_value: str) -> set:
6868
if field_value == key:
6969
return deepcopy(value)
7070

71-
raise ValueError(f"Field {field_value} not supported by EBSCO")
71+
raise ValueError(f"Field {field_value} not supported by EBSCO") # pragma: no cover
7272

7373

7474
def generic_search_field_to_syntax_field(generic_search_field: str) -> str:
@@ -78,6 +78,6 @@ def generic_search_field_to_syntax_field(generic_search_field: str) -> str:
7878
if {generic_search_field} == value:
7979
return key
8080

81-
raise ValueError(
81+
raise ValueError( # pragma: no cover
8282
f"Generic search field set {generic_search_field} " "not supported by EBSCO"
8383
)

search_query/ebsco/linter.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,6 @@ class EBSCOQueryStringLinter(QueryStringLinter):
2222

2323
UNSUPPORTED_SEARCH_FIELD_REGEX = r"\b(?!OR\b)\b(?!S\d+\b)[A-Z]{2}\b"
2424

25-
OPERATOR_PRECEDENCE = {
26-
"NEAR": 3,
27-
"WITHIN": 3,
28-
"NOT": 2,
29-
"AND": 1,
30-
"OR": 0,
31-
}
3225
PLATFORM: PLATFORM = PLATFORM.EBSCO
3326
VALID_FIELDS_REGEX = VALID_FIELDS_REGEX
3427

@@ -250,7 +243,7 @@ def validate_query_tree(self, query: Query) -> None:
250243
This method is called after the query tree has been built.
251244
"""
252245

253-
self.check_quoted_search_terms_query(query)
246+
self.check_unbalanced_quotes_in_terms(query)
254247
self.check_operator_capitalization_query(query)
255248
self.check_invalid_characters_in_search_term_query(query, "@&%$^~\\<>{}()[]#")
256249
self.check_unsupported_search_fields_in_query(query)

search_query/ebsco/parser.py

Lines changed: 54 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,11 @@
55
import re
66
import typing
77

8-
from search_query.constants import GENERAL_ERROR_POSITION
98
from search_query.constants import LinterMode
109
from search_query.constants import PLATFORM
11-
from search_query.constants import QueryErrorCode
1210
from search_query.constants import Token
1311
from search_query.constants import TokenTypes
1412
from search_query.ebsco.linter import EBSCOQueryStringLinter
15-
from search_query.linter_base import QueryListLinter
16-
from search_query.parser_base import QueryListParser
1713
from search_query.parser_base import QueryStringParser
1814
from search_query.query import Query
1915
from search_query.query import SearchField
@@ -60,8 +56,6 @@ def __init__(
6056

6157
def combine_subsequent_tokens(self) -> None:
6258
"""Combine subsequent tokens based on specific conditions."""
63-
if not self.tokens:
64-
return
6559

6660
combined_tokens = []
6761
i = 0
@@ -106,18 +100,12 @@ def combine_subsequent_tokens(self) -> None:
106100

107101
self.tokens = combined_tokens
108102

109-
def convert_proximity_operators(
110-
self, token: str, token_type: str
111-
) -> tuple[str, int]:
103+
def _extract_proximity_distance(self, token: Token) -> int:
112104
"""Convert proximity operator token into operator and distance components"""
113-
if token_type != TokenTypes.PROXIMITY_OPERATOR:
114-
raise ValueError(
115-
f"Invalid token type: {token_type}. Expected 'PROXIMITY_OPERATOR'."
116-
)
117105

118106
# Extract the operator (first character) and distance (rest of the string)
119-
operator = token[:1]
120-
distance_string = token[1:]
107+
operator = token.value[:1]
108+
distance_string = token.value[1:]
121109

122110
# Change value of operator to fit construction of operator query
123111
if operator == "N":
@@ -128,19 +116,17 @@ def convert_proximity_operators(
128116
# Validate and convert the distance
129117
if not distance_string.isdigit():
130118
raise ValueError(
131-
f"Invalid proximity operator format: '{token}'. "
119+
f"Invalid proximity operator format: '{token.value}'. "
132120
"Expected a number after the operator."
133121
)
134122

135123
distance = int(distance_string)
136-
return operator, distance
124+
token.value = operator
125+
return distance
137126

138127
def tokenize(self) -> None:
139128
"""Tokenize the query_str."""
140129

141-
if self.query_str is None:
142-
raise ValueError("No string provided to parse.")
143-
144130
self.tokens = []
145131
token_type = TokenTypes.UNKNOWN
146132
for match in self.pattern.finditer(self.query_str):
@@ -200,7 +186,7 @@ def append_operator(
200186

201187
def _check_for_none(self, root: typing.Optional[Query]) -> Query:
202188
"""Check if root is none"""
203-
if root is None:
189+
if root is None: # pragma: no cover
204190
raise ValueError("Failed to construct a valid query tree.")
205191
return root
206192

@@ -244,9 +230,7 @@ def parse_query_tree(
244230

245231
elif token.type == TokenTypes.PROXIMITY_OPERATOR:
246232
# Split token into NEAR/WITHIN and distance
247-
token.value, distance = self.convert_proximity_operators(
248-
token.value, token.type
249-
)
233+
distance = self._extract_proximity_distance(token)
250234

251235
# Create new proximity_operator from token (N3, W1, N13, ...)
252236
proximity_node = Query(
@@ -323,44 +307,49 @@ def parse(self) -> Query:
323307
return query
324308

325309

326-
class EBSCOListParser(QueryListParser):
327-
"""Parser for EBSCO (list format) queries."""
328-
329-
def __init__(self, query_list: str, search_field_general: str, mode: str) -> None:
330-
"""Initialize with a query list and use EBSCOParser for parsing each query."""
331-
super().__init__(
332-
query_list=query_list,
333-
parser_class=EBSCOParser,
334-
search_field_general=search_field_general,
335-
mode=mode,
336-
)
337-
self.linter = QueryListLinter(parser=self, string_parser_class=EBSCOParser)
338-
339-
def get_token_str(self, token_nr: str) -> str:
340-
"""Format the token string for output or processing."""
341-
342-
# Match string combinators such as S1 AND S2 ... ; #1 AND #2 ; ...
343-
pattern = rf"(S|#){token_nr}"
344-
345-
match = re.search(pattern, self.query_list)
346-
347-
if match:
348-
# Return the preceding character if found
349-
return f"{match.group(1)}{token_nr}"
350-
351-
# Log a linter message and return the token number
352-
# 1 AND 2 ... are still possible,
353-
# however for standardization purposes it should be S/#
354-
self.linter.add_linter_message(
355-
QueryErrorCode.INVALID_LIST_REFERENCE,
356-
list_position=GENERAL_ERROR_POSITION,
357-
positions=[(-1, -1)],
358-
details="Connecting lines possibly failed. "
359-
"Please use this format for connection: "
360-
"S1 OR S2 OR S3 / #1 OR #2 OR #3",
361-
)
362-
return token_nr
363-
364-
def parse(self) -> Query:
365-
"""Parse the query in list format."""
366-
raise NotImplementedError("List parsing not implemented yet.")
310+
# from search_query.constants import GENERAL_ERROR_POSITION
311+
# from search_query.constants import QueryErrorCode
312+
# from search_query.linter_base import QueryListLinter
313+
# from search_query.parser_base import QueryListParser
314+
315+
# class EBSCOListParser(QueryListParser):
316+
# """Parser for EBSCO (list format) queries."""
317+
318+
# def __init__(self, query_list: str, search_field_general: str, mode: str) -> None:
319+
# """Initialize with a query list and use EBSCOParser for parsing each query."""
320+
# super().__init__(
321+
# query_list=query_list,
322+
# parser_class=EBSCOParser,
323+
# search_field_general=search_field_general,
324+
# mode=mode,
325+
# )
326+
# self.linter = QueryListLinter(parser=self, string_parser_class=EBSCOParser)
327+
328+
# def get_token_str(self, token_nr: str) -> str:
329+
# """Format the token string for output or processing."""
330+
331+
# # Match string combinators such as S1 AND S2 ... ; #1 AND #2 ; ...
332+
# pattern = rf"(S|#){token_nr}"
333+
334+
# match = re.search(pattern, self.query_list)
335+
336+
# if match:
337+
# # Return the preceding character if found
338+
# return f"{match.group(1)}{token_nr}"
339+
340+
# # Log a linter message and return the token number
341+
# # 1 AND 2 ... are still possible,
342+
# # however for standardization purposes it should be S/#
343+
# self.linter.add_linter_message(
344+
# QueryErrorCode.INVALID_LIST_REFERENCE,
345+
# list_position=GENERAL_ERROR_POSITION,
346+
# positions=[(-1, -1)],
347+
# details="Connecting lines possibly failed. "
348+
# "Please use this format for connection: "
349+
# "S1 OR S2 OR S3 / #1 OR #2 OR #3",
350+
# )
351+
# return token_nr
352+
353+
# def parse(self) -> Query:
354+
# """Parse the query in list format."""
355+
# raise NotImplementedError("List parsing not implemented yet.")

search_query/generic/linter.py

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,6 @@
1515
class GenericLinter(QueryStringLinter):
1616
"""Linter for Generic Query Strings"""
1717

18-
PRECEDENCE = {
19-
"NEAR": 3,
20-
"WITHIN": 3,
21-
"NOT": 2,
22-
"AND": 1,
23-
"OR": 0,
24-
}
2518
PLATFORM: PLATFORM = PLATFORM.GENERIC
2619

2720
# Extract unique string values
@@ -33,39 +26,6 @@ class GenericLinter(QueryStringLinter):
3326

3427
VALID_FIELDS_REGEX = re.compile(r"\b(?:" + "|".join(sorted(field_codes)) + r")\b")
3528

36-
# VALID_TOKEN_SEQUENCES = {
37-
# TokenTypes.FIELD: [
38-
# TokenTypes.SEARCH_TERM,
39-
# TokenTypes.PARENTHESIS_OPEN,
40-
# ],
41-
# TokenTypes.SEARCH_TERM: [
42-
# TokenTypes.SEARCH_TERM,
43-
# TokenTypes.LOGIC_OPERATOR,
44-
# TokenTypes.PROXIMITY_OPERATOR,
45-
# TokenTypes.PARENTHESIS_CLOSED,
46-
# ],
47-
# TokenTypes.LOGIC_OPERATOR: [
48-
# TokenTypes.SEARCH_TERM,
49-
# TokenTypes.FIELD,
50-
# TokenTypes.PARENTHESIS_OPEN,
51-
# ],
52-
# TokenTypes.PROXIMITY_OPERATOR: [
53-
# TokenTypes.SEARCH_TERM,
54-
# TokenTypes.PARENTHESIS_OPEN,
55-
# TokenTypes.FIELD,
56-
# ],
57-
# TokenTypes.PARENTHESIS_OPEN: [
58-
# TokenTypes.FIELD,
59-
# TokenTypes.SEARCH_TERM,
60-
# TokenTypes.PARENTHESIS_OPEN,
61-
# ],
62-
# TokenTypes.PARENTHESIS_CLOSED: [
63-
# TokenTypes.PARENTHESIS_CLOSED,
64-
# TokenTypes.LOGIC_OPERATOR,
65-
# TokenTypes.PROXIMITY_OPERATOR,
66-
# ],
67-
# }
68-
6929
def __init__(self, query_str: str = "") -> None:
7030
super().__init__(query_str=query_str)
7131

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,16 @@
1010

1111
def to_string_generic(query: Query) -> str:
1212
"""Convert the query to a string."""
13-
if not hasattr(query, "value"):
13+
if not hasattr(query, "value"): # pragma: no cover
1414
return " (?) "
1515

1616
result = ""
1717
query_content = query.value
1818
if query.search_field:
1919
query_content += f"[{query.search_field}]"
2020

21-
if hasattr(query, "near_param"):
22-
query_content += f"({query.near_param})"
21+
if hasattr(query, "distance") and query.distance:
22+
query_content += f"({query.distance})"
2323
result = f"{result}{query_content}"
2424
if query.children == []:
2525
return result

0 commit comments

Comments
 (0)