Skip to content

Commit ded794c

Browse files
author
Gerit Wagner
committed
extract ebsco-linter
1 parent b53988c commit ded794c

File tree

3 files changed

+143
-143
lines changed

3 files changed

+143
-143
lines changed

search_query/linter_ebsco.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
#!/usr/bin/env python3
2+
"""Validator for search queries."""
3+
from __future__ import annotations
4+
5+
import re
6+
import typing
7+
8+
from search_query.constants import QueryErrorCode
9+
from search_query.constants import TokenTypes
10+
from search_query.parser_validation import QueryStringValidator
11+
12+
13+
class EBSCOQueryStringValidator(QueryStringValidator):
14+
"""Class for EBSCO Query String Validation"""
15+
16+
UNSUPPORTED_SEARCH_FIELD_REGEX = r"\b(?!OR\b)\b(?!S\d+\b)[A-Z]{2}\b"
17+
18+
def check_search_field_general(self, strict: str) -> None:
19+
"""Check field 'Search Fields' in content."""
20+
21+
if self.search_field_general != "" and strict == "strict":
22+
self.parser.add_linter_message(QueryErrorCode.SEARCH_FIELD_EXTRACTED, ())
23+
24+
def filter_search_field(self, strict: bool) -> None:
25+
"""
26+
Filter out unsupported search_fields.
27+
Depending on strictness, automatically change or ask user
28+
"""
29+
30+
supported_fields = {
31+
"TI",
32+
"AU",
33+
"TX",
34+
"AB",
35+
"SO",
36+
"SU",
37+
"IS",
38+
"IB",
39+
"DE",
40+
"LA",
41+
"KW",
42+
}
43+
modified_query_list = list(
44+
self.query_str
45+
) # Convert to list for direct modification
46+
unsupported_fields = []
47+
48+
for match in re.finditer(self.UNSUPPORTED_SEARCH_FIELD_REGEX, self.query_str):
49+
field = match.group()
50+
field = field.strip()
51+
start, end = match.span()
52+
53+
# if escaped by quotes: continue (e.g., search term "AI")
54+
if self.query_str[start - 1] == '"':
55+
continue
56+
57+
if field not in supported_fields:
58+
unsupported_fields.append(field)
59+
if strict:
60+
while True:
61+
# Prompt the user to enter a replacement field
62+
replacement = input(
63+
f"Unsupported field '{field}' found. "
64+
"Please enter a replacement (e.g., 'AB'): "
65+
).strip()
66+
if replacement in supported_fields:
67+
# Replace directly in the modified query list
68+
modified_query_list[start:end] = list(replacement)
69+
print(f"Field '{field}' replaced with '{replacement}'.")
70+
break
71+
print(
72+
f"'{replacement}' is not a supported field. "
73+
"Please try again."
74+
)
75+
else:
76+
# Replace the unsupported field with 'AB' directly
77+
modified_query_list[start:end] = list("AB")
78+
self.parser.add_linter_message(
79+
QueryErrorCode.SEARCH_FIELD_UNSUPPORTED, (start, end)
80+
)
81+
82+
# Convert the modified list back to a string
83+
self.query_str = "".join(modified_query_list)
84+
85+
def validate_token_position(
86+
self,
87+
token_type: TokenTypes,
88+
previous_token_type: typing.Optional[TokenTypes],
89+
position: typing.Optional[tuple[int, int]],
90+
) -> None:
91+
"""
92+
Validate the position of the current token
93+
based on its type and the previous token type.
94+
"""
95+
96+
if previous_token_type is None:
97+
# First token, no validation required
98+
return
99+
100+
valid_transitions = {
101+
TokenTypes.FIELD: [
102+
TokenTypes.SEARCH_TERM,
103+
TokenTypes.PARENTHESIS_OPEN,
104+
], # After FIELD can be SEARCH_TERM; PARENTHESIS_OPEN
105+
TokenTypes.SEARCH_TERM: [
106+
TokenTypes.SEARCH_TERM,
107+
TokenTypes.LOGIC_OPERATOR,
108+
TokenTypes.PROXIMITY_OPERATOR,
109+
TokenTypes.PARENTHESIS_CLOSED,
110+
], # After SEARCH_TERM can be SEARCH_TERM (will get connected anyway);
111+
# LOGIC_OPERATOR; PROXIMITY_OPERATOR; PARENTHESIS_CLOSED
112+
TokenTypes.LOGIC_OPERATOR: [
113+
TokenTypes.SEARCH_TERM,
114+
TokenTypes.FIELD,
115+
TokenTypes.PARENTHESIS_OPEN,
116+
], # After LOGIC_OPERATOR can be SEARCH_TERM; FIELD; PARENTHESIS_OPEN
117+
TokenTypes.PROXIMITY_OPERATOR: [
118+
TokenTypes.SEARCH_TERM,
119+
TokenTypes.PARENTHESIS_OPEN,
120+
TokenTypes.FIELD,
121+
], # After PROXIMITY_OPERATOR can be SEARCH_TERM; PARENTHESIS_OPEN; FIELD
122+
TokenTypes.PARENTHESIS_OPEN: [
123+
TokenTypes.FIELD,
124+
TokenTypes.SEARCH_TERM,
125+
TokenTypes.PARENTHESIS_OPEN,
126+
], # After PARENTHESIS_OPEN can be FIELD; SEARCH_TERM; PARENTHESIS_OPEN
127+
TokenTypes.PARENTHESIS_CLOSED: [
128+
TokenTypes.PARENTHESIS_CLOSED,
129+
TokenTypes.LOGIC_OPERATOR,
130+
TokenTypes.PROXIMITY_OPERATOR,
131+
], # After PARENTHESIS_CLOSED can be PARENTHESIS_CLOSED;
132+
# LOGIC_OPERATOR; PROXIMITY_OPERATOR
133+
}
134+
135+
if position is None:
136+
position = (-1, -1)
137+
138+
if token_type not in valid_transitions.get(previous_token_type, []):
139+
self.parser.add_linter_message(
140+
QueryErrorCode.INVALID_TOKEN_SEQUENCE,
141+
position,
142+
)

search_query/parser_ebsco.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
from search_query.constants import QueryErrorCode
1111
from search_query.constants import Token
1212
from search_query.constants import TokenTypes
13+
from search_query.linter_ebsco import EBSCOQueryStringValidator
1314
from search_query.parser_base import QueryListParser
1415
from search_query.parser_base import QueryStringParser
15-
from search_query.parser_validation import EBSCOQueryStringValidator
1616
from search_query.parser_validation import QueryStringValidator
1717
from search_query.query import Query
1818
from search_query.query import SearchField

search_query/parser_validation.py

Lines changed: 0 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,9 @@
33
from __future__ import annotations
44

55
import re
6-
import typing
76

87
import search_query.parser_base
98
from search_query.constants import QueryErrorCode
10-
from search_query.constants import TokenTypes
11-
12-
if typing.TYPE_CHECKING:
13-
import search_query.parser_ebsco
149

1510

1611
# Could indeed be a general Validator class
@@ -65,143 +60,6 @@ def check_parenthesis(self) -> None:
6560
self.parser.add_linter_message(QueryErrorCode.UNBALANCED_PARENTHESES, ())
6661

6762

68-
class EBSCOQueryStringValidator:
69-
"""Class for EBSCO Query String Validation"""
70-
71-
UNSUPPORTED_SEARCH_FIELD_REGEX = r"\b(?!OR\b)\b(?!S\d+\b)[A-Z]{2}\b"
72-
73-
def __init__(self, parser: search_query.parser_ebsco.EBSCOParser):
74-
self.query_str = parser.query_str
75-
self.search_field_general = parser.search_field_general
76-
self.parser = parser
77-
78-
def check_search_field_general(self, strict: str) -> None:
79-
"""Check field 'Search Fields' in content."""
80-
81-
if self.search_field_general != "" and strict == "strict":
82-
self.parser.add_linter_message(QueryErrorCode.SEARCH_FIELD_EXTRACTED, ())
83-
84-
def filter_search_field(self, strict: bool) -> None:
85-
"""
86-
Filter out unsupported search_fields.
87-
Depending on strictness, automatically change or ask user
88-
"""
89-
90-
supported_fields = {
91-
"TI",
92-
"AU",
93-
"TX",
94-
"AB",
95-
"SO",
96-
"SU",
97-
"IS",
98-
"IB",
99-
"DE",
100-
"LA",
101-
"KW",
102-
}
103-
modified_query_list = list(
104-
self.query_str
105-
) # Convert to list for direct modification
106-
unsupported_fields = []
107-
108-
for match in re.finditer(self.UNSUPPORTED_SEARCH_FIELD_REGEX, self.query_str):
109-
field = match.group()
110-
field = field.strip()
111-
start, end = match.span()
112-
113-
# if escaped by quotes: continue (e.g., search term "AI")
114-
if self.query_str[start - 1] == '"':
115-
continue
116-
117-
if field not in supported_fields:
118-
unsupported_fields.append(field)
119-
if strict:
120-
while True:
121-
# Prompt the user to enter a replacement field
122-
replacement = input(
123-
f"Unsupported field '{field}' found. "
124-
"Please enter a replacement (e.g., 'AB'): "
125-
).strip()
126-
if replacement in supported_fields:
127-
# Replace directly in the modified query list
128-
modified_query_list[start:end] = list(replacement)
129-
print(f"Field '{field}' replaced with '{replacement}'.")
130-
break
131-
print(
132-
f"'{replacement}' is not a supported field. "
133-
"Please try again."
134-
)
135-
else:
136-
# Replace the unsupported field with 'AB' directly
137-
modified_query_list[start:end] = list("AB")
138-
self.parser.add_linter_message(
139-
QueryErrorCode.SEARCH_FIELD_UNSUPPORTED, (start, end)
140-
)
141-
142-
# Convert the modified list back to a string
143-
self.query_str = "".join(modified_query_list)
144-
145-
def validate_token_position(
146-
self,
147-
token_type: TokenTypes,
148-
previous_token_type: typing.Optional[TokenTypes],
149-
position: typing.Optional[tuple[int, int]],
150-
) -> None:
151-
"""
152-
Validate the position of the current token
153-
based on its type and the previous token type.
154-
"""
155-
156-
if previous_token_type is None:
157-
# First token, no validation required
158-
return
159-
160-
valid_transitions = {
161-
TokenTypes.FIELD: [
162-
TokenTypes.SEARCH_TERM,
163-
TokenTypes.PARENTHESIS_OPEN,
164-
], # After FIELD can be SEARCH_TERM; PARENTHESIS_OPEN
165-
TokenTypes.SEARCH_TERM: [
166-
TokenTypes.SEARCH_TERM,
167-
TokenTypes.LOGIC_OPERATOR,
168-
TokenTypes.PROXIMITY_OPERATOR,
169-
TokenTypes.PARENTHESIS_CLOSED,
170-
], # After SEARCH_TERM can be SEARCH_TERM (will get connected anyway);
171-
# LOGIC_OPERATOR; PROXIMITY_OPERATOR; PARENTHESIS_CLOSED
172-
TokenTypes.LOGIC_OPERATOR: [
173-
TokenTypes.SEARCH_TERM,
174-
TokenTypes.FIELD,
175-
TokenTypes.PARENTHESIS_OPEN,
176-
], # After LOGIC_OPERATOR can be SEARCH_TERM; FIELD; PARENTHESIS_OPEN
177-
TokenTypes.PROXIMITY_OPERATOR: [
178-
TokenTypes.SEARCH_TERM,
179-
TokenTypes.PARENTHESIS_OPEN,
180-
TokenTypes.FIELD,
181-
], # After PROXIMITY_OPERATOR can be SEARCH_TERM; PARENTHESIS_OPEN; FIELD
182-
TokenTypes.PARENTHESIS_OPEN: [
183-
TokenTypes.FIELD,
184-
TokenTypes.SEARCH_TERM,
185-
TokenTypes.PARENTHESIS_OPEN,
186-
], # After PARENTHESIS_OPEN can be FIELD; SEARCH_TERM; PARENTHESIS_OPEN
187-
TokenTypes.PARENTHESIS_CLOSED: [
188-
TokenTypes.PARENTHESIS_CLOSED,
189-
TokenTypes.LOGIC_OPERATOR,
190-
TokenTypes.PROXIMITY_OPERATOR,
191-
], # After PARENTHESIS_CLOSED can be PARENTHESIS_CLOSED;
192-
# LOGIC_OPERATOR; PROXIMITY_OPERATOR
193-
}
194-
195-
if position is None:
196-
position = (-1, -1)
197-
198-
if token_type not in valid_transitions.get(previous_token_type, []):
199-
self.parser.add_linter_message(
200-
QueryErrorCode.INVALID_TOKEN_SEQUENCE,
201-
position,
202-
)
203-
204-
20563
class QueryListValidator:
20664
"""Class for Query List Validation"""
20765

0 commit comments

Comments
 (0)