|
3 | 3 | from __future__ import annotations |
4 | 4 |
|
5 | 5 | import re |
6 | | -import typing |
7 | 6 |
|
8 | 7 | import search_query.parser_base |
9 | 8 | from search_query.constants import QueryErrorCode |
10 | | -from search_query.constants import TokenTypes |
11 | | - |
12 | | -if typing.TYPE_CHECKING: |
13 | | - import search_query.parser_ebsco |
14 | 9 |
|
15 | 10 |
|
16 | 11 | # Could indeed be a general Validator class |
@@ -65,143 +60,6 @@ def check_parenthesis(self) -> None: |
65 | 60 | self.parser.add_linter_message(QueryErrorCode.UNBALANCED_PARENTHESES, ()) |
66 | 61 |
|
67 | 62 |
|
68 | | -class EBSCOQueryStringValidator: |
69 | | - """Class for EBSCO Query String Validation""" |
70 | | - |
71 | | - UNSUPPORTED_SEARCH_FIELD_REGEX = r"\b(?!OR\b)\b(?!S\d+\b)[A-Z]{2}\b" |
72 | | - |
73 | | - def __init__(self, parser: search_query.parser_ebsco.EBSCOParser): |
74 | | - self.query_str = parser.query_str |
75 | | - self.search_field_general = parser.search_field_general |
76 | | - self.parser = parser |
77 | | - |
78 | | - def check_search_field_general(self, strict: str) -> None: |
79 | | - """Check field 'Search Fields' in content.""" |
80 | | - |
81 | | - if self.search_field_general != "" and strict == "strict": |
82 | | - self.parser.add_linter_message(QueryErrorCode.SEARCH_FIELD_EXTRACTED, ()) |
83 | | - |
84 | | - def filter_search_field(self, strict: bool) -> None: |
85 | | - """ |
86 | | - Filter out unsupported search_fields. |
87 | | - Depending on strictness, automatically change or ask user |
88 | | - """ |
89 | | - |
90 | | - supported_fields = { |
91 | | - "TI", |
92 | | - "AU", |
93 | | - "TX", |
94 | | - "AB", |
95 | | - "SO", |
96 | | - "SU", |
97 | | - "IS", |
98 | | - "IB", |
99 | | - "DE", |
100 | | - "LA", |
101 | | - "KW", |
102 | | - } |
103 | | - modified_query_list = list( |
104 | | - self.query_str |
105 | | - ) # Convert to list for direct modification |
106 | | - unsupported_fields = [] |
107 | | - |
108 | | - for match in re.finditer(self.UNSUPPORTED_SEARCH_FIELD_REGEX, self.query_str): |
109 | | - field = match.group() |
110 | | - field = field.strip() |
111 | | - start, end = match.span() |
112 | | - |
113 | | - # if escaped by quotes: continue (e.g., search term "AI") |
114 | | - if self.query_str[start - 1] == '"': |
115 | | - continue |
116 | | - |
117 | | - if field not in supported_fields: |
118 | | - unsupported_fields.append(field) |
119 | | - if strict: |
120 | | - while True: |
121 | | - # Prompt the user to enter a replacement field |
122 | | - replacement = input( |
123 | | - f"Unsupported field '{field}' found. " |
124 | | - "Please enter a replacement (e.g., 'AB'): " |
125 | | - ).strip() |
126 | | - if replacement in supported_fields: |
127 | | - # Replace directly in the modified query list |
128 | | - modified_query_list[start:end] = list(replacement) |
129 | | - print(f"Field '{field}' replaced with '{replacement}'.") |
130 | | - break |
131 | | - print( |
132 | | - f"'{replacement}' is not a supported field. " |
133 | | - "Please try again." |
134 | | - ) |
135 | | - else: |
136 | | - # Replace the unsupported field with 'AB' directly |
137 | | - modified_query_list[start:end] = list("AB") |
138 | | - self.parser.add_linter_message( |
139 | | - QueryErrorCode.SEARCH_FIELD_UNSUPPORTED, (start, end) |
140 | | - ) |
141 | | - |
142 | | - # Convert the modified list back to a string |
143 | | - self.query_str = "".join(modified_query_list) |
144 | | - |
145 | | - def validate_token_position( |
146 | | - self, |
147 | | - token_type: TokenTypes, |
148 | | - previous_token_type: typing.Optional[TokenTypes], |
149 | | - position: typing.Optional[tuple[int, int]], |
150 | | - ) -> None: |
151 | | - """ |
152 | | - Validate the position of the current token |
153 | | - based on its type and the previous token type. |
154 | | - """ |
155 | | - |
156 | | - if previous_token_type is None: |
157 | | - # First token, no validation required |
158 | | - return |
159 | | - |
160 | | - valid_transitions = { |
161 | | - TokenTypes.FIELD: [ |
162 | | - TokenTypes.SEARCH_TERM, |
163 | | - TokenTypes.PARENTHESIS_OPEN, |
164 | | - ], # After FIELD can be SEARCH_TERM; PARENTHESIS_OPEN |
165 | | - TokenTypes.SEARCH_TERM: [ |
166 | | - TokenTypes.SEARCH_TERM, |
167 | | - TokenTypes.LOGIC_OPERATOR, |
168 | | - TokenTypes.PROXIMITY_OPERATOR, |
169 | | - TokenTypes.PARENTHESIS_CLOSED, |
170 | | - ], # After SEARCH_TERM can be SEARCH_TERM (will get connected anyway); |
171 | | - # LOGIC_OPERATOR; PROXIMITY_OPERATOR; PARENTHESIS_CLOSED |
172 | | - TokenTypes.LOGIC_OPERATOR: [ |
173 | | - TokenTypes.SEARCH_TERM, |
174 | | - TokenTypes.FIELD, |
175 | | - TokenTypes.PARENTHESIS_OPEN, |
176 | | - ], # After LOGIC_OPERATOR can be SEARCH_TERM; FIELD; PARENTHESIS_OPEN |
177 | | - TokenTypes.PROXIMITY_OPERATOR: [ |
178 | | - TokenTypes.SEARCH_TERM, |
179 | | - TokenTypes.PARENTHESIS_OPEN, |
180 | | - TokenTypes.FIELD, |
181 | | - ], # After PROXIMITY_OPERATOR can be SEARCH_TERM; PARENTHESIS_OPEN; FIELD |
182 | | - TokenTypes.PARENTHESIS_OPEN: [ |
183 | | - TokenTypes.FIELD, |
184 | | - TokenTypes.SEARCH_TERM, |
185 | | - TokenTypes.PARENTHESIS_OPEN, |
186 | | - ], # After PARENTHESIS_OPEN can be FIELD; SEARCH_TERM; PARENTHESIS_OPEN |
187 | | - TokenTypes.PARENTHESIS_CLOSED: [ |
188 | | - TokenTypes.PARENTHESIS_CLOSED, |
189 | | - TokenTypes.LOGIC_OPERATOR, |
190 | | - TokenTypes.PROXIMITY_OPERATOR, |
191 | | - ], # After PARENTHESIS_CLOSED can be PARENTHESIS_CLOSED; |
192 | | - # LOGIC_OPERATOR; PROXIMITY_OPERATOR |
193 | | - } |
194 | | - |
195 | | - if position is None: |
196 | | - position = (-1, -1) |
197 | | - |
198 | | - if token_type not in valid_transitions.get(previous_token_type, []): |
199 | | - self.parser.add_linter_message( |
200 | | - QueryErrorCode.INVALID_TOKEN_SEQUENCE, |
201 | | - position, |
202 | | - ) |
203 | | - |
204 | | - |
205 | 63 | class QueryListValidator: |
206 | 64 | """Class for Query List Validation""" |
207 | 65 |
|
|
0 commit comments