@@ -22,34 +22,27 @@ class PubmedQueryStringValidator(QueryStringValidator):
2222 PROXIMITY_REGEX = r"^\[(.+):~(.*)\]$"
2323 parser : "PubmedParser"
2424
25- VALID_TOKEN_SEQUENCES = {
26- None : [
27- TokenTypes .SEARCH_TERM ,
28- TokenTypes .PARENTHESIS_OPEN
29- ],
25+ VALID_TOKEN_SEQUENCES : typing .Dict [TokenTypes , typing .List [TokenTypes ]] = {
3026 TokenTypes .PARENTHESIS_OPEN : [
3127 TokenTypes .SEARCH_TERM ,
3228 TokenTypes .PARENTHESIS_OPEN ,
3329 ],
3430 TokenTypes .PARENTHESIS_CLOSED : [
3531 TokenTypes .LOGIC_OPERATOR ,
3632 TokenTypes .PARENTHESIS_CLOSED ,
37- None
3833 ],
3934 TokenTypes .SEARCH_TERM : [
4035 TokenTypes .FIELD ,
4136 TokenTypes .LOGIC_OPERATOR ,
4237 TokenTypes .PARENTHESIS_CLOSED ,
43- None
4438 ],
4539 TokenTypes .FIELD : [
4640 TokenTypes .LOGIC_OPERATOR ,
4741 TokenTypes .PARENTHESIS_CLOSED ,
48- None
4942 ],
5043 TokenTypes .LOGIC_OPERATOR : [
5144 TokenTypes .SEARCH_TERM ,
52- TokenTypes .PARENTHESIS_OPEN
45+ TokenTypes .PARENTHESIS_OPEN ,
5346 ],
5447 }
5548
@@ -61,7 +54,7 @@ def validate_tokens(self, tokens: list) -> list:
6154 for index , token in enumerate (tokens ):
6255 if token .type == TokenTypes .SEARCH_TERM :
6356 self ._check_invalid_characters (token )
64- if '*' in token .value :
57+ if "*" in token .value :
6558 self ._check_invalid_wildcard (token )
6659
6760 if token .type == TokenTypes .FIELD :
@@ -105,8 +98,33 @@ def _check_unbalanced_parentheses(self, tokens: list) -> None:
10598 def _check_invalid_token_sequence (self , tokens : list ) -> None :
10699 """Check token list for invalid token sequences."""
107100 for i in range (0 , len (tokens ) + 1 ):
108- prev_type = tokens [i - 1 ].type if i > 0 else None
109- token_type = tokens [i ].type if i < len (tokens ) else None
101+ if i == len (tokens ):
102+ if tokens [i - 1 ].type in [
103+ TokenTypes .PARENTHESIS_OPEN ,
104+ TokenTypes .LOGIC_OPERATOR ,
105+ ]:
106+ self .parser .add_linter_message (
107+ QueryErrorCode .INVALID_TOKEN_SEQUENCE ,
108+ pos = tokens [i - 1 ].position ,
109+ details = f"Cannot end with { tokens [i - 1 ].type } " ,
110+ )
111+ break
112+
113+ token_type = tokens [i ].type # if i < len(tokens) else None
114+ if i == 0 :
115+ # Skip first token
116+ if token_type not in [
117+ TokenTypes .SEARCH_TERM ,
118+ TokenTypes .PARENTHESIS_OPEN ,
119+ ]:
120+ self .parser .add_linter_message (
121+ QueryErrorCode .INVALID_TOKEN_SEQUENCE ,
122+ pos = tokens [i ].position ,
123+ details = f"Cannot start with { token_type } " ,
124+ )
125+ continue
126+
127+ prev_type = tokens [i - 1 ].type
110128
111129 if token_type not in self .VALID_TOKEN_SEQUENCES [prev_type ]:
112130 if token_type == TokenTypes .FIELD :
@@ -117,17 +135,24 @@ def _check_invalid_token_sequence(self, tokens: list) -> None:
117135 details = "Invalid operator position"
118136 position = tokens [i ].position
119137
120- elif prev_type == TokenTypes .PARENTHESIS_OPEN and token_type == TokenTypes .PARENTHESIS_CLOSED :
138+ elif (
139+ prev_type == TokenTypes .PARENTHESIS_OPEN
140+ and token_type == TokenTypes .PARENTHESIS_CLOSED
141+ ):
121142 details = "Empty parenthesis"
122143 position = (tokens [i - 1 ].position [0 ], tokens [i ].position [1 ])
123144
124- elif token_type and prev_type and prev_type != TokenTypes .LOGIC_OPERATOR :
145+ elif (
146+ token_type and prev_type and prev_type != TokenTypes .LOGIC_OPERATOR
147+ ):
125148 details = "Missing operator"
126149 position = (tokens [i - 1 ].position [0 ], tokens [i ].position [1 ])
127150
128151 else :
129152 details = ""
130- position = tokens [i ].position if token_type else tokens [i - 1 ].position
153+ position = (
154+ tokens [i ].position if token_type else tokens [i - 1 ].position
155+ )
131156
132157 self .parser .add_linter_message (
133158 QueryErrorCode .INVALID_TOKEN_SEQUENCE ,
@@ -143,8 +168,7 @@ def _check_precedence(self, index: int, tokens: list) -> None:
143168 if token .type == TokenTypes .PARENTHESIS_OPEN :
144169 if i == 0 :
145170 return
146- else :
147- i -= 1
171+ i -= 1
148172 if token .type == TokenTypes .PARENTHESIS_CLOSED :
149173 i += 1
150174 if token .type == TokenTypes .LOGIC_OPERATOR and i == 0 :
@@ -153,12 +177,10 @@ def _check_precedence(self, index: int, tokens: list) -> None:
153177 if token .value .upper () not in operator_group :
154178 self .parser .add_linter_message (
155179 QueryErrorCode .IMPLICIT_PRECEDENCE ,
156- pos = tokens [index ].position
180+ pos = tokens [index ].position ,
157181 )
158182
159- def _check_invalid_characters (
160- self , token : Token
161- ) -> None :
183+ def _check_invalid_characters (self , token : Token ) -> None :
162184 """Check a search term for invalid characters"""
163185 invalid_characters = "!#$%+.;<>?\\ ^_{}~'()[]"
164186 value = token .value
@@ -167,10 +189,9 @@ def _check_invalid_characters(
167189 for i , char in enumerate (token .value ):
168190 if char in invalid_characters :
169191 self .parser .add_linter_message (
170- QueryErrorCode .INVALID_CHARACTER ,
171- pos = token .position
192+ QueryErrorCode .INVALID_CHARACTER , pos = token .position
172193 )
173- value = value [:i ] + " " + value [i + 1 :]
194+ value = value [:i ] + " " + value [i + 1 :]
174195 # Update token
175196 if value != token .value :
176197 token .value = value
@@ -270,12 +291,8 @@ def _check_redundant_terms(self, query: Query) -> None:
270291 ):
271292 continue
272293
273- field_a = self .parser .map_search_field (
274- term_a .search_field .value
275- )
276- field_b = self .parser .map_search_field (
277- term_b .search_field .value
278- )
294+ field_a = self .parser .map_search_field (term_a .search_field .value )
295+ field_b = self .parser .map_search_field (term_b .search_field .value )
279296
280297 if field_a == field_b and (
281298 term_a .value == term_b .value
@@ -346,7 +363,8 @@ def _check_unsupported_search_field(self, search_field: SearchField) -> None:
346363 search_field .position and search_field .value == "ab"
347364 ):
348365 self .parser .add_linter_message (
349- QueryErrorCode .SEARCH_FIELD_UNSUPPORTED , search_field .position
366+ QueryErrorCode .SEARCH_FIELD_UNSUPPORTED ,
367+ search_field .position or (- 1 , - 1 ),
350368 )
351369 search_field .value = Fields .ALL
352370 search_field .position = None
0 commit comments