1
1
import type { RegExpVisitor } from "@eslint-community/regexpp/visitor"
2
2
import type { RegExpContext } from "../utils"
3
3
import { canUnwrapped , createRule , defineRegexpVisitor } from "../utils"
4
+ import type {
5
+ CharacterClass ,
6
+ CharacterClassElement ,
7
+ ExpressionCharacterClass ,
8
+ UnicodeSetsCharacterClass ,
9
+ } from "@eslint-community/regexpp/ast"
10
+
11
+ const ESCAPES_OUTSIDE_CHARACTER_CLASS = new Set ( "$()*+./?[{|" )
12
+ const ESCAPES_OUTSIDE_CHARACTER_CLASS_WITH_U = new Set ( [
13
+ ...ESCAPES_OUTSIDE_CHARACTER_CLASS ,
14
+ "}" ,
15
+ ] )
16
+ // A single character set of ClassSetReservedDoublePunctuator.
17
+ // && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~ are ClassSetReservedDoublePunctuator
18
+ const REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR = new Set (
19
+ "!#$%&*+,.:;<=>?@^`~" ,
20
+ )
4
21
5
22
export default createRule ( "no-useless-character-class" , {
6
23
meta : {
@@ -27,8 +44,10 @@ export default createRule("no-useless-character-class", {
27
44
} ,
28
45
] ,
29
46
messages : {
30
- unexpected :
47
+ unexpectedCharacterClassWith :
31
48
"Unexpected character class with one {{type}}. Can remove brackets{{additional}}." ,
49
+ unexpectedUnnecessaryNestingCharacterClass :
50
+ "Unexpected unnecessary nesting character class. Can remove brackets." ,
32
51
} ,
33
52
type : "suggestion" , // "problem",
34
53
} ,
@@ -37,88 +56,258 @@ export default createRule("no-useless-character-class", {
37
56
38
57
function createVisitor ( {
39
58
node,
59
+ pattern,
40
60
flags,
41
61
fixReplaceNode,
42
62
getRegexpLocation,
43
63
} : RegExpContext ) : RegExpVisitor . Handlers {
64
+ const characterClassStack : (
65
+ | CharacterClass
66
+ | ExpressionCharacterClass
67
+ ) [ ] = [ ]
44
68
return {
69
+ onExpressionCharacterClassEnter ( eccNode ) {
70
+ characterClassStack . push ( eccNode )
71
+ } ,
72
+ onExpressionCharacterClassLeave ( ) {
73
+ characterClassStack . pop ( )
74
+ } ,
45
75
onCharacterClassEnter ( ccNode ) {
46
- if ( ccNode . elements . length !== 1 ) {
47
- return
48
- }
76
+ characterClassStack . push ( ccNode )
77
+ } ,
78
+ onCharacterClassLeave ( ccNode ) {
79
+ characterClassStack . pop ( )
49
80
if ( ccNode . negate ) {
50
81
return
51
82
}
52
- const element = ccNode . elements [ 0 ]
53
- if ( ignores . length > 0 && ignores . includes ( element . raw ) ) {
54
- return
55
- }
56
- if ( element . type === "Character" ) {
57
- if ( element . raw === "\\b" ) {
58
- // Backspace escape
59
- return
83
+ let messageId : string ,
84
+ messageData : { type : string ; additional ?: string }
85
+ const unwrapped : string [ ] = ccNode . elements . map (
86
+ ( _e , index ) => {
87
+ const element = ccNode . elements [ index ]
88
+ return (
89
+ ( index === 0
90
+ ? getEscapedFirstRawIfNeeded ( element )
91
+ : null ) ??
92
+ ( index === ccNode . elements . length - 1
93
+ ? getEscapedLastRawIfNeeded ( element )
94
+ : null ) ??
95
+ element . raw
96
+ )
97
+ } ,
98
+ )
99
+ if (
100
+ ccNode . elements . length !== 1 &&
101
+ ccNode . parent . type === "CharacterClass"
102
+ ) {
103
+ messageId = "unexpectedUnnecessaryNestingCharacterClass"
104
+ messageData = {
105
+ type : "unnecessary nesting character class" ,
60
106
}
61
- if (
62
- / ^ \\ \d + $ / u. test ( element . raw ) &&
63
- ! element . raw . startsWith ( "\\0" )
64
- ) {
65
- // Avoid back reference
107
+ if ( ! ccNode . elements . length ) {
108
+ // empty character class
109
+ const nextElement =
110
+ ccNode . parent . elements [
111
+ ccNode . parent . elements . indexOf (
112
+ ccNode as UnicodeSetsCharacterClass ,
113
+ ) + 1
114
+ ]
115
+ if (
116
+ nextElement &&
117
+ isNeedEscapedForFirstElement ( nextElement )
118
+ ) {
119
+ unwrapped . push ( "\\" ) // Add a backslash to escape the next character.
120
+ }
121
+ }
122
+ } else {
123
+ if ( ccNode . elements . length !== 1 ) {
66
124
return
67
125
}
126
+ const element = ccNode . elements [ 0 ]
68
127
if (
69
128
ignores . length > 0 &&
70
- ignores . includes (
71
- String . fromCodePoint ( element . value ) ,
72
- )
129
+ ignores . includes ( element . raw )
73
130
) {
74
131
return
75
132
}
76
- if ( ! canUnwrapped ( ccNode , element . raw ) ) {
77
- return
78
- }
79
- } else if ( element . type === "CharacterClassRange" ) {
80
- if ( element . min . value !== element . max . value ) {
133
+ if ( element . type === "Character" ) {
134
+ if ( element . raw === "\\b" ) {
135
+ // Backspace escape
136
+ return
137
+ }
138
+ if (
139
+ / ^ \\ \d + $ / u. test ( element . raw ) &&
140
+ ! element . raw . startsWith ( "\\0" )
141
+ ) {
142
+ // Avoid back reference
143
+ return
144
+ }
145
+ if (
146
+ ignores . length > 0 &&
147
+ ignores . includes (
148
+ String . fromCodePoint ( element . value ) ,
149
+ )
150
+ ) {
151
+ return
152
+ }
153
+ if ( ! canUnwrapped ( ccNode , element . raw ) ) {
154
+ return
155
+ }
156
+ messageData = { type : "character" }
157
+ } else if ( element . type === "CharacterClassRange" ) {
158
+ if ( element . min . value !== element . max . value ) {
159
+ return
160
+ }
161
+ messageData = {
162
+ type : "character class range" ,
163
+ additional : " and range" ,
164
+ }
165
+ unwrapped [ 0 ] =
166
+ getEscapedFirstRawIfNeeded ( element . min ) ??
167
+ getEscapedLastRawIfNeeded ( element . min ) ??
168
+ element . min . raw
169
+ } else if ( element . type === "ClassStringDisjunction" ) {
170
+ if ( ! characterClassStack . length ) {
171
+ // Only nesting character class
172
+ return
173
+ }
174
+ messageData = { type : "string literal" }
175
+ } else if ( element . type === "CharacterSet" ) {
176
+ messageData = { type : "character class escape" }
177
+ } else if (
178
+ element . type === "CharacterClass" ||
179
+ element . type === "ExpressionCharacterClass"
180
+ ) {
181
+ messageData = { type : "character class" }
182
+ } else {
81
183
return
82
184
}
83
- } else if ( element . type !== "CharacterSet" ) {
84
- return
185
+ messageId = "unexpectedCharacterClassWith"
85
186
}
86
187
87
188
context . report ( {
88
189
node,
89
190
loc : getRegexpLocation ( ccNode ) ,
90
- messageId : "unexpected" ,
191
+ messageId,
91
192
data : {
92
- type :
93
- element . type === "Character"
94
- ? "character"
95
- : element . type === "CharacterClassRange"
96
- ? "character class range"
97
- : "character class escape" ,
98
- additional :
99
- element . type === "CharacterClassRange"
100
- ? " and range"
101
- : "" ,
193
+ type : messageData . type ,
194
+ additional : messageData . additional || "" ,
102
195
} ,
103
- fix : fixReplaceNode ( ccNode , ( ) => {
104
- let text : string =
105
- element . type === "CharacterClassRange"
106
- ? element . min . raw
107
- : element . raw
196
+ fix : fixReplaceNode ( ccNode , unwrapped . join ( "" ) ) ,
197
+ } )
198
+
199
+ /**
200
+ * Checks whether an escape is required if the given element is placed first
201
+ * after character class replacement.
202
+ */
203
+ function isNeedEscapedForFirstElement (
204
+ element : CharacterClassElement ,
205
+ ) {
206
+ const char =
207
+ element . type === "Character"
208
+ ? element . raw
209
+ : element . type === "CharacterClassRange"
210
+ ? element . min . raw
211
+ : null
212
+ if ( char == null ) {
213
+ return false
214
+ }
215
+ if ( characterClassStack . length ) {
216
+ // Nesting character class
217
+
218
+ // Avoid [A&&[&]] => [A&&&]
108
219
if (
109
- element . type === "Character" ||
110
- element . type === "CharacterClassRange"
220
+ REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR . has (
221
+ char ,
222
+ ) &&
223
+ // The previous character is the same
224
+ pattern [ ccNode . start - 1 ] === char
111
225
) {
112
- if (
113
- / ^ [ $ ( ) * + . / ? [ { | ] $ / u. test ( text ) ||
114
- ( flags . unicode && text === "}" )
115
- ) {
116
- text = `\\${ text } `
117
- }
226
+ return true
118
227
}
119
- return text
120
- } ) ,
121
- } )
228
+
229
+ // Avoid [[]^] => [^]
230
+ return (
231
+ char === "^" &&
232
+ ccNode . parent . type === "CharacterClass" &&
233
+ ccNode . parent . elements [ 0 ] === ccNode
234
+ )
235
+ }
236
+
237
+ // Flat character class
238
+ return (
239
+ flags . unicode
240
+ ? ESCAPES_OUTSIDE_CHARACTER_CLASS_WITH_U
241
+ : ESCAPES_OUTSIDE_CHARACTER_CLASS
242
+ ) . has ( char )
243
+ }
244
+
245
+ /**
246
+ * Checks whether an escape is required if the given element is placed last
247
+ * after character class replacement.
248
+ */
249
+ function needEscapedForLastElement (
250
+ element : CharacterClassElement ,
251
+ ) {
252
+ const char =
253
+ element . type === "Character"
254
+ ? element . raw
255
+ : element . type === "CharacterClassRange"
256
+ ? element . max . raw
257
+ : null
258
+ if ( char == null ) {
259
+ return false
260
+ }
261
+ if ( characterClassStack . length ) {
262
+ // Nesting character class
263
+
264
+ // Avoid [A[&]&B] => [A&&B]
265
+ return (
266
+ REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR . has (
267
+ char ,
268
+ ) &&
269
+ // The next character is the same
270
+ pattern [ ccNode . end ] === char
271
+ )
272
+ }
273
+ return false
274
+ }
275
+
276
+ /**
277
+ * Returns the escaped raw text, if the given first element requires escaping.
278
+ * Otherwise, returns null.
279
+ */
280
+ function getEscapedFirstRawIfNeeded (
281
+ firstElement : CharacterClassElement ,
282
+ ) {
283
+ if ( isNeedEscapedForFirstElement ( firstElement ) ) {
284
+ return `\\${ firstElement . raw } `
285
+ }
286
+ return null
287
+ }
288
+
289
+ /**
290
+ * Returns the escaped raw text, if the given last element requires escaping.
291
+ * Otherwise, returns null.
292
+ */
293
+ function getEscapedLastRawIfNeeded (
294
+ lastElement : CharacterClassElement ,
295
+ ) {
296
+ if ( needEscapedForLastElement ( lastElement ) ) {
297
+ const lastRaw =
298
+ lastElement . type === "Character"
299
+ ? lastElement . raw
300
+ : lastElement . type === "CharacterClassRange"
301
+ ? lastElement . max . raw
302
+ : "" // never
303
+ const prefix = lastElement . raw . slice (
304
+ 0 ,
305
+ - lastRaw . length ,
306
+ )
307
+ return `${ prefix } \\${ lastRaw } `
308
+ }
309
+ return null
310
+ }
122
311
} ,
123
312
}
124
313
}
0 commit comments