Skip to content

Commit b0f6396

Browse files
authored
Add support for v flag to regexp/no-useless-character-class rule (#593)
1 parent 40273b3 commit b0f6396

File tree

3 files changed

+365
-55
lines changed

3 files changed

+365
-55
lines changed

.changeset/gold-baboons-clap.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"eslint-plugin-regexp": minor
3+
---
4+
5+
Add support for v flag to `regexp/no-useless-character-class` rule

lib/rules/no-useless-character-class.ts

Lines changed: 243 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,23 @@
11
import type { RegExpVisitor } from "@eslint-community/regexpp/visitor"
22
import type { RegExpContext } from "../utils"
33
import { canUnwrapped, createRule, defineRegexpVisitor } from "../utils"
4+
import type {
5+
CharacterClass,
6+
CharacterClassElement,
7+
ExpressionCharacterClass,
8+
UnicodeSetsCharacterClass,
9+
} from "@eslint-community/regexpp/ast"
10+
11+
const ESCAPES_OUTSIDE_CHARACTER_CLASS = new Set("$()*+./?[{|")
12+
const ESCAPES_OUTSIDE_CHARACTER_CLASS_WITH_U = new Set([
13+
...ESCAPES_OUTSIDE_CHARACTER_CLASS,
14+
"}",
15+
])
16+
// A single character set of ClassSetReservedDoublePunctuator.
17+
// && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~ are ClassSetReservedDoublePunctuator
18+
const REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR = new Set(
19+
"!#$%&*+,.:;<=>?@^`~",
20+
)
421

522
export default createRule("no-useless-character-class", {
623
meta: {
@@ -27,8 +44,10 @@ export default createRule("no-useless-character-class", {
2744
},
2845
],
2946
messages: {
30-
unexpected:
47+
unexpectedCharacterClassWith:
3148
"Unexpected character class with one {{type}}. Can remove brackets{{additional}}.",
49+
unexpectedUnnecessaryNestingCharacterClass:
50+
"Unexpected unnecessary nesting character class. Can remove brackets.",
3251
},
3352
type: "suggestion", // "problem",
3453
},
@@ -37,88 +56,258 @@ export default createRule("no-useless-character-class", {
3756

3857
function createVisitor({
3958
node,
59+
pattern,
4060
flags,
4161
fixReplaceNode,
4262
getRegexpLocation,
4363
}: RegExpContext): RegExpVisitor.Handlers {
64+
const characterClassStack: (
65+
| CharacterClass
66+
| ExpressionCharacterClass
67+
)[] = []
4468
return {
69+
onExpressionCharacterClassEnter(eccNode) {
70+
characterClassStack.push(eccNode)
71+
},
72+
onExpressionCharacterClassLeave() {
73+
characterClassStack.pop()
74+
},
4575
onCharacterClassEnter(ccNode) {
46-
if (ccNode.elements.length !== 1) {
47-
return
48-
}
76+
characterClassStack.push(ccNode)
77+
},
78+
onCharacterClassLeave(ccNode) {
79+
characterClassStack.pop()
4980
if (ccNode.negate) {
5081
return
5182
}
52-
const element = ccNode.elements[0]
53-
if (ignores.length > 0 && ignores.includes(element.raw)) {
54-
return
55-
}
56-
if (element.type === "Character") {
57-
if (element.raw === "\\b") {
58-
// Backspace escape
59-
return
83+
let messageId: string,
84+
messageData: { type: string; additional?: string }
85+
const unwrapped: string[] = ccNode.elements.map(
86+
(_e, index) => {
87+
const element = ccNode.elements[index]
88+
return (
89+
(index === 0
90+
? getEscapedFirstRawIfNeeded(element)
91+
: null) ??
92+
(index === ccNode.elements.length - 1
93+
? getEscapedLastRawIfNeeded(element)
94+
: null) ??
95+
element.raw
96+
)
97+
},
98+
)
99+
if (
100+
ccNode.elements.length !== 1 &&
101+
ccNode.parent.type === "CharacterClass"
102+
) {
103+
messageId = "unexpectedUnnecessaryNestingCharacterClass"
104+
messageData = {
105+
type: "unnecessary nesting character class",
60106
}
61-
if (
62-
/^\\\d+$/u.test(element.raw) &&
63-
!element.raw.startsWith("\\0")
64-
) {
65-
// Avoid back reference
107+
if (!ccNode.elements.length) {
108+
// empty character class
109+
const nextElement =
110+
ccNode.parent.elements[
111+
ccNode.parent.elements.indexOf(
112+
ccNode as UnicodeSetsCharacterClass,
113+
) + 1
114+
]
115+
if (
116+
nextElement &&
117+
isNeedEscapedForFirstElement(nextElement)
118+
) {
119+
unwrapped.push("\\") // Add a backslash to escape the next character.
120+
}
121+
}
122+
} else {
123+
if (ccNode.elements.length !== 1) {
66124
return
67125
}
126+
const element = ccNode.elements[0]
68127
if (
69128
ignores.length > 0 &&
70-
ignores.includes(
71-
String.fromCodePoint(element.value),
72-
)
129+
ignores.includes(element.raw)
73130
) {
74131
return
75132
}
76-
if (!canUnwrapped(ccNode, element.raw)) {
77-
return
78-
}
79-
} else if (element.type === "CharacterClassRange") {
80-
if (element.min.value !== element.max.value) {
133+
if (element.type === "Character") {
134+
if (element.raw === "\\b") {
135+
// Backspace escape
136+
return
137+
}
138+
if (
139+
/^\\\d+$/u.test(element.raw) &&
140+
!element.raw.startsWith("\\0")
141+
) {
142+
// Avoid back reference
143+
return
144+
}
145+
if (
146+
ignores.length > 0 &&
147+
ignores.includes(
148+
String.fromCodePoint(element.value),
149+
)
150+
) {
151+
return
152+
}
153+
if (!canUnwrapped(ccNode, element.raw)) {
154+
return
155+
}
156+
messageData = { type: "character" }
157+
} else if (element.type === "CharacterClassRange") {
158+
if (element.min.value !== element.max.value) {
159+
return
160+
}
161+
messageData = {
162+
type: "character class range",
163+
additional: " and range",
164+
}
165+
unwrapped[0] =
166+
getEscapedFirstRawIfNeeded(element.min) ??
167+
getEscapedLastRawIfNeeded(element.min) ??
168+
element.min.raw
169+
} else if (element.type === "ClassStringDisjunction") {
170+
if (!characterClassStack.length) {
171+
// Only nesting character class
172+
return
173+
}
174+
messageData = { type: "string literal" }
175+
} else if (element.type === "CharacterSet") {
176+
messageData = { type: "character class escape" }
177+
} else if (
178+
element.type === "CharacterClass" ||
179+
element.type === "ExpressionCharacterClass"
180+
) {
181+
messageData = { type: "character class" }
182+
} else {
81183
return
82184
}
83-
} else if (element.type !== "CharacterSet") {
84-
return
185+
messageId = "unexpectedCharacterClassWith"
85186
}
86187

87188
context.report({
88189
node,
89190
loc: getRegexpLocation(ccNode),
90-
messageId: "unexpected",
191+
messageId,
91192
data: {
92-
type:
93-
element.type === "Character"
94-
? "character"
95-
: element.type === "CharacterClassRange"
96-
? "character class range"
97-
: "character class escape",
98-
additional:
99-
element.type === "CharacterClassRange"
100-
? " and range"
101-
: "",
193+
type: messageData.type,
194+
additional: messageData.additional || "",
102195
},
103-
fix: fixReplaceNode(ccNode, () => {
104-
let text: string =
105-
element.type === "CharacterClassRange"
106-
? element.min.raw
107-
: element.raw
196+
fix: fixReplaceNode(ccNode, unwrapped.join("")),
197+
})
198+
199+
/**
200+
* Checks whether an escape is required if the given element is placed first
201+
* after character class replacement.
202+
*/
203+
function isNeedEscapedForFirstElement(
204+
element: CharacterClassElement,
205+
) {
206+
const char =
207+
element.type === "Character"
208+
? element.raw
209+
: element.type === "CharacterClassRange"
210+
? element.min.raw
211+
: null
212+
if (char == null) {
213+
return false
214+
}
215+
if (characterClassStack.length) {
216+
// Nesting character class
217+
218+
// Avoid [A&&[&]] => [A&&&]
108219
if (
109-
element.type === "Character" ||
110-
element.type === "CharacterClassRange"
220+
REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR.has(
221+
char,
222+
) &&
223+
// The previous character is the same
224+
pattern[ccNode.start - 1] === char
111225
) {
112-
if (
113-
/^[$()*+./?[{|]$/u.test(text) ||
114-
(flags.unicode && text === "}")
115-
) {
116-
text = `\\${text}`
117-
}
226+
return true
118227
}
119-
return text
120-
}),
121-
})
228+
229+
// Avoid [[]^] => [^]
230+
return (
231+
char === "^" &&
232+
ccNode.parent.type === "CharacterClass" &&
233+
ccNode.parent.elements[0] === ccNode
234+
)
235+
}
236+
237+
// Flat character class
238+
return (
239+
flags.unicode
240+
? ESCAPES_OUTSIDE_CHARACTER_CLASS_WITH_U
241+
: ESCAPES_OUTSIDE_CHARACTER_CLASS
242+
).has(char)
243+
}
244+
245+
/**
246+
* Checks whether an escape is required if the given element is placed last
247+
* after character class replacement.
248+
*/
249+
function needEscapedForLastElement(
250+
element: CharacterClassElement,
251+
) {
252+
const char =
253+
element.type === "Character"
254+
? element.raw
255+
: element.type === "CharacterClassRange"
256+
? element.max.raw
257+
: null
258+
if (char == null) {
259+
return false
260+
}
261+
if (characterClassStack.length) {
262+
// Nesting character class
263+
264+
// Avoid [A[&]&B] => [A&&B]
265+
return (
266+
REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR.has(
267+
char,
268+
) &&
269+
// The next character is the same
270+
pattern[ccNode.end] === char
271+
)
272+
}
273+
return false
274+
}
275+
276+
/**
277+
* Returns the escaped raw text, if the given first element requires escaping.
278+
* Otherwise, returns null.
279+
*/
280+
function getEscapedFirstRawIfNeeded(
281+
firstElement: CharacterClassElement,
282+
) {
283+
if (isNeedEscapedForFirstElement(firstElement)) {
284+
return `\\${firstElement.raw}`
285+
}
286+
return null
287+
}
288+
289+
/**
290+
* Returns the escaped raw text, if the given last element requires escaping.
291+
* Otherwise, returns null.
292+
*/
293+
function getEscapedLastRawIfNeeded(
294+
lastElement: CharacterClassElement,
295+
) {
296+
if (needEscapedForLastElement(lastElement)) {
297+
const lastRaw =
298+
lastElement.type === "Character"
299+
? lastElement.raw
300+
: lastElement.type === "CharacterClassRange"
301+
? lastElement.max.raw
302+
: "" // never
303+
const prefix = lastElement.raw.slice(
304+
0,
305+
-lastRaw.length,
306+
)
307+
return `${prefix}\\${lastRaw}`
308+
}
309+
return null
310+
}
122311
},
123312
}
124313
}

0 commit comments

Comments
 (0)