Skip to content

Commit 3b076a7

Browse files
Add regexp/strict rule (#220)
Co-authored-by: Michael Schmidt <[email protected]>
1 parent 0eeddf7 commit 3b076a7

File tree

6 files changed

+609
-0
lines changed

6 files changed

+609
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco
112112
| [regexp/no-useless-assertions](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-assertions.html) | disallow assertions that are known to always accept (or reject) | |
113113
| [regexp/no-useless-backreference](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-backreference.html) | disallow useless backreferences in regular expressions | :star: |
114114
| [regexp/no-useless-dollar-replacements](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-dollar-replacements.html) | disallow useless `$` replacements in replacement string | |
115+
| [regexp/strict](https://ota-meshi.github.io/eslint-plugin-regexp/rules/strict.html) | disallow not strictly valid regular expressions | :wrench: |
115116

116117
### Best Practices
117118

docs/rules/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco
2626
| [regexp/no-useless-assertions](./no-useless-assertions.md) | disallow assertions that are known to always accept (or reject) | |
2727
| [regexp/no-useless-backreference](./no-useless-backreference.md) | disallow useless backreferences in regular expressions | :star: |
2828
| [regexp/no-useless-dollar-replacements](./no-useless-dollar-replacements.md) | disallow useless `$` replacements in replacement string | |
29+
| [regexp/strict](./strict.md) | disallow not strictly valid regular expressions | :wrench: |
2930

3031
### Best Practices
3132

docs/rules/strict.md

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
---
2+
pageClass: "rule-details"
3+
sidebarDepth: 0
4+
title: "regexp/strict"
5+
description: "disallow not strictly valid regular expressions"
6+
---
7+
# regexp/strict
8+
9+
> disallow not strictly valid regular expressions
10+
11+
- :exclamation: <badge text="This rule has not been released yet." vertical="middle" type="error"> ***This rule has not been released yet.*** </badge>
12+
- :wrench: The `--fix` option on the [command line](https://eslint.org/docs/user-guide/command-line-interface#fixing-problems) can automatically fix some of the problems reported by this rule.
13+
14+
## :book: Rule Details
15+
16+
This rule disallows not strictly valid regular expressions.
17+
18+
An invalid pattern in a regular expression literal is a `SyntaxError` when the code is parsed. However, it is not always strictly checked.
19+
20+
Depending on the syntax defined in [Annex B] of the ECMAScript specification, some ambiguous pattern syntax may also succeed in parsing as a valid pattern. This rule reports these ambiguous patterns.
21+
22+
[Annex B]: https://tc39.es/ecma262/#sec-regular-expressions-patterns
23+
24+
<eslint-code-block fix>
25+
26+
```js
27+
/* eslint regexp/strict: "error" */
28+
29+
/* ✓ GOOD */
30+
var foo = /\}/
31+
var foo = /\{/
32+
var foo = /\]/
33+
var foo = /\u{42}/u; // It matches "B".
34+
var foo = /u{42}/; // It matches a string followed by 42 "u"s.
35+
36+
/* ✗ BAD */
37+
var foo = /}/
38+
var foo = /{/
39+
var foo = /]/
40+
var foo = /\u{42}/; // It matches a string followed by 42 "u"s.
41+
```
42+
43+
</eslint-code-block>
44+
45+
## :wrench: Options
46+
47+
Nothing.
48+
49+
## :books: Further reading
50+
51+
- [ECMAScript® 2022 Language Specification > Annex B > B.1.4 Regular Expressions Patterns](https://tc39.es/ecma262/#sec-regular-expressions-patterns)
52+
53+
## :couple: Related rules
54+
55+
- [no-invalid-regexp]
56+
- [regexp/no-standalone-backslash]
57+
58+
[no-invalid-regexp]: https://eslint.org/docs/rules/no-invalid-regexp
59+
[regexp/no-standalone-backslash]: ./no-standalone-backslash.md
60+
61+
## :mag: Implementation
62+
63+
- [Rule source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/lib/rules/strict.ts)
64+
- [Test source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/tests/lib/rules/strict.ts)

lib/rules/strict.ts

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
import { RegExpValidator } from "regexpp"
2+
import type { CharacterClassElement, Element } from "regexpp/ast"
3+
import type { RegExpVisitor } from "regexpp/visitor"
4+
import type { RegExpContext } from "../utils"
5+
import {
6+
isOctalEscape,
7+
createRule,
8+
defineRegexpVisitor,
9+
isEscapeSequence,
10+
} from "../utils"
11+
12+
const validator = new RegExpValidator({ strict: true, ecmaVersion: 2020 })
13+
14+
/**
15+
* Check syntax error in a given pattern.
16+
* @returns The syntax error.
17+
*/
18+
function validateRegExpPattern(
19+
pattern: string,
20+
uFlag?: boolean,
21+
): string | null {
22+
try {
23+
validator.validatePattern(pattern, undefined, undefined, uFlag)
24+
return null
25+
} catch (err) {
26+
return err.message
27+
}
28+
}
29+
30+
const CHARACTER_CLASS_SYNTAX_CHARACTERS = new Set("\\/()[]{}^$.|-+*?".split(""))
31+
const SYNTAX_CHARACTERS = new Set("\\/()[]{}^$.|+*?".split(""))
32+
33+
export default createRule("strict", {
34+
meta: {
35+
docs: {
36+
description: "disallow not strictly valid regular expressions",
37+
category: "Possible Errors",
38+
// TODO Switch to recommended in the major version.
39+
// recommended: true,
40+
recommended: false,
41+
},
42+
fixable: "code",
43+
schema: [],
44+
messages: {
45+
// character escape
46+
invalidControlEscape:
47+
"Invalid or incomplete control escape sequence. Either use a valid control escape sequence or escaping the standalone backslash.",
48+
incompleteEscapeSequence:
49+
"Incomplete escape sequence '{{expr}}'. Either use a valid escape sequence or remove the useless escaping.",
50+
invalidPropertyEscape:
51+
"Invalid property escape sequence '{{expr}}'. Either use a valid property escape sequence or remove the useless escaping.",
52+
incompleteBackreference:
53+
"Incomplete backreference '{{expr}}'. Either use a valid backreference or remove the useless escaping.",
54+
unescapedSourceCharacter: "Unescaped source character '{{expr}}'.",
55+
octalEscape:
56+
"Invalid legacy octal escape sequence '{{expr}}'. Use a hexadecimal escape instead.",
57+
uselessEscape:
58+
"Useless identity escapes with non-syntax characters are forbidden.",
59+
60+
// character class
61+
invalidRange:
62+
"Invalid character class range. A character set cannot be the minimum or maximum of a character class range. Either escape the `-` or fix the character class range.",
63+
64+
// assertion
65+
quantifiedAssertion:
66+
"Assertion are not allowed to be quantified directly.",
67+
68+
// validator
69+
regexMessage: "{{message}}.",
70+
},
71+
type: "suggestion",
72+
},
73+
create(context) {
74+
/**
75+
* Create visitor
76+
*/
77+
function createVisitor(
78+
regexpContext: RegExpContext,
79+
): RegExpVisitor.Handlers {
80+
const {
81+
node,
82+
flags,
83+
pattern,
84+
getRegexpLocation,
85+
fixReplaceNode,
86+
} = regexpContext
87+
88+
if (flags.unicode) {
89+
// the Unicode flag enables strict parsing mode automatically
90+
return {}
91+
}
92+
93+
let reported = false
94+
let hasNamedBackreference = false
95+
96+
/** Report */
97+
function report(
98+
messageId: string,
99+
element: Element,
100+
fix?: string | null,
101+
): void {
102+
reported = true
103+
104+
context.report({
105+
node,
106+
loc: getRegexpLocation(element),
107+
messageId,
108+
data: {
109+
expr: element.raw,
110+
},
111+
fix: fix ? fixReplaceNode(element, fix) : null,
112+
})
113+
}
114+
115+
return {
116+
// eslint-disable-next-line complexity -- x
117+
onCharacterEnter(cNode) {
118+
if (cNode.raw === "\\") {
119+
// e.g. \c5 or \c
120+
report("invalidControlEscape", cNode)
121+
return
122+
}
123+
if (cNode.raw === "\\u" || cNode.raw === "\\x") {
124+
// e.g. \u000;
125+
report("incompleteEscapeSequence", cNode)
126+
return
127+
}
128+
if (cNode.raw === "\\p" || cNode.raw === "\\P") {
129+
// e.g. \p{H} or \p
130+
report("invalidPropertyEscape", cNode)
131+
return
132+
}
133+
if (cNode.value !== 0 && isOctalEscape(cNode.raw)) {
134+
// e.g. \023
135+
report(
136+
"octalEscape",
137+
cNode,
138+
`\\x${cNode.value.toString(16).padStart(2, "0")}`,
139+
)
140+
return
141+
}
142+
143+
const insideCharClass =
144+
cNode.parent.type === "CharacterClass" ||
145+
cNode.parent.type === "CharacterClassRange"
146+
147+
if (!insideCharClass) {
148+
if (cNode.raw === "\\k") {
149+
// e.g. \k<foo or \k
150+
report("incompleteBackreference", cNode)
151+
return
152+
}
153+
154+
if (
155+
cNode.raw === "{" ||
156+
cNode.raw === "}" ||
157+
cNode.raw === "]"
158+
) {
159+
report(
160+
"unescapedSourceCharacter",
161+
cNode,
162+
`\\${cNode.raw}`,
163+
)
164+
return
165+
}
166+
}
167+
168+
if (isEscapeSequence(cNode.raw)) {
169+
// all remaining escape sequences are valid
170+
return
171+
}
172+
173+
if (cNode.raw.startsWith("\\")) {
174+
const identity = cNode.raw.slice(1)
175+
const syntaxChars = insideCharClass
176+
? CHARACTER_CLASS_SYNTAX_CHARACTERS
177+
: SYNTAX_CHARACTERS
178+
179+
if (
180+
cNode.value === identity.charCodeAt(0) &&
181+
!syntaxChars.has(identity)
182+
) {
183+
// e.g. \g or \;
184+
report("uselessEscape", cNode, identity)
185+
}
186+
}
187+
},
188+
onCharacterClassEnter(ccNode) {
189+
for (let i = 0; i < ccNode.elements.length; i++) {
190+
const current = ccNode.elements[i]
191+
192+
if (current.type === "CharacterSet") {
193+
const next: CharacterClassElement | undefined =
194+
ccNode.elements[i + 1]
195+
const nextNext: CharacterClassElement | undefined =
196+
ccNode.elements[i + 2]
197+
198+
if (next && next.raw === "-" && nextNext) {
199+
// e.g. [\w-a]
200+
report("invalidRange", current)
201+
return
202+
}
203+
204+
const prev: CharacterClassElement | undefined =
205+
ccNode.elements[i - 1]
206+
const prevPrev: CharacterClassElement | undefined =
207+
ccNode.elements[i - 2]
208+
if (
209+
prev &&
210+
prev.raw === "-" &&
211+
prevPrev &&
212+
prevPrev.type !== "CharacterClassRange"
213+
) {
214+
// e.g. [a-\w]
215+
report("invalidRange", current)
216+
return
217+
}
218+
}
219+
}
220+
},
221+
onQuantifierEnter(qNode) {
222+
if (qNode.element.type === "Assertion") {
223+
// e.g. \b+
224+
report(
225+
"quantifiedAssertion",
226+
qNode,
227+
`(?:${qNode.element.raw})${qNode.raw.slice(
228+
qNode.element.end - qNode.start,
229+
)}`,
230+
)
231+
}
232+
},
233+
234+
onBackreferenceEnter(bNode) {
235+
if (typeof bNode.ref === "string") {
236+
hasNamedBackreference = true
237+
}
238+
},
239+
onPatternLeave() {
240+
if (hasNamedBackreference) {
241+
// There is a bug in regexpp that causes it throw a
242+
// syntax error for all non-Unicode regexes with named
243+
// backreferences.
244+
// TODO: Remove this workaround when the bug is fixed.
245+
return
246+
}
247+
248+
if (!reported) {
249+
// our own logic couldn't find any problems,
250+
// so let's use a real parser to do the job.
251+
252+
const message = validateRegExpPattern(
253+
pattern,
254+
flags.unicode,
255+
)
256+
257+
if (message) {
258+
context.report({
259+
node,
260+
messageId: "regexMessage",
261+
data: {
262+
message,
263+
},
264+
})
265+
}
266+
}
267+
},
268+
}
269+
}
270+
271+
return defineRegexpVisitor(context, {
272+
createVisitor,
273+
})
274+
},
275+
})

lib/utils/rules.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ import preferT from "../rules/prefer-t"
5858
import preferUnicodeCodepointEscapes from "../rules/prefer-unicode-codepoint-escapes"
5959
import preferW from "../rules/prefer-w"
6060
import sortFlags from "../rules/sort-flags"
61+
import strict from "../rules/strict"
6162
import unicodeEscape from "../rules/unicode-escape"
6263

6364
export const rules = [
@@ -120,5 +121,6 @@ export const rules = [
120121
preferUnicodeCodepointEscapes,
121122
preferW,
122123
sortFlags,
124+
strict,
123125
unicodeEscape,
124126
] as RuleModule[]

0 commit comments

Comments
 (0)