Skip to content

Commit 17239ab

Browse files
authored
Add regexp/require-unicode-sets-regexp rule (#598)
1 parent b0f6396 commit 17239ab

File tree

9 files changed

+311
-3
lines changed

9 files changed

+311
-3
lines changed

.changeset/sour-feet-explain.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"eslint-plugin-regexp": minor
3+
---
4+
5+
Add `regexp/require-unicode-sets-regexp` rule

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ The `plugin:regexp/all` config enables all rules. It's meant for testing, not fo
166166
| [prefer-regexp-exec](https://ota-meshi.github.io/eslint-plugin-regexp/rules/prefer-regexp-exec.html) | enforce that `RegExp#exec` is used instead of `String#match` if no global flag is provided | | | | |
167167
| [prefer-regexp-test](https://ota-meshi.github.io/eslint-plugin-regexp/rules/prefer-regexp-test.html) | enforce that `RegExp#test` is used instead of `String#match` and `RegExp#exec` | | | 🔧 | |
168168
| [require-unicode-regexp](https://ota-meshi.github.io/eslint-plugin-regexp/rules/require-unicode-regexp.html) | enforce the use of the `u` flag | | | 🔧 | |
169+
| [require-unicode-sets-regexp](https://ota-meshi.github.io/eslint-plugin-regexp/rules/require-unicode-sets-regexp.html) | enforce the use of the `v` flag | | | 🔧 | |
169170
| [sort-alternatives](https://ota-meshi.github.io/eslint-plugin-regexp/rules/sort-alternatives.html) | sort alternatives if order doesn't matter | | | 🔧 | |
170171
| [use-ignore-case](https://ota-meshi.github.io/eslint-plugin-regexp/rules/use-ignore-case.html) | use the `i` flag if it simplifies the pattern || | 🔧 | |
171172

docs/rules/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ sidebarDepth: 0
7373
| [prefer-regexp-exec](prefer-regexp-exec.md) | enforce that `RegExp#exec` is used instead of `String#match` if no global flag is provided | | | | |
7474
| [prefer-regexp-test](prefer-regexp-test.md) | enforce that `RegExp#test` is used instead of `String#match` and `RegExp#exec` | | | 🔧 | |
7575
| [require-unicode-regexp](require-unicode-regexp.md) | enforce the use of the `u` flag | | | 🔧 | |
76+
| [require-unicode-sets-regexp](require-unicode-sets-regexp.md) | enforce the use of the `v` flag | | | 🔧 | |
7677
| [sort-alternatives](sort-alternatives.md) | sort alternatives if order doesn't matter | | | 🔧 | |
7778
| [use-ignore-case](use-ignore-case.md) | use the `i` flag if it simplifies the pattern || | 🔧 | |
7879

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
---
2+
pageClass: "rule-details"
3+
sidebarDepth: 0
4+
title: "regexp/require-unicode-sets-regexp"
5+
description: "enforce the use of the `v` flag"
6+
---
7+
# regexp/require-unicode-sets-regexp
8+
9+
🔧 This rule is automatically fixable by the [`--fix` CLI option](https://eslint.org/docs/latest/user-guide/command-line-interface#--fix).
10+
11+
<!-- end auto-generated rule header -->
12+
13+
> enforce the use of the `v` flag
14+
15+
## :book: Rule Details
16+
17+
This rule reports regular expressions without the `v` flag.
18+
19+
It will automatically replace the `v` flag to regular expressions where it is already uses the 'u' flag and statically guaranteed to be safe to do so. In all other cases, the developer has to check that adding the `v` flag doesn't cause the regex to behave incorrectly.
20+
21+
If you want to automatically add the `v` flag to legacy regular expressions that don't use the `u` flag, use them together with the [regexp/require-unicode-regexp] rule.
22+
23+
<eslint-code-block fix>
24+
25+
```js
26+
/* eslint regexp/require-unicode-sets-regexp: "error" */
27+
28+
/* ✓ GOOD */
29+
var foo = /foo/v;
30+
var foo = /a\s+b/v;
31+
32+
/* ✗ BAD */
33+
var foo = /foo/;
34+
var foo = RegExp("a\\s+b");
35+
var foo = /[a-z]/i;
36+
var foo = /\S/;
37+
var foo = /foo/u;
38+
var foo = RegExp("a\\s+b", 'u');
39+
var foo = /[a-z]/iu;
40+
var foo = /\S/u;
41+
```
42+
43+
</eslint-code-block>
44+
45+
## :wrench: Options
46+
47+
Nothing.
48+
49+
## :couple: Related rules
50+
51+
- [regexp/require-unicode-regexp]
52+
53+
[regexp/require-unicode-regexp]: ./require-unicode-regexp.md
54+
55+
## :books: Further reading
56+
57+
- [require-unicode-regexp]
58+
59+
[require-unicode-regexp]: https://eslint.org/docs/rules/require-unicode-regexp
60+
61+
## :rocket: Version
62+
63+
:exclamation: <badge text="This rule has not been released yet." vertical="middle" type="error"> ***This rule has not been released yet.*** </badge>
64+
65+
## :mag: Implementation
66+
67+
- [Rule source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/lib/rules/require-unicode-sets-regexp.ts)
68+
- [Test source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/tests/lib/rules/require-unicode-sets-regexp.ts)
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
import type { RegExpVisitor } from "@eslint-community/regexpp/visitor"
2+
import type { RegExpContext } from "../utils"
3+
import { createRule, defineRegexpVisitor } from "../utils"
4+
import { RegExpParser, visitRegExpAST } from "@eslint-community/regexpp"
5+
import { toUnicodeSet } from "regexp-ast-analysis"
6+
7+
const CLASS_SET_RESERVED_DOUBLE_PUNCTUATORS = [
8+
"&&",
9+
"!!",
10+
"##",
11+
"$$",
12+
"%%",
13+
"**",
14+
"++",
15+
",,",
16+
"..",
17+
"::",
18+
";;",
19+
"<<",
20+
"==",
21+
">>",
22+
"??",
23+
"@@",
24+
"^^",
25+
"``",
26+
"~~",
27+
"--",
28+
]
29+
30+
/**
31+
* Returns whether the regex would keep its behavior if the v flag were to be
32+
* added.
33+
*/
34+
function isCompatible(regexpContext: RegExpContext): boolean {
35+
const INCOMPATIBLE = {}
36+
37+
const { flags, patternAst, pattern } = regexpContext
38+
39+
try {
40+
const flagsWithV = { ...flags, unicodeSets: true, unicode: false }
41+
visitRegExpAST(patternAst, {
42+
onCharacterClassEnter(node) {
43+
const us = toUnicodeSet(node, flags)
44+
const vus = toUnicodeSet(
45+
{ ...node, unicodeSets: true },
46+
flagsWithV,
47+
)
48+
if (!us.equals(vus)) {
49+
throw INCOMPATIBLE
50+
}
51+
if (
52+
CLASS_SET_RESERVED_DOUBLE_PUNCTUATORS.some((punctuator) =>
53+
node.raw.includes(punctuator),
54+
)
55+
) {
56+
throw INCOMPATIBLE
57+
}
58+
},
59+
})
60+
} catch (error) {
61+
if (error === INCOMPATIBLE) {
62+
return false
63+
}
64+
// just rethrow
65+
throw error
66+
}
67+
68+
try {
69+
// The `v` flag has more strict escape characters.
70+
// To check whether it can be converted to a pattern with the `v` flag,
71+
// parse the pattern with the `v` flag and check for errors.
72+
new RegExpParser().parsePattern(pattern, undefined, undefined, {
73+
unicodeSets: true,
74+
})
75+
} catch (_error) {
76+
return false
77+
}
78+
79+
return true
80+
}
81+
82+
export default createRule("require-unicode-sets-regexp", {
83+
meta: {
84+
docs: {
85+
description: "enforce the use of the `v` flag",
86+
category: "Best Practices",
87+
recommended: false,
88+
},
89+
schema: [],
90+
fixable: "code",
91+
messages: {
92+
require: "Use the 'v' flag.",
93+
},
94+
type: "suggestion",
95+
},
96+
create(context) {
97+
/**
98+
* Create visitor
99+
*/
100+
function createVisitor(
101+
regexpContext: RegExpContext,
102+
): RegExpVisitor.Handlers {
103+
const {
104+
node,
105+
flags,
106+
flagsString,
107+
getFlagsLocation,
108+
fixReplaceFlags,
109+
} = regexpContext
110+
111+
if (flagsString === null) {
112+
// This means that there are flags (probably) but we were
113+
// unable to evaluate them.
114+
return {}
115+
}
116+
117+
if (!flags.unicodeSets) {
118+
context.report({
119+
node,
120+
loc: getFlagsLocation(),
121+
messageId: "require",
122+
fix: fixReplaceFlags(() => {
123+
if (
124+
// Only patterns with the u flag are auto-fixed.
125+
// When migrating from legacy, first add the `u` flag with the `require-unicode-regexp` rule.
126+
!flags.unicode ||
127+
!isCompatible(regexpContext)
128+
) {
129+
return null
130+
}
131+
return `${flagsString.replace(/u/gu, "")}v`
132+
}),
133+
})
134+
}
135+
136+
return {}
137+
}
138+
139+
return defineRegexpVisitor(context, {
140+
createVisitor,
141+
})
142+
},
143+
})

lib/utils/rules.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ import preferT from "../rules/prefer-t"
7272
import preferUnicodeCodepointEscapes from "../rules/prefer-unicode-codepoint-escapes"
7373
import preferW from "../rules/prefer-w"
7474
import requireUnicodeRegexp from "../rules/require-unicode-regexp"
75+
import requireUnicodeSetsRegexp from "../rules/require-unicode-sets-regexp"
7576
import sortAlternatives from "../rules/sort-alternatives"
7677
import sortCharacterClassElements from "../rules/sort-character-class-elements"
7778
import sortFlags from "../rules/sort-flags"
@@ -153,6 +154,7 @@ export const rules = [
153154
preferUnicodeCodepointEscapes,
154155
preferW,
155156
requireUnicodeRegexp,
157+
requireUnicodeSetsRegexp,
156158
sortAlternatives,
157159
sortCharacterClassElements,
158160
sortFlags,

tests/lib/rules-with-unknown-flag.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,11 @@ describe("Don't crash even if with unknown flag.", () => {
5959
],
6060
}
6161

62-
for (const key of Object.keys(rules)) {
63-
const rule = rules[key]
62+
const pluginRules = Object.fromEntries(
63+
Object.values(rules).map((rule) => [rule.meta.docs.ruleId, rule]),
64+
)
65+
66+
for (const rule of Object.values(rules)) {
6467
const ruleId = rule.meta.docs.ruleId
6568

6669
it(ruleId, () => {
@@ -73,11 +76,14 @@ describe("Don't crash even if with unknown flag.", () => {
7376
rules: {
7477
[ruleId]: "error",
7578
"regexp/test": "error",
79+
...(ruleId === "regexp/require-unicode-sets-regexp"
80+
? { "regexp/require-unicode-regexp": "error" }
81+
: {}),
7682
},
7783
}
7884
// @ts-expect-error -- ignore
7985
linter.defineParser("@typescript-eslint/parser", parser)
80-
linter.defineRule(ruleId, rule)
86+
linter.defineRules(pluginRules)
8187

8288
linter.defineRule("regexp/test", TEST_RULE)
8389
const resultVue = linter.verifyAndFix(code, config, "test.js")

tests/lib/rules/no-useless-flag.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,7 @@ describe("Don't conflict even if using the rules together.", () => {
846846
rulesConfig: {
847847
"regexp/no-useless-flag": ["error"],
848848
"regexp/require-unicode-regexp": "off",
849+
"regexp/require-unicode-sets-regexp": "off",
849850
"regexp/match-any": ["error", { allows: ["dotAll"] }],
850851
},
851852
messages: [
@@ -874,6 +875,7 @@ describe("Don't conflict even if using the rules together.", () => {
874875
rulesConfig: {
875876
"regexp/match-any": ["error", { allows: ["dotAll"] }],
876877
"regexp/require-unicode-regexp": "off",
878+
"regexp/require-unicode-sets-regexp": "off",
877879
"regexp/no-useless-flag": ["error"],
878880
},
879881
messages: [
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import { RuleTester } from "eslint"
2+
import rule from "../../../lib/rules/require-unicode-sets-regexp"
3+
4+
const tester = new RuleTester({
5+
parserOptions: {
6+
ecmaVersion: "latest",
7+
sourceType: "module",
8+
},
9+
})
10+
11+
tester.run("require-unicode-sets-regexp", rule as any, {
12+
valid: [`/a/v`],
13+
invalid: [
14+
{
15+
code: `/a/`,
16+
output: null, // It will not auto-fix if it does not have the u flag.
17+
errors: ["Use the 'v' flag."],
18+
},
19+
{
20+
code: `/a/u`,
21+
output: `/a/v`,
22+
errors: ["Use the 'v' flag."],
23+
},
24+
{
25+
code: String.raw`/[\p{ASCII}]/iu`,
26+
output: String.raw`/[\p{ASCII}]/iv`,
27+
errors: ["Use the 'v' flag."],
28+
},
29+
{
30+
code: `/[[]/u`,
31+
output: null, // Converting to the v flag will result in a parsing error.
32+
errors: ["Use the 'v' flag."],
33+
},
34+
{
35+
code: String.raw`/[^\P{Lowercase_Letter}]/giu`,
36+
output: null, // Converting to the v flag changes the behavior of the character set.
37+
errors: ["Use the 'v' flag."],
38+
},
39+
{
40+
code: String.raw`/[^\P{ASCII}]/iu`,
41+
output: null, // Converting to the v flag changes the behavior of the character set.
42+
errors: ["Use the 'v' flag."],
43+
},
44+
{
45+
code: String.raw`/[\P{ASCII}]/iu`,
46+
output: null, // Converting to the v flag changes the behavior of the character set.
47+
errors: ["Use the 'v' flag."],
48+
},
49+
...[
50+
"&&",
51+
"!!",
52+
"##",
53+
"$$",
54+
"%%",
55+
"**",
56+
"++",
57+
",,",
58+
"..",
59+
"::",
60+
";;",
61+
"<<",
62+
"==",
63+
">>",
64+
"??",
65+
"@@",
66+
"^^",
67+
"``",
68+
"~~",
69+
].map((punctuator) => ({
70+
code: String.raw`/[a${punctuator}b]/u`,
71+
output: null, // Converting to the v flag changes the behavior of the character set.
72+
errors: ["Use the 'v' flag."],
73+
})),
74+
{
75+
code: String.raw`/[+--b]/u`,
76+
output: null, // Converting to the v flag changes the behavior of the character set.
77+
errors: ["Use the 'v' flag."],
78+
},
79+
],
80+
})

0 commit comments

Comments
 (0)