Skip to content

Commit c614858

Browse files
Added no-trivially-nested-quantifier rule (#146)
* Added `no-trivially-nested-quantifier` rule * Added missing docs * Simplified code
1 parent 4efda8d commit c614858

File tree

7 files changed

+454
-5
lines changed

7 files changed

+454
-5
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco
102102
| [regexp/no-optional-assertion](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-optional-assertion.html) | disallow optional assertions | |
103103
| [regexp/no-potentially-useless-backreference](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-potentially-useless-backreference.html) | disallow backreferences that reference a group that might not be matched | |
104104
| [regexp/no-trivially-nested-assertion](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-trivially-nested-assertion.html) | disallow trivially nested assertions | :wrench: |
105+
| [regexp/no-trivially-nested-quantifier](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-trivially-nested-quantifier.html) | disallow nested quantifiers that can be rewritten as one quantifier | :wrench: |
105106
| [regexp/no-unused-capturing-group](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-unused-capturing-group.html) | disallow unused capturing group | |
106107
| [regexp/no-useless-assertions](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-assertions.html) | disallow assertions that are known to always accept (or reject) | |
107108
| [regexp/no-useless-backreference](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-backreference.html) | disallow useless backreferences in regular expressions | :star: |

docs/rules/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco
3030
| [regexp/no-optional-assertion](./no-optional-assertion.md) | disallow optional assertions | |
3131
| [regexp/no-potentially-useless-backreference](./no-potentially-useless-backreference.md) | disallow backreferences that reference a group that might not be matched | |
3232
| [regexp/no-trivially-nested-assertion](./no-trivially-nested-assertion.md) | disallow trivially nested assertions | :wrench: |
33+
| [regexp/no-trivially-nested-quantifier](./no-trivially-nested-quantifier.md) | disallow nested quantifiers that can be rewritten as one quantifier | :wrench: |
3334
| [regexp/no-unused-capturing-group](./no-unused-capturing-group.md) | disallow unused capturing group | |
3435
| [regexp/no-useless-assertions](./no-useless-assertions.md) | disallow assertions that are known to always accept (or reject) | |
3536
| [regexp/no-useless-backreference](./no-useless-backreference.md) | disallow useless backreferences in regular expressions | :star: |
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
---
2+
pageClass: "rule-details"
3+
sidebarDepth: 0
4+
title: "regexp/no-trivially-nested-quantifier"
5+
description: "disallow nested quantifiers that can be rewritten as one quantifier"
6+
---
7+
# regexp/no-trivially-nested-quantifier
8+
9+
> disallow nested quantifiers that can be rewritten as one quantifier
10+
11+
- :exclamation: <badge text="This rule has not been released yet." vertical="middle" type="error"> ***This rule has not been released yet.*** </badge>
12+
- :wrench: The `--fix` option on the [command line](https://eslint.org/docs/user-guide/command-line-interface#fixing-problems) can automatically fix some of the problems reported by this rule.
13+
14+
## :book: Rule Details
15+
16+
In some cases, nested quantifiers can be rewritten as one quantifier (e.g. `(?:a{1,2}){3}` -> `a{3,6}`).
17+
18+
<eslint-code-block fix>
19+
20+
```js
21+
/* eslint regexp/no-trivially-nested-quantifier: "error" */
22+
23+
/* ✓ GOOD */
24+
var foo = /(a{1,2})+/; // the rule won't touch capturing groups
25+
var foo = /(?:a{2})+/;
26+
27+
/* ✗ BAD */
28+
var foo = /(?:a{1,2})+/;
29+
var foo = /(?:a{1,2}){3,4}/;
30+
var foo = /(?:a{4,}){5}/;
31+
```
32+
33+
</eslint-code-block>
34+
35+
## :wrench: Options
36+
37+
Nothing.
38+
39+
## :mag: Implementation
40+
41+
- [Rule source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/lib/rules/no-trivially-nested-quantifier.ts)
42+
- [Test source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/tests/lib/rules/no-trivially-nested-quantifier.ts)
Lines changed: 278 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,278 @@
1+
import type { Expression } from "estree"
2+
import type { RegExpVisitor } from "regexpp/visitor"
3+
import type { Group, Quantifier } from "regexpp/ast"
4+
import type { Quant } from "../utils"
5+
import {
6+
fixReplaceNode,
7+
fixReplaceQuant,
8+
quantToString,
9+
createRule,
10+
defineRegexpVisitor,
11+
getRegexpLocation,
12+
} from "../utils"
13+
14+
/**
15+
* Returns a new quant which is the combination of both given quantifiers.
16+
*/
17+
function getCombinedQuant(parent: Quantifier, child: Quantifier): Quant | null {
18+
if (parent.max === 0 || child.max === 0) {
19+
// other rules deal with this case
20+
return null
21+
} else if (parent.greedy === child.greedy) {
22+
const greedy = parent.greedy
23+
24+
// Explanation of the following condition:
25+
//
26+
// We are currently given a regular expression of the form `(R{a,b}){c,d}` with a<=b, c<=d, b>0, and d>0. The
27+
// question is: For what numbers a,b,c,d is `(R{a,b}){c,d}` == `R{a*c,b*d}`?
28+
//
29+
// Let's reformulate the question in terms of integer intervals. First, some definitions:
30+
// x∈[a,b] ⇔ a <= x <= b
31+
// [a,b]*x = [a*x, b*x] for x != 0
32+
// = [0, 0] for x == 0
33+
//
34+
// The question: For what intervals [a, b] and [c, d] is X=Y for
35+
// X = [a*c, b*d] and
36+
// Y = { x | x ∈ [a,b]*i where i∈[c,d] } ?
37+
//
38+
// The first thing to note is that X ⊇ Y, so we only have to show X\Y = ∅. We can think of the elements X\Y
39+
// as holes in Y. Holes can only appear between intervals [a,b]*j and [a,b]*(j+1), so let's look at a hole h
40+
// between [a,b]*c and [a,b]*(c+1):
41+
//
42+
// 1. We can see that [a,b]*(c+1) ⊆ Y iff c+1 <= d ⇔ c != d since we are dealing with integers only and know
43+
// that c<=d.
44+
// 2. h > b*c and h < a*(c+1). Let's just pick h=b*c+1, then we'll get b*c+1 < a*(c+1).
45+
//
46+
// The condition for _no_ hole between [a,b]*c and [a,b]*(c+1) is:
47+
// c=d ∨ b*c+1 >= a*(c+1)
48+
//
49+
// However, this condition is not defined for b=∞ and c=0. Since [a,b]*x = [0, 0] for x == 0, we will just
50+
// define 0*∞ = 0. It makes sense for our problem, so the condition for b=∞ and c=0 is:
51+
// a <= 1
52+
//
53+
// Now to proof that it's sufficient to only check for a hole between the first two intervals. We want to show
54+
// that if h=b*c+1 is not a hole then there will be no j, c<j<d such that b*j+1 is a hole. The first thing to
55+
// not that j can only exist if c!=d, so the condition for h to not exist simplifies to b*c+1 >= a*(c+1).
56+
//
57+
// 1) b=∞ and c=0:
58+
// b*c+1 >= a*(c+1) ⇔ 1 >= a ⇔ a <= 1. If a <= 1, then h does not exist but since b=∞, we know that the
59+
// union of the next interval [a, ∞]*1 = [a, ∞] and [0, 0] = [a, ∞]*0 is [0, ∞]. [0, ∞] is the largest
60+
// possible interval meaning that there could not possibly be any holes after it. Therefore, a j, c<j<d
61+
// cannot exist.
62+
// 2) b==∞ and c>0:
63+
// b*c+1 >= a*(c+1) ⇔ ∞ >= a*(c+1) is trivially true, so the hole h between [a,b]*c and [a,b]*(c+1) cannot
64+
// exist. There can also be no other holes because [a,b]*c = [a*c,∞] ⊇ [a,b]*i = [a*i,∞] for all i>c.
65+
// 3) b<∞:
66+
// b*c+1 >= a*(c+1). If c+x is also not a hole for any x >= 0, then there can be no holes.
67+
// b*(c+x)+1 >= a*(c+x+1) ⇔ b >= a + (a-1)/(c+x). We know that this is true for x=0 and increasing x will
68+
// only make (a-1)/(c+x) smaller, so it is always true. Therefore, there can be no j c<j<d such that b*j+1
69+
// is a hole.
70+
//
71+
// We've shown that if there is no hole h between the first and second interval, then there can be no other
72+
// holes. Therefore it is sufficient to only check for the first hole.
73+
74+
const a = child.min
75+
const b = child.max
76+
const c = parent.min
77+
const d = parent.max
78+
const condition =
79+
b === Infinity && c === 0
80+
? a <= 1
81+
: c === d || b * c + 1 >= a * (c + 1)
82+
83+
if (condition) {
84+
return {
85+
min: a * c,
86+
max: b * d,
87+
greedy,
88+
}
89+
}
90+
return null
91+
}
92+
return null
93+
}
94+
95+
/**
96+
* Given a parent quantifier and a child quantifier, this will return a
97+
* simplified child quant.
98+
*/
99+
function getSimplifiedChildQuant(
100+
parent: Quantifier,
101+
child: Quantifier,
102+
): Quant | null {
103+
if (parent.max === 0 || child.max === 0) {
104+
// this rule doesn't handle this
105+
return null
106+
} else if (parent.greedy !== child.greedy) {
107+
// maybe some optimization is possible, but I'm not sure, so let's be safe
108+
return null
109+
}
110+
let min = child.min
111+
let max = child.max
112+
113+
if (min === 0 && parent.min === 0) {
114+
min = 1
115+
}
116+
if (parent.max === Infinity && (min === 0 || min === 1) && max > 1) {
117+
max = 1
118+
}
119+
120+
return { min, max, greedy: child.greedy }
121+
}
122+
123+
/**
124+
* Returns whether the given quantifier is a trivial constant zero or constant
125+
* one quantifier.
126+
*/
127+
function isTrivialQuantifier(quant: Quantifier): boolean {
128+
return quant.min === quant.max && (quant.min === 0 || quant.min === 1)
129+
}
130+
131+
/**
132+
* Iterates over the alternatives of the given group and yields all quantifiers
133+
* that are the only element of their respective alternative.
134+
*/
135+
function* iterateSingleQuantifiers(group: Group): Iterable<Quantifier> {
136+
for (const { elements } of group.alternatives) {
137+
if (elements.length === 1) {
138+
const single = elements[0]
139+
if (single.type === "Quantifier") {
140+
yield single
141+
}
142+
}
143+
}
144+
}
145+
146+
export default createRule("no-trivially-nested-quantifier", {
147+
meta: {
148+
docs: {
149+
description:
150+
"disallow nested quantifiers that can be rewritten as one quantifier",
151+
// TODO Switch to recommended in the major version.
152+
// recommended: true,
153+
recommended: false,
154+
},
155+
fixable: "code",
156+
schema: [],
157+
messages: {
158+
nested:
159+
"These two quantifiers are trivially nested and can be replaced with '{{quant}}'.",
160+
childOne: "This nested quantifier can be removed.",
161+
childSimpler:
162+
"This nested quantifier can be simplified to '{{quant}}'.",
163+
},
164+
type: "suggestion", // "problem",
165+
},
166+
create(context) {
167+
const sourceCode = context.getSourceCode()
168+
169+
/**
170+
* Create visitor
171+
* @param node
172+
*/
173+
function createVisitor(node: Expression): RegExpVisitor.Handlers {
174+
return {
175+
onQuantifierEnter(qNode) {
176+
if (isTrivialQuantifier(qNode)) {
177+
return
178+
}
179+
180+
const element = qNode.element
181+
if (element.type !== "Group") {
182+
return
183+
}
184+
185+
for (const child of iterateSingleQuantifiers(element)) {
186+
if (isTrivialQuantifier(child)) {
187+
continue
188+
}
189+
190+
if (element.alternatives.length === 1) {
191+
// only one alternative
192+
// let's see whether we can rewrite the quantifier
193+
194+
const quant = getCombinedQuant(qNode, child)
195+
if (!quant) {
196+
continue
197+
}
198+
199+
const quantStr = quantToString(quant)
200+
const replacement = child.element.raw + quantStr
201+
202+
context.report({
203+
node,
204+
loc: getRegexpLocation(sourceCode, node, qNode),
205+
messageId: "nested",
206+
data: { quant: quantStr },
207+
fix: fixReplaceNode(
208+
sourceCode,
209+
node,
210+
qNode,
211+
replacement,
212+
),
213+
})
214+
} else {
215+
// this isn't the only child of the parent quantifier
216+
217+
const quant = getSimplifiedChildQuant(qNode, child)
218+
if (!quant) {
219+
continue
220+
}
221+
222+
if (
223+
quant.min === child.min &&
224+
quant.max === child.max
225+
) {
226+
// quantifier could not be simplified
227+
continue
228+
}
229+
230+
if (quant.min === 1 && quant.max === 1) {
231+
context.report({
232+
node,
233+
loc: getRegexpLocation(
234+
sourceCode,
235+
node,
236+
child,
237+
),
238+
messageId: "childOne",
239+
// TODO: This fix depends on `qNode`
240+
fix: fixReplaceNode(
241+
sourceCode,
242+
node,
243+
child,
244+
child.element.raw,
245+
),
246+
})
247+
} else {
248+
quant.greedy = undefined
249+
250+
context.report({
251+
node,
252+
loc: getRegexpLocation(
253+
sourceCode,
254+
node,
255+
child,
256+
),
257+
messageId: "childSimpler",
258+
data: { quant: quantToString(quant) },
259+
// TODO: This fix depends on `qNode`
260+
fix: fixReplaceQuant(
261+
sourceCode,
262+
node,
263+
child,
264+
quant,
265+
),
266+
})
267+
}
268+
}
269+
}
270+
},
271+
}
272+
}
273+
274+
return defineRegexpVisitor(context, {
275+
createVisitor,
276+
})
277+
},
278+
})

lib/utils/index.ts

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,7 @@ export function fixReplaceQuant(
530530
sourceCode: SourceCode,
531531
node: ESTree.Expression,
532532
quantifier: Quantifier,
533-
replacement: string | (() => string | null),
533+
replacement: string | Quant | (() => string | Quant | null),
534534
) {
535535
return (fixer: Rule.RuleFixer): Rule.Fix | null => {
536536
const range = getRegexpRange(sourceCode, node, quantifier)
@@ -539,7 +539,7 @@ export function fixReplaceQuant(
539539
}
540540

541541
let text
542-
if (typeof replacement === "string") {
542+
if (typeof replacement !== "function") {
543543
text = replacement
544544
} else {
545545
text = replacement()
@@ -548,10 +548,21 @@ export function fixReplaceQuant(
548548
}
549549
}
550550

551-
const [startOffset, endOffset] = getQuantifierOffsets(quantifier)
551+
const offset = getQuantifierOffsets(quantifier)
552+
553+
if (typeof text !== "string") {
554+
if (
555+
text.greedy !== undefined &&
556+
text.greedy !== quantifier.greedy
557+
) {
558+
// we also change the greediness of the quantifier
559+
offset[1] += 1
560+
}
561+
text = quantToString(text)
562+
}
552563

553564
return fixer.replaceTextRange(
554-
[range[0] + startOffset, range[0] + endOffset],
565+
[range[0] + offset[0], range[0] + offset[1]],
555566
text,
556567
)
557568
}
@@ -602,7 +613,7 @@ export function quantToString(quant: Readonly<Quant>): string {
602613
value = `{${quant.min},${quant.max}}`
603614
}
604615

605-
if (!quant.greedy) {
616+
if (quant.greedy === false) {
606617
return `${value}?`
607618
}
608619
return value

0 commit comments

Comments
 (0)