Skip to content

Commit 99367b0

Browse files
Fixed false negatives around edges and added second phase of reporting (#486)
* Fixed false negatives around edges and added second phase of reporting * Added another test * More efficient traversal for phase 2
1 parent 0b263da commit 99367b0

File tree

2 files changed

+257
-35
lines changed

2 files changed

+257
-35
lines changed

lib/rules/no-useless-assertions.ts

Lines changed: 193 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,17 @@ import type { RegExpVisitor } from "regexpp/visitor"
22
import type {
33
Assertion,
44
EdgeAssertion,
5+
Element,
56
LookaroundAssertion,
67
WordBoundaryAssertion,
78
} from "regexpp/ast"
89
import type { RegExpContext } from "../utils"
910
import { createRule, defineRegexpVisitor } from "../utils"
11+
import type {
12+
MatchingDirection,
13+
ReadonlyFlags,
14+
FirstLookChar,
15+
} from "regexp-ast-analysis"
1016
import {
1117
Chars,
1218
getFirstCharAfter,
@@ -15,17 +21,101 @@ import {
1521
getMatchingDirectionFromAssertionKind,
1622
hasSomeDescendant,
1723
isPotentiallyEmpty,
24+
isZeroLength,
1825
FirstConsumedChars,
1926
} from "regexp-ast-analysis"
2027
import { mention } from "../utils/mention"
2128

29+
/**
30+
* Combines 2 look chars such that the result is equivalent to 2 adjacent
31+
* assertions `(?=a)(?=b)`.
32+
*/
33+
function firstLookCharsIntersection(
34+
a: FirstLookChar,
35+
b: FirstLookChar,
36+
): FirstLookChar {
37+
const char = a.char.intersect(b.char)
38+
return {
39+
char: a.char.intersect(b.char),
40+
exact: (a.exact && b.exact) || char.isEmpty,
41+
edge: a.edge && b.edge,
42+
}
43+
}
44+
45+
type GetFirstCharAfter = (
46+
afterThis: Assertion,
47+
direction: MatchingDirection,
48+
flags: ReadonlyFlags,
49+
) => FirstLookChar
50+
51+
/**
52+
* Creates a {@link GetFirstCharAfter} function that will reorder assertions to
53+
* get the maximum information after the characters after the given assertions.
54+
*
55+
* Conceptually, this will reorder adjacent assertions such that given
56+
* assertion is moved as far as possible in the opposite direction of natural
57+
* matching direction. E.g. when given `$` in `a(?!a)(?<=\w)$`, the characters
58+
* after `$` will be returned as if the pattern was `a$(?!a)(?<=\w)`.
59+
*
60+
* @param forbidden A list of assertions that may not be reordered.
61+
*/
62+
function createReorderingGetFirstCharAfter(
63+
forbidden: ReadonlySet<Assertion>,
64+
): GetFirstCharAfter {
65+
/** Whether the given element or one of its descendants is forbidden. */
66+
function hasForbidden(element: Element): boolean {
67+
if (element.type === "Assertion" && forbidden.has(element)) {
68+
return true
69+
}
70+
for (const f of forbidden) {
71+
if (hasSomeDescendant(element, f)) {
72+
return true
73+
}
74+
}
75+
return false
76+
}
77+
78+
return (afterThis, direction, flags) => {
79+
let result = getFirstCharAfter(afterThis, direction, flags)
80+
81+
if (afterThis.parent.type === "Alternative") {
82+
const { elements } = afterThis.parent
83+
84+
const inc = direction === "ltr" ? -1 : +1
85+
const start = elements.indexOf(afterThis)
86+
for (let i = start + inc; i >= 0 && i < elements.length; i += inc) {
87+
const other = elements[i]
88+
if (!isZeroLength(other)) {
89+
break
90+
}
91+
if (hasForbidden(other)) {
92+
// we hit an element that cannot be reordered
93+
break
94+
}
95+
96+
const otherResult = FirstConsumedChars.toLook(
97+
getFirstConsumedChar(other, direction, flags),
98+
)
99+
100+
result = firstLookCharsIntersection(result, otherResult)
101+
}
102+
}
103+
104+
return result
105+
}
106+
}
107+
22108
const messages = {
23109
alwaysRejectByChar:
24110
"{{assertion}} will always reject because it is {{followedOrPreceded}} by a character.",
111+
alwaysAcceptByChar:
112+
"{{assertion}} will always accept because it is never {{followedOrPreceded}} by a character.",
25113
alwaysRejectByNonLineTerminator:
26114
"{{assertion}} will always reject because it is {{followedOrPreceded}} by a non-line-terminator character.",
27115
alwaysAcceptByLineTerminator:
28116
"{{assertion}} will always accept because it is {{followedOrPreceded}} by a line-terminator character.",
117+
alwaysAcceptByLineTerminatorOrEdge:
118+
"{{assertion}} will always accept because it is {{followedOrPreceded}} by a line-terminator character or the {{startOrEnd}} of the input string.",
29119
alwaysAcceptOrRejectFollowedByWord:
30120
"{{assertion}} will always {{acceptOrReject}} because it is preceded by a non-word character and followed by a word character.",
31121
alwaysAcceptOrRejectFollowedByNonWord:
@@ -61,12 +151,16 @@ export default createRule("no-useless-assertions", {
61151
flags,
62152
getRegexpLocation,
63153
}: RegExpContext): RegExpVisitor.Handlers {
154+
const reported = new Set<Assertion>()
155+
64156
/** Report */
65157
function report(
66158
assertion: Assertion,
67159
messageId: keyof typeof messages,
68160
data: Record<string, string>,
69161
) {
162+
reported.add(assertion)
163+
70164
context.report({
71165
node,
72166
loc: getRegexpLocation(assertion),
@@ -81,20 +175,48 @@ export default createRule("no-useless-assertions", {
81175
/**
82176
* Verify for `^` or `$`
83177
*/
84-
function verifyStartOrEnd(assertion: EdgeAssertion): void {
178+
function verifyStartOrEnd(
179+
assertion: EdgeAssertion,
180+
getFirstCharAfterFn: GetFirstCharAfter,
181+
): void {
85182
// Note: /^/ is the same as /(?<!.)/s and /^/m is the same as /(?<!.)/
86183
// Note: /$/ is the same as /(?!.)/s and /$/m is the same as /(?!.)/
87184

88185
// get the "next" character
89186
const direction = getMatchingDirectionFromAssertionKind(
90187
assertion.kind,
91188
)
92-
const next = getFirstCharAfter(assertion, direction, flags)
189+
const next = getFirstCharAfterFn(assertion, direction, flags)
93190

94191
const followedOrPreceded =
95192
assertion.kind === "end" ? "followed" : "preceded"
96193

97-
if (!next.edge) {
194+
const lineTerminator = Chars.lineTerminator(flags)
195+
196+
if (next.edge) {
197+
// the string might start/end after the assertion
198+
199+
if (!flags.multiline) {
200+
// ^/$ will always accept at an edge with no char before/after it
201+
if (next.char.isEmpty) {
202+
report(assertion, "alwaysAcceptByChar", {
203+
followedOrPreceded,
204+
})
205+
}
206+
} else {
207+
// ^/$ will always accept at an edge or line terminator before/after it
208+
if (next.char.isSubsetOf(lineTerminator)) {
209+
report(
210+
assertion,
211+
"alwaysAcceptByLineTerminatorOrEdge",
212+
{
213+
followedOrPreceded,
214+
startOrEnd: assertion.kind,
215+
},
216+
)
217+
}
218+
}
219+
} else {
98220
// there is always some character of `node`
99221

100222
if (!flags.multiline) {
@@ -105,8 +227,6 @@ export default createRule("no-useless-assertions", {
105227
} else {
106228
// only if the character is a sub set of /./, will the assertion trivially reject
107229

108-
const lineTerminator = Chars.lineTerminator(flags)
109-
110230
if (next.char.isDisjointWith(lineTerminator)) {
111231
report(
112232
assertion,
@@ -127,19 +247,15 @@ export default createRule("no-useless-assertions", {
127247
*/
128248
function verifyWordBoundary(
129249
assertion: WordBoundaryAssertion,
250+
getFirstCharAfterFn: GetFirstCharAfter,
130251
): void {
131252
const word = Chars.word(flags)
132253

133-
const next = getFirstCharAfter(assertion, "ltr", flags)
134-
const prev = getFirstCharAfter(assertion, "rtl", flags)
254+
const next = getFirstCharAfterFn(assertion, "ltr", flags)
255+
const prev = getFirstCharAfterFn(assertion, "rtl", flags)
135256

136-
if (prev.edge || next.edge) {
137-
// we can only do this analysis if we know the previous and next character
138-
return
139-
}
140-
141-
const nextIsWord = next.char.isSubsetOf(word)
142-
const prevIsWord = prev.char.isSubsetOf(word)
257+
const nextIsWord = next.char.isSubsetOf(word) && !next.edge
258+
const prevIsWord = prev.char.isSubsetOf(word) && !prev.edge
143259
const nextIsNonWord = next.char.isDisjointWith(word)
144260
const prevIsNonWord = prev.char.isDisjointWith(word)
145261

@@ -198,7 +314,10 @@ export default createRule("no-useless-assertions", {
198314
/**
199315
* Verify for LookaroundAssertion
200316
*/
201-
function verifyLookaround(assertion: LookaroundAssertion): void {
317+
function verifyLookaround(
318+
assertion: LookaroundAssertion,
319+
getFirstCharAfterFn: GetFirstCharAfter,
320+
): void {
202321
if (isPotentiallyEmpty(assertion.alternatives)) {
203322
// we don't handle trivial accept/reject based on emptiness
204323
return
@@ -207,10 +326,7 @@ export default createRule("no-useless-assertions", {
207326
const direction = getMatchingDirectionFromAssertionKind(
208327
assertion.kind,
209328
)
210-
const after = getFirstCharAfter(assertion, direction, flags)
211-
if (after.edge) {
212-
return
213-
}
329+
const after = getFirstCharAfterFn(assertion, direction, flags)
214330

215331
const firstOf = FirstConsumedChars.toLook(
216332
getFirstConsumedChar(
@@ -227,7 +343,10 @@ export default createRule("no-useless-assertions", {
227343
// Careful now! If exact is false, we are only guaranteed to have a superset of the actual character.
228344
// False negatives are fine but we can't have false positives.
229345

230-
if (after.char.isDisjointWith(firstOf.char)) {
346+
if (
347+
after.char.isDisjointWith(firstOf.char) &&
348+
!(after.edge && firstOf.edge)
349+
) {
231350
report(
232351
assertion,
233352
assertion.negate
@@ -240,6 +359,10 @@ export default createRule("no-useless-assertions", {
240359
)
241360
}
242361

362+
if (after.edge) {
363+
return
364+
}
365+
243366
// accept is harder because that can't generally be decided by the first character
244367

245368
// if this contains another assertion then that might reject. It's out of our control
@@ -273,23 +396,58 @@ export default createRule("no-useless-assertions", {
273396
}
274397
}
275398

399+
/**
400+
* Verify for Assertion
401+
*/
402+
function verifyAssertion(
403+
assertion: Assertion,
404+
getFirstCharAfterFn: GetFirstCharAfter,
405+
): void {
406+
switch (assertion.kind) {
407+
case "start":
408+
case "end":
409+
verifyStartOrEnd(assertion, getFirstCharAfterFn)
410+
break
411+
412+
case "word":
413+
verifyWordBoundary(assertion, getFirstCharAfterFn)
414+
break
415+
416+
case "lookahead":
417+
case "lookbehind":
418+
verifyLookaround(assertion, getFirstCharAfterFn)
419+
break
420+
default:
421+
}
422+
}
423+
424+
const allAssertions: Assertion[] = []
425+
276426
return {
277427
onAssertionEnter(assertion) {
278-
switch (assertion.kind) {
279-
case "start":
280-
case "end":
281-
verifyStartOrEnd(assertion)
282-
break
283-
284-
case "word":
285-
verifyWordBoundary(assertion)
286-
break
287-
288-
case "lookahead":
289-
case "lookbehind":
290-
verifyLookaround(assertion)
291-
break
292-
default:
428+
// Phase 1:
429+
// The context of assertions is determined by only looking
430+
// at elements after the current assertion. This means that
431+
// the order of assertions is kept as is.
432+
verifyAssertion(assertion, getFirstCharAfter)
433+
434+
// store all assertions for the second phase
435+
allAssertions.push(assertion)
436+
},
437+
onPatternLeave() {
438+
// Phase 2:
439+
// The context of assertions is determined by reordering
440+
// assertions such that as much information as possible can
441+
// be extracted from its surrounding assertions.
442+
const reorderingGetFirstCharAfter =
443+
createReorderingGetFirstCharAfter(reported)
444+
for (const assertion of allAssertions) {
445+
if (!reported.has(assertion)) {
446+
verifyAssertion(
447+
assertion,
448+
reorderingGetFirstCharAfter,
449+
)
450+
}
293451
}
294452
},
295453
}

0 commit comments

Comments
 (0)