@@ -2,11 +2,17 @@ import type { RegExpVisitor } from "regexpp/visitor"
2
2
import type {
3
3
Assertion ,
4
4
EdgeAssertion ,
5
+ Element ,
5
6
LookaroundAssertion ,
6
7
WordBoundaryAssertion ,
7
8
} from "regexpp/ast"
8
9
import type { RegExpContext } from "../utils"
9
10
import { createRule , defineRegexpVisitor } from "../utils"
11
+ import type {
12
+ MatchingDirection ,
13
+ ReadonlyFlags ,
14
+ FirstLookChar ,
15
+ } from "regexp-ast-analysis"
10
16
import {
11
17
Chars ,
12
18
getFirstCharAfter ,
@@ -15,17 +21,101 @@ import {
15
21
getMatchingDirectionFromAssertionKind ,
16
22
hasSomeDescendant ,
17
23
isPotentiallyEmpty ,
24
+ isZeroLength ,
18
25
FirstConsumedChars ,
19
26
} from "regexp-ast-analysis"
20
27
import { mention } from "../utils/mention"
21
28
29
+ /**
30
+ * Combines 2 look chars such that the result is equivalent to 2 adjacent
31
+ * assertions `(?=a)(?=b)`.
32
+ */
33
+ function firstLookCharsIntersection (
34
+ a : FirstLookChar ,
35
+ b : FirstLookChar ,
36
+ ) : FirstLookChar {
37
+ const char = a . char . intersect ( b . char )
38
+ return {
39
+ char : a . char . intersect ( b . char ) ,
40
+ exact : ( a . exact && b . exact ) || char . isEmpty ,
41
+ edge : a . edge && b . edge ,
42
+ }
43
+ }
44
+
45
+ type GetFirstCharAfter = (
46
+ afterThis : Assertion ,
47
+ direction : MatchingDirection ,
48
+ flags : ReadonlyFlags ,
49
+ ) => FirstLookChar
50
+
51
+ /**
52
+ * Creates a {@link GetFirstCharAfter} function that will reorder assertions to
53
+ * get the maximum information after the characters after the given assertions.
54
+ *
55
+ * Conceptually, this will reorder adjacent assertions such that given
56
+ * assertion is moved as far as possible in the opposite direction of natural
57
+ * matching direction. E.g. when given `$` in `a(?!a)(?<=\w)$`, the characters
58
+ * after `$` will be returned as if the pattern was `a$(?!a)(?<=\w)`.
59
+ *
60
+ * @param forbidden A list of assertions that may not be reordered.
61
+ */
62
+ function createReorderingGetFirstCharAfter (
63
+ forbidden : ReadonlySet < Assertion > ,
64
+ ) : GetFirstCharAfter {
65
+ /** Whether the given element or one of its descendants is forbidden. */
66
+ function hasForbidden ( element : Element ) : boolean {
67
+ if ( element . type === "Assertion" && forbidden . has ( element ) ) {
68
+ return true
69
+ }
70
+ for ( const f of forbidden ) {
71
+ if ( hasSomeDescendant ( element , f ) ) {
72
+ return true
73
+ }
74
+ }
75
+ return false
76
+ }
77
+
78
+ return ( afterThis , direction , flags ) => {
79
+ let result = getFirstCharAfter ( afterThis , direction , flags )
80
+
81
+ if ( afterThis . parent . type === "Alternative" ) {
82
+ const { elements } = afterThis . parent
83
+
84
+ const inc = direction === "ltr" ? - 1 : + 1
85
+ const start = elements . indexOf ( afterThis )
86
+ for ( let i = start + inc ; i >= 0 && i < elements . length ; i += inc ) {
87
+ const other = elements [ i ]
88
+ if ( ! isZeroLength ( other ) ) {
89
+ break
90
+ }
91
+ if ( hasForbidden ( other ) ) {
92
+ // we hit an element that cannot be reordered
93
+ break
94
+ }
95
+
96
+ const otherResult = FirstConsumedChars . toLook (
97
+ getFirstConsumedChar ( other , direction , flags ) ,
98
+ )
99
+
100
+ result = firstLookCharsIntersection ( result , otherResult )
101
+ }
102
+ }
103
+
104
+ return result
105
+ }
106
+ }
107
+
22
108
const messages = {
23
109
alwaysRejectByChar :
24
110
"{{assertion}} will always reject because it is {{followedOrPreceded}} by a character." ,
111
+ alwaysAcceptByChar :
112
+ "{{assertion}} will always accept because it is never {{followedOrPreceded}} by a character." ,
25
113
alwaysRejectByNonLineTerminator :
26
114
"{{assertion}} will always reject because it is {{followedOrPreceded}} by a non-line-terminator character." ,
27
115
alwaysAcceptByLineTerminator :
28
116
"{{assertion}} will always accept because it is {{followedOrPreceded}} by a line-terminator character." ,
117
+ alwaysAcceptByLineTerminatorOrEdge :
118
+ "{{assertion}} will always accept because it is {{followedOrPreceded}} by a line-terminator character or the {{startOrEnd}} of the input string." ,
29
119
alwaysAcceptOrRejectFollowedByWord :
30
120
"{{assertion}} will always {{acceptOrReject}} because it is preceded by a non-word character and followed by a word character." ,
31
121
alwaysAcceptOrRejectFollowedByNonWord :
@@ -61,12 +151,16 @@ export default createRule("no-useless-assertions", {
61
151
flags,
62
152
getRegexpLocation,
63
153
} : RegExpContext ) : RegExpVisitor . Handlers {
154
+ const reported = new Set < Assertion > ( )
155
+
64
156
/** Report */
65
157
function report (
66
158
assertion : Assertion ,
67
159
messageId : keyof typeof messages ,
68
160
data : Record < string , string > ,
69
161
) {
162
+ reported . add ( assertion )
163
+
70
164
context . report ( {
71
165
node,
72
166
loc : getRegexpLocation ( assertion ) ,
@@ -81,20 +175,48 @@ export default createRule("no-useless-assertions", {
81
175
/**
82
176
* Verify for `^` or `$`
83
177
*/
84
- function verifyStartOrEnd ( assertion : EdgeAssertion ) : void {
178
+ function verifyStartOrEnd (
179
+ assertion : EdgeAssertion ,
180
+ getFirstCharAfterFn : GetFirstCharAfter ,
181
+ ) : void {
85
182
// Note: /^/ is the same as /(?<!.)/s and /^/m is the same as /(?<!.)/
86
183
// Note: /$/ is the same as /(?!.)/s and /$/m is the same as /(?!.)/
87
184
88
185
// get the "next" character
89
186
const direction = getMatchingDirectionFromAssertionKind (
90
187
assertion . kind ,
91
188
)
92
- const next = getFirstCharAfter ( assertion , direction , flags )
189
+ const next = getFirstCharAfterFn ( assertion , direction , flags )
93
190
94
191
const followedOrPreceded =
95
192
assertion . kind === "end" ? "followed" : "preceded"
96
193
97
- if ( ! next . edge ) {
194
+ const lineTerminator = Chars . lineTerminator ( flags )
195
+
196
+ if ( next . edge ) {
197
+ // the string might start/end after the assertion
198
+
199
+ if ( ! flags . multiline ) {
200
+ // ^/$ will always accept at an edge with no char before/after it
201
+ if ( next . char . isEmpty ) {
202
+ report ( assertion , "alwaysAcceptByChar" , {
203
+ followedOrPreceded,
204
+ } )
205
+ }
206
+ } else {
207
+ // ^/$ will always accept at an edge or line terminator before/after it
208
+ if ( next . char . isSubsetOf ( lineTerminator ) ) {
209
+ report (
210
+ assertion ,
211
+ "alwaysAcceptByLineTerminatorOrEdge" ,
212
+ {
213
+ followedOrPreceded,
214
+ startOrEnd : assertion . kind ,
215
+ } ,
216
+ )
217
+ }
218
+ }
219
+ } else {
98
220
// there is always some character of `node`
99
221
100
222
if ( ! flags . multiline ) {
@@ -105,8 +227,6 @@ export default createRule("no-useless-assertions", {
105
227
} else {
106
228
// only if the character is a sub set of /./, will the assertion trivially reject
107
229
108
- const lineTerminator = Chars . lineTerminator ( flags )
109
-
110
230
if ( next . char . isDisjointWith ( lineTerminator ) ) {
111
231
report (
112
232
assertion ,
@@ -127,19 +247,15 @@ export default createRule("no-useless-assertions", {
127
247
*/
128
248
function verifyWordBoundary (
129
249
assertion : WordBoundaryAssertion ,
250
+ getFirstCharAfterFn : GetFirstCharAfter ,
130
251
) : void {
131
252
const word = Chars . word ( flags )
132
253
133
- const next = getFirstCharAfter ( assertion , "ltr" , flags )
134
- const prev = getFirstCharAfter ( assertion , "rtl" , flags )
254
+ const next = getFirstCharAfterFn ( assertion , "ltr" , flags )
255
+ const prev = getFirstCharAfterFn ( assertion , "rtl" , flags )
135
256
136
- if ( prev . edge || next . edge ) {
137
- // we can only do this analysis if we know the previous and next character
138
- return
139
- }
140
-
141
- const nextIsWord = next . char . isSubsetOf ( word )
142
- const prevIsWord = prev . char . isSubsetOf ( word )
257
+ const nextIsWord = next . char . isSubsetOf ( word ) && ! next . edge
258
+ const prevIsWord = prev . char . isSubsetOf ( word ) && ! prev . edge
143
259
const nextIsNonWord = next . char . isDisjointWith ( word )
144
260
const prevIsNonWord = prev . char . isDisjointWith ( word )
145
261
@@ -198,7 +314,10 @@ export default createRule("no-useless-assertions", {
198
314
/**
199
315
* Verify for LookaroundAssertion
200
316
*/
201
- function verifyLookaround ( assertion : LookaroundAssertion ) : void {
317
+ function verifyLookaround (
318
+ assertion : LookaroundAssertion ,
319
+ getFirstCharAfterFn : GetFirstCharAfter ,
320
+ ) : void {
202
321
if ( isPotentiallyEmpty ( assertion . alternatives ) ) {
203
322
// we don't handle trivial accept/reject based on emptiness
204
323
return
@@ -207,10 +326,7 @@ export default createRule("no-useless-assertions", {
207
326
const direction = getMatchingDirectionFromAssertionKind (
208
327
assertion . kind ,
209
328
)
210
- const after = getFirstCharAfter ( assertion , direction , flags )
211
- if ( after . edge ) {
212
- return
213
- }
329
+ const after = getFirstCharAfterFn ( assertion , direction , flags )
214
330
215
331
const firstOf = FirstConsumedChars . toLook (
216
332
getFirstConsumedChar (
@@ -227,7 +343,10 @@ export default createRule("no-useless-assertions", {
227
343
// Careful now! If exact is false, we are only guaranteed to have a superset of the actual character.
228
344
// False negatives are fine but we can't have false positives.
229
345
230
- if ( after . char . isDisjointWith ( firstOf . char ) ) {
346
+ if (
347
+ after . char . isDisjointWith ( firstOf . char ) &&
348
+ ! ( after . edge && firstOf . edge )
349
+ ) {
231
350
report (
232
351
assertion ,
233
352
assertion . negate
@@ -240,6 +359,10 @@ export default createRule("no-useless-assertions", {
240
359
)
241
360
}
242
361
362
+ if ( after . edge ) {
363
+ return
364
+ }
365
+
243
366
// accept is harder because that can't generally be decided by the first character
244
367
245
368
// if this contains another assertion then that might reject. It's out of our control
@@ -273,23 +396,58 @@ export default createRule("no-useless-assertions", {
273
396
}
274
397
}
275
398
399
+ /**
400
+ * Verify for Assertion
401
+ */
402
+ function verifyAssertion (
403
+ assertion : Assertion ,
404
+ getFirstCharAfterFn : GetFirstCharAfter ,
405
+ ) : void {
406
+ switch ( assertion . kind ) {
407
+ case "start" :
408
+ case "end" :
409
+ verifyStartOrEnd ( assertion , getFirstCharAfterFn )
410
+ break
411
+
412
+ case "word" :
413
+ verifyWordBoundary ( assertion , getFirstCharAfterFn )
414
+ break
415
+
416
+ case "lookahead" :
417
+ case "lookbehind" :
418
+ verifyLookaround ( assertion , getFirstCharAfterFn )
419
+ break
420
+ default :
421
+ }
422
+ }
423
+
424
+ const allAssertions : Assertion [ ] = [ ]
425
+
276
426
return {
277
427
onAssertionEnter ( assertion ) {
278
- switch ( assertion . kind ) {
279
- case "start" :
280
- case "end" :
281
- verifyStartOrEnd ( assertion )
282
- break
283
-
284
- case "word" :
285
- verifyWordBoundary ( assertion )
286
- break
287
-
288
- case "lookahead" :
289
- case "lookbehind" :
290
- verifyLookaround ( assertion )
291
- break
292
- default :
428
+ // Phase 1:
429
+ // The context of assertions is determined by only looking
430
+ // at elements after the current assertion. This means that
431
+ // the order of assertions is kept as is.
432
+ verifyAssertion ( assertion , getFirstCharAfter )
433
+
434
+ // store all assertions for the second phase
435
+ allAssertions . push ( assertion )
436
+ } ,
437
+ onPatternLeave ( ) {
438
+ // Phase 2:
439
+ // The context of assertions is determined by reordering
440
+ // assertions such that as much information as possible can
441
+ // be extracted from its surrounding assertions.
442
+ const reorderingGetFirstCharAfter =
443
+ createReorderingGetFirstCharAfter ( reported )
444
+ for ( const assertion of allAssertions ) {
445
+ if ( ! reported . has ( assertion ) ) {
446
+ verifyAssertion (
447
+ assertion ,
448
+ reorderingGetFirstCharAfter ,
449
+ )
450
+ }
293
451
}
294
452
} ,
295
453
}
0 commit comments