@@ -4,9 +4,11 @@ import type {
4
4
Character ,
5
5
CharacterClass ,
6
6
CharacterSet ,
7
+ ClassStringDisjunction ,
7
8
Element ,
8
- Node ,
9
+ ExpressionCharacterClass ,
9
10
Pattern ,
11
+ StringAlternative ,
10
12
} from "@eslint-community/regexpp/ast"
11
13
import type { RegExpContext } from "../utils"
12
14
import {
@@ -30,12 +32,14 @@ import {
30
32
hasSomeDescendant ,
31
33
canReorder ,
32
34
getLongestPrefix ,
33
- toCharSet ,
34
35
getConsumedChars ,
36
+ toUnicodeSet ,
37
+ hasStrings ,
35
38
} from "regexp-ast-analysis"
36
39
import type { CharSet , Word , ReadonlyWord } from "refa"
37
40
import { NFA , JS , transform } from "refa"
38
41
import { getParser } from "../utils/regexp-ast"
42
+ import { getLexicographicallySmallestInConcatenation } from "../utils/lexicographically-smallest"
39
43
40
44
interface AllowedChars {
41
45
allowed : CharSet
@@ -45,7 +49,10 @@ const cache = new Map<string, Readonly<AllowedChars>>()
45
49
46
50
function getAllowedChars ( flags : ReadonlyFlags ) {
47
51
assertValidFlags ( flags )
48
- const cacheKey = ( flags . ignoreCase ? "i" : "" ) + ( flags . unicode ? "u" : "" )
52
+ const cacheKey =
53
+ ( flags . ignoreCase ? "i" : "" ) +
54
+ ( flags . unicode ? "u" : "" ) +
55
+ ( flags . unicodeSets ? "v" : "" )
49
56
let result = cache . get ( cacheKey )
50
57
if ( result === undefined ) {
51
58
result = {
@@ -86,7 +93,8 @@ function containsOnlyLiterals(
86
93
d . type === "Backreference" ||
87
94
d . type === "CharacterSet" ||
88
95
( d . type === "Quantifier" && d . max === Infinity ) ||
89
- ( d . type === "CharacterClass" && d . negate )
96
+ ( d . type === "CharacterClass" && d . negate ) ||
97
+ ( d . type === "ExpressionCharacterClass" && d . negate )
90
98
)
91
99
} ,
92
100
( d ) => d . type !== "Assertion" ,
@@ -156,29 +164,45 @@ function approximateLexicographicallySmallest(
156
164
return getLexicographicallySmallestFromCharSets ( prefix )
157
165
}
158
166
167
+ function getLexicographicallySmallestFromAlternative (
168
+ alternative : Alternative ,
169
+ parser : JS . Parser ,
170
+ flags : ReadonlyFlags ,
171
+ ) : Word | undefined
172
+ function getLexicographicallySmallestFromAlternative (
173
+ alternative : StringAlternative ,
174
+ parser : JS . Parser ,
175
+ flags : ReadonlyFlags ,
176
+ ) : Word
159
177
/**
160
178
* If defined, this will return the lexicographically smallest string accepted
161
179
* by the given alternative (ignoring assertions).
162
180
*/
163
181
function getLexicographicallySmallestFromAlternative (
164
- alternative : Alternative ,
182
+ alternative : Alternative | StringAlternative ,
165
183
parser : JS . Parser ,
166
184
flags : ReadonlyFlags ,
167
185
) : Word | undefined {
168
- const { elements } = alternative
169
- if ( isOnlyCharacters ( elements ) ) {
186
+ if (
187
+ alternative . type === "StringAlternative" ||
188
+ hasOnlyCharacters ( alternative , flags )
189
+ ) {
170
190
// fast path to avoid converting simple alternatives into NFAs
171
191
const smallest : Word = [ ]
172
- for ( const e of elements ) {
173
- // FIXME: TS Error
174
- // @ts -expect-error -- FIXME
175
- const cs = toCharSet ( e , flags )
192
+ for ( const e of alternative . elements ) {
193
+ const cs = toUnicodeSet ( e , flags ) . chars
176
194
if ( cs . isEmpty ) return undefined
177
195
smallest . push ( cs . ranges [ 0 ] . min )
178
196
}
179
197
return smallest
180
198
}
181
199
200
+ if ( isOnlyCharacterElements ( alternative . elements ) ) {
201
+ return getLexicographicallySmallestInConcatenation (
202
+ alternative . elements . map ( ( e ) => toUnicodeSet ( e , flags ) ) ,
203
+ )
204
+ }
205
+
182
206
try {
183
207
const result = parser . parseElement ( alternative , {
184
208
assertions : "unknown" ,
@@ -212,15 +236,45 @@ function getLexicographicallySmallestFromAlternative(
212
236
}
213
237
}
214
238
215
- /** Returns whether the given array of nodes contains only characters. */
216
- function isOnlyCharacters (
217
- nodes : readonly Node [ ] ,
218
- ) : nodes is readonly ( Character | CharacterClass | CharacterSet ) [ ] {
239
+ /**
240
+ * Returns whether the given array of nodes contains only characters.
241
+ * But note that if the pattern has the v flag, the character class may contain strings.
242
+ */
243
+ function isOnlyCharacterElements (
244
+ nodes : Element [ ] ,
245
+ ) : nodes is (
246
+ | Character
247
+ | CharacterClass
248
+ | CharacterSet
249
+ | ExpressionCharacterClass
250
+ ) [ ] {
219
251
return nodes . every (
220
252
( e ) =>
221
253
e . type === "Character" ||
222
254
e . type === "CharacterClass" ||
223
- e . type === "CharacterSet" ,
255
+ e . type === "CharacterSet" ||
256
+ e . type === "ExpressionCharacterClass" ,
257
+ )
258
+ }
259
+
260
+ /**
261
+ * Returns whether the given alternative has contains only characters.
262
+ * The v flag in the pattern does not contains the string.
263
+ */
264
+ function hasOnlyCharacters (
265
+ alternative : Alternative ,
266
+ flags : ReadonlyFlags ,
267
+ ) : alternative is Alternative & {
268
+ elements : readonly (
269
+ | Character
270
+ | CharacterClass
271
+ | CharacterSet
272
+ | ExpressionCharacterClass
273
+ ) [ ]
274
+ } {
275
+ return (
276
+ isOnlyCharacterElements ( alternative . elements ) &&
277
+ alternative . elements . every ( ( e ) => ! hasStrings ( e , flags ) )
224
278
)
225
279
}
226
280
@@ -433,6 +487,28 @@ function sortAlternatives(
433
487
} )
434
488
}
435
489
490
+ /**
491
+ * Sorts the given string alternatives.
492
+ *
493
+ * Sorting is done by comparing the lexicographically smallest strings (LSS).
494
+ *
495
+ * For more information on why we use LSS-based comparison and how it works,
496
+ * see https://github.com/ota-meshi/eslint-plugin-regexp/pull/423.
497
+ */
498
+ function sortStringAlternatives (
499
+ alternatives : StringAlternative [ ] ,
500
+ parser : JS . Parser ,
501
+ flags : ReadonlyFlags ,
502
+ ) : void {
503
+ alternatives . sort ( ( a , b ) => {
504
+ const lssDiff = compareWords (
505
+ getLexicographicallySmallestFromAlternative ( a , parser , flags ) ,
506
+ getLexicographicallySmallestFromAlternative ( b , parser , flags ) ,
507
+ )
508
+ return lssDiff
509
+ } )
510
+ }
511
+
436
512
/**
437
513
* Returns whether the given string is a valid integer.
438
514
* @param str
@@ -446,7 +522,9 @@ function isIntegerString(str: string): boolean {
446
522
* This tries to sort the given alternatives by assuming that all alternatives
447
523
* are a number.
448
524
*/
449
- function trySortNumberAlternatives ( alternatives : Alternative [ ] ) : void {
525
+ function trySortNumberAlternatives (
526
+ alternatives : ( Alternative | StringAlternative ) [ ] ,
527
+ ) : void {
450
528
const runs = getRuns ( alternatives , ( a ) => isIntegerString ( a . raw ) )
451
529
for ( const { startIndex, elements } of runs ) {
452
530
elements . sort ( ( a , b ) => {
@@ -528,7 +606,7 @@ export default createRule("sort-alternatives", {
528
606
fixable : "code" ,
529
607
schema : [ ] ,
530
608
messages : {
531
- sort : "The alternatives of this group can be sorted without affecting the regex." ,
609
+ sort : "The {{ alternatives}} can be sorted without affecting the regex." ,
532
610
} ,
533
611
type : "suggestion" , // "problem",
534
612
} ,
@@ -551,7 +629,6 @@ export default createRule("sort-alternatives", {
551
629
let chars = possibleCharsCache . get ( a )
552
630
if ( chars === undefined ) {
553
631
chars = getConsumedChars ( a , flags ) . chars
554
- possibleCharsCache . set ( a , chars )
555
632
}
556
633
return chars
557
634
}
@@ -590,14 +667,19 @@ export default createRule("sort-alternatives", {
590
667
}
591
668
}
592
669
593
- enforceSorted ( run )
670
+ enforceSorted ( run , "alternatives of this group" )
594
671
}
595
672
596
673
/**
597
674
* Creates a report if the sorted alternatives are different from
598
675
* the unsorted ones.
599
676
*/
600
- function enforceSorted ( run : Run < Alternative > ) : void {
677
+ function enforceSorted (
678
+ run : Run < Alternative | StringAlternative > ,
679
+ alternatives :
680
+ | "alternatives of this group"
681
+ | "string alternatives" ,
682
+ ) : void {
601
683
const sorted = run . elements
602
684
const parent = sorted [ 0 ] . parent
603
685
const unsorted = parent . alternatives . slice (
@@ -619,6 +701,7 @@ export default createRule("sort-alternatives", {
619
701
node,
620
702
loc,
621
703
messageId : "sort" ,
704
+ data : { alternatives } ,
622
705
fix : fixReplaceNode ( parent , ( ) => {
623
706
const prefix = parent . raw . slice (
624
707
0 ,
@@ -682,10 +765,29 @@ export default createRule("sort-alternatives", {
682
765
}
683
766
}
684
767
768
+ /** The handler for ClassStringDisjunction */
769
+ function onClassStringDisjunction (
770
+ parent : ClassStringDisjunction ,
771
+ ) : void {
772
+ if ( parent . alternatives . length < 2 ) {
773
+ return
774
+ }
775
+
776
+ const alternatives = [ ...parent . alternatives ]
777
+ sortStringAlternatives ( alternatives , parser , flags )
778
+ trySortNumberAlternatives ( alternatives )
779
+ const run : Run < StringAlternative > = {
780
+ startIndex : 0 ,
781
+ elements : [ ...alternatives ] ,
782
+ }
783
+ enforceSorted ( run , "string alternatives" )
784
+ }
785
+
685
786
return {
686
787
onGroupEnter : onParent ,
687
788
onPatternEnter : onParent ,
688
789
onCapturingGroupEnter : onParent ,
790
+ onClassStringDisjunctionEnter : onClassStringDisjunction ,
689
791
}
690
792
}
691
793
0 commit comments