@@ -3,6 +3,10 @@ import type { Alternative, Element, Pattern } from "regexpp/ast"
33import type { RegExpContext } from "../utils"
44import {
55 CP_MINUS ,
6+ CP_PLUS ,
7+ CP_STAR ,
8+ CP_QUESTION ,
9+ CP_SLASH ,
610 CP_SPACE ,
711 CP_APOSTROPHE ,
812 createRule ,
@@ -16,7 +20,6 @@ import {
1620} from "regexp-ast-analysis"
1721import type { CharSet } from "refa"
1822import { JS } from "refa"
19- import type { SourceLocation , Position } from "estree"
2023import { canReorder } from "../utils/reorder-alternatives"
2124import { getPossiblyConsumedChar } from "../utils/regexp-ast"
2225
@@ -36,8 +39,15 @@ function getAllowedChars(flags: ReadonlyFlags) {
3639 [
3740 { kind : "word" , negate : false } ,
3841 { min : CP_SPACE , max : CP_SPACE } ,
42+
43+ // common punctuation and operators
44+ { min : CP_PLUS , max : CP_PLUS } ,
3945 { min : CP_MINUS , max : CP_MINUS } ,
46+ { min : CP_STAR , max : CP_STAR } ,
47+ { min : CP_SLASH , max : CP_SLASH } ,
48+
4049 { min : CP_APOSTROPHE , max : CP_APOSTROPHE } ,
50+ { min : CP_QUESTION , max : CP_QUESTION } ,
4151 ] ,
4252 flags ,
4353 ) ,
@@ -133,50 +143,12 @@ function isIntegerString(str: string): boolean {
133143 * are a number.
134144 */
135145function trySortNumberAlternatives ( alternatives : Alternative [ ] ) : void {
136- const numberRanges : [ number , number ] [ ] = [ ]
137- {
138- let start = 0
139- for ( let i = 0 ; i < alternatives . length ; i ++ ) {
140- if ( ! isIntegerString ( alternatives [ i ] . raw ) ) {
141- if ( start < i ) {
142- numberRanges . push ( [ start , i ] )
143- }
144- start = i + 1
145- }
146- }
147- if ( start < alternatives . length ) {
148- numberRanges . push ( [ start , alternatives . length ] )
149- }
150- }
151-
152- for ( const [ start , end ] of numberRanges ) {
153- const slice = alternatives . slice ( start , end )
154-
155- slice . sort ( ( a , b ) => {
146+ const runs = getRuns ( alternatives , ( a ) => isIntegerString ( a . raw ) )
147+ for ( const { startIndex, elements } of runs ) {
148+ elements . sort ( ( a , b ) => {
156149 return Number ( a . raw ) - Number ( b . raw )
157150 } )
158-
159- alternatives . splice ( start , end - start , ...slice )
160- }
161- }
162-
163- /**
164- * Returns the combined source location of the two given locations.
165- */
166- function unionLocations ( a : SourceLocation , b : SourceLocation ) : SourceLocation {
167- /** x < y */
168- function less ( x : Position , y : Position ) : boolean {
169- if ( x . line < y . line ) {
170- return true
171- } else if ( x . line > y . line ) {
172- return false
173- }
174- return x . column < y . column
175- }
176-
177- return {
178- start : { ...( less ( a . start , b . start ) ? a . start : b . start ) } ,
179- end : { ...( less ( a . end , b . end ) ? b . end : a . end ) } ,
151+ alternatives . splice ( startIndex , elements . length , ...elements )
180152 }
181153}
182154
@@ -208,7 +180,7 @@ function getReorderingBounds<T>(
208180}
209181
210182interface Run < T > {
211- index : number
183+ startIndex : number
212184 elements : T [ ]
213185}
214186
@@ -226,14 +198,19 @@ function getRuns<T>(iter: Iterable<T>, condFn: (item: T) => boolean): Run<T>[] {
226198 elements . push ( item )
227199 } else {
228200 if ( elements . length > 0 ) {
229- runs . push ( { index, elements } )
201+ runs . push ( { startIndex : index - elements . length , elements } )
230202 elements = [ ]
231203 }
232204 }
233205
234206 index ++
235207 }
236208
209+ if ( elements . length > 0 ) {
210+ runs . push ( { startIndex : index - elements . length , elements } )
211+ elements = [ ]
212+ }
213+
237214 return runs
238215}
239216
@@ -253,6 +230,8 @@ export default createRule("sort-alternatives", {
253230 type : "suggestion" , // "problem",
254231 } ,
255232 create ( context ) {
233+ const sliceMinLength = 3
234+
256235 /**
257236 * Create visitor
258237 */
@@ -268,23 +247,76 @@ export default createRule("sort-alternatives", {
268247
269248 const allowedChars = getAllowedChars ( flags )
270249
250+ const possibleCharsCache = new Map < Alternative , CharSet > ( )
251+
252+ /** A cached version of getPossiblyConsumedChar */
253+ function getPossibleChars ( a : Alternative ) : CharSet {
254+ let chars = possibleCharsCache . get ( a )
255+ if ( chars === undefined ) {
256+ chars = getPossiblyConsumedChar ( a , regexpContext ) . char
257+ possibleCharsCache . set ( a , chars )
258+ }
259+ return chars
260+ }
261+
262+ /** Tries to sort the given alternatives. */
263+ function trySortRun ( run : Run < Alternative > ) : void {
264+ const alternatives = run . elements
265+
266+ if ( canReorder ( alternatives , regexpContext ) ) {
267+ // alternatives can be reordered freely
268+ sortAlternatives ( alternatives , regexpContext )
269+ trySortNumberAlternatives ( alternatives )
270+ } else {
271+ const consumedChars = Chars . empty ( flags ) . union (
272+ ...alternatives . map ( getPossibleChars ) ,
273+ )
274+ if ( ! consumedChars . isDisjointWith ( Chars . digit ( flags ) ) ) {
275+ // let's try to at least sort numbers
276+ const runs = getRuns ( alternatives , ( a ) =>
277+ isIntegerString ( a . raw ) ,
278+ )
279+
280+ for ( const { startIndex : index , elements } of runs ) {
281+ if (
282+ elements . length > 1 &&
283+ canReorder ( elements , regexpContext )
284+ ) {
285+ trySortNumberAlternatives ( elements )
286+ alternatives . splice (
287+ index ,
288+ elements . length ,
289+ ...elements ,
290+ )
291+ }
292+ }
293+ }
294+ }
295+
296+ enforceSorted ( run )
297+ }
298+
271299 /**
272300 * Creates a report if the sorted alternatives are different from
273301 * the unsorted ones.
274302 */
275- function enforceSorted ( sorted : Alternative [ ] ) : void {
303+ function enforceSorted ( run : Run < Alternative > ) : void {
304+ const sorted = run . elements
276305 const parent = sorted [ 0 ] . parent
277- const unsorted = parent . alternatives
306+ const unsorted = parent . alternatives . slice (
307+ run . startIndex ,
308+ run . startIndex + sorted . length ,
309+ )
278310
279311 const bounds = getReorderingBounds ( unsorted , sorted )
280312 if ( ! bounds ) {
281313 return
282314 }
283315
284- const loc = unionLocations (
285- getRegexpLocation ( unsorted [ bounds [ 0 ] ] ) ,
286- getRegexpLocation ( unsorted [ bounds [ 1 ] ] ) ,
287- )
316+ const loc = getRegexpLocation ( {
317+ start : unsorted [ bounds [ 0 ] ] . start ,
318+ end : unsorted [ bounds [ 1 ] ] . end ,
319+ } )
288320
289321 context . report ( {
290322 node,
@@ -293,11 +325,10 @@ export default createRule("sort-alternatives", {
293325 fix : fixReplaceNode ( parent , ( ) => {
294326 const prefix = parent . raw . slice (
295327 0 ,
296- parent . alternatives [ 0 ] . start - parent . start ,
328+ unsorted [ 0 ] . start - parent . start ,
297329 )
298330 const suffix = parent . raw . slice (
299- parent . alternatives [ parent . alternatives . length - 1 ]
300- . end - parent . start ,
331+ unsorted [ unsorted . length - 1 ] . end - parent . start ,
301332 )
302333
303334 return (
@@ -313,65 +344,46 @@ export default createRule("sort-alternatives", {
313344 return
314345 }
315346
316- if ( ! containsOnlyLiterals ( parent ) ) {
317- return
318- }
319-
320- if (
321- hasSomeDescendant (
322- parent ,
323- ( d ) => d !== parent && d . type === "CapturingGroup" ,
324- )
325- ) {
326- // it's not safe to reorder alternatives with capturing groups
327- return
328- }
347+ const runs = getRuns ( parent . alternatives , ( a ) => {
348+ if ( ! containsOnlyLiterals ( a ) ) {
349+ return false
350+ }
329351
330- const consumedChars = getPossiblyConsumedChar (
331- parent ,
332- regexpContext ,
333- ) . char
334- if ( consumedChars . isEmpty ) {
335- // all alternatives are either empty or only contain
336- // assertions
337- return
338- }
339- if ( ! consumedChars . isSubsetOf ( allowedChars . allowed ) ) {
340- // contains some chars that are not allowed
341- return
342- }
343- if ( consumedChars . isDisjointWith ( allowedChars . required ) ) {
344- // doesn't contain required chars
345- return
346- }
352+ const consumedChars = getPossibleChars ( a )
353+ if ( consumedChars . isEmpty ) {
354+ // the alternative is either empty or only contains
355+ // assertions
356+ return false
357+ }
358+ if ( ! consumedChars . isSubsetOf ( allowedChars . allowed ) ) {
359+ // contains some chars that are not allowed
360+ return false
361+ }
362+ if ( consumedChars . isDisjointWith ( allowedChars . required ) ) {
363+ // doesn't contain required chars
364+ return false
365+ }
347366
348- const alternatives = [ ...parent . alternatives ]
367+ return true
368+ } )
349369
350- if ( canReorder ( alternatives , regexpContext ) ) {
351- // alternatives can be reordered freely
352- sortAlternatives ( alternatives , regexpContext )
353- trySortNumberAlternatives ( alternatives )
354- } else if ( ! consumedChars . isDisjointWith ( Chars . digit ( flags ) ) ) {
355- // let's try to at least sort numbers
356- const runs = getRuns ( alternatives , ( a ) =>
357- isIntegerString ( a . raw ) ,
358- )
359- for ( const { index, elements } of runs ) {
370+ if (
371+ runs . length === 1 &&
372+ runs [ 0 ] . elements . length === parent . alternatives . length
373+ ) {
374+ // All alternatives are to be sorted
375+ trySortRun ( runs [ 0 ] )
376+ } else {
377+ // Some slices are to be sorted
378+ for ( const run of runs ) {
360379 if (
361- elements . length > 1 &&
362- canReorder ( elements , regexpContext )
380+ run . elements . length >= sliceMinLength &&
381+ run . elements . length >= 2
363382 ) {
364- trySortNumberAlternatives ( elements )
365- alternatives . splice (
366- index ,
367- elements . length ,
368- ...elements ,
369- )
383+ trySortRun ( run )
370384 }
371385 }
372386 }
373-
374- enforceSorted ( alternatives )
375387 }
376388
377389 return {
0 commit comments