Skip to content

Commit e8bd77c

Browse files
Improve sort-alternatives (#297)
1 parent 941500c commit e8bd77c

File tree

2 files changed

+128
-106
lines changed

2 files changed

+128
-106
lines changed

lib/rules/sort-alternatives.ts

Lines changed: 117 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ import type { Alternative, Element, Pattern } from "regexpp/ast"
33
import type { RegExpContext } from "../utils"
44
import {
55
CP_MINUS,
6+
CP_PLUS,
7+
CP_STAR,
8+
CP_QUESTION,
9+
CP_SLASH,
610
CP_SPACE,
711
CP_APOSTROPHE,
812
createRule,
@@ -16,7 +20,6 @@ import {
1620
} from "regexp-ast-analysis"
1721
import type { CharSet } from "refa"
1822
import { JS } from "refa"
19-
import type { SourceLocation, Position } from "estree"
2023
import { canReorder } from "../utils/reorder-alternatives"
2124
import { getPossiblyConsumedChar } from "../utils/regexp-ast"
2225

@@ -36,8 +39,15 @@ function getAllowedChars(flags: ReadonlyFlags) {
3639
[
3740
{ kind: "word", negate: false },
3841
{ min: CP_SPACE, max: CP_SPACE },
42+
43+
// common punctuation and operators
44+
{ min: CP_PLUS, max: CP_PLUS },
3945
{ min: CP_MINUS, max: CP_MINUS },
46+
{ min: CP_STAR, max: CP_STAR },
47+
{ min: CP_SLASH, max: CP_SLASH },
48+
4049
{ min: CP_APOSTROPHE, max: CP_APOSTROPHE },
50+
{ min: CP_QUESTION, max: CP_QUESTION },
4151
],
4252
flags,
4353
),
@@ -133,50 +143,12 @@ function isIntegerString(str: string): boolean {
133143
* are a number.
134144
*/
135145
function trySortNumberAlternatives(alternatives: Alternative[]): void {
136-
const numberRanges: [number, number][] = []
137-
{
138-
let start = 0
139-
for (let i = 0; i < alternatives.length; i++) {
140-
if (!isIntegerString(alternatives[i].raw)) {
141-
if (start < i) {
142-
numberRanges.push([start, i])
143-
}
144-
start = i + 1
145-
}
146-
}
147-
if (start < alternatives.length) {
148-
numberRanges.push([start, alternatives.length])
149-
}
150-
}
151-
152-
for (const [start, end] of numberRanges) {
153-
const slice = alternatives.slice(start, end)
154-
155-
slice.sort((a, b) => {
146+
const runs = getRuns(alternatives, (a) => isIntegerString(a.raw))
147+
for (const { startIndex, elements } of runs) {
148+
elements.sort((a, b) => {
156149
return Number(a.raw) - Number(b.raw)
157150
})
158-
159-
alternatives.splice(start, end - start, ...slice)
160-
}
161-
}
162-
163-
/**
164-
* Returns the combined source location of the two given locations.
165-
*/
166-
function unionLocations(a: SourceLocation, b: SourceLocation): SourceLocation {
167-
/** x < y */
168-
function less(x: Position, y: Position): boolean {
169-
if (x.line < y.line) {
170-
return true
171-
} else if (x.line > y.line) {
172-
return false
173-
}
174-
return x.column < y.column
175-
}
176-
177-
return {
178-
start: { ...(less(a.start, b.start) ? a.start : b.start) },
179-
end: { ...(less(a.end, b.end) ? b.end : a.end) },
151+
alternatives.splice(startIndex, elements.length, ...elements)
180152
}
181153
}
182154

@@ -208,7 +180,7 @@ function getReorderingBounds<T>(
208180
}
209181

210182
interface Run<T> {
211-
index: number
183+
startIndex: number
212184
elements: T[]
213185
}
214186

@@ -226,14 +198,19 @@ function getRuns<T>(iter: Iterable<T>, condFn: (item: T) => boolean): Run<T>[] {
226198
elements.push(item)
227199
} else {
228200
if (elements.length > 0) {
229-
runs.push({ index, elements })
201+
runs.push({ startIndex: index - elements.length, elements })
230202
elements = []
231203
}
232204
}
233205

234206
index++
235207
}
236208

209+
if (elements.length > 0) {
210+
runs.push({ startIndex: index - elements.length, elements })
211+
elements = []
212+
}
213+
237214
return runs
238215
}
239216

@@ -253,6 +230,8 @@ export default createRule("sort-alternatives", {
253230
type: "suggestion", // "problem",
254231
},
255232
create(context) {
233+
const sliceMinLength = 3
234+
256235
/**
257236
* Create visitor
258237
*/
@@ -268,23 +247,76 @@ export default createRule("sort-alternatives", {
268247

269248
const allowedChars = getAllowedChars(flags)
270249

250+
const possibleCharsCache = new Map<Alternative, CharSet>()
251+
252+
/** A cached version of getPossiblyConsumedChar */
253+
function getPossibleChars(a: Alternative): CharSet {
254+
let chars = possibleCharsCache.get(a)
255+
if (chars === undefined) {
256+
chars = getPossiblyConsumedChar(a, regexpContext).char
257+
possibleCharsCache.set(a, chars)
258+
}
259+
return chars
260+
}
261+
262+
/** Tries to sort the given alternatives. */
263+
function trySortRun(run: Run<Alternative>): void {
264+
const alternatives = run.elements
265+
266+
if (canReorder(alternatives, regexpContext)) {
267+
// alternatives can be reordered freely
268+
sortAlternatives(alternatives, regexpContext)
269+
trySortNumberAlternatives(alternatives)
270+
} else {
271+
const consumedChars = Chars.empty(flags).union(
272+
...alternatives.map(getPossibleChars),
273+
)
274+
if (!consumedChars.isDisjointWith(Chars.digit(flags))) {
275+
// let's try to at least sort numbers
276+
const runs = getRuns(alternatives, (a) =>
277+
isIntegerString(a.raw),
278+
)
279+
280+
for (const { startIndex: index, elements } of runs) {
281+
if (
282+
elements.length > 1 &&
283+
canReorder(elements, regexpContext)
284+
) {
285+
trySortNumberAlternatives(elements)
286+
alternatives.splice(
287+
index,
288+
elements.length,
289+
...elements,
290+
)
291+
}
292+
}
293+
}
294+
}
295+
296+
enforceSorted(run)
297+
}
298+
271299
/**
272300
* Creates a report if the sorted alternatives are different from
273301
* the unsorted ones.
274302
*/
275-
function enforceSorted(sorted: Alternative[]): void {
303+
function enforceSorted(run: Run<Alternative>): void {
304+
const sorted = run.elements
276305
const parent = sorted[0].parent
277-
const unsorted = parent.alternatives
306+
const unsorted = parent.alternatives.slice(
307+
run.startIndex,
308+
run.startIndex + sorted.length,
309+
)
278310

279311
const bounds = getReorderingBounds(unsorted, sorted)
280312
if (!bounds) {
281313
return
282314
}
283315

284-
const loc = unionLocations(
285-
getRegexpLocation(unsorted[bounds[0]]),
286-
getRegexpLocation(unsorted[bounds[1]]),
287-
)
316+
const loc = getRegexpLocation({
317+
start: unsorted[bounds[0]].start,
318+
end: unsorted[bounds[1]].end,
319+
})
288320

289321
context.report({
290322
node,
@@ -293,11 +325,10 @@ export default createRule("sort-alternatives", {
293325
fix: fixReplaceNode(parent, () => {
294326
const prefix = parent.raw.slice(
295327
0,
296-
parent.alternatives[0].start - parent.start,
328+
unsorted[0].start - parent.start,
297329
)
298330
const suffix = parent.raw.slice(
299-
parent.alternatives[parent.alternatives.length - 1]
300-
.end - parent.start,
331+
unsorted[unsorted.length - 1].end - parent.start,
301332
)
302333

303334
return (
@@ -313,65 +344,46 @@ export default createRule("sort-alternatives", {
313344
return
314345
}
315346

316-
if (!containsOnlyLiterals(parent)) {
317-
return
318-
}
319-
320-
if (
321-
hasSomeDescendant(
322-
parent,
323-
(d) => d !== parent && d.type === "CapturingGroup",
324-
)
325-
) {
326-
// it's not safe to reorder alternatives with capturing groups
327-
return
328-
}
347+
const runs = getRuns(parent.alternatives, (a) => {
348+
if (!containsOnlyLiterals(a)) {
349+
return false
350+
}
329351

330-
const consumedChars = getPossiblyConsumedChar(
331-
parent,
332-
regexpContext,
333-
).char
334-
if (consumedChars.isEmpty) {
335-
// all alternatives are either empty or only contain
336-
// assertions
337-
return
338-
}
339-
if (!consumedChars.isSubsetOf(allowedChars.allowed)) {
340-
// contains some chars that are not allowed
341-
return
342-
}
343-
if (consumedChars.isDisjointWith(allowedChars.required)) {
344-
// doesn't contain required chars
345-
return
346-
}
352+
const consumedChars = getPossibleChars(a)
353+
if (consumedChars.isEmpty) {
354+
// the alternative is either empty or only contains
355+
// assertions
356+
return false
357+
}
358+
if (!consumedChars.isSubsetOf(allowedChars.allowed)) {
359+
// contains some chars that are not allowed
360+
return false
361+
}
362+
if (consumedChars.isDisjointWith(allowedChars.required)) {
363+
// doesn't contain required chars
364+
return false
365+
}
347366

348-
const alternatives = [...parent.alternatives]
367+
return true
368+
})
349369

350-
if (canReorder(alternatives, regexpContext)) {
351-
// alternatives can be reordered freely
352-
sortAlternatives(alternatives, regexpContext)
353-
trySortNumberAlternatives(alternatives)
354-
} else if (!consumedChars.isDisjointWith(Chars.digit(flags))) {
355-
// let's try to at least sort numbers
356-
const runs = getRuns(alternatives, (a) =>
357-
isIntegerString(a.raw),
358-
)
359-
for (const { index, elements } of runs) {
370+
if (
371+
runs.length === 1 &&
372+
runs[0].elements.length === parent.alternatives.length
373+
) {
374+
// All alternatives are to be sorted
375+
trySortRun(runs[0])
376+
} else {
377+
// Some slices are to be sorted
378+
for (const run of runs) {
360379
if (
361-
elements.length > 1 &&
362-
canReorder(elements, regexpContext)
380+
run.elements.length >= sliceMinLength &&
381+
run.elements.length >= 2
363382
) {
364-
trySortNumberAlternatives(elements)
365-
alternatives.splice(
366-
index,
367-
elements.length,
368-
...elements,
369-
)
383+
trySortRun(run)
370384
}
371385
}
372386
}
373-
374-
enforceSorted(alternatives)
375387
}
376388

377389
return {

tests/lib/rules/sort-alternatives.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ const tester = new RuleTester({
99
})
1010

1111
tester.run("sort-alternatives", rule as any, {
12-
valid: [String.raw`/\b(?:a|\d+|c|b)\b/`],
12+
valid: [String.raw`/\b(?:a|\d+|c|b)\b/`, String.raw`/\b(?:\^|c|b)\b/`],
1313
invalid: [
1414
{
1515
code: String.raw`/c|b|a/`,
@@ -103,5 +103,15 @@ tester.run("sort-alternatives", rule as any, {
103103
"The alternatives of this group can be sorted without affecting the regex.",
104104
],
105105
},
106+
107+
// sorting slices
108+
109+
{
110+
code: String.raw`/\b(?:\^|c|b|a)\b/`,
111+
output: String.raw`/\b(?:\^|a|b|c)\b/`,
112+
errors: [
113+
"The alternatives of this group can be sorted without affecting the regex.",
114+
],
115+
},
106116
],
107117
})

0 commit comments

Comments
 (0)